1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 
49 /* Pattern recognition functions  */
50 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51 					    tree *);
52 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
53 					     tree *);
54 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
55 					   tree *);
56 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
57 				      tree *);
58 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
59 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
60                                                  tree *);
61 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
62 	                                tree *, tree *);
63 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
64 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
65 						      tree *, tree *);
66 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
67 					 tree *, tree *);
68 
69 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
70 				       tree *, tree *);
71 
72 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
73 						  tree *, tree *);
74 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
75 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
77 						  tree *);
78 
79 struct vect_recog_func
80 {
81   vect_recog_func_ptr fn;
82   const char *name;
83 };
84 
85 /* Note that ordering matters - the first pattern matching on a stmt
86    is taken which means usually the more complex one needs to preceed
87    the less comples onex (widen_sum only after dot_prod or sad for example).  */
88 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
89       { vect_recog_widen_mult_pattern, "widen_mult" },
90       { vect_recog_dot_prod_pattern, "dot_prod" },
91       { vect_recog_sad_pattern, "sad" },
92       { vect_recog_widen_sum_pattern, "widen_sum" },
93       { vect_recog_pow_pattern, "pow" },
94       { vect_recog_widen_shift_pattern, "widen_shift" },
95       { vect_recog_over_widening_pattern, "over_widening" },
96       { vect_recog_rotate_pattern, "rotate" },
97       { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
98       {	vect_recog_divmod_pattern, "divmod" },
99       {	vect_recog_mult_pattern, "mult" },
100       {	vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
101       {	vect_recog_bool_pattern, "bool" },
102       /* This must come before mask conversion, and includes the parts
103 	 of mask conversion that are needed for gather and scatter
104 	 internal functions.  */
105       { vect_recog_gather_scatter_pattern, "gather_scatter" },
106       {	vect_recog_mask_conversion_pattern, "mask_conversion" }
107 };
108 
109 static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)110 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
111 {
112   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
113 				      stmt);
114 }
115 
116 static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)117 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
118 {
119   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
120   append_pattern_def_seq (stmt_info, stmt);
121 }
122 
123 /* Check whether STMT2 is in the same loop or basic block as STMT1.
124    Which of the two applies depends on whether we're currently doing
125    loop-based or basic-block-based vectorization, as determined by
126    the vinfo_for_stmt for STMT1 (which must be defined).
127 
128    If this returns true, vinfo_for_stmt for STMT2 is guaranteed
129    to be defined as well.  */
130 
131 static bool
vect_same_loop_or_bb_p(gimple * stmt1,gimple * stmt2)132 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
133 {
134   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
135   return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
136 }
137 
138 /* If the LHS of DEF_STMT has a single use, and that statement is
139    in the same loop or basic block, return it.  */
140 
141 static gimple *
vect_single_imm_use(gimple * def_stmt)142 vect_single_imm_use (gimple *def_stmt)
143 {
144   tree lhs = gimple_assign_lhs (def_stmt);
145   use_operand_p use_p;
146   gimple *use_stmt;
147 
148   if (!single_imm_use (lhs, &use_p, &use_stmt))
149     return NULL;
150 
151   if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
152     return NULL;
153 
154   return use_stmt;
155 }
156 
157 /* Check whether NAME, an ssa-name used in USE_STMT,
158    is a result of a type promotion, such that:
159      DEF_STMT: NAME = NOP (name0)
160    If CHECK_SIGN is TRUE, check that either both types are signed or both are
161    unsigned.  */
162 
163 static bool
type_conversion_p(tree name,gimple * use_stmt,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)164 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
165 		   tree *orig_type, gimple **def_stmt, bool *promotion)
166 {
167   gimple *dummy_gimple;
168   stmt_vec_info stmt_vinfo;
169   tree type = TREE_TYPE (name);
170   tree oprnd0;
171   enum vect_def_type dt;
172 
173   stmt_vinfo = vinfo_for_stmt (use_stmt);
174   if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
175     return false;
176 
177   if (dt != vect_internal_def
178       && dt != vect_external_def && dt != vect_constant_def)
179     return false;
180 
181   if (!*def_stmt)
182     return false;
183 
184   if (dt == vect_internal_def)
185     {
186       stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
187       if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
188 	return false;
189     }
190 
191   if (!is_gimple_assign (*def_stmt))
192     return false;
193 
194   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
195     return false;
196 
197   oprnd0 = gimple_assign_rhs1 (*def_stmt);
198 
199   *orig_type = TREE_TYPE (oprnd0);
200   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
201       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
202     return false;
203 
204   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
205     *promotion = true;
206   else
207     *promotion = false;
208 
209   if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
210     return false;
211 
212   return true;
213 }
214 
215 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
216    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
217 
218 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)219 vect_recog_temp_ssa_var (tree type, gimple *stmt)
220 {
221   return make_temp_ssa_name (type, stmt, "patt");
222 }
223 
224 /* Return true if STMT_VINFO describes a reduction for which reassociation
225    is allowed.  If STMT_INFO is part of a group, assume that it's part of
226    a reduction chain and optimistically assume that all statements
227    except the last allow reassociation.  */
228 
229 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_vinfo)230 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
231 {
232   return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
233 	  ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
234 	  : GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
235 }
236 
237 /* Function vect_recog_dot_prod_pattern
238 
239    Try to find the following pattern:
240 
241      type x_t, y_t;
242      TYPE1 prod;
243      TYPE2 sum = init;
244    loop:
245      sum_0 = phi <init, sum_1>
246      S1  x_t = ...
247      S2  y_t = ...
248      S3  x_T = (TYPE1) x_t;
249      S4  y_T = (TYPE1) y_t;
250      S5  prod = x_T * y_T;
251      [S6  prod = (TYPE2) prod;  #optional]
252      S7  sum_1 = prod + sum_0;
253 
254    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
255    same size of 'TYPE1' or bigger. This is a special case of a reduction
256    computation.
257 
258    Input:
259 
260    * STMTS: Contains a stmt from which the pattern search begins.  In the
261    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
262    will be detected.
263 
264    Output:
265 
266    * TYPE_IN: The type of the input arguments to the pattern.
267 
268    * TYPE_OUT: The type of the output  of this pattern.
269 
270    * Return value: A new stmt that will be used to replace the sequence of
271    stmts that constitute the pattern. In this case it will be:
272         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273 
274    Note: The dot-prod idiom is a widening reduction pattern that is
275          vectorized without preserving all the intermediate results. It
276          produces only N/2 (widened) results (by summing up pairs of
277          intermediate results) rather than all N results.  Therefore, we
278          cannot allow this pattern when we want to get all the results and in
279          the correct order (as is the case when this computation is in an
280          inner-loop nested in an outer-loop that us being vectorized).  */
281 
282 static gimple *
vect_recog_dot_prod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)283 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
284 			     tree *type_out)
285 {
286   gimple *stmt, *last_stmt = (*stmts)[0];
287   tree oprnd0, oprnd1;
288   tree oprnd00, oprnd01;
289   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
290   tree type, half_type;
291   gimple *pattern_stmt;
292   tree prod_type;
293   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
294   struct loop *loop;
295   tree var;
296   bool promotion;
297 
298   if (!loop_info)
299     return NULL;
300 
301   loop = LOOP_VINFO_LOOP (loop_info);
302 
303   /* We don't allow changing the order of the computation in the inner-loop
304      when doing outer-loop vectorization.  */
305   if (loop && nested_in_vect_loop_p (loop, last_stmt))
306     return NULL;
307 
308   if (!is_gimple_assign (last_stmt))
309     return NULL;
310 
311   type = gimple_expr_type (last_stmt);
312 
313   /* Look for the following pattern
314           DX = (TYPE1) X;
315           DY = (TYPE1) Y;
316           DPROD = DX * DY;
317           DDPROD = (TYPE2) DPROD;
318           sum_1 = DDPROD + sum_0;
319      In which
320      - DX is double the size of X
321      - DY is double the size of Y
322      - DX, DY, DPROD all have the same type
323      - sum is the same size of DPROD or bigger
324      - sum has been recognized as a reduction variable.
325 
326      This is equivalent to:
327        DPROD = X w* Y;          #widen mult
328        sum_1 = DPROD w+ sum_0;  #widen summation
329      or
330        DPROD = X w* Y;          #widen mult
331        sum_1 = DPROD + sum_0;   #summation
332    */
333 
334   /* Starting from LAST_STMT, follow the defs of its uses in search
335      of the above pattern.  */
336 
337   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
338     return NULL;
339 
340   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
341     {
342       /* Has been detected as widening-summation?  */
343 
344       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
345       type = gimple_expr_type (stmt);
346       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
347         return NULL;
348       oprnd0 = gimple_assign_rhs1 (stmt);
349       oprnd1 = gimple_assign_rhs2 (stmt);
350       half_type = TREE_TYPE (oprnd0);
351     }
352   else
353     {
354       gimple *def_stmt;
355 
356       if (!vect_reassociating_reduction_p (stmt_vinfo))
357 	return NULL;
358       oprnd0 = gimple_assign_rhs1 (last_stmt);
359       oprnd1 = gimple_assign_rhs2 (last_stmt);
360       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
361 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
362         return NULL;
363       stmt = last_stmt;
364 
365       if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
366 			     &promotion)
367 	  && promotion)
368         {
369           stmt = def_stmt;
370           oprnd0 = gimple_assign_rhs1 (stmt);
371         }
372       else
373         half_type = type;
374     }
375 
376   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
377      we know that oprnd1 is the reduction variable (defined by a loop-header
378      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
379      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
380   if (TREE_CODE (oprnd0) != SSA_NAME)
381     return NULL;
382 
383   prod_type = half_type;
384   stmt = SSA_NAME_DEF_STMT (oprnd0);
385 
386   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
387   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
388     return NULL;
389 
390   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
391      inside the loop (in case we are analyzing an outer-loop).  */
392   if (!is_gimple_assign (stmt))
393     return NULL;
394   stmt_vinfo = vinfo_for_stmt (stmt);
395   gcc_assert (stmt_vinfo);
396   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
397     return NULL;
398   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
399     return NULL;
400   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
401     {
402       /* Has been detected as a widening multiplication?  */
403 
404       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
405       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
406         return NULL;
407       stmt_vinfo = vinfo_for_stmt (stmt);
408       gcc_assert (stmt_vinfo);
409       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
410       oprnd00 = gimple_assign_rhs1 (stmt);
411       oprnd01 = gimple_assign_rhs2 (stmt);
412       STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
413 	  = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
414     }
415   else
416     {
417       tree half_type0, half_type1;
418       gimple *def_stmt;
419       tree oprnd0, oprnd1;
420 
421       oprnd0 = gimple_assign_rhs1 (stmt);
422       oprnd1 = gimple_assign_rhs2 (stmt);
423       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
424           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
425         return NULL;
426       if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
427 			      &promotion)
428 	  || !promotion)
429         return NULL;
430       oprnd00 = gimple_assign_rhs1 (def_stmt);
431       if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
432 			      &promotion)
433 	  || !promotion)
434         return NULL;
435       oprnd01 = gimple_assign_rhs1 (def_stmt);
436       if (!types_compatible_p (half_type0, half_type1))
437         return NULL;
438       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
439 	return NULL;
440     }
441 
442   half_type = TREE_TYPE (oprnd00);
443   *type_in = half_type;
444   *type_out = type;
445 
446   /* Pattern detected. Create a stmt to be used to replace the pattern: */
447   var = vect_recog_temp_ssa_var (type, NULL);
448   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
449 				      oprnd00, oprnd01, oprnd1);
450 
451   if (dump_enabled_p ())
452     {
453       dump_printf_loc (MSG_NOTE, vect_location,
454                        "vect_recog_dot_prod_pattern: detected: ");
455       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
456     }
457 
458   return pattern_stmt;
459 }
460 
461 
462 /* Function vect_recog_sad_pattern
463 
464    Try to find the following Sum of Absolute Difference (SAD) pattern:
465 
466      type x_t, y_t;
467      signed TYPE1 diff, abs_diff;
468      TYPE2 sum = init;
469    loop:
470      sum_0 = phi <init, sum_1>
471      S1  x_t = ...
472      S2  y_t = ...
473      S3  x_T = (TYPE1) x_t;
474      S4  y_T = (TYPE1) y_t;
475      S5  diff = x_T - y_T;
476      S6  abs_diff = ABS_EXPR <diff>;
477      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
478      S8  sum_1 = abs_diff + sum_0;
479 
480    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
481    same size of 'TYPE1' or bigger. This is a special case of a reduction
482    computation.
483 
484    Input:
485 
486    * STMTS: Contains a stmt from which the pattern search begins.  In the
487    example, when this function is called with S8, the pattern
488    {S3,S4,S5,S6,S7,S8} will be detected.
489 
490    Output:
491 
492    * TYPE_IN: The type of the input arguments to the pattern.
493 
494    * TYPE_OUT: The type of the output of this pattern.
495 
496    * Return value: A new stmt that will be used to replace the sequence of
497    stmts that constitute the pattern. In this case it will be:
498         SAD_EXPR <x_t, y_t, sum_0>
499   */
500 
501 static gimple *
vect_recog_sad_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)502 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
503 			     tree *type_out)
504 {
505   gimple *last_stmt = (*stmts)[0];
506   tree sad_oprnd0, sad_oprnd1;
507   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
508   tree half_type;
509   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
510   struct loop *loop;
511   bool promotion;
512 
513   if (!loop_info)
514     return NULL;
515 
516   loop = LOOP_VINFO_LOOP (loop_info);
517 
518   /* We don't allow changing the order of the computation in the inner-loop
519      when doing outer-loop vectorization.  */
520   if (loop && nested_in_vect_loop_p (loop, last_stmt))
521     return NULL;
522 
523   if (!is_gimple_assign (last_stmt))
524     return NULL;
525 
526   tree sum_type = gimple_expr_type (last_stmt);
527 
528   /* Look for the following pattern
529           DX = (TYPE1) X;
530           DY = (TYPE1) Y;
531           DDIFF = DX - DY;
532           DAD = ABS_EXPR <DDIFF>;
533           DDPROD = (TYPE2) DPROD;
534           sum_1 = DAD + sum_0;
535      In which
536      - DX is at least double the size of X
537      - DY is at least double the size of Y
538      - DX, DY, DDIFF, DAD all have the same type
539      - sum is the same size of DAD or bigger
540      - sum has been recognized as a reduction variable.
541 
542      This is equivalent to:
543        DDIFF = X w- Y;          #widen sub
544        DAD = ABS_EXPR <DDIFF>;
545        sum_1 = DAD w+ sum_0;    #widen summation
546      or
547        DDIFF = X w- Y;          #widen sub
548        DAD = ABS_EXPR <DDIFF>;
549        sum_1 = DAD + sum_0;     #summation
550    */
551 
552   /* Starting from LAST_STMT, follow the defs of its uses in search
553      of the above pattern.  */
554 
555   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
556     return NULL;
557 
558   tree plus_oprnd0, plus_oprnd1;
559 
560   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
561     {
562       /* Has been detected as widening-summation?  */
563 
564       gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
565       sum_type = gimple_expr_type (stmt);
566       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
567         return NULL;
568       plus_oprnd0 = gimple_assign_rhs1 (stmt);
569       plus_oprnd1 = gimple_assign_rhs2 (stmt);
570       half_type = TREE_TYPE (plus_oprnd0);
571     }
572   else
573     {
574       gimple *def_stmt;
575 
576       if (!vect_reassociating_reduction_p (stmt_vinfo))
577 	return NULL;
578       plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
579       plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
580       if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
581 	  || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
582         return NULL;
583 
584       /* The type conversion could be promotion, demotion,
585          or just signed -> unsigned.  */
586       if (type_conversion_p (plus_oprnd0, last_stmt, false,
587                              &half_type, &def_stmt, &promotion))
588         plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
589       else
590         half_type = sum_type;
591     }
592 
593   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
594      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
595      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
596      Then check that plus_oprnd0 is defined by an abs_expr.  */
597 
598   if (TREE_CODE (plus_oprnd0) != SSA_NAME)
599     return NULL;
600 
601   tree abs_type = half_type;
602   gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
603 
604   /* It could not be the sad pattern if the abs_stmt is outside the loop.  */
605   if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
606     return NULL;
607 
608   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
609      inside the loop (in case we are analyzing an outer-loop).  */
610   if (!is_gimple_assign (abs_stmt))
611     return NULL;
612 
613   stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
614   gcc_assert (abs_stmt_vinfo);
615   if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
616     return NULL;
617   if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
618     return NULL;
619 
620   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
621   if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
622     return NULL;
623   if (TYPE_UNSIGNED (abs_type))
624     return NULL;
625 
626   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
627 
628   if (TREE_CODE (abs_oprnd) != SSA_NAME)
629     return NULL;
630 
631   gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
632 
633   /* It could not be the sad pattern if the diff_stmt is outside the loop.  */
634   if (!gimple_bb (diff_stmt)
635       || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
636     return NULL;
637 
638   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
639      inside the loop (in case we are analyzing an outer-loop).  */
640   if (!is_gimple_assign (diff_stmt))
641     return NULL;
642 
643   stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
644   gcc_assert (diff_stmt_vinfo);
645   if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
646     return NULL;
647   if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
648     return NULL;
649 
650   tree half_type0, half_type1;
651   gimple *def_stmt;
652 
653   tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
654   tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
655 
656   if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
657       || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
658     return NULL;
659   if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
660                           &half_type0, &def_stmt, &promotion)
661       || !promotion)
662     return NULL;
663   sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
664 
665   if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
666                           &half_type1, &def_stmt, &promotion)
667       || !promotion)
668     return NULL;
669   sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
670 
671   if (!types_compatible_p (half_type0, half_type1))
672     return NULL;
673   if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
674       || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
675     return NULL;
676 
677   *type_in = TREE_TYPE (sad_oprnd0);
678   *type_out = sum_type;
679 
680   /* Pattern detected. Create a stmt to be used to replace the pattern: */
681   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
682   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
683 					      sad_oprnd1, plus_oprnd1);
684 
685   if (dump_enabled_p ())
686     {
687       dump_printf_loc (MSG_NOTE, vect_location,
688                        "vect_recog_sad_pattern: detected: ");
689       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
690     }
691 
692   return pattern_stmt;
693 }
694 
695 
696 /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
697    and LSHIFT_EXPR.
698 
699    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
700    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701 
702    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
703    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
704    that satisfies the above restrictions,  we can perform a widening opeartion
705    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
706    with a_it = (interm_type) a_t;  Store such operation in *WSTMT.  */
707 
708 static bool
vect_handle_widen_op_by_const(gimple * stmt,enum tree_code code,tree const_oprnd,tree * oprnd,gimple ** wstmt,tree type,tree * half_type,gimple * def_stmt)709 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
710 		               tree const_oprnd, tree *oprnd,
711 			       gimple **wstmt, tree type,
712 			       tree *half_type, gimple *def_stmt)
713 {
714   tree new_type, new_oprnd;
715 
716   if (code != MULT_EXPR && code != LSHIFT_EXPR)
717     return false;
718 
719   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720         || (code == LSHIFT_EXPR
721             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722 	    	!= 1))
723       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
724     {
725       /* CONST_OPRND is a constant of HALF_TYPE.  */
726       *oprnd = gimple_assign_rhs1 (def_stmt);
727       return true;
728     }
729 
730   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731     return false;
732 
733   if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734     return false;
735 
736   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737      a type 2 times bigger than HALF_TYPE.  */
738   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739                                              TYPE_UNSIGNED (type));
740   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741       || (code == LSHIFT_EXPR
742           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743     return false;
744 
745   /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t;  */
746   *oprnd = gimple_assign_rhs1 (def_stmt);
747   new_oprnd = make_ssa_name (new_type);
748   *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
749   *oprnd = new_oprnd;
750 
751   *half_type = new_type;
752   return true;
753 }
754 
755 
756 /* Function vect_recog_widen_mult_pattern
757 
758    Try to find the following pattern:
759 
760      type1 a_t;
761      type2 b_t;
762      TYPE a_T, b_T, prod_T;
763 
764      S1  a_t = ;
765      S2  b_t = ;
766      S3  a_T = (TYPE) a_t;
767      S4  b_T = (TYPE) b_t;
768      S5  prod_T = a_T * b_T;
769 
770    where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
771 
772    Also detect unsigned cases:
773 
774      unsigned type1 a_t;
775      unsigned type2 b_t;
776      unsigned TYPE u_prod_T;
777      TYPE a_T, b_T, prod_T;
778 
779      S1  a_t = ;
780      S2  b_t = ;
781      S3  a_T = (TYPE) a_t;
782      S4  b_T = (TYPE) b_t;
783      S5  prod_T = a_T * b_T;
784      S6  u_prod_T = (unsigned TYPE) prod_T;
785 
786    and multiplication by constants:
787 
788      type a_t;
789      TYPE a_T, prod_T;
790 
791      S1  a_t = ;
792      S3  a_T = (TYPE) a_t;
793      S5  prod_T = a_T * CONST;
794 
795    A special case of multiplication by constants is when 'TYPE' is 4 times
796    bigger than 'type', but CONST fits an intermediate type 2 times smaller
797    than 'TYPE'.  In that case we create an additional pattern stmt for S3
798    to create a variable of the intermediate type, and perform widen-mult
799    on the intermediate type as well:
800 
801      type a_t;
802      interm_type a_it;
803      TYPE a_T, prod_T,  prod_T';
804 
805      S1  a_t = ;
806      S3  a_T = (TYPE) a_t;
807            '--> a_it = (interm_type) a_t;
808      S5  prod_T = a_T * CONST;
809            '--> prod_T' = a_it w* CONST;
810 
811    Input/Output:
812 
813    * STMTS: Contains a stmt from which the pattern search begins.  In the
814    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
815    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
816    replaced with S6 in STMTS.  In case of multiplication by a constant
817    of an intermediate type (the last case above), STMTS also contains S3
818    (inserted before S5).
819 
820    Output:
821 
822    * TYPE_IN: The type of the input arguments to the pattern.
823 
824    * TYPE_OUT: The type of the output of this pattern.
825 
826    * Return value: A new stmt that will be used to replace the sequence of
827    stmts that constitute the pattern.  In this case it will be:
828         WIDEN_MULT <a_t, b_t>
829    If the result of WIDEN_MULT needs to be converted to a larger type, the
830    returned stmt will be this type conversion stmt.
831 */
832 
833 static gimple *
vect_recog_widen_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)834 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
835                                tree *type_in, tree *type_out)
836 {
837   gimple *last_stmt = stmts->pop ();
838   gimple *def_stmt0, *def_stmt1;
839   tree oprnd0, oprnd1;
840   tree type, half_type0, half_type1;
841   gimple *new_stmt = NULL, *pattern_stmt = NULL;
842   tree vectype, vecitype;
843   tree var;
844   enum tree_code dummy_code;
845   int dummy_int;
846   vec<tree> dummy_vec;
847   bool op1_ok;
848   bool promotion;
849 
850   if (!is_gimple_assign (last_stmt))
851     return NULL;
852 
853   type = gimple_expr_type (last_stmt);
854 
855   /* Starting from LAST_STMT, follow the defs of its uses in search
856      of the above pattern.  */
857 
858   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
859     return NULL;
860 
861   oprnd0 = gimple_assign_rhs1 (last_stmt);
862   oprnd1 = gimple_assign_rhs2 (last_stmt);
863   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
864       || !types_compatible_p (TREE_TYPE (oprnd1), type))
865     return NULL;
866 
867   /* Check argument 0.  */
868   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
869                          &promotion)
870       || !promotion)
871      return NULL;
872   /* Check argument 1.  */
873   op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
874                               &def_stmt1, &promotion);
875 
876   if (op1_ok && promotion)
877     {
878       oprnd0 = gimple_assign_rhs1 (def_stmt0);
879       oprnd1 = gimple_assign_rhs1 (def_stmt1);
880     }
881   else
882     {
883       if (TREE_CODE (oprnd1) == INTEGER_CST
884           && TREE_CODE (half_type0) == INTEGER_TYPE
885           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
886 		                            &oprnd0, &new_stmt, type,
887 					    &half_type0, def_stmt0))
888 	{
889 	  half_type1 = half_type0;
890 	  oprnd1 = fold_convert (half_type1, oprnd1);
891 	}
892       else
893         return NULL;
894     }
895 
896   /* If the two arguments have different sizes, convert the one with
897      the smaller type into the larger type.  */
898   if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
899     {
900       /* If we already used up the single-stmt slot give up.  */
901       if (new_stmt)
902 	return NULL;
903 
904       tree* oprnd = NULL;
905       gimple *def_stmt = NULL;
906 
907       if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
908 	{
909 	  def_stmt = def_stmt0;
910 	  half_type0 = half_type1;
911 	  oprnd = &oprnd0;
912 	}
913       else
914 	{
915 	  def_stmt = def_stmt1;
916 	  half_type1 = half_type0;
917 	  oprnd = &oprnd1;
918 	}
919 
920       tree old_oprnd = gimple_assign_rhs1 (def_stmt);
921       tree new_oprnd = make_ssa_name (half_type0);
922       new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
923       *oprnd = new_oprnd;
924     }
925 
926   /* Handle unsigned case.  Look for
927      S6  u_prod_T = (unsigned TYPE) prod_T;
928      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
929   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
930     {
931       gimple *use_stmt;
932       tree use_lhs;
933       tree use_type;
934 
935       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
936         return NULL;
937 
938       use_stmt = vect_single_imm_use (last_stmt);
939       if (!use_stmt || !is_gimple_assign (use_stmt)
940 	  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
941         return NULL;
942 
943       use_lhs = gimple_assign_lhs (use_stmt);
944       use_type = TREE_TYPE (use_lhs);
945       if (!INTEGRAL_TYPE_P (use_type)
946           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
947           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
948         return NULL;
949 
950       type = use_type;
951       last_stmt = use_stmt;
952     }
953 
954   if (!types_compatible_p (half_type0, half_type1))
955     return NULL;
956 
957   /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
958      to get an intermediate result of type ITYPE.  In this case we need
959      to build a statement to convert this intermediate result to type TYPE.  */
960   tree itype = type;
961   if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
962     itype = build_nonstandard_integer_type
963 	      (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
964 	       TYPE_UNSIGNED (type));
965 
966   /* Pattern detected.  */
967   if (dump_enabled_p ())
968     dump_printf_loc (MSG_NOTE, vect_location,
969                      "vect_recog_widen_mult_pattern: detected:\n");
970 
971   /* Check target support  */
972   vectype = get_vectype_for_scalar_type (half_type0);
973   vecitype = get_vectype_for_scalar_type (itype);
974   if (!vectype
975       || !vecitype
976       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
977 					  vecitype, vectype,
978 					  &dummy_code, &dummy_code,
979 					  &dummy_int, &dummy_vec))
980     return NULL;
981 
982   *type_in = vectype;
983   *type_out = get_vectype_for_scalar_type (type);
984 
985   /* Pattern supported. Create a stmt to be used to replace the pattern: */
986   var = vect_recog_temp_ssa_var (itype, NULL);
987   pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
988 
989   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
990   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
991 
992   /* If the original two operands have different sizes, we may need to convert
993      the smaller one into the larget type.  If this is the case, at this point
994      the new stmt is already built.  */
995   if (new_stmt)
996     {
997       append_pattern_def_seq (stmt_vinfo, new_stmt);
998       stmt_vec_info new_stmt_info
999         = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
1000       set_vinfo_for_stmt (new_stmt, new_stmt_info);
1001       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1002     }
1003 
1004   /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1005      the result of the widen-mult operation into type TYPE.  */
1006   if (itype != type)
1007     {
1008       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1009       stmt_vec_info pattern_stmt_info
1010         = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1011       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1012       STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1013       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1014 					  NOP_EXPR,
1015 					  gimple_assign_lhs (pattern_stmt));
1016     }
1017 
1018   if (dump_enabled_p ())
1019     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1020 
1021   stmts->safe_push (last_stmt);
1022   return pattern_stmt;
1023 }
1024 
1025 
1026 /* Function vect_recog_pow_pattern
1027 
1028    Try to find the following pattern:
1029 
1030      x = POW (y, N);
1031 
1032    with POW being one of pow, powf, powi, powif and N being
1033    either 2 or 0.5.
1034 
1035    Input:
1036 
1037    * LAST_STMT: A stmt from which the pattern search begins.
1038 
1039    Output:
1040 
1041    * TYPE_IN: The type of the input arguments to the pattern.
1042 
1043    * TYPE_OUT: The type of the output of this pattern.
1044 
1045    * Return value: A new stmt that will be used to replace the sequence of
1046    stmts that constitute the pattern. In this case it will be:
1047         x = x * x
1048    or
1049 	x = sqrt (x)
1050 */
1051 
1052 static gimple *
vect_recog_pow_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1053 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1054 			tree *type_out)
1055 {
1056   gimple *last_stmt = (*stmts)[0];
1057   tree base, exp;
1058   gimple *stmt;
1059   tree var;
1060 
1061   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1062     return NULL;
1063 
1064   switch (gimple_call_combined_fn (last_stmt))
1065     {
1066     CASE_CFN_POW:
1067     CASE_CFN_POWI:
1068       break;
1069 
1070     default:
1071       return NULL;
1072     }
1073 
1074   base = gimple_call_arg (last_stmt, 0);
1075   exp = gimple_call_arg (last_stmt, 1);
1076   if (TREE_CODE (exp) != REAL_CST
1077       && TREE_CODE (exp) != INTEGER_CST)
1078     {
1079       if (flag_unsafe_math_optimizations
1080 	  && TREE_CODE (base) == REAL_CST
1081 	  && !gimple_call_internal_p (last_stmt))
1082 	{
1083 	  combined_fn log_cfn;
1084 	  built_in_function exp_bfn;
1085 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1086 	    {
1087 	    case BUILT_IN_POW:
1088 	      log_cfn = CFN_BUILT_IN_LOG;
1089 	      exp_bfn = BUILT_IN_EXP;
1090 	      break;
1091 	    case BUILT_IN_POWF:
1092 	      log_cfn = CFN_BUILT_IN_LOGF;
1093 	      exp_bfn = BUILT_IN_EXPF;
1094 	      break;
1095 	    case BUILT_IN_POWL:
1096 	      log_cfn = CFN_BUILT_IN_LOGL;
1097 	      exp_bfn = BUILT_IN_EXPL;
1098 	      break;
1099 	    default:
1100 	      return NULL;
1101 	    }
1102 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1103 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1104 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1105 	     does that, but if C is a power of 2, we want to use
1106 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1107 	     vectorization we don't have vectorized exp2.  */
1108 	  if (logc
1109 	      && TREE_CODE (logc) == REAL_CST
1110 	      && exp_decl
1111 	      && lookup_attribute ("omp declare simd",
1112 				   DECL_ATTRIBUTES (exp_decl)))
1113 	    {
1114 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1115 	      if (node->simd_clones == NULL)
1116 		{
1117 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1118 		      || node->definition)
1119 		    return NULL;
1120 		  expand_simd_clones (node);
1121 		  if (node->simd_clones == NULL)
1122 		    return NULL;
1123 		}
1124 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1125 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1126 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1127 	      new_pattern_def_seq (stmt_vinfo, g);
1128 	      *type_in = TREE_TYPE (base);
1129 	      *type_out = NULL_TREE;
1130 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1131 	      g = gimple_build_call (exp_decl, 1, def);
1132 	      gimple_call_set_lhs (g, res);
1133 	      return g;
1134 	    }
1135 	}
1136 
1137       return NULL;
1138     }
1139 
1140   /* We now have a pow or powi builtin function call with a constant
1141      exponent.  */
1142 
1143   *type_out = NULL_TREE;
1144 
1145   /* Catch squaring.  */
1146   if ((tree_fits_shwi_p (exp)
1147        && tree_to_shwi (exp) == 2)
1148       || (TREE_CODE (exp) == REAL_CST
1149           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1150     {
1151       *type_in = TREE_TYPE (base);
1152 
1153       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1154       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1155       return stmt;
1156     }
1157 
1158   /* Catch square root.  */
1159   if (TREE_CODE (exp) == REAL_CST
1160       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1161     {
1162       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1163       if (*type_in
1164 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1165 					     OPTIMIZE_FOR_SPEED))
1166 	{
1167 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1168 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1169 	  gimple_call_set_lhs (stmt, var);
1170 	  gimple_call_set_nothrow (stmt, true);
1171 	  return stmt;
1172 	}
1173     }
1174 
1175   return NULL;
1176 }
1177 
1178 
1179 /* Function vect_recog_widen_sum_pattern
1180 
1181    Try to find the following pattern:
1182 
1183      type x_t;
1184      TYPE x_T, sum = init;
1185    loop:
1186      sum_0 = phi <init, sum_1>
1187      S1  x_t = *p;
1188      S2  x_T = (TYPE) x_t;
1189      S3  sum_1 = x_T + sum_0;
1190 
1191    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1192    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1193    a special case of a reduction computation.
1194 
1195    Input:
1196 
1197    * LAST_STMT: A stmt from which the pattern search begins. In the example,
1198    when this function is called with S3, the pattern {S2,S3} will be detected.
1199 
1200    Output:
1201 
1202    * TYPE_IN: The type of the input arguments to the pattern.
1203 
1204    * TYPE_OUT: The type of the output of this pattern.
1205 
1206    * Return value: A new stmt that will be used to replace the sequence of
1207    stmts that constitute the pattern. In this case it will be:
1208         WIDEN_SUM <x_t, sum_0>
1209 
1210    Note: The widening-sum idiom is a widening reduction pattern that is
1211 	 vectorized without preserving all the intermediate results. It
1212          produces only N/2 (widened) results (by summing up pairs of
1213 	 intermediate results) rather than all N results.  Therefore, we
1214 	 cannot allow this pattern when we want to get all the results and in
1215 	 the correct order (as is the case when this computation is in an
1216 	 inner-loop nested in an outer-loop that us being vectorized).  */
1217 
1218 static gimple *
vect_recog_widen_sum_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1219 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1220 			      tree *type_out)
1221 {
1222   gimple *stmt, *last_stmt = (*stmts)[0];
1223   tree oprnd0, oprnd1;
1224   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1225   tree type, half_type;
1226   gimple *pattern_stmt;
1227   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228   struct loop *loop;
1229   tree var;
1230   bool promotion;
1231 
1232   if (!loop_info)
1233     return NULL;
1234 
1235   loop = LOOP_VINFO_LOOP (loop_info);
1236 
1237   /* We don't allow changing the order of the computation in the inner-loop
1238      when doing outer-loop vectorization.  */
1239   if (loop && nested_in_vect_loop_p (loop, last_stmt))
1240     return NULL;
1241 
1242   if (!is_gimple_assign (last_stmt))
1243     return NULL;
1244 
1245   type = gimple_expr_type (last_stmt);
1246 
1247   /* Look for the following pattern
1248           DX = (TYPE) X;
1249           sum_1 = DX + sum_0;
1250      In which DX is at least double the size of X, and sum_1 has been
1251      recognized as a reduction variable.
1252    */
1253 
1254   /* Starting from LAST_STMT, follow the defs of its uses in search
1255      of the above pattern.  */
1256 
1257   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1258     return NULL;
1259 
1260   if (!vect_reassociating_reduction_p (stmt_vinfo))
1261     return NULL;
1262 
1263   oprnd0 = gimple_assign_rhs1 (last_stmt);
1264   oprnd1 = gimple_assign_rhs2 (last_stmt);
1265   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1266       || !types_compatible_p (TREE_TYPE (oprnd1), type))
1267     return NULL;
1268 
1269   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1270      we know that oprnd1 is the reduction variable (defined by a loop-header
1271      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1272      Left to check that oprnd0 is defined by a cast from type 'type' to type
1273      'TYPE'.  */
1274 
1275   if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1276                           &promotion)
1277       || !promotion)
1278      return NULL;
1279 
1280   oprnd0 = gimple_assign_rhs1 (stmt);
1281   *type_in = half_type;
1282   *type_out = type;
1283 
1284   /* Pattern detected. Create a stmt to be used to replace the pattern: */
1285   var = vect_recog_temp_ssa_var (type, NULL);
1286   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1287 
1288   if (dump_enabled_p ())
1289     {
1290       dump_printf_loc (MSG_NOTE, vect_location,
1291                        "vect_recog_widen_sum_pattern: detected: ");
1292       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1293     }
1294 
1295   return pattern_stmt;
1296 }
1297 
1298 
1299 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1300 
1301    Input:
1302    STMT - a statement to check.
1303    DEF - we support operations with two operands, one of which is constant.
1304          The other operand can be defined by a demotion operation, or by a
1305          previous statement in a sequence of over-promoted operations.  In the
1306          later case DEF is used to replace that operand.  (It is defined by a
1307          pattern statement we created for the previous statement in the
1308          sequence).
1309 
1310    Input/output:
1311    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
1312          NULL, it's the type of DEF.
1313    STMTS - additional pattern statements.  If a pattern statement (type
1314          conversion) is created in this function, its original statement is
1315          added to STMTS.
1316 
1317    Output:
1318    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1319          operands to use in the new pattern statement for STMT (will be created
1320          in vect_recog_over_widening_pattern ()).
1321    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1322          statements for STMT: the first one is a type promotion and the second
1323          one is the operation itself.  We return the type promotion statement
1324 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1325          the second pattern statement.  */
1326 
1327 static bool
vect_operation_fits_smaller_type(gimple * stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple ** new_def_stmt,vec<gimple * > * stmts)1328 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1329 				  tree *op0, tree *op1, gimple **new_def_stmt,
1330 				  vec<gimple *> *stmts)
1331 {
1332   enum tree_code code;
1333   tree const_oprnd, oprnd;
1334   tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1335   gimple *def_stmt, *new_stmt;
1336   bool first = false;
1337   bool promotion;
1338 
1339   *op0 = NULL_TREE;
1340   *op1 = NULL_TREE;
1341   *new_def_stmt = NULL;
1342 
1343   if (!is_gimple_assign (stmt))
1344     return false;
1345 
1346   code = gimple_assign_rhs_code (stmt);
1347   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1348       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1349     return false;
1350 
1351   oprnd = gimple_assign_rhs1 (stmt);
1352   const_oprnd = gimple_assign_rhs2 (stmt);
1353   type = gimple_expr_type (stmt);
1354 
1355   if (TREE_CODE (oprnd) != SSA_NAME
1356       || TREE_CODE (const_oprnd) != INTEGER_CST)
1357     return false;
1358 
1359   /* If oprnd has other uses besides that in stmt we cannot mark it
1360      as being part of a pattern only.  */
1361   if (!has_single_use (oprnd))
1362     return false;
1363 
1364   /* If we are in the middle of a sequence, we use DEF from a previous
1365      statement.  Otherwise, OPRND has to be a result of type promotion.  */
1366   if (*new_type)
1367     {
1368       half_type = *new_type;
1369       oprnd = def;
1370     }
1371   else
1372     {
1373       first = true;
1374       if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1375 			      &promotion)
1376 	  || !promotion
1377 	  || !vect_same_loop_or_bb_p (stmt, def_stmt))
1378         return false;
1379     }
1380 
1381   /* Can we perform the operation on a smaller type?  */
1382   switch (code)
1383     {
1384       case BIT_IOR_EXPR:
1385       case BIT_XOR_EXPR:
1386       case BIT_AND_EXPR:
1387         if (!int_fits_type_p (const_oprnd, half_type))
1388           {
1389             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
1390             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1391               return false;
1392 
1393             interm_type = build_nonstandard_integer_type (
1394                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1395             if (!int_fits_type_p (const_oprnd, interm_type))
1396               return false;
1397           }
1398 
1399         break;
1400 
1401       case LSHIFT_EXPR:
1402         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
1403         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1404           return false;
1405 
1406         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1407           (e.g., if the original value was char, the shift amount is at most 8
1408            if we want to use short).  */
1409         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1410           return false;
1411 
1412         interm_type = build_nonstandard_integer_type (
1413                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1414 
1415         if (!vect_supportable_shift (code, interm_type))
1416           return false;
1417 
1418         break;
1419 
1420       case RSHIFT_EXPR:
1421         if (vect_supportable_shift (code, half_type))
1422           break;
1423 
1424         /* Try intermediate type - HALF_TYPE is not supported.  */
1425         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1426           return false;
1427 
1428         interm_type = build_nonstandard_integer_type (
1429                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1430 
1431         if (!vect_supportable_shift (code, interm_type))
1432           return false;
1433 
1434         break;
1435 
1436       default:
1437         gcc_unreachable ();
1438     }
1439 
1440   /* There are four possible cases:
1441      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1442         the first statement in the sequence)
1443         a. The original, HALF_TYPE, is not enough - we replace the promotion
1444            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1445         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1446            promotion.
1447      2. OPRND is defined by a pattern statement we created.
1448         a. Its type is not sufficient for the operation, we create a new stmt:
1449            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1450            this statement in NEW_DEF_STMT, and it is later put in
1451 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1452         b. OPRND is good to use in the new statement.  */
1453   if (first)
1454     {
1455       if (interm_type)
1456         {
1457           /* Replace the original type conversion HALF_TYPE->TYPE with
1458              HALF_TYPE->INTERM_TYPE.  */
1459           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1460             {
1461               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1462               /* Check if the already created pattern stmt is what we need.  */
1463               if (!is_gimple_assign (new_stmt)
1464                   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1465                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1466                 return false;
1467 
1468 	      stmts->safe_push (def_stmt);
1469               oprnd = gimple_assign_lhs (new_stmt);
1470             }
1471           else
1472             {
1473               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1474               oprnd = gimple_assign_rhs1 (def_stmt);
1475 	      new_oprnd = make_ssa_name (interm_type);
1476 	      new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1477               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1478               stmts->safe_push (def_stmt);
1479               oprnd = new_oprnd;
1480             }
1481         }
1482       else
1483         {
1484           /* Retrieve the operand before the type promotion.  */
1485           oprnd = gimple_assign_rhs1 (def_stmt);
1486         }
1487     }
1488   else
1489     {
1490       if (interm_type)
1491         {
1492           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1493 	  new_oprnd = make_ssa_name (interm_type);
1494 	  new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1495           oprnd = new_oprnd;
1496           *new_def_stmt = new_stmt;
1497         }
1498 
1499       /* Otherwise, OPRND is already set.  */
1500     }
1501 
1502   if (interm_type)
1503     *new_type = interm_type;
1504   else
1505     *new_type = half_type;
1506 
1507   *op0 = oprnd;
1508   *op1 = fold_convert (*new_type, const_oprnd);
1509 
1510   return true;
1511 }
1512 
1513 
1514 /* Try to find a statement or a sequence of statements that can be performed
1515    on a smaller type:
1516 
1517      type x_t;
1518      TYPE x_T, res0_T, res1_T;
1519    loop:
1520      S1  x_t = *p;
1521      S2  x_T = (TYPE) x_t;
1522      S3  res0_T = op (x_T, C0);
1523      S4  res1_T = op (res0_T, C1);
1524      S5  ... = () res1_T;  - type demotion
1525 
1526    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1527    constants.
1528    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1529    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1530    demotion operation.  We also check that S3 and S4 have only one use.  */
1531 
1532 static gimple *
vect_recog_over_widening_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1533 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1534                                   tree *type_in, tree *type_out)
1535 {
1536   gimple *stmt = stmts->pop ();
1537   gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1538 	 *use_stmt = NULL;
1539   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1540   tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1541   bool first;
1542   tree type = NULL;
1543 
1544   first = true;
1545   while (1)
1546     {
1547       if (!vinfo_for_stmt (stmt)
1548           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1549         return NULL;
1550 
1551       new_def_stmt = NULL;
1552       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1553                                              &op0, &op1, &new_def_stmt,
1554                                              stmts))
1555         {
1556           if (first)
1557             return NULL;
1558           else
1559             break;
1560         }
1561 
1562       /* STMT can be performed on a smaller type.  Check its uses.  */
1563       use_stmt = vect_single_imm_use (stmt);
1564       if (!use_stmt || !is_gimple_assign (use_stmt))
1565         return NULL;
1566 
1567       /* Create pattern statement for STMT.  */
1568       vectype = get_vectype_for_scalar_type (new_type);
1569       if (!vectype)
1570         return NULL;
1571 
1572       /* We want to collect all the statements for which we create pattern
1573          statetments, except for the case when the last statement in the
1574          sequence doesn't have a corresponding pattern statement.  In such
1575          case we associate the last pattern statement with the last statement
1576          in the sequence.  Therefore, we only add the original statement to
1577          the list if we know that it is not the last.  */
1578       if (prev_stmt)
1579         stmts->safe_push (prev_stmt);
1580 
1581       var = vect_recog_temp_ssa_var (new_type, NULL);
1582       pattern_stmt
1583 	= gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1584       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1585       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1586 
1587       if (dump_enabled_p ())
1588         {
1589           dump_printf_loc (MSG_NOTE, vect_location,
1590                            "created pattern stmt: ");
1591           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1592         }
1593 
1594       type = gimple_expr_type (stmt);
1595       prev_stmt = stmt;
1596       stmt = use_stmt;
1597 
1598       first = false;
1599     }
1600 
1601   /* We got a sequence.  We expect it to end with a type demotion operation.
1602      Otherwise, we quit (for now).  There are three possible cases: the
1603      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1604      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1605      NEW_TYPE differs (we create a new conversion statement).  */
1606   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1607     {
1608       use_lhs = gimple_assign_lhs (use_stmt);
1609       use_type = TREE_TYPE (use_lhs);
1610       /* Support only type demotion or signedess change.  */
1611       if (!INTEGRAL_TYPE_P (use_type)
1612 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1613         return NULL;
1614 
1615       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1616       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1617 	return NULL;
1618 
1619       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1620           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1621         {
1622           /* Create NEW_TYPE->USE_TYPE conversion.  */
1623 	  new_oprnd = make_ssa_name (use_type);
1624 	  pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1625           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1626 
1627           *type_in = get_vectype_for_scalar_type (new_type);
1628           *type_out = get_vectype_for_scalar_type (use_type);
1629 
1630           /* We created a pattern statement for the last statement in the
1631              sequence, so we don't need to associate it with the pattern
1632              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1633              to the list in order to mark it later in vect_pattern_recog_1.  */
1634           if (prev_stmt)
1635             stmts->safe_push (prev_stmt);
1636         }
1637       else
1638         {
1639           if (prev_stmt)
1640 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1641 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1642 
1643           *type_in = vectype;
1644           *type_out = NULL_TREE;
1645         }
1646 
1647       stmts->safe_push (use_stmt);
1648     }
1649   else
1650     /* TODO: support general case, create a conversion to the correct type.  */
1651     return NULL;
1652 
1653   /* Pattern detected.  */
1654   if (dump_enabled_p ())
1655     {
1656       dump_printf_loc (MSG_NOTE, vect_location,
1657                        "vect_recog_over_widening_pattern: detected: ");
1658       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1659     }
1660 
1661   return pattern_stmt;
1662 }
1663 
1664 /* Detect widening shift pattern:
1665 
1666    type a_t;
1667    TYPE a_T, res_T;
1668 
1669    S1 a_t = ;
1670    S2 a_T = (TYPE) a_t;
1671    S3 res_T = a_T << CONST;
1672 
1673   where type 'TYPE' is at least double the size of type 'type'.
1674 
1675   Also detect cases where the shift result is immediately converted
1676   to another type 'result_type' that is no larger in size than 'TYPE'.
1677   In those cases we perform a widen-shift that directly results in
1678   'result_type', to avoid a possible over-widening situation:
1679 
1680   type a_t;
1681   TYPE a_T, res_T;
1682   result_type res_result;
1683 
1684   S1 a_t = ;
1685   S2 a_T = (TYPE) a_t;
1686   S3 res_T = a_T << CONST;
1687   S4 res_result = (result_type) res_T;
1688       '--> res_result' = a_t w<< CONST;
1689 
1690   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1691   create an additional pattern stmt for S2 to create a variable of an
1692   intermediate type, and perform widen-shift on the intermediate type:
1693 
1694   type a_t;
1695   interm_type a_it;
1696   TYPE a_T, res_T, res_T';
1697 
1698   S1 a_t = ;
1699   S2 a_T = (TYPE) a_t;
1700       '--> a_it = (interm_type) a_t;
1701   S3 res_T = a_T << CONST;
1702       '--> res_T' = a_it <<* CONST;
1703 
1704   Input/Output:
1705 
1706   * STMTS: Contains a stmt from which the pattern search begins.
1707     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1708     in STMTS.  When an intermediate type is used and a pattern statement is
1709     created for S2, we also put S2 here (before S3).
1710 
1711   Output:
1712 
1713   * TYPE_IN: The type of the input arguments to the pattern.
1714 
1715   * TYPE_OUT: The type of the output of this pattern.
1716 
1717   * Return value: A new stmt that will be used to replace the sequence of
1718     stmts that constitute the pattern.  In this case it will be:
1719     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1720 
1721 static gimple *
vect_recog_widen_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1722 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1723 				tree *type_in, tree *type_out)
1724 {
1725   gimple *last_stmt = stmts->pop ();
1726   gimple *def_stmt0;
1727   tree oprnd0, oprnd1;
1728   tree type, half_type0;
1729   gimple *pattern_stmt;
1730   tree vectype, vectype_out = NULL_TREE;
1731   tree var;
1732   enum tree_code dummy_code;
1733   int dummy_int;
1734   vec<tree>  dummy_vec;
1735   gimple *use_stmt;
1736   bool promotion;
1737 
1738   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1739     return NULL;
1740 
1741   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1742     return NULL;
1743 
1744   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1745     return NULL;
1746 
1747   oprnd0 = gimple_assign_rhs1 (last_stmt);
1748   oprnd1 = gimple_assign_rhs2 (last_stmt);
1749   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1750     return NULL;
1751 
1752   /* Check operand 0: it has to be defined by a type promotion.  */
1753   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1754 			  &promotion)
1755       || !promotion)
1756      return NULL;
1757 
1758   /* Check operand 1: has to be positive.  We check that it fits the type
1759      in vect_handle_widen_op_by_const ().  */
1760   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1761     return NULL;
1762 
1763   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1764   type = gimple_expr_type (last_stmt);
1765 
1766   /* Check for subsequent conversion to another type.  */
1767   use_stmt = vect_single_imm_use (last_stmt);
1768   if (use_stmt && is_gimple_assign (use_stmt)
1769       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1770       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1771     {
1772       tree use_lhs = gimple_assign_lhs (use_stmt);
1773       tree use_type = TREE_TYPE (use_lhs);
1774 
1775       if (INTEGRAL_TYPE_P (use_type)
1776 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1777 	{
1778 	  last_stmt = use_stmt;
1779 	  type = use_type;
1780 	}
1781     }
1782 
1783   /* Check if this a widening operation.  */
1784   gimple *wstmt = NULL;
1785   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1786        				      &oprnd0, &wstmt,
1787 	                              type, &half_type0, def_stmt0))
1788     return NULL;
1789 
1790   /* Pattern detected.  */
1791   if (dump_enabled_p ())
1792     dump_printf_loc (MSG_NOTE, vect_location,
1793                      "vect_recog_widen_shift_pattern: detected:\n");
1794 
1795   /* Check target support.  */
1796   vectype = get_vectype_for_scalar_type (half_type0);
1797   vectype_out = get_vectype_for_scalar_type (type);
1798 
1799   if (!vectype
1800       || !vectype_out
1801       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1802 					  vectype_out, vectype,
1803 					  &dummy_code, &dummy_code,
1804 					  &dummy_int, &dummy_vec))
1805     return NULL;
1806 
1807   *type_in = vectype;
1808   *type_out = vectype_out;
1809 
1810   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1811   var = vect_recog_temp_ssa_var (type, NULL);
1812   pattern_stmt
1813     = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1814   if (wstmt)
1815     {
1816       stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1817       new_pattern_def_seq (stmt_vinfo, wstmt);
1818       stmt_vec_info new_stmt_info
1819 	= new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1820       set_vinfo_for_stmt (wstmt, new_stmt_info);
1821       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1822     }
1823 
1824   if (dump_enabled_p ())
1825     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1826 
1827   stmts->safe_push (last_stmt);
1828   return pattern_stmt;
1829 }
1830 
1831 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1832 
1833    type a_t, b_t, c_t;
1834 
1835    S0 a_t = b_t r<< c_t;
1836 
1837   Input/Output:
1838 
1839   * STMTS: Contains a stmt from which the pattern search begins,
1840     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
1841     with a sequence:
1842 
1843    S1 d_t = -c_t;
1844    S2 e_t = d_t & (B - 1);
1845    S3 f_t = b_t << c_t;
1846    S4 g_t = b_t >> e_t;
1847    S0 a_t = f_t | g_t;
1848 
1849     where B is element bitsize of type.
1850 
1851   Output:
1852 
1853   * TYPE_IN: The type of the input arguments to the pattern.
1854 
1855   * TYPE_OUT: The type of the output of this pattern.
1856 
1857   * Return value: A new stmt that will be used to replace the rotate
1858     S0 stmt.  */
1859 
1860 static gimple *
vect_recog_rotate_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1861 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1862 {
1863   gimple *last_stmt = stmts->pop ();
1864   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1865   gimple *pattern_stmt, *def_stmt;
1866   enum tree_code rhs_code;
1867   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1868   vec_info *vinfo = stmt_vinfo->vinfo;
1869   enum vect_def_type dt;
1870   optab optab1, optab2;
1871   edge ext_def = NULL;
1872 
1873   if (!is_gimple_assign (last_stmt))
1874     return NULL;
1875 
1876   rhs_code = gimple_assign_rhs_code (last_stmt);
1877   switch (rhs_code)
1878     {
1879     case LROTATE_EXPR:
1880     case RROTATE_EXPR:
1881       break;
1882     default:
1883       return NULL;
1884     }
1885 
1886   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1887     return NULL;
1888 
1889   lhs = gimple_assign_lhs (last_stmt);
1890   oprnd0 = gimple_assign_rhs1 (last_stmt);
1891   type = TREE_TYPE (oprnd0);
1892   oprnd1 = gimple_assign_rhs2 (last_stmt);
1893   if (TREE_CODE (oprnd0) != SSA_NAME
1894       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1895       || !INTEGRAL_TYPE_P (type)
1896       || !TYPE_UNSIGNED (type))
1897     return NULL;
1898 
1899   if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1900     return NULL;
1901 
1902   if (dt != vect_internal_def
1903       && dt != vect_constant_def
1904       && dt != vect_external_def)
1905     return NULL;
1906 
1907   vectype = get_vectype_for_scalar_type (type);
1908   if (vectype == NULL_TREE)
1909     return NULL;
1910 
1911   /* If vector/vector or vector/scalar rotate is supported by the target,
1912      don't do anything here.  */
1913   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1914   if (optab1
1915       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1916     return NULL;
1917 
1918   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1919     {
1920       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1921       if (optab2
1922 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1923 	return NULL;
1924     }
1925 
1926   /* If vector/vector or vector/scalar shifts aren't supported by the target,
1927      don't do anything here either.  */
1928   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1929   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1930   if (!optab1
1931       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1932       || !optab2
1933       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1934     {
1935       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1936 	return NULL;
1937       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1938       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1939       if (!optab1
1940 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1941 	  || !optab2
1942 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1943 	return NULL;
1944     }
1945 
1946   *type_in = vectype;
1947   *type_out = vectype;
1948   if (*type_in == NULL_TREE)
1949     return NULL;
1950 
1951   if (dt == vect_external_def
1952       && TREE_CODE (oprnd1) == SSA_NAME
1953       && is_a <loop_vec_info> (vinfo))
1954     {
1955       struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1956       ext_def = loop_preheader_edge (loop);
1957       if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1958 	{
1959 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1960 	  if (bb == NULL
1961 	      || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1962 	    ext_def = NULL;
1963 	}
1964     }
1965 
1966   def = NULL_TREE;
1967   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1968   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1969     def = oprnd1;
1970   else if (def_stmt && gimple_assign_cast_p (def_stmt))
1971     {
1972       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1973       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1974 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1975 	     == TYPE_PRECISION (type))
1976 	def = rhs1;
1977     }
1978 
1979   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1980   if (def == NULL_TREE)
1981     {
1982       def = vect_recog_temp_ssa_var (type, NULL);
1983       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1984       append_pattern_def_seq (stmt_vinfo, def_stmt);
1985     }
1986   stype = TREE_TYPE (def);
1987   scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1988 
1989   if (TREE_CODE (def) == INTEGER_CST)
1990     {
1991       if (!tree_fits_uhwi_p (def)
1992 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1993 	  || integer_zerop (def))
1994 	return NULL;
1995       def2 = build_int_cst (stype,
1996 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1997     }
1998   else
1999     {
2000       tree vecstype = get_vectype_for_scalar_type (stype);
2001       stmt_vec_info def_stmt_vinfo;
2002 
2003       if (vecstype == NULL_TREE)
2004 	return NULL;
2005       def2 = vect_recog_temp_ssa_var (stype, NULL);
2006       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2007       if (ext_def)
2008 	{
2009 	  basic_block new_bb
2010 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2011 	  gcc_assert (!new_bb);
2012 	}
2013       else
2014 	{
2015 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2016 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2017 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2018 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2019 	}
2020 
2021       def2 = vect_recog_temp_ssa_var (stype, NULL);
2022       tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2023       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2024 				      gimple_assign_lhs (def_stmt), mask);
2025       if (ext_def)
2026 	{
2027 	  basic_block new_bb
2028 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2029 	  gcc_assert (!new_bb);
2030 	}
2031       else
2032 	{
2033 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2034 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2035 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2036 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2037 	}
2038     }
2039 
2040   var1 = vect_recog_temp_ssa_var (type, NULL);
2041   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2042 					? LSHIFT_EXPR : RSHIFT_EXPR,
2043 				  oprnd0, def);
2044   append_pattern_def_seq (stmt_vinfo, def_stmt);
2045 
2046   var2 = vect_recog_temp_ssa_var (type, NULL);
2047   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2048 					? RSHIFT_EXPR : LSHIFT_EXPR,
2049 				  oprnd0, def2);
2050   append_pattern_def_seq (stmt_vinfo, def_stmt);
2051 
2052   /* Pattern detected.  */
2053   if (dump_enabled_p ())
2054     dump_printf_loc (MSG_NOTE, vect_location,
2055 		     "vect_recog_rotate_pattern: detected:\n");
2056 
2057   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2058   var = vect_recog_temp_ssa_var (type, NULL);
2059   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2060 
2061   if (dump_enabled_p ())
2062     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2063 
2064   stmts->safe_push (last_stmt);
2065   return pattern_stmt;
2066 }
2067 
2068 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2069    vectorized:
2070 
2071    type a_t;
2072    TYPE b_T, res_T;
2073 
2074    S1 a_t = ;
2075    S2 b_T = ;
2076    S3 res_T = b_T op a_t;
2077 
2078   where type 'TYPE' is a type with different size than 'type',
2079   and op is <<, >> or rotate.
2080 
2081   Also detect cases:
2082 
2083    type a_t;
2084    TYPE b_T, c_T, res_T;
2085 
2086    S0 c_T = ;
2087    S1 a_t = (type) c_T;
2088    S2 b_T = ;
2089    S3 res_T = b_T op a_t;
2090 
2091   Input/Output:
2092 
2093   * STMTS: Contains a stmt from which the pattern search begins,
2094     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2095     with a shift/rotate which has same type on both operands, in the
2096     second case just b_T op c_T, in the first case with added cast
2097     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2098 
2099   Output:
2100 
2101   * TYPE_IN: The type of the input arguments to the pattern.
2102 
2103   * TYPE_OUT: The type of the output of this pattern.
2104 
2105   * Return value: A new stmt that will be used to replace the shift/rotate
2106     S3 stmt.  */
2107 
2108 static gimple *
vect_recog_vector_vector_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2109 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2110 					tree *type_in, tree *type_out)
2111 {
2112   gimple *last_stmt = stmts->pop ();
2113   tree oprnd0, oprnd1, lhs, var;
2114   gimple *pattern_stmt, *def_stmt;
2115   enum tree_code rhs_code;
2116   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2117   vec_info *vinfo = stmt_vinfo->vinfo;
2118   enum vect_def_type dt;
2119 
2120   if (!is_gimple_assign (last_stmt))
2121     return NULL;
2122 
2123   rhs_code = gimple_assign_rhs_code (last_stmt);
2124   switch (rhs_code)
2125     {
2126     case LSHIFT_EXPR:
2127     case RSHIFT_EXPR:
2128     case LROTATE_EXPR:
2129     case RROTATE_EXPR:
2130       break;
2131     default:
2132       return NULL;
2133     }
2134 
2135   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2136     return NULL;
2137 
2138   lhs = gimple_assign_lhs (last_stmt);
2139   oprnd0 = gimple_assign_rhs1 (last_stmt);
2140   oprnd1 = gimple_assign_rhs2 (last_stmt);
2141   if (TREE_CODE (oprnd0) != SSA_NAME
2142       || TREE_CODE (oprnd1) != SSA_NAME
2143       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2144       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2145       || TYPE_PRECISION (TREE_TYPE (lhs))
2146 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2147     return NULL;
2148 
2149   if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2150     return NULL;
2151 
2152   if (dt != vect_internal_def)
2153     return NULL;
2154 
2155   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2156   *type_out = *type_in;
2157   if (*type_in == NULL_TREE)
2158     return NULL;
2159 
2160   tree def = NULL_TREE;
2161   stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2162   if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2163     {
2164       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2165       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2166 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2167 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2168 	{
2169 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2170 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2171 	    def = rhs1;
2172 	  else
2173 	    {
2174 	      tree mask
2175 		= build_low_bits_mask (TREE_TYPE (rhs1),
2176 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2177 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2178 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2179 	      new_pattern_def_seq (stmt_vinfo, def_stmt);
2180 	    }
2181 	}
2182     }
2183 
2184   if (def == NULL_TREE)
2185     {
2186       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2187       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2188       new_pattern_def_seq (stmt_vinfo, def_stmt);
2189     }
2190 
2191   /* Pattern detected.  */
2192   if (dump_enabled_p ())
2193     dump_printf_loc (MSG_NOTE, vect_location,
2194                      "vect_recog_vector_vector_shift_pattern: detected:\n");
2195 
2196   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2197   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2198   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2199 
2200   if (dump_enabled_p ())
2201     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2202 
2203   stmts->safe_push (last_stmt);
2204   return pattern_stmt;
2205 }
2206 
2207 /* Return true iff the target has a vector optab implementing the operation
2208    CODE on type VECTYPE.  */
2209 
2210 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2211 target_has_vecop_for_code (tree_code code, tree vectype)
2212 {
2213   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2214   return voptab
2215 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2216 }
2217 
2218 /* Verify that the target has optabs of VECTYPE to perform all the steps
2219    needed by the multiplication-by-immediate synthesis algorithm described by
2220    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2221    present.  Return true iff the target supports all the steps.  */
2222 
2223 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2224 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2225 				 tree vectype, bool synth_shift_p)
2226 {
2227   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2228     return false;
2229 
2230   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2231   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2232 
2233   if (var == negate_variant
2234       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2235     return false;
2236 
2237   /* If we must synthesize shifts with additions make sure that vector
2238      addition is available.  */
2239   if ((var == add_variant || synth_shift_p) && !supports_vplus)
2240     return false;
2241 
2242   for (int i = 1; i < alg->ops; i++)
2243     {
2244       switch (alg->op[i])
2245 	{
2246 	case alg_shift:
2247 	  break;
2248 	case alg_add_t_m2:
2249 	case alg_add_t2_m:
2250 	case alg_add_factor:
2251 	  if (!supports_vplus)
2252 	    return false;
2253 	  break;
2254 	case alg_sub_t_m2:
2255 	case alg_sub_t2_m:
2256 	case alg_sub_factor:
2257 	  if (!supports_vminus)
2258 	    return false;
2259 	  break;
2260 	case alg_unknown:
2261 	case alg_m:
2262 	case alg_zero:
2263 	case alg_impossible:
2264 	  return false;
2265 	default:
2266 	  gcc_unreachable ();
2267 	}
2268     }
2269 
2270   return true;
2271 }
2272 
2273 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2274    putting the final result in DEST.  Append all statements but the last into
2275    VINFO.  Return the last statement.  */
2276 
2277 static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2278 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2279 			   stmt_vec_info vinfo)
2280 {
2281   HOST_WIDE_INT i;
2282   tree itype = TREE_TYPE (op);
2283   tree prev_res = op;
2284   gcc_assert (amnt >= 0);
2285   for (i = 0; i < amnt; i++)
2286     {
2287       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2288 		      : dest;
2289       gimple *stmt
2290         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2291       prev_res = tmp_var;
2292       if (i < amnt - 1)
2293 	append_pattern_def_seq (vinfo, stmt);
2294       else
2295 	return stmt;
2296     }
2297   gcc_unreachable ();
2298   return NULL;
2299 }
2300 
2301 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
2302    CODE to operands OP1 and OP2, creating a new temporary SSA var in
2303    the process if necessary.  Append the resulting assignment statements
2304    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
2305    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
2306    left shifts using additions.  */
2307 
2308 static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2309 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2310 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
2311 {
2312   if (integer_zerop (op2)
2313       && (code == LSHIFT_EXPR
2314 	  || code == PLUS_EXPR))
2315     {
2316       gcc_assert (TREE_CODE (op1) == SSA_NAME);
2317       return op1;
2318     }
2319 
2320   gimple *stmt;
2321   tree itype = TREE_TYPE (op1);
2322   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2323 
2324   if (code == LSHIFT_EXPR
2325       && synth_shift_p)
2326     {
2327       stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2328 					 stmt_vinfo);
2329       append_pattern_def_seq (stmt_vinfo, stmt);
2330       return tmp_var;
2331     }
2332 
2333   stmt = gimple_build_assign (tmp_var, code, op1, op2);
2334   append_pattern_def_seq (stmt_vinfo, stmt);
2335   return tmp_var;
2336 }
2337 
2338 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2339    and simple arithmetic operations to be vectorized.  Record the statements
2340    produced in STMT_VINFO and return the last statement in the sequence or
2341    NULL if it's not possible to synthesize such a multiplication.
2342    This function mirrors the behavior of expand_mult_const in expmed.c but
2343    works on tree-ssa form.  */
2344 
2345 static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2346 vect_synth_mult_by_constant (tree op, tree val,
2347 			     stmt_vec_info stmt_vinfo)
2348 {
2349   tree itype = TREE_TYPE (op);
2350   machine_mode mode = TYPE_MODE (itype);
2351   struct algorithm alg;
2352   mult_variant variant;
2353   if (!tree_fits_shwi_p (val))
2354     return NULL;
2355 
2356   /* Multiplication synthesis by shifts, adds and subs can introduce
2357      signed overflow where the original operation didn't.  Perform the
2358      operations on an unsigned type and cast back to avoid this.
2359      In the future we may want to relax this for synthesis algorithms
2360      that we can prove do not cause unexpected overflow.  */
2361   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2362 
2363   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2364 
2365   /* Targets that don't support vector shifts but support vector additions
2366      can synthesize shifts that way.  */
2367   bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2368 
2369   HOST_WIDE_INT hwval = tree_to_shwi (val);
2370   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2371      The vectorizer's benefit analysis will decide whether it's beneficial
2372      to do this.  */
2373   bool possible = choose_mult_variant (mode, hwval, &alg,
2374 					&variant, MAX_COST);
2375   if (!possible)
2376     return NULL;
2377 
2378   tree vectype = get_vectype_for_scalar_type (multtype);
2379 
2380   if (!vectype
2381       || !target_supports_mult_synth_alg (&alg, variant,
2382 					   vectype, synth_shift_p))
2383     return NULL;
2384 
2385   tree accumulator;
2386 
2387   /* Clear out the sequence of statements so we can populate it below.  */
2388   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2389   gimple *stmt = NULL;
2390 
2391   if (cast_to_unsigned_p)
2392     {
2393       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2394       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2395       append_pattern_def_seq (stmt_vinfo, stmt);
2396       op = tmp_op;
2397     }
2398 
2399   if (alg.op[0] == alg_zero)
2400     accumulator = build_int_cst (multtype, 0);
2401   else
2402     accumulator = op;
2403 
2404   bool needs_fixup = (variant == negate_variant)
2405 		      || (variant == add_variant);
2406 
2407   for (int i = 1; i < alg.ops; i++)
2408     {
2409       tree shft_log = build_int_cst (multtype, alg.log[i]);
2410       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2411       tree tmp_var = NULL_TREE;
2412 
2413       switch (alg.op[i])
2414 	{
2415 	case alg_shift:
2416 	  if (synth_shift_p)
2417 	    stmt
2418 	      = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2419 					    stmt_vinfo);
2420 	  else
2421 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2422 					 shft_log);
2423 	  break;
2424 	case alg_add_t_m2:
2425 	  tmp_var
2426 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2427 					    stmt_vinfo, synth_shift_p);
2428 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2429 				       tmp_var);
2430 	  break;
2431 	case alg_sub_t_m2:
2432 	  tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2433 						  shft_log, stmt_vinfo,
2434 						  synth_shift_p);
2435 	  /* In some algorithms the first step involves zeroing the
2436 	     accumulator.  If subtracting from such an accumulator
2437 	     just emit the negation directly.  */
2438 	  if (integer_zerop (accumulator))
2439 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2440 	  else
2441 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2442 					tmp_var);
2443 	  break;
2444 	case alg_add_t2_m:
2445 	  tmp_var
2446 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2447 					   stmt_vinfo, synth_shift_p);
2448 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2449 	  break;
2450 	case alg_sub_t2_m:
2451 	  tmp_var
2452 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2453 					   stmt_vinfo, synth_shift_p);
2454 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2455 	  break;
2456 	case alg_add_factor:
2457 	  tmp_var
2458 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2459 					    stmt_vinfo, synth_shift_p);
2460 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2461 				       tmp_var);
2462 	  break;
2463 	case alg_sub_factor:
2464 	  tmp_var
2465 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2466 					   stmt_vinfo, synth_shift_p);
2467 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2468 				      accumulator);
2469 	  break;
2470 	default:
2471 	  gcc_unreachable ();
2472 	}
2473       /* We don't want to append the last stmt in the sequence to stmt_vinfo
2474 	 but rather return it directly.  */
2475 
2476       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2477 	append_pattern_def_seq (stmt_vinfo, stmt);
2478       accumulator = accum_tmp;
2479     }
2480   if (variant == negate_variant)
2481     {
2482       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2483       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2484       accumulator = accum_tmp;
2485       if (cast_to_unsigned_p)
2486 	append_pattern_def_seq (stmt_vinfo, stmt);
2487     }
2488   else if (variant == add_variant)
2489     {
2490       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2491       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2492       accumulator = accum_tmp;
2493       if (cast_to_unsigned_p)
2494 	append_pattern_def_seq (stmt_vinfo, stmt);
2495     }
2496   /* Move back to a signed if needed.  */
2497   if (cast_to_unsigned_p)
2498     {
2499       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2500       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2501     }
2502 
2503   return stmt;
2504 }
2505 
2506 /* Detect multiplication by constant and convert it into a sequence of
2507    shifts and additions, subtractions, negations.  We reuse the
2508    choose_mult_variant algorithms from expmed.c
2509 
2510    Input/Output:
2511 
2512    STMTS: Contains a stmt from which the pattern search begins,
2513    i.e. the mult stmt.
2514 
2515  Output:
2516 
2517   * TYPE_IN: The type of the input arguments to the pattern.
2518 
2519   * TYPE_OUT: The type of the output of this pattern.
2520 
2521   * Return value: A new stmt that will be used to replace
2522     the multiplication.  */
2523 
2524 static gimple *
vect_recog_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2525 vect_recog_mult_pattern (vec<gimple *> *stmts,
2526 			 tree *type_in, tree *type_out)
2527 {
2528   gimple *last_stmt = stmts->pop ();
2529   tree oprnd0, oprnd1, vectype, itype;
2530   gimple *pattern_stmt;
2531   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2532 
2533   if (!is_gimple_assign (last_stmt))
2534     return NULL;
2535 
2536   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2537     return NULL;
2538 
2539   oprnd0 = gimple_assign_rhs1 (last_stmt);
2540   oprnd1 = gimple_assign_rhs2 (last_stmt);
2541   itype = TREE_TYPE (oprnd0);
2542 
2543   if (TREE_CODE (oprnd0) != SSA_NAME
2544       || TREE_CODE (oprnd1) != INTEGER_CST
2545       || !INTEGRAL_TYPE_P (itype)
2546       || !type_has_mode_precision_p (itype))
2547     return NULL;
2548 
2549   vectype = get_vectype_for_scalar_type (itype);
2550   if (vectype == NULL_TREE)
2551     return NULL;
2552 
2553   /* If the target can handle vectorized multiplication natively,
2554      don't attempt to optimize this.  */
2555   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2556   if (mul_optab != unknown_optab)
2557     {
2558       machine_mode vec_mode = TYPE_MODE (vectype);
2559       int icode = (int) optab_handler (mul_optab, vec_mode);
2560       if (icode != CODE_FOR_nothing)
2561        return NULL;
2562     }
2563 
2564   pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2565   if (!pattern_stmt)
2566     return NULL;
2567 
2568   /* Pattern detected.  */
2569   if (dump_enabled_p ())
2570     dump_printf_loc (MSG_NOTE, vect_location,
2571 		     "vect_recog_mult_pattern: detected:\n");
2572 
2573   if (dump_enabled_p ())
2574     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2575 			  pattern_stmt,0);
2576 
2577   stmts->safe_push (last_stmt);
2578   *type_in = vectype;
2579   *type_out = vectype;
2580 
2581   return pattern_stmt;
2582 }
2583 
2584 /* Detect a signed division by a constant that wouldn't be
2585    otherwise vectorized:
2586 
2587    type a_t, b_t;
2588 
2589    S1 a_t = b_t / N;
2590 
2591   where type 'type' is an integral type and N is a constant.
2592 
2593   Similarly handle modulo by a constant:
2594 
2595    S4 a_t = b_t % N;
2596 
2597   Input/Output:
2598 
2599   * STMTS: Contains a stmt from which the pattern search begins,
2600     i.e. the division stmt.  S1 is replaced by if N is a power
2601     of two constant and type is signed:
2602   S3  y_t = b_t < 0 ? N - 1 : 0;
2603   S2  x_t = b_t + y_t;
2604   S1' a_t = x_t >> log2 (N);
2605 
2606     S4 is replaced if N is a power of two constant and
2607     type is signed by (where *_T temporaries have unsigned type):
2608   S9  y_T = b_t < 0 ? -1U : 0U;
2609   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2610   S7  z_t = (type) z_T;
2611   S6  w_t = b_t + z_t;
2612   S5  x_t = w_t & (N - 1);
2613   S4' a_t = x_t - z_t;
2614 
2615   Output:
2616 
2617   * TYPE_IN: The type of the input arguments to the pattern.
2618 
2619   * TYPE_OUT: The type of the output of this pattern.
2620 
2621   * Return value: A new stmt that will be used to replace the division
2622     S1 or modulo S4 stmt.  */
2623 
2624 static gimple *
vect_recog_divmod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2625 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2626 			   tree *type_in, tree *type_out)
2627 {
2628   gimple *last_stmt = stmts->pop ();
2629   tree oprnd0, oprnd1, vectype, itype, cond;
2630   gimple *pattern_stmt, *def_stmt;
2631   enum tree_code rhs_code;
2632   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2633   vec_info *vinfo = stmt_vinfo->vinfo;
2634   optab optab;
2635   tree q;
2636   int dummy_int, prec;
2637   stmt_vec_info def_stmt_vinfo;
2638 
2639   if (!is_gimple_assign (last_stmt))
2640     return NULL;
2641 
2642   rhs_code = gimple_assign_rhs_code (last_stmt);
2643   switch (rhs_code)
2644     {
2645     case TRUNC_DIV_EXPR:
2646     case TRUNC_MOD_EXPR:
2647       break;
2648     default:
2649       return NULL;
2650     }
2651 
2652   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2653     return NULL;
2654 
2655   oprnd0 = gimple_assign_rhs1 (last_stmt);
2656   oprnd1 = gimple_assign_rhs2 (last_stmt);
2657   itype = TREE_TYPE (oprnd0);
2658   if (TREE_CODE (oprnd0) != SSA_NAME
2659       || TREE_CODE (oprnd1) != INTEGER_CST
2660       || TREE_CODE (itype) != INTEGER_TYPE
2661       || !type_has_mode_precision_p (itype))
2662     return NULL;
2663 
2664   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2665   vectype = get_vectype_for_scalar_type (itype);
2666   if (vectype == NULL_TREE)
2667     return NULL;
2668 
2669   /* If the target can handle vectorized division or modulo natively,
2670      don't attempt to optimize this.  */
2671   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2672   if (optab != unknown_optab)
2673     {
2674       machine_mode vec_mode = TYPE_MODE (vectype);
2675       int icode = (int) optab_handler (optab, vec_mode);
2676       if (icode != CODE_FOR_nothing)
2677 	return NULL;
2678     }
2679 
2680   prec = TYPE_PRECISION (itype);
2681   if (integer_pow2p (oprnd1))
2682     {
2683       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2684 	return NULL;
2685 
2686       /* Pattern detected.  */
2687       if (dump_enabled_p ())
2688         dump_printf_loc (MSG_NOTE, vect_location,
2689                          "vect_recog_divmod_pattern: detected:\n");
2690 
2691       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2692 		     build_int_cst (itype, 0));
2693       if (rhs_code == TRUNC_DIV_EXPR)
2694 	{
2695 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
2696 	  tree shift;
2697 	  def_stmt
2698 	    = gimple_build_assign (var, COND_EXPR, cond,
2699 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2700 						build_int_cst (itype, 1)),
2701 				   build_int_cst (itype, 0));
2702 	  new_pattern_def_seq (stmt_vinfo, def_stmt);
2703 	  var = vect_recog_temp_ssa_var (itype, NULL);
2704 	  def_stmt
2705 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2706 				   gimple_assign_lhs (def_stmt));
2707 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2708 
2709 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
2710 	  pattern_stmt
2711 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2712 				   RSHIFT_EXPR, var, shift);
2713 	}
2714       else
2715 	{
2716 	  tree signmask;
2717 	  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2718 	  if (compare_tree_int (oprnd1, 2) == 0)
2719 	    {
2720 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2721 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2722 					      build_int_cst (itype, 1),
2723 					      build_int_cst (itype, 0));
2724 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2725 	    }
2726 	  else
2727 	    {
2728 	      tree utype
2729 		= build_nonstandard_integer_type (prec, 1);
2730 	      tree vecutype = get_vectype_for_scalar_type (utype);
2731 	      tree shift
2732 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2733 					- tree_log2 (oprnd1));
2734 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
2735 
2736 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2737 					      build_int_cst (utype, -1),
2738 					      build_int_cst (utype, 0));
2739 	      def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2740 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2741 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2742 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2743 	      var = vect_recog_temp_ssa_var (utype, NULL);
2744 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2745 					      gimple_assign_lhs (def_stmt),
2746 					      shift);
2747 	      def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2748 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2749 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2750 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2751 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2752 	      def_stmt
2753 		= gimple_build_assign (signmask, NOP_EXPR, var);
2754 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2755 	    }
2756 	  def_stmt
2757 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2758 				   PLUS_EXPR, oprnd0, signmask);
2759 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2760 	  def_stmt
2761 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2762 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2763 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2764 						build_int_cst (itype, 1)));
2765 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2766 
2767 	  pattern_stmt
2768 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
2770 				   signmask);
2771 	}
2772 
2773       if (dump_enabled_p ())
2774 	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2775                               0);
2776 
2777       stmts->safe_push (last_stmt);
2778 
2779       *type_in = vectype;
2780       *type_out = vectype;
2781       return pattern_stmt;
2782     }
2783 
2784   if (prec > HOST_BITS_PER_WIDE_INT
2785       || integer_zerop (oprnd1))
2786     return NULL;
2787 
2788   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2789     return NULL;
2790 
2791   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2792 
2793   if (TYPE_UNSIGNED (itype))
2794     {
2795       unsigned HOST_WIDE_INT mh, ml;
2796       int pre_shift, post_shift;
2797       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2798 				  & GET_MODE_MASK (itype_mode));
2799       tree t1, t2, t3, t4;
2800 
2801       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2802 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
2803 	return NULL;
2804 
2805       /* Find a suitable multiplier and right shift count
2806 	 instead of multiplying with D.  */
2807       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2808 
2809       /* If the suggested multiplier is more than SIZE bits, we can do better
2810 	 for even divisors, using an initial right shift.  */
2811       if (mh != 0 && (d & 1) == 0)
2812 	{
2813 	  pre_shift = ctz_or_zero (d);
2814 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2815 				  &ml, &post_shift, &dummy_int);
2816 	  gcc_assert (!mh);
2817 	}
2818       else
2819 	pre_shift = 0;
2820 
2821       if (mh != 0)
2822 	{
2823 	  if (post_shift - 1 >= prec)
2824 	    return NULL;
2825 
2826 	  /* t1 = oprnd0 h* ml;
2827 	     t2 = oprnd0 - t1;
2828 	     t3 = t2 >> 1;
2829 	     t4 = t1 + t3;
2830 	     q = t4 >> (post_shift - 1);  */
2831 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
2832 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2833 					  build_int_cst (itype, ml));
2834 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2835 
2836 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2837 	  def_stmt
2838 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2839 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2840 
2841 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2842 	  def_stmt
2843 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2844 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2845 
2846 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2847 	  def_stmt
2848 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2849 
2850 	  if (post_shift != 1)
2851 	    {
2852 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2853 
2854 	      q = vect_recog_temp_ssa_var (itype, NULL);
2855 	      pattern_stmt
2856 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
2857 				       build_int_cst (itype, post_shift - 1));
2858 	    }
2859 	  else
2860 	    {
2861 	      q = t4;
2862 	      pattern_stmt = def_stmt;
2863 	    }
2864 	}
2865       else
2866 	{
2867 	  if (pre_shift >= prec || post_shift >= prec)
2868 	    return NULL;
2869 
2870 	  /* t1 = oprnd0 >> pre_shift;
2871 	     t2 = t1 h* ml;
2872 	     q = t2 >> post_shift;  */
2873 	  if (pre_shift)
2874 	    {
2875 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
2876 	      def_stmt
2877 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2878 				       build_int_cst (NULL, pre_shift));
2879 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2880 	    }
2881 	  else
2882 	    t1 = oprnd0;
2883 
2884 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2885 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2886 					  build_int_cst (itype, ml));
2887 
2888 	  if (post_shift)
2889 	    {
2890 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2891 
2892 	      q = vect_recog_temp_ssa_var (itype, NULL);
2893 	      def_stmt
2894 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
2895 				       build_int_cst (itype, post_shift));
2896 	    }
2897 	  else
2898 	    q = t2;
2899 
2900 	  pattern_stmt = def_stmt;
2901 	}
2902     }
2903   else
2904     {
2905       unsigned HOST_WIDE_INT ml;
2906       int post_shift;
2907       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2908       unsigned HOST_WIDE_INT abs_d;
2909       bool add = false;
2910       tree t1, t2, t3, t4;
2911 
2912       /* Give up for -1.  */
2913       if (d == -1)
2914 	return NULL;
2915 
2916       /* Since d might be INT_MIN, we have to cast to
2917 	 unsigned HOST_WIDE_INT before negating to avoid
2918 	 undefined signed overflow.  */
2919       abs_d = (d >= 0
2920 	       ? (unsigned HOST_WIDE_INT) d
2921 	       : - (unsigned HOST_WIDE_INT) d);
2922 
2923       /* n rem d = n rem -d */
2924       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2925 	{
2926 	  d = abs_d;
2927 	  oprnd1 = build_int_cst (itype, abs_d);
2928 	}
2929       else if (HOST_BITS_PER_WIDE_INT >= prec
2930 	       && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2931 	/* This case is not handled correctly below.  */
2932 	return NULL;
2933 
2934       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2935       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2936 	{
2937 	  add = true;
2938 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
2939 	}
2940       if (post_shift >= prec)
2941 	return NULL;
2942 
2943       /* t1 = oprnd0 h* ml;  */
2944       t1 = vect_recog_temp_ssa_var (itype, NULL);
2945       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2946 				      build_int_cst (itype, ml));
2947 
2948       if (add)
2949 	{
2950 	  /* t2 = t1 + oprnd0;  */
2951 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2952 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2953 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2954 	}
2955       else
2956 	t2 = t1;
2957 
2958       if (post_shift)
2959 	{
2960 	  /* t3 = t2 >> post_shift;  */
2961 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2962 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2963 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2964 					  build_int_cst (itype, post_shift));
2965 	}
2966       else
2967 	t3 = t2;
2968 
2969       wide_int oprnd0_min, oprnd0_max;
2970       int msb = 1;
2971       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2972 	{
2973 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2974 	    msb = 0;
2975 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2976 	    msb = -1;
2977 	}
2978 
2979       if (msb == 0 && d >= 0)
2980 	{
2981 	  /* q = t3;  */
2982 	  q = t3;
2983 	  pattern_stmt = def_stmt;
2984 	}
2985       else
2986 	{
2987 	  /* t4 = oprnd0 >> (prec - 1);
2988 	     or if we know from VRP that oprnd0 >= 0
2989 	     t4 = 0;
2990 	     or if we know from VRP that oprnd0 < 0
2991 	     t4 = -1;  */
2992 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2993 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2994 	  if (msb != 1)
2995 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
2996 					    build_int_cst (itype, msb));
2997 	  else
2998 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2999 					    build_int_cst (itype, prec - 1));
3000 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3001 
3002 	  /* q = t3 - t4;  or q = t4 - t3;  */
3003 	  q = vect_recog_temp_ssa_var (itype, NULL);
3004 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3005 					      d < 0 ? t3 : t4);
3006 	}
3007     }
3008 
3009   if (rhs_code == TRUNC_MOD_EXPR)
3010     {
3011       tree r, t1;
3012 
3013       /* We divided.  Now finish by:
3014 	 t1 = q * oprnd1;
3015 	 r = oprnd0 - t1;  */
3016       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3017 
3018       t1 = vect_recog_temp_ssa_var (itype, NULL);
3019       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3020       append_pattern_def_seq (stmt_vinfo, def_stmt);
3021 
3022       r = vect_recog_temp_ssa_var (itype, NULL);
3023       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3024     }
3025 
3026   /* Pattern detected.  */
3027   if (dump_enabled_p ())
3028     {
3029       dump_printf_loc (MSG_NOTE, vect_location,
3030                        "vect_recog_divmod_pattern: detected: ");
3031       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3032     }
3033 
3034   stmts->safe_push (last_stmt);
3035 
3036   *type_in = vectype;
3037   *type_out = vectype;
3038   return pattern_stmt;
3039 }
3040 
3041 /* Function vect_recog_mixed_size_cond_pattern
3042 
3043    Try to find the following pattern:
3044 
3045      type x_t, y_t;
3046      TYPE a_T, b_T, c_T;
3047    loop:
3048      S1  a_T = x_t CMP y_t ? b_T : c_T;
3049 
3050    where type 'TYPE' is an integral type which has different size
3051    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3052    than 'type', the constants need to fit into an integer type
3053    with the same width as 'type') or results of conversion from 'type'.
3054 
3055    Input:
3056 
3057    * LAST_STMT: A stmt from which the pattern search begins.
3058 
3059    Output:
3060 
3061    * TYPE_IN: The type of the input arguments to the pattern.
3062 
3063    * TYPE_OUT: The type of the output of this pattern.
3064 
3065    * Return value: A new stmt that will be used to replace the pattern.
3066 	Additionally a def_stmt is added.
3067 
3068 	a_it = x_t CMP y_t ? b_it : c_it;
3069 	a_T = (TYPE) a_it;  */
3070 
3071 static gimple *
vect_recog_mixed_size_cond_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3072 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3073 				    tree *type_out)
3074 {
3075   gimple *last_stmt = (*stmts)[0];
3076   tree cond_expr, then_clause, else_clause;
3077   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3078   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3079   gimple *pattern_stmt, *def_stmt;
3080   vec_info *vinfo = stmt_vinfo->vinfo;
3081   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3082   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3083   bool promotion;
3084   tree comp_scalar_type;
3085 
3086   if (!is_gimple_assign (last_stmt)
3087       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3088       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3089     return NULL;
3090 
3091   cond_expr = gimple_assign_rhs1 (last_stmt);
3092   then_clause = gimple_assign_rhs2 (last_stmt);
3093   else_clause = gimple_assign_rhs3 (last_stmt);
3094 
3095   if (!COMPARISON_CLASS_P (cond_expr))
3096     return NULL;
3097 
3098   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3099   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3100   if (comp_vectype == NULL_TREE)
3101     return NULL;
3102 
3103   type = gimple_expr_type (last_stmt);
3104   if (types_compatible_p (type, comp_scalar_type)
3105       || ((TREE_CODE (then_clause) != INTEGER_CST
3106 	   || TREE_CODE (else_clause) != INTEGER_CST)
3107 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3108       || !INTEGRAL_TYPE_P (type))
3109     return NULL;
3110 
3111   if ((TREE_CODE (then_clause) != INTEGER_CST
3112        && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3113                               &def_stmt0, &promotion))
3114       || (TREE_CODE (else_clause) != INTEGER_CST
3115           && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3116                                  &def_stmt1, &promotion)))
3117     return NULL;
3118 
3119   if (orig_type0 && orig_type1
3120       && !types_compatible_p (orig_type0, orig_type1))
3121     return NULL;
3122 
3123   if (orig_type0)
3124     {
3125       if (!types_compatible_p (orig_type0, comp_scalar_type))
3126 	return NULL;
3127       then_clause = gimple_assign_rhs1 (def_stmt0);
3128       itype = orig_type0;
3129     }
3130 
3131   if (orig_type1)
3132     {
3133       if (!types_compatible_p (orig_type1, comp_scalar_type))
3134 	return NULL;
3135       else_clause = gimple_assign_rhs1 (def_stmt1);
3136       itype = orig_type1;
3137     }
3138 
3139 
3140   HOST_WIDE_INT cmp_mode_size
3141     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3142 
3143   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3144   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3145     return NULL;
3146 
3147   vectype = get_vectype_for_scalar_type (type);
3148   if (vectype == NULL_TREE)
3149     return NULL;
3150 
3151   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3152     return NULL;
3153 
3154   if (itype == NULL_TREE)
3155     itype = build_nonstandard_integer_type (cmp_mode_size,
3156   					    TYPE_UNSIGNED (type));
3157 
3158   if (itype == NULL_TREE
3159       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3160     return NULL;
3161 
3162   vecitype = get_vectype_for_scalar_type (itype);
3163   if (vecitype == NULL_TREE)
3164     return NULL;
3165 
3166   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3167     return NULL;
3168 
3169   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3170     {
3171       if ((TREE_CODE (then_clause) == INTEGER_CST
3172 	   && !int_fits_type_p (then_clause, itype))
3173 	  || (TREE_CODE (else_clause) == INTEGER_CST
3174 	      && !int_fits_type_p (else_clause, itype)))
3175 	return NULL;
3176     }
3177 
3178   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3179 				  COND_EXPR, unshare_expr (cond_expr),
3180 				  fold_convert (itype, then_clause),
3181 				  fold_convert (itype, else_clause));
3182   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3183 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3184 
3185   new_pattern_def_seq (stmt_vinfo, def_stmt);
3186   def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3187   set_vinfo_for_stmt (def_stmt, def_stmt_info);
3188   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3189   *type_in = vecitype;
3190   *type_out = vectype;
3191 
3192   if (dump_enabled_p ())
3193     dump_printf_loc (MSG_NOTE, vect_location,
3194                      "vect_recog_mixed_size_cond_pattern: detected:\n");
3195 
3196   return pattern_stmt;
3197 }
3198 
3199 
3200 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3201    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3202    in case it's a result of a comparison which can be directly vectorized into
3203    a vector comparison.  Fills in STMTS with all stmts visited during the
3204    walk.  */
3205 
3206 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3207 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3208 {
3209   gimple *def_stmt;
3210   enum vect_def_type dt;
3211   tree rhs1;
3212   enum tree_code rhs_code;
3213 
3214   if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3215     return false;
3216 
3217   if (dt != vect_internal_def)
3218     return false;
3219 
3220   if (!is_gimple_assign (def_stmt))
3221     return false;
3222 
3223   if (stmts.contains (def_stmt))
3224     return true;
3225 
3226   rhs1 = gimple_assign_rhs1 (def_stmt);
3227   rhs_code = gimple_assign_rhs_code (def_stmt);
3228   switch (rhs_code)
3229     {
3230     case SSA_NAME:
3231       if (! check_bool_pattern (rhs1, vinfo, stmts))
3232 	return false;
3233       break;
3234 
3235     CASE_CONVERT:
3236       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3237 	return false;
3238       if (! check_bool_pattern (rhs1, vinfo, stmts))
3239 	return false;
3240       break;
3241 
3242     case BIT_NOT_EXPR:
3243       if (! check_bool_pattern (rhs1, vinfo, stmts))
3244 	return false;
3245       break;
3246 
3247     case BIT_AND_EXPR:
3248     case BIT_IOR_EXPR:
3249     case BIT_XOR_EXPR:
3250       if (! check_bool_pattern (rhs1, vinfo, stmts)
3251 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3252 	return false;
3253       break;
3254 
3255     default:
3256       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3257 	{
3258 	  tree vecitype, comp_vectype;
3259 
3260 	  /* If the comparison can throw, then is_gimple_condexpr will be
3261 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
3262 	  if (stmt_could_throw_p (def_stmt))
3263 	    return false;
3264 
3265 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3266 	  if (comp_vectype == NULL_TREE)
3267 	    return false;
3268 
3269 	  tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3270 	  if (mask_type
3271 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3272 	    return false;
3273 
3274 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3275 	    {
3276 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3277 	      tree itype
3278 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3279 	      vecitype = get_vectype_for_scalar_type (itype);
3280 	      if (vecitype == NULL_TREE)
3281 		return false;
3282 	    }
3283 	  else
3284 	    vecitype = comp_vectype;
3285 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3286 	    return false;
3287 	}
3288       else
3289 	return false;
3290       break;
3291     }
3292 
3293   bool res = stmts.add (def_stmt);
3294   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
3295   gcc_assert (!res);
3296 
3297   return true;
3298 }
3299 
3300 
3301 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
3302    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3303    pattern sequence.  */
3304 
3305 static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3306 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3307 {
3308   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3309 					   NOP_EXPR, var);
3310   stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3311   set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3312   STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3313   append_pattern_def_seq (stmt_info, cast_stmt);
3314   return gimple_assign_lhs (cast_stmt);
3315 }
3316 
3317 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
3318    VAR is an SSA_NAME that should be transformed from bool to a wider integer
3319    type, OUT_TYPE is the desired final integer type of the whole pattern.
3320    STMT_INFO is the info of the pattern root and is where pattern stmts should
3321    be associated with.  DEFS is a map of pattern defs.  */
3322 
3323 static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3324 adjust_bool_pattern (tree var, tree out_type,
3325 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3326 {
3327   gimple *stmt = SSA_NAME_DEF_STMT (var);
3328   enum tree_code rhs_code, def_rhs_code;
3329   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3330   location_t loc;
3331   gimple *pattern_stmt, *def_stmt;
3332   tree trueval = NULL_TREE;
3333 
3334   rhs1 = gimple_assign_rhs1 (stmt);
3335   rhs2 = gimple_assign_rhs2 (stmt);
3336   rhs_code = gimple_assign_rhs_code (stmt);
3337   loc = gimple_location (stmt);
3338   switch (rhs_code)
3339     {
3340     case SSA_NAME:
3341     CASE_CONVERT:
3342       irhs1 = *defs.get (rhs1);
3343       itype = TREE_TYPE (irhs1);
3344       pattern_stmt
3345 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3346 			       SSA_NAME, irhs1);
3347       break;
3348 
3349     case BIT_NOT_EXPR:
3350       irhs1 = *defs.get (rhs1);
3351       itype = TREE_TYPE (irhs1);
3352       pattern_stmt
3353 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3354 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3355       break;
3356 
3357     case BIT_AND_EXPR:
3358       /* Try to optimize x = y & (a < b ? 1 : 0); into
3359 	 x = (a < b ? y : 0);
3360 
3361 	 E.g. for:
3362 	   bool a_b, b_b, c_b;
3363 	   TYPE d_T;
3364 
3365 	   S1  a_b = x1 CMP1 y1;
3366 	   S2  b_b = x2 CMP2 y2;
3367 	   S3  c_b = a_b & b_b;
3368 	   S4  d_T = (TYPE) c_b;
3369 
3370 	 we would normally emit:
3371 
3372 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3373 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3374 	   S3'  c_T = a_T & b_T;
3375 	   S4'  d_T = c_T;
3376 
3377 	 but we can save one stmt by using the
3378 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3379 	 BIT_AND_EXPR stmt out:
3380 
3381 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3382 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3383 	   S4'  f_T = c_T;
3384 
3385 	 At least when VEC_COND_EXPR is implemented using masks
3386 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3387 	 computes the comparison masks and ands it, in one case with
3388 	 all ones vector, in the other case with a vector register.
3389 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3390 	 often more expensive.  */
3391       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3392       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3393       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3394 	{
3395 	  irhs1 = *defs.get (rhs1);
3396 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3397 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3398 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3399 	    {
3400 	      rhs_code = def_rhs_code;
3401 	      rhs1 = def_rhs1;
3402 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3403 	      trueval = irhs1;
3404 	      goto do_compare;
3405 	    }
3406 	  else
3407 	    irhs2 = *defs.get (rhs2);
3408 	  goto and_ior_xor;
3409 	}
3410       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3411       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3412       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3413 	{
3414 	  irhs2 = *defs.get (rhs2);
3415 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3416 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3417 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3418 	    {
3419 	      rhs_code = def_rhs_code;
3420 	      rhs1 = def_rhs1;
3421 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3422 	      trueval = irhs2;
3423 	      goto do_compare;
3424 	    }
3425 	  else
3426 	    irhs1 = *defs.get (rhs1);
3427 	  goto and_ior_xor;
3428 	}
3429       /* FALLTHRU */
3430     case BIT_IOR_EXPR:
3431     case BIT_XOR_EXPR:
3432       irhs1 = *defs.get (rhs1);
3433       irhs2 = *defs.get (rhs2);
3434     and_ior_xor:
3435       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3436 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3437 	{
3438 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3439 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3440 	  int out_prec = TYPE_PRECISION (out_type);
3441 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3442 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3443 					      stmt_info);
3444 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3445 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3446 					      stmt_info);
3447 	  else
3448 	    {
3449 	      irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3450 	      irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3451 	    }
3452 	}
3453       itype = TREE_TYPE (irhs1);
3454       pattern_stmt
3455 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3456 			       rhs_code, irhs1, irhs2);
3457       break;
3458 
3459     default:
3460     do_compare:
3461       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3462       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3463 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3464 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3465 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3466 	{
3467 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3468 	  itype
3469 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3470 	}
3471       else
3472 	itype = TREE_TYPE (rhs1);
3473       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3474       if (trueval == NULL_TREE)
3475 	trueval = build_int_cst (itype, 1);
3476       else
3477 	gcc_checking_assert (useless_type_conversion_p (itype,
3478 							TREE_TYPE (trueval)));
3479       pattern_stmt
3480 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3481 			       COND_EXPR, cond_expr, trueval,
3482 			       build_int_cst (itype, 0));
3483       break;
3484     }
3485 
3486   gimple_set_location (pattern_stmt, loc);
3487   /* ???  Why does vect_mark_pattern_stmts set the vector type on all
3488      pattern def seq stmts instead of just letting auto-detection do
3489      its work?  */
3490   stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3491   set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3492   STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3493   append_pattern_def_seq (stmt_info, pattern_stmt);
3494   defs.put (var, gimple_assign_lhs (pattern_stmt));
3495 }
3496 
3497 /* Comparison function to qsort a vector of gimple stmts after UID.  */
3498 
3499 static int
sort_after_uid(const void * p1,const void * p2)3500 sort_after_uid (const void *p1, const void *p2)
3501 {
3502   const gimple *stmt1 = *(const gimple * const *)p1;
3503   const gimple *stmt2 = *(const gimple * const *)p2;
3504   return gimple_uid (stmt1) - gimple_uid (stmt2);
3505 }
3506 
3507 /* Create pattern stmts for all stmts participating in the bool pattern
3508    specified by BOOL_STMT_SET and its root STMT with the desired type
3509    OUT_TYPE.  Return the def of the pattern root.  */
3510 
3511 static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,gimple * stmt)3512 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3513 		   tree out_type, gimple *stmt)
3514 {
3515   /* Gather original stmts in the bool pattern in their order of appearance
3516      in the IL.  */
3517   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3518   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3519        i != bool_stmt_set.end (); ++i)
3520     bool_stmts.quick_push (*i);
3521   bool_stmts.qsort (sort_after_uid);
3522 
3523   /* Now process them in that order, producing pattern stmts.  */
3524   hash_map <tree, tree> defs;
3525   for (unsigned i = 0; i < bool_stmts.length (); ++i)
3526     adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3527 			 out_type, vinfo_for_stmt (stmt), defs);
3528 
3529   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
3530   gimple *pattern_stmt
3531     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3532   return gimple_assign_lhs (pattern_stmt);
3533 }
3534 
3535 /* Helper for search_type_for_mask.  */
3536 
3537 static tree
search_type_for_mask_1(tree var,vec_info * vinfo,hash_map<gimple *,tree> & cache)3538 search_type_for_mask_1 (tree var, vec_info *vinfo,
3539 			hash_map<gimple *, tree> &cache)
3540 {
3541   gimple *def_stmt;
3542   enum vect_def_type dt;
3543   tree rhs1;
3544   enum tree_code rhs_code;
3545   tree res = NULL_TREE, res2;
3546 
3547   if (TREE_CODE (var) != SSA_NAME)
3548     return NULL_TREE;
3549 
3550   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3551     return NULL_TREE;
3552 
3553   if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3554     return NULL_TREE;
3555 
3556   if (dt != vect_internal_def)
3557     return NULL_TREE;
3558 
3559   if (!is_gimple_assign (def_stmt))
3560     return NULL_TREE;
3561 
3562   tree *c = cache.get (def_stmt);
3563   if (c)
3564     return *c;
3565 
3566   rhs_code = gimple_assign_rhs_code (def_stmt);
3567   rhs1 = gimple_assign_rhs1 (def_stmt);
3568 
3569   switch (rhs_code)
3570     {
3571     case SSA_NAME:
3572     case BIT_NOT_EXPR:
3573     CASE_CONVERT:
3574       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3575       break;
3576 
3577     case BIT_AND_EXPR:
3578     case BIT_IOR_EXPR:
3579     case BIT_XOR_EXPR:
3580       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3581       res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3582 				     cache);
3583       if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3584 	res = res2;
3585       break;
3586 
3587     default:
3588       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3589 	{
3590 	  tree comp_vectype, mask_type;
3591 
3592 	  if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3593 	    {
3594 	      res = search_type_for_mask_1 (rhs1, vinfo, cache);
3595 	      res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3596 					     vinfo, cache);
3597 	      if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3598 		res = res2;
3599 	      break;
3600 	    }
3601 
3602 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3603 	  if (comp_vectype == NULL_TREE)
3604 	    {
3605 	      res = NULL_TREE;
3606 	      break;
3607 	    }
3608 
3609 	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3610 	  if (!mask_type
3611 	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3612 	    {
3613 	      res = NULL_TREE;
3614 	      break;
3615 	    }
3616 
3617 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3618 	      || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3619 	    {
3620 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3621 	      res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3622 	    }
3623 	  else
3624 	    res = TREE_TYPE (rhs1);
3625 	}
3626     }
3627 
3628   cache.put (def_stmt, res);
3629   return res;
3630 }
3631 
3632 /* Return the proper type for converting bool VAR into
3633    an integer value or NULL_TREE if no such type exists.
3634    The type is chosen so that converted value has the
3635    same number of elements as VAR's vector type.  */
3636 
3637 static tree
search_type_for_mask(tree var,vec_info * vinfo)3638 search_type_for_mask (tree var, vec_info *vinfo)
3639 {
3640   hash_map<gimple *, tree> cache;
3641   return search_type_for_mask_1 (var, vinfo, cache);
3642 }
3643 
3644 /* Function vect_recog_bool_pattern
3645 
3646    Try to find pattern like following:
3647 
3648      bool a_b, b_b, c_b, d_b, e_b;
3649      TYPE f_T;
3650    loop:
3651      S1  a_b = x1 CMP1 y1;
3652      S2  b_b = x2 CMP2 y2;
3653      S3  c_b = a_b & b_b;
3654      S4  d_b = x3 CMP3 y3;
3655      S5  e_b = c_b | d_b;
3656      S6  f_T = (TYPE) e_b;
3657 
3658    where type 'TYPE' is an integral type.  Or a similar pattern
3659    ending in
3660 
3661      S6  f_Y = e_b ? r_Y : s_Y;
3662 
3663    as results from if-conversion of a complex condition.
3664 
3665    Input:
3666 
3667    * LAST_STMT: A stmt at the end from which the pattern
3668 		search begins, i.e. cast of a bool to
3669 		an integer type.
3670 
3671    Output:
3672 
3673    * TYPE_IN: The type of the input arguments to the pattern.
3674 
3675    * TYPE_OUT: The type of the output of this pattern.
3676 
3677    * Return value: A new stmt that will be used to replace the pattern.
3678 
3679 	Assuming size of TYPE is the same as size of all comparisons
3680 	(otherwise some casts would be added where needed), the above
3681 	sequence we create related pattern stmts:
3682 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3683 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3684 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
3685 	S5'  e_T = c_T | d_T;
3686 	S6'  f_T = e_T;
3687 
3688 	Instead of the above S3' we could emit:
3689 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3690 	S3'  c_T = a_T | b_T;
3691 	but the above is more efficient.  */
3692 
3693 static gimple *
vect_recog_bool_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3694 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3695 			 tree *type_out)
3696 {
3697   gimple *last_stmt = stmts->pop ();
3698   enum tree_code rhs_code;
3699   tree var, lhs, rhs, vectype;
3700   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3701   stmt_vec_info new_stmt_info;
3702   vec_info *vinfo = stmt_vinfo->vinfo;
3703   gimple *pattern_stmt;
3704 
3705   if (!is_gimple_assign (last_stmt))
3706     return NULL;
3707 
3708   var = gimple_assign_rhs1 (last_stmt);
3709   lhs = gimple_assign_lhs (last_stmt);
3710 
3711   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3712     return NULL;
3713 
3714   hash_set<gimple *> bool_stmts;
3715 
3716   rhs_code = gimple_assign_rhs_code (last_stmt);
3717   if (CONVERT_EXPR_CODE_P (rhs_code))
3718     {
3719       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3720 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3721 	return NULL;
3722       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3723       if (vectype == NULL_TREE)
3724 	return NULL;
3725 
3726       if (check_bool_pattern (var, vinfo, bool_stmts))
3727 	{
3728 	  rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3729 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3730 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3731 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3732 	  else
3733 	    pattern_stmt
3734 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
3735 	}
3736       else
3737 	{
3738 	  tree type = search_type_for_mask (var, vinfo);
3739 	  tree cst0, cst1, tmp;
3740 
3741 	  if (!type)
3742 	    return NULL;
3743 
3744 	  /* We may directly use cond with narrowed type to avoid
3745 	     multiple cond exprs with following result packing and
3746 	     perform single cond with packed mask instead.  In case
3747 	     of widening we better make cond first and then extract
3748 	     results.  */
3749 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3750 	    type = TREE_TYPE (lhs);
3751 
3752 	  cst0 = build_int_cst (type, 0);
3753 	  cst1 = build_int_cst (type, 1);
3754 	  tmp = vect_recog_temp_ssa_var (type, NULL);
3755 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3756 
3757 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3758 	    {
3759 	      tree new_vectype = get_vectype_for_scalar_type (type);
3760 	      new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3761 	      set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3762 	      STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3763 	      new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3764 
3765 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3766 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3767 	    }
3768 	}
3769 
3770       *type_out = vectype;
3771       *type_in = vectype;
3772       stmts->safe_push (last_stmt);
3773       if (dump_enabled_p ())
3774 	dump_printf_loc (MSG_NOTE, vect_location,
3775                          "vect_recog_bool_pattern: detected:\n");
3776 
3777       return pattern_stmt;
3778     }
3779   else if (rhs_code == COND_EXPR
3780 	   && TREE_CODE (var) == SSA_NAME)
3781     {
3782       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3783       if (vectype == NULL_TREE)
3784 	return NULL;
3785 
3786       /* Build a scalar type for the boolean result that when
3787          vectorized matches the vector type of the result in
3788 	 size and number of elements.  */
3789       unsigned prec
3790 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3791 			       TYPE_VECTOR_SUBPARTS (vectype));
3792 
3793       tree type
3794 	= build_nonstandard_integer_type (prec,
3795 					  TYPE_UNSIGNED (TREE_TYPE (var)));
3796       if (get_vectype_for_scalar_type (type) == NULL_TREE)
3797 	return NULL;
3798 
3799       if (!check_bool_pattern (var, vinfo, bool_stmts))
3800 	return NULL;
3801 
3802       rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3803 
3804       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3805       pattern_stmt
3806 	  = gimple_build_assign (lhs, COND_EXPR,
3807 				 build2 (NE_EXPR, boolean_type_node,
3808 					 rhs, build_int_cst (type, 0)),
3809 				 gimple_assign_rhs2 (last_stmt),
3810 				 gimple_assign_rhs3 (last_stmt));
3811       *type_out = vectype;
3812       *type_in = vectype;
3813       stmts->safe_push (last_stmt);
3814       if (dump_enabled_p ())
3815 	dump_printf_loc (MSG_NOTE, vect_location,
3816                          "vect_recog_bool_pattern: detected:\n");
3817 
3818       return pattern_stmt;
3819     }
3820   else if (rhs_code == SSA_NAME
3821 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
3822     {
3823       stmt_vec_info pattern_stmt_info;
3824       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3825       gcc_assert (vectype != NULL_TREE);
3826       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3827 	return NULL;
3828 
3829       if (check_bool_pattern (var, vinfo, bool_stmts))
3830 	rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3831       else
3832 	{
3833 	  tree type = search_type_for_mask (var, vinfo);
3834 	  tree cst0, cst1, new_vectype;
3835 
3836 	  if (!type)
3837 	    return NULL;
3838 
3839 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3840 	    type = TREE_TYPE (vectype);
3841 
3842 	  cst0 = build_int_cst (type, 0);
3843 	  cst1 = build_int_cst (type, 1);
3844 	  new_vectype = get_vectype_for_scalar_type (type);
3845 
3846 	  rhs = vect_recog_temp_ssa_var (type, NULL);
3847 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3848 
3849 	  pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3850 	  set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3851 	  STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3852 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3853 	}
3854 
3855       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3856       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3857 	{
3858 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3859 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3860 	  append_pattern_def_seq (stmt_vinfo, cast_stmt);
3861 	  rhs = rhs2;
3862 	}
3863       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3864       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3865       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3866       STMT_VINFO_DATA_REF (pattern_stmt_info)
3867 	= STMT_VINFO_DATA_REF (stmt_vinfo);
3868       STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3869 	= STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3870       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3871       *type_out = vectype;
3872       *type_in = vectype;
3873       stmts->safe_push (last_stmt);
3874       if (dump_enabled_p ())
3875 	dump_printf_loc (MSG_NOTE, vect_location,
3876                          "vect_recog_bool_pattern: detected:\n");
3877       return pattern_stmt;
3878     }
3879   else
3880     return NULL;
3881 }
3882 
3883 
3884 /* A helper for vect_recog_mask_conversion_pattern.  Build
3885    conversion of MASK to a type suitable for masking VECTYPE.
3886    Built statement gets required vectype and is appended to
3887    a pattern sequence of STMT_VINFO.
3888 
3889    Return converted mask.  */
3890 
3891 static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo,vec_info * vinfo)3892 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3893 		       vec_info *vinfo)
3894 {
3895   gimple *stmt;
3896   tree masktype, tmp;
3897   stmt_vec_info new_stmt_info;
3898 
3899   masktype = build_same_sized_truth_vector_type (vectype);
3900   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3901   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3902   new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3903   set_vinfo_for_stmt (stmt, new_stmt_info);
3904   STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3905   append_pattern_def_seq (stmt_vinfo, stmt);
3906 
3907   return tmp;
3908 }
3909 
3910 
3911 /* Function vect_recog_mask_conversion_pattern
3912 
3913    Try to find statements which require boolean type
3914    converison.  Additional conversion statements are
3915    added to handle such cases.  For example:
3916 
3917    bool m_1, m_2, m_3;
3918    int i_4, i_5;
3919    double d_6, d_7;
3920    char c_1, c_2, c_3;
3921 
3922    S1   m_1 = i_4 > i_5;
3923    S2   m_2 = d_6 < d_7;
3924    S3   m_3 = m_1 & m_2;
3925    S4   c_1 = m_3 ? c_2 : c_3;
3926 
3927    Will be transformed into:
3928 
3929    S1   m_1 = i_4 > i_5;
3930    S2   m_2 = d_6 < d_7;
3931    S3'' m_2' = (_Bool[bitsize=32])m_2
3932    S3'  m_3' = m_1 & m_2';
3933    S4'' m_3'' = (_Bool[bitsize=8])m_3'
3934    S4'  c_1' = m_3'' ? c_2 : c_3;  */
3935 
3936 static gimple *
vect_recog_mask_conversion_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3937 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3938 				    tree *type_out)
3939 {
3940   gimple *last_stmt = stmts->pop ();
3941   enum tree_code rhs_code;
3942   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3943   tree vectype1, vectype2;
3944   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3945   stmt_vec_info pattern_stmt_info;
3946   vec_info *vinfo = stmt_vinfo->vinfo;
3947 
3948   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
3949   if (is_gimple_call (last_stmt)
3950       && gimple_call_internal_p (last_stmt)
3951       && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3952 	  || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3953     {
3954       gcall *pattern_stmt;
3955       bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3956 
3957       if (load)
3958 	{
3959 	  lhs = gimple_call_lhs (last_stmt);
3960 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3961 	}
3962       else
3963 	{
3964 	  rhs2 = gimple_call_arg (last_stmt, 3);
3965 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3966 	}
3967 
3968       rhs1 = gimple_call_arg (last_stmt, 2);
3969       rhs1_type = search_type_for_mask (rhs1, vinfo);
3970       if (!rhs1_type)
3971 	return NULL;
3972       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3973 
3974       if (!vectype1 || !vectype2
3975 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3976 		       TYPE_VECTOR_SUBPARTS (vectype2)))
3977 	return NULL;
3978 
3979       tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3980 
3981       if (load)
3982 	{
3983 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3984 	  pattern_stmt
3985 	    = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3986 					  gimple_call_arg (last_stmt, 0),
3987 					  gimple_call_arg (last_stmt, 1),
3988 					  tmp);
3989 	  gimple_call_set_lhs (pattern_stmt, lhs);
3990 	}
3991       else
3992 	  pattern_stmt
3993 	    = gimple_build_call_internal (IFN_MASK_STORE, 4,
3994 					  gimple_call_arg (last_stmt, 0),
3995 					  gimple_call_arg (last_stmt, 1),
3996 					  tmp,
3997 					  gimple_call_arg (last_stmt, 3));
3998 
3999       gimple_call_set_nothrow (pattern_stmt, true);
4000 
4001       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4002       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4003       STMT_VINFO_DATA_REF (pattern_stmt_info)
4004 	= STMT_VINFO_DATA_REF (stmt_vinfo);
4005       STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4006 	= STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4007       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
4008 
4009       *type_out = vectype1;
4010       *type_in = vectype1;
4011       stmts->safe_push (last_stmt);
4012       if (dump_enabled_p ())
4013 	dump_printf_loc (MSG_NOTE, vect_location,
4014                          "vect_recog_mask_conversion_pattern: detected:\n");
4015 
4016       return pattern_stmt;
4017     }
4018 
4019   if (!is_gimple_assign (last_stmt))
4020     return NULL;
4021 
4022   gimple *pattern_stmt;
4023   lhs = gimple_assign_lhs (last_stmt);
4024   rhs1 = gimple_assign_rhs1 (last_stmt);
4025   rhs_code = gimple_assign_rhs_code (last_stmt);
4026 
4027   /* Check for cond expression requiring mask conversion.  */
4028   if (rhs_code == COND_EXPR)
4029     {
4030       /* vect_recog_mixed_size_cond_pattern could apply.
4031 	 Do nothing then.  */
4032       if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4033 	return NULL;
4034 
4035       vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4036 
4037       if (TREE_CODE (rhs1) == SSA_NAME)
4038 	{
4039 	  rhs1_type = search_type_for_mask (rhs1, vinfo);
4040 	  if (!rhs1_type)
4041 	    return NULL;
4042 	}
4043       else if (COMPARISON_CLASS_P (rhs1))
4044 	{
4045 	  /* Check whether we're comparing scalar booleans and (if so)
4046 	     whether a better mask type exists than the mask associated
4047 	     with boolean-sized elements.  This avoids unnecessary packs
4048 	     and unpacks if the booleans are set from comparisons of
4049 	     wider types.  E.g. in:
4050 
4051 	       int x1, x2, x3, x4, y1, y1;
4052 	       ...
4053 	       bool b1 = (x1 == x2);
4054 	       bool b2 = (x3 == x4);
4055 	       ... = b1 == b2 ? y1 : y2;
4056 
4057 	     it is better for b1 and b2 to use the mask type associated
4058 	     with int elements rather bool (byte) elements.  */
4059 	  rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4060 	  if (!rhs1_type)
4061 	    rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4062 	}
4063       else
4064 	return NULL;
4065 
4066       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4067 
4068       if (!vectype1 || !vectype2)
4069 	return NULL;
4070 
4071       /* Continue if a conversion is needed.  Also continue if we have
4072 	 a comparison whose vector type would normally be different from
4073 	 VECTYPE2 when considered in isolation.  In that case we'll
4074 	 replace the comparison with an SSA name (so that we can record
4075 	 its vector type) and behave as though the comparison was an SSA
4076 	 name from the outset.  */
4077       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4078 		    TYPE_VECTOR_SUBPARTS (vectype2))
4079 	  && (TREE_CODE (rhs1) == SSA_NAME
4080 	      || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4081 	return NULL;
4082 
4083       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4084          in place, we can handle it in vectorizable_condition.  This avoids
4085 	 unnecessary promotion stmts and increased vectorization factor.  */
4086       if (COMPARISON_CLASS_P (rhs1)
4087 	  && INTEGRAL_TYPE_P (rhs1_type)
4088 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4089 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4090 	{
4091 	  gimple *dummy;
4092 	  enum vect_def_type dt;
4093 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4094 				  &dummy, &dt)
4095 	      && dt == vect_external_def
4096 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4097 				     &dummy, &dt)
4098 	      && (dt == vect_external_def
4099 		  || dt == vect_constant_def))
4100 	    {
4101 	      tree wide_scalar_type = build_nonstandard_integer_type
4102 		(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4103 		 TYPE_UNSIGNED (rhs1_type));
4104 	      tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4105 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4106 		return NULL;
4107 	    }
4108 	}
4109 
4110       /* If rhs1 is a comparison we need to move it into a
4111 	 separate statement.  */
4112       if (TREE_CODE (rhs1) != SSA_NAME)
4113 	{
4114 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4115 	  pattern_stmt = gimple_build_assign (tmp, rhs1);
4116 	  rhs1 = tmp;
4117 
4118 	  pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4119 	  set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4120 	  STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4121 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4122 	}
4123 
4124       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4125 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4126 	tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4127       else
4128 	tmp = rhs1;
4129 
4130       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4131       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4132 					  gimple_assign_rhs2 (last_stmt),
4133 					  gimple_assign_rhs3 (last_stmt));
4134 
4135       *type_out = vectype1;
4136       *type_in = vectype1;
4137       stmts->safe_push (last_stmt);
4138       if (dump_enabled_p ())
4139 	dump_printf_loc (MSG_NOTE, vect_location,
4140                          "vect_recog_mask_conversion_pattern: detected:\n");
4141 
4142       return pattern_stmt;
4143     }
4144 
4145   /* Now check for binary boolean operations requiring conversion for
4146      one of operands.  */
4147   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4148     return NULL;
4149 
4150   if (rhs_code != BIT_IOR_EXPR
4151       && rhs_code != BIT_XOR_EXPR
4152       && rhs_code != BIT_AND_EXPR
4153       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4154     return NULL;
4155 
4156   rhs2 = gimple_assign_rhs2 (last_stmt);
4157 
4158   rhs1_type = search_type_for_mask (rhs1, vinfo);
4159   rhs2_type = search_type_for_mask (rhs2, vinfo);
4160 
4161   if (!rhs1_type || !rhs2_type
4162       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4163     return NULL;
4164 
4165   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4166     {
4167       vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4168       if (!vectype1)
4169 	return NULL;
4170       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4171     }
4172   else
4173     {
4174       vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4175       if (!vectype1)
4176 	return NULL;
4177       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4178     }
4179 
4180   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4181   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4182 
4183   *type_out = vectype1;
4184   *type_in = vectype1;
4185   stmts->safe_push (last_stmt);
4186   if (dump_enabled_p ())
4187     dump_printf_loc (MSG_NOTE, vect_location,
4188 		     "vect_recog_mask_conversion_pattern: detected:\n");
4189 
4190   return pattern_stmt;
4191 }
4192 
4193 /* STMT is a load or store.  If the load or store is conditional, return
4194    the boolean condition under which it occurs, otherwise return null.  */
4195 
4196 static tree
vect_get_load_store_mask(gimple * stmt)4197 vect_get_load_store_mask (gimple *stmt)
4198 {
4199   if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4200     {
4201       gcc_assert (gimple_assign_single_p (def_assign));
4202       return NULL_TREE;
4203     }
4204 
4205   if (gcall *def_call = dyn_cast <gcall *> (stmt))
4206     {
4207       internal_fn ifn = gimple_call_internal_fn (def_call);
4208       int mask_index = internal_fn_mask_index (ifn);
4209       return gimple_call_arg (def_call, mask_index);
4210     }
4211 
4212   gcc_unreachable ();
4213 }
4214 
4215 /* Return the scalar offset type that an internal gather/scatter function
4216    should use.  GS_INFO describes the gather/scatter operation.  */
4217 
4218 static tree
vect_get_gather_scatter_offset_type(gather_scatter_info * gs_info)4219 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4220 {
4221   tree offset_type = TREE_TYPE (gs_info->offset);
4222   unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4223 
4224   /* Enforced by vect_check_gather_scatter.  */
4225   unsigned int offset_bits = TYPE_PRECISION (offset_type);
4226   gcc_assert (element_bits >= offset_bits);
4227 
4228   /* If the offset is narrower than the elements, extend it according
4229      to its sign.  */
4230   if (element_bits > offset_bits)
4231     return build_nonstandard_integer_type (element_bits,
4232 					   TYPE_UNSIGNED (offset_type));
4233 
4234   return offset_type;
4235 }
4236 
4237 /* Return MASK if MASK is suitable for masking an operation on vectors
4238    of type VECTYPE, otherwise convert it into such a form and return
4239    the result.  Associate any conversion statements with STMT_INFO's
4240    pattern.  */
4241 
4242 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4243 vect_convert_mask_for_vectype (tree mask, tree vectype,
4244 			       stmt_vec_info stmt_info, vec_info *vinfo)
4245 {
4246   tree mask_type = search_type_for_mask (mask, vinfo);
4247   if (mask_type)
4248     {
4249       tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4250       if (mask_vectype
4251 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4252 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4253 	mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4254     }
4255   return mask;
4256 }
4257 
4258 /* Return the equivalent of:
4259 
4260      fold_convert (TYPE, VALUE)
4261 
4262    with the expectation that the operation will be vectorized.
4263    If new statements are needed, add them as pattern statements
4264    to STMT_INFO.  */
4265 
4266 static tree
vect_add_conversion_to_patterm(tree type,tree value,stmt_vec_info stmt_info,vec_info * vinfo)4267 vect_add_conversion_to_patterm (tree type, tree value,
4268 				stmt_vec_info stmt_info,
4269 				vec_info *vinfo)
4270 {
4271   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4272     return value;
4273 
4274   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4275   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4276   stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4277   set_vinfo_for_stmt (conversion, new_stmt_info);
4278   STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4279   append_pattern_def_seq (stmt_info, conversion);
4280   return new_value;
4281 }
4282 
4283 /* Try to convert STMT into a call to a gather load or scatter store
4284    internal function.  Return the final statement on success and set
4285    *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4286 
4287    This function only handles gathers and scatters that were recognized
4288    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4289 
4290 static gimple *
vect_try_gather_scatter_pattern(gimple * stmt,stmt_vec_info last_stmt_info,tree * type_in,tree * type_out)4291 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4292 				 tree *type_in, tree *type_out)
4293 {
4294   /* Currently we only support this for loop vectorization.  */
4295   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4296   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4297   if (!loop_vinfo)
4298     return NULL;
4299 
4300   /* Make sure that we're looking at a gather load or scatter store.  */
4301   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4302   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4303     return NULL;
4304 
4305   /* Get the boolean that controls whether the load or store happens.
4306      This is null if the operation is unconditional.  */
4307   tree mask = vect_get_load_store_mask (stmt);
4308 
4309   /* Make sure that the target supports an appropriate internal
4310      function for the gather/scatter operation.  */
4311   gather_scatter_info gs_info;
4312   if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4313       || gs_info.decl)
4314     return NULL;
4315 
4316   /* Convert the mask to the right form.  */
4317   tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4318   if (mask)
4319     mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4320 					  loop_vinfo);
4321 
4322   /* Get the invariant base and non-invariant offset, converting the
4323      latter to the same width as the vector elements.  */
4324   tree base = gs_info.base;
4325   tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4326   tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4327 						last_stmt_info, loop_vinfo);
4328 
4329   /* Build the new pattern statement.  */
4330   tree scale = size_int (gs_info.scale);
4331   gcall *pattern_stmt;
4332   if (DR_IS_READ (dr))
4333     {
4334       if (mask != NULL)
4335 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4336 						   offset, scale, mask);
4337       else
4338 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4339 						   offset, scale);
4340       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4341       gimple_call_set_lhs (pattern_stmt, load_lhs);
4342     }
4343   else
4344     {
4345       tree rhs = vect_get_store_rhs (stmt);
4346       if (mask != NULL)
4347 	pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4348 						   base, offset, scale, rhs,
4349 						   mask);
4350       else
4351 	pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4352 						   base, offset, scale, rhs);
4353     }
4354   gimple_call_set_nothrow (pattern_stmt, true);
4355 
4356   /* Copy across relevant vectorization info and associate DR with the
4357      new pattern statement instead of the original statement.  */
4358   stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4359 						       loop_vinfo);
4360   set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4361   STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4362   STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4363     = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4364   STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4365     = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4366   DR_STMT (dr) = pattern_stmt;
4367 
4368   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4369   *type_out = vectype;
4370   *type_in = vectype;
4371 
4372   if (dump_enabled_p ())
4373     dump_printf_loc (MSG_NOTE, vect_location,
4374 		     "gather/scatter pattern detected:\n");
4375 
4376   return pattern_stmt;
4377 }
4378 
4379 /* Pattern wrapper around vect_try_gather_scatter_pattern.  */
4380 
4381 static gimple *
vect_recog_gather_scatter_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)4382 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4383 				   tree *type_out)
4384 {
4385   gimple *last_stmt = stmts->pop ();
4386   stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4387   gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4388 							  last_stmt_info,
4389 							  type_in, type_out);
4390   if (pattern_stmt)
4391     stmts->safe_push (last_stmt);
4392   return pattern_stmt;
4393 }
4394 
4395 /* Mark statements that are involved in a pattern.  */
4396 
4397 static inline void
vect_mark_pattern_stmts(gimple * orig_stmt,gimple * pattern_stmt,tree pattern_vectype)4398 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4399                          tree pattern_vectype)
4400 {
4401   stmt_vec_info pattern_stmt_info, def_stmt_info;
4402   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4403   vec_info *vinfo = orig_stmt_info->vinfo;
4404   gimple *def_stmt;
4405 
4406   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4407   if (pattern_stmt_info == NULL)
4408     {
4409       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4410       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4411     }
4412   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4413 
4414   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4415   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4416     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4417   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4418   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4419   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4420   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4421     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4422   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4423     {
4424       gimple_stmt_iterator si;
4425       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4426 	   !gsi_end_p (si); gsi_next (&si))
4427 	{
4428 	  def_stmt = gsi_stmt (si);
4429 	  def_stmt_info = vinfo_for_stmt (def_stmt);
4430 	  if (def_stmt_info == NULL)
4431 	    {
4432 	      def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4433 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
4434 	    }
4435 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4436 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4437 	  STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4438 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4439 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4440 	}
4441     }
4442 }
4443 
4444 /* Function vect_pattern_recog_1
4445 
4446    Input:
4447    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4448         computation pattern.
4449    STMT: A stmt from which the pattern search should start.
4450 
4451    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4452    expression that computes the same functionality and can be used to
4453    replace the sequence of stmts that are involved in the pattern.
4454 
4455    Output:
4456    This function checks if the expression returned by PATTERN_RECOG_FUNC is
4457    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
4458    relevant vector type. If 'TYPE_IN' is already a vector type, then this
4459    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4460    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4461    to the available target pattern.
4462 
4463    This function also does some bookkeeping, as explained in the documentation
4464    for vect_recog_pattern.  */
4465 
4466 static bool
vect_pattern_recog_1(vect_recog_func * recog_func,gimple_stmt_iterator si,vec<gimple * > * stmts_to_replace)4467 vect_pattern_recog_1 (vect_recog_func *recog_func,
4468 		      gimple_stmt_iterator si,
4469 		      vec<gimple *> *stmts_to_replace)
4470 {
4471   gimple *stmt = gsi_stmt (si), *pattern_stmt;
4472   stmt_vec_info stmt_info;
4473   loop_vec_info loop_vinfo;
4474   tree pattern_vectype;
4475   tree type_in, type_out;
4476   enum tree_code code;
4477   int i;
4478   gimple *next;
4479 
4480   stmts_to_replace->truncate (0);
4481   stmts_to_replace->quick_push (stmt);
4482   pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4483   if (!pattern_stmt)
4484     return false;
4485 
4486   stmt = stmts_to_replace->last ();
4487   stmt_info = vinfo_for_stmt (stmt);
4488   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4489 
4490   if (VECTOR_BOOLEAN_TYPE_P (type_in)
4491       || VECTOR_TYPE_P (type_in))
4492     {
4493       /* No need to check target support (already checked by the pattern
4494          recognition function).  */
4495       pattern_vectype = type_out ? type_out : type_in;
4496     }
4497   else
4498     {
4499       /* Check target support  */
4500       type_in = get_vectype_for_scalar_type (type_in);
4501       if (!type_in)
4502 	return false;
4503       if (type_out)
4504 	type_out = get_vectype_for_scalar_type (type_out);
4505       else
4506 	type_out = type_in;
4507       if (!type_out)
4508 	return false;
4509       pattern_vectype = type_out;
4510 
4511       if (is_gimple_assign (pattern_stmt))
4512 	{
4513 	  enum insn_code icode;
4514 	  code = gimple_assign_rhs_code (pattern_stmt);
4515 	  optab optab = optab_for_tree_code (code, type_in, optab_default);
4516 	  machine_mode vec_mode = TYPE_MODE (type_in);
4517 	  if (!optab
4518 	      || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4519 	      || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4520 	    return false;
4521 	}
4522       else
4523 	gcc_assert (is_gimple_call (pattern_stmt));
4524     }
4525 
4526   /* Found a vectorizable pattern.  */
4527   if (dump_enabled_p ())
4528     {
4529       dump_printf_loc (MSG_NOTE, vect_location,
4530                        "%s pattern recognized: ", recog_func->name);
4531       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4532     }
4533 
4534   /* Mark the stmts that are involved in the pattern. */
4535   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4536 
4537   /* Patterns cannot be vectorized using SLP, because they change the order of
4538      computation.  */
4539   if (loop_vinfo)
4540     FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4541       if (next == stmt)
4542         LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4543 
4544   /* It is possible that additional pattern stmts are created and inserted in
4545      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
4546      relevant statements.  */
4547   for (i = 0; stmts_to_replace->iterate (i, &stmt)
4548 	      && (unsigned) i < (stmts_to_replace->length () - 1);
4549        i++)
4550     {
4551       stmt_info = vinfo_for_stmt (stmt);
4552       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4553       if (dump_enabled_p ())
4554         {
4555           dump_printf_loc (MSG_NOTE, vect_location,
4556                            "additional pattern stmt: ");
4557           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4558         }
4559 
4560       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4561     }
4562 
4563   return true;
4564 }
4565 
4566 
4567 /* Function vect_pattern_recog
4568 
4569    Input:
4570    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4571         computation idioms.
4572 
4573    Output - for each computation idiom that is detected we create a new stmt
4574         that provides the same functionality and that can be vectorized.  We
4575         also record some information in the struct_stmt_info of the relevant
4576         stmts, as explained below:
4577 
4578    At the entry to this function we have the following stmts, with the
4579    following initial value in the STMT_VINFO fields:
4580 
4581          stmt                     in_pattern_p  related_stmt    vec_stmt
4582          S1: a_i = ....                 -       -               -
4583          S2: a_2 = ..use(a_i)..         -       -               -
4584          S3: a_1 = ..use(a_2)..         -       -               -
4585          S4: a_0 = ..use(a_1)..         -       -               -
4586          S5: ... = ..use(a_0)..         -       -               -
4587 
4588    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4589    represented by a single stmt.  We then:
4590    - create a new stmt S6 equivalent to the pattern (the stmt is not
4591      inserted into the code)
4592    - fill in the STMT_VINFO fields as follows:
4593 
4594                                   in_pattern_p  related_stmt    vec_stmt
4595          S1: a_i = ....                 -       -               -
4596          S2: a_2 = ..use(a_i)..         -       -               -
4597          S3: a_1 = ..use(a_2)..         -       -               -
4598          S4: a_0 = ..use(a_1)..         true    S6              -
4599           '---> S6: a_new = ....        -       S4              -
4600          S5: ... = ..use(a_0)..         -       -               -
4601 
4602    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4603    to each other through the RELATED_STMT field).
4604 
4605    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4606    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
4607    remain irrelevant unless used by stmts other than S4.
4608 
4609    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4610    (because they are marked as irrelevant).  It will vectorize S6, and record
4611    a pointer to the new vector stmt VS6 from S6 (as usual).
4612    S4 will be skipped, and S5 will be vectorized as usual:
4613 
4614                                   in_pattern_p  related_stmt    vec_stmt
4615          S1: a_i = ....                 -       -               -
4616          S2: a_2 = ..use(a_i)..         -       -               -
4617          S3: a_1 = ..use(a_2)..         -       -               -
4618        > VS6: va_new = ....             -       -               -
4619          S4: a_0 = ..use(a_1)..         true    S6              VS6
4620           '---> S6: a_new = ....        -       S4              VS6
4621        > VS5: ... = ..vuse(va_new)..    -       -               -
4622          S5: ... = ..use(a_0)..         -       -               -
4623 
4624    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4625    elsewhere), and we'll end up with:
4626 
4627         VS6: va_new = ....
4628         VS5: ... = ..vuse(va_new)..
4629 
4630    In case of more than one pattern statements, e.g., widen-mult with
4631    intermediate type:
4632 
4633      S1  a_t = ;
4634      S2  a_T = (TYPE) a_t;
4635            '--> S3: a_it = (interm_type) a_t;
4636      S4  prod_T = a_T * CONST;
4637            '--> S5: prod_T' = a_it w* CONST;
4638 
4639    there may be other users of a_T outside the pattern.  In that case S2 will
4640    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4641    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
4642    be recorded in S3.  */
4643 
4644 void
vect_pattern_recog(vec_info * vinfo)4645 vect_pattern_recog (vec_info *vinfo)
4646 {
4647   struct loop *loop;
4648   basic_block *bbs;
4649   unsigned int nbbs;
4650   gimple_stmt_iterator si;
4651   unsigned int i, j;
4652   auto_vec<gimple *, 1> stmts_to_replace;
4653   gimple *stmt;
4654 
4655   if (dump_enabled_p ())
4656     dump_printf_loc (MSG_NOTE, vect_location,
4657                      "=== vect_pattern_recog ===\n");
4658 
4659   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4660     {
4661       loop = LOOP_VINFO_LOOP (loop_vinfo);
4662       bbs = LOOP_VINFO_BBS (loop_vinfo);
4663       nbbs = loop->num_nodes;
4664 
4665       /* Scan through the loop stmts, applying the pattern recognition
4666 	 functions starting at each stmt visited:  */
4667       for (i = 0; i < nbbs; i++)
4668 	{
4669 	  basic_block bb = bbs[i];
4670 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4671 	    {
4672 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
4673 	      for (j = 0; j < NUM_PATTERNS; j++)
4674 		if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4675 					  &stmts_to_replace))
4676 		  break;
4677 	    }
4678 	}
4679     }
4680   else
4681     {
4682       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4683       for (si = bb_vinfo->region_begin;
4684 	   gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4685 	{
4686 	  if ((stmt = gsi_stmt (si))
4687 	      && vinfo_for_stmt (stmt)
4688 	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4689 	    continue;
4690 
4691 	  /* Scan over all generic vect_recog_xxx_pattern functions.  */
4692 	  for (j = 0; j < NUM_PATTERNS; j++)
4693 	    if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4694 				      &stmts_to_replace))
4695 	      break;
4696 	}
4697     }
4698 }
4699