1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2014 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
32 #include "tree-eh.h"
33 #include "gimple-expr.h"
34 #include "is-a.h"
35 #include "gimple.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "cfgloop.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "params.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h"		/* FIXME: for insn_data */
50 #include "diagnostic-core.h"
51 #include "dumpfile.h"
52 
53 /* Pattern recognition functions  */
54 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
55 					    tree *);
56 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
57 					     tree *);
58 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
59 					   tree *);
60 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
61 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
62                                                  tree *);
63 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
64 	                                tree *, tree *);
65 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
66 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
67 						      tree *, tree *);
68 static gimple vect_recog_divmod_pattern (vec<gimple> *,
69 					 tree *, tree *);
70 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
71 						  tree *, tree *);
72 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
73 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
74 	vect_recog_widen_mult_pattern,
75 	vect_recog_widen_sum_pattern,
76 	vect_recog_dot_prod_pattern,
77 	vect_recog_pow_pattern,
78 	vect_recog_widen_shift_pattern,
79 	vect_recog_over_widening_pattern,
80 	vect_recog_rotate_pattern,
81 	vect_recog_vector_vector_shift_pattern,
82 	vect_recog_divmod_pattern,
83 	vect_recog_mixed_size_cond_pattern,
84 	vect_recog_bool_pattern};
85 
86 static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)87 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
88 {
89   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
90 				      stmt);
91 }
92 
93 static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)94 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
95 {
96   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
97   append_pattern_def_seq (stmt_info, stmt);
98 }
99 
100 /* Check whether STMT2 is in the same loop or basic block as STMT1.
101    Which of the two applies depends on whether we're currently doing
102    loop-based or basic-block-based vectorization, as determined by
103    the vinfo_for_stmt for STMT1 (which must be defined).
104 
105    If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106    to be defined as well.  */
107 
108 static bool
vect_same_loop_or_bb_p(gimple stmt1,gimple stmt2)109 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
110 {
111   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
112   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
113   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
114 
115   if (!gimple_bb (stmt2))
116     return false;
117 
118   if (loop_vinfo)
119     {
120       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
121       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
122 	return false;
123     }
124   else
125     {
126       if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
127 	  || gimple_code (stmt2) == GIMPLE_PHI)
128 	return false;
129     }
130 
131   gcc_assert (vinfo_for_stmt (stmt2));
132   return true;
133 }
134 
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136    in the same loop or basic block, return it.  */
137 
138 static gimple
vect_single_imm_use(gimple def_stmt)139 vect_single_imm_use (gimple def_stmt)
140 {
141   tree lhs = gimple_assign_lhs (def_stmt);
142   use_operand_p use_p;
143   gimple use_stmt;
144 
145   if (!single_imm_use (lhs, &use_p, &use_stmt))
146     return NULL;
147 
148   if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
149     return NULL;
150 
151   return use_stmt;
152 }
153 
154 /* Check whether NAME, an ssa-name used in USE_STMT,
155    is a result of a type promotion or demotion, such that:
156      DEF_STMT: NAME = NOP (name0)
157    where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
158    If CHECK_SIGN is TRUE, check that either both types are signed or both are
159    unsigned.  */
160 
161 static bool
type_conversion_p(tree name,gimple use_stmt,bool check_sign,tree * orig_type,gimple * def_stmt,bool * promotion)162 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
163 		   tree *orig_type, gimple *def_stmt, bool *promotion)
164 {
165   tree dummy;
166   gimple dummy_gimple;
167   loop_vec_info loop_vinfo;
168   stmt_vec_info stmt_vinfo;
169   tree type = TREE_TYPE (name);
170   tree oprnd0;
171   enum vect_def_type dt;
172   tree def;
173   bb_vec_info bb_vinfo;
174 
175   stmt_vinfo = vinfo_for_stmt (use_stmt);
176   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
177   bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
178   if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
179 			   &def, &dt))
180     return false;
181 
182   if (dt != vect_internal_def
183       && dt != vect_external_def && dt != vect_constant_def)
184     return false;
185 
186   if (!*def_stmt)
187     return false;
188 
189   if (!is_gimple_assign (*def_stmt))
190     return false;
191 
192   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
193     return false;
194 
195   oprnd0 = gimple_assign_rhs1 (*def_stmt);
196 
197   *orig_type = TREE_TYPE (oprnd0);
198   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
199       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
200     return false;
201 
202   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
203     *promotion = true;
204   else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
205     *promotion = false;
206   else
207     return false;
208 
209   if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
210 			   bb_vinfo, &dummy_gimple, &dummy, &dt))
211     return false;
212 
213   return true;
214 }
215 
216 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
217    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
218 
219 static tree
vect_recog_temp_ssa_var(tree type,gimple stmt)220 vect_recog_temp_ssa_var (tree type, gimple stmt)
221 {
222   return make_temp_ssa_name (type, stmt, "patt");
223 }
224 
225 /* Function vect_recog_dot_prod_pattern
226 
227    Try to find the following pattern:
228 
229      type x_t, y_t;
230      TYPE1 prod;
231      TYPE2 sum = init;
232    loop:
233      sum_0 = phi <init, sum_1>
234      S1  x_t = ...
235      S2  y_t = ...
236      S3  x_T = (TYPE1) x_t;
237      S4  y_T = (TYPE1) y_t;
238      S5  prod = x_T * y_T;
239      [S6  prod = (TYPE2) prod;  #optional]
240      S7  sum_1 = prod + sum_0;
241 
242    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
243    same size of 'TYPE1' or bigger. This is a special case of a reduction
244    computation.
245 
246    Input:
247 
248    * STMTS: Contains a stmt from which the pattern search begins.  In the
249    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
250    will be detected.
251 
252    Output:
253 
254    * TYPE_IN: The type of the input arguments to the pattern.
255 
256    * TYPE_OUT: The type of the output  of this pattern.
257 
258    * Return value: A new stmt that will be used to replace the sequence of
259    stmts that constitute the pattern. In this case it will be:
260         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
261 
262    Note: The dot-prod idiom is a widening reduction pattern that is
263          vectorized without preserving all the intermediate results. It
264          produces only N/2 (widened) results (by summing up pairs of
265          intermediate results) rather than all N results.  Therefore, we
266          cannot allow this pattern when we want to get all the results and in
267          the correct order (as is the case when this computation is in an
268          inner-loop nested in an outer-loop that us being vectorized).  */
269 
270 static gimple
vect_recog_dot_prod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)271 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
272 			     tree *type_out)
273 {
274   gimple stmt, last_stmt = (*stmts)[0];
275   tree oprnd0, oprnd1;
276   tree oprnd00, oprnd01;
277   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
278   tree type, half_type;
279   gimple pattern_stmt;
280   tree prod_type;
281   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
282   struct loop *loop;
283   tree var;
284   bool promotion;
285 
286   if (!loop_info)
287     return NULL;
288 
289   loop = LOOP_VINFO_LOOP (loop_info);
290 
291   if (!is_gimple_assign (last_stmt))
292     return NULL;
293 
294   type = gimple_expr_type (last_stmt);
295 
296   /* Look for the following pattern
297           DX = (TYPE1) X;
298           DY = (TYPE1) Y;
299           DPROD = DX * DY;
300           DDPROD = (TYPE2) DPROD;
301           sum_1 = DDPROD + sum_0;
302      In which
303      - DX is double the size of X
304      - DY is double the size of Y
305      - DX, DY, DPROD all have the same type
306      - sum is the same size of DPROD or bigger
307      - sum has been recognized as a reduction variable.
308 
309      This is equivalent to:
310        DPROD = X w* Y;          #widen mult
311        sum_1 = DPROD w+ sum_0;  #widen summation
312      or
313        DPROD = X w* Y;          #widen mult
314        sum_1 = DPROD + sum_0;   #summation
315    */
316 
317   /* Starting from LAST_STMT, follow the defs of its uses in search
318      of the above pattern.  */
319 
320   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
321     return NULL;
322 
323   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
324     {
325       /* Has been detected as widening-summation?  */
326 
327       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
328       type = gimple_expr_type (stmt);
329       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
330         return NULL;
331       oprnd0 = gimple_assign_rhs1 (stmt);
332       oprnd1 = gimple_assign_rhs2 (stmt);
333       half_type = TREE_TYPE (oprnd0);
334     }
335   else
336     {
337       gimple def_stmt;
338 
339       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
340         return NULL;
341       oprnd0 = gimple_assign_rhs1 (last_stmt);
342       oprnd1 = gimple_assign_rhs2 (last_stmt);
343       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
344 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
345         return NULL;
346       stmt = last_stmt;
347 
348       if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
349                                &promotion)
350          && promotion)
351         {
352           stmt = def_stmt;
353           oprnd0 = gimple_assign_rhs1 (stmt);
354         }
355       else
356         half_type = type;
357     }
358 
359   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
360      we know that oprnd1 is the reduction variable (defined by a loop-header
361      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
363   if (TREE_CODE (oprnd0) != SSA_NAME)
364     return NULL;
365 
366   prod_type = half_type;
367   stmt = SSA_NAME_DEF_STMT (oprnd0);
368 
369   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
370   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
371     return NULL;
372 
373   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
374      inside the loop (in case we are analyzing an outer-loop).  */
375   if (!is_gimple_assign (stmt))
376     return NULL;
377   stmt_vinfo = vinfo_for_stmt (stmt);
378   gcc_assert (stmt_vinfo);
379   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
380     return NULL;
381   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
382     return NULL;
383   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
384     {
385       /* Has been detected as a widening multiplication?  */
386 
387       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
388       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
389         return NULL;
390       stmt_vinfo = vinfo_for_stmt (stmt);
391       gcc_assert (stmt_vinfo);
392       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
393       oprnd00 = gimple_assign_rhs1 (stmt);
394       oprnd01 = gimple_assign_rhs2 (stmt);
395     }
396   else
397     {
398       tree half_type0, half_type1;
399       gimple def_stmt;
400       tree oprnd0, oprnd1;
401 
402       oprnd0 = gimple_assign_rhs1 (stmt);
403       oprnd1 = gimple_assign_rhs2 (stmt);
404       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
405           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
406         return NULL;
407       if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
408                                 &promotion)
409           || !promotion)
410         return NULL;
411       oprnd00 = gimple_assign_rhs1 (def_stmt);
412       if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
413                                 &promotion)
414           || !promotion)
415         return NULL;
416       oprnd01 = gimple_assign_rhs1 (def_stmt);
417       if (!types_compatible_p (half_type0, half_type1))
418         return NULL;
419       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
420 	return NULL;
421     }
422 
423   half_type = TREE_TYPE (oprnd00);
424   *type_in = half_type;
425   *type_out = type;
426 
427   /* Pattern detected. Create a stmt to be used to replace the pattern: */
428   var = vect_recog_temp_ssa_var (type, NULL);
429   pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
430 					       oprnd00, oprnd01, oprnd1);
431 
432   if (dump_enabled_p ())
433     {
434       dump_printf_loc (MSG_NOTE, vect_location,
435                        "vect_recog_dot_prod_pattern: detected: ");
436       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
437       dump_printf (MSG_NOTE, "\n");
438     }
439 
440   /* We don't allow changing the order of the computation in the inner-loop
441      when doing outer-loop vectorization.  */
442   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
443 
444   return pattern_stmt;
445 }
446 
447 
448 /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
449    and LSHIFT_EXPR.
450 
451    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
452    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
453 
454    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
455    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
456    that satisfies the above restrictions,  we can perform a widening opeartion
457    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
458    with a_it = (interm_type) a_t;  */
459 
460 static bool
vect_handle_widen_op_by_const(gimple stmt,enum tree_code code,tree const_oprnd,tree * oprnd,vec<gimple> * stmts,tree type,tree * half_type,gimple def_stmt)461 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
462 		               tree const_oprnd, tree *oprnd,
463    		               vec<gimple> *stmts, tree type,
464 			       tree *half_type, gimple def_stmt)
465 {
466   tree new_type, new_oprnd;
467   gimple new_stmt;
468 
469   if (code != MULT_EXPR && code != LSHIFT_EXPR)
470     return false;
471 
472   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
473         || (code == LSHIFT_EXPR
474             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
475 	    	!= 1))
476       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
477     {
478       /* CONST_OPRND is a constant of HALF_TYPE.  */
479       *oprnd = gimple_assign_rhs1 (def_stmt);
480       return true;
481     }
482 
483   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
484     return false;
485 
486   if (!vect_same_loop_or_bb_p (stmt, def_stmt))
487     return false;
488 
489   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
490      a type 2 times bigger than HALF_TYPE.  */
491   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
492                                              TYPE_UNSIGNED (type));
493   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
494       || (code == LSHIFT_EXPR
495           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
496     return false;
497 
498   /* Use NEW_TYPE for widening operation.  */
499   if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
500     {
501       new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
502       /* Check if the already created pattern stmt is what we need.  */
503       if (!is_gimple_assign (new_stmt)
504           || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
505           || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
506         return false;
507 
508       stmts->safe_push (def_stmt);
509       *oprnd = gimple_assign_lhs (new_stmt);
510     }
511   else
512     {
513       /* Create a_T = (NEW_TYPE) a_t;  */
514       *oprnd = gimple_assign_rhs1 (def_stmt);
515       new_oprnd = make_ssa_name (new_type, NULL);
516       new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
517 					       NULL_TREE);
518       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
519       stmts->safe_push (def_stmt);
520       *oprnd = new_oprnd;
521     }
522 
523   *half_type = new_type;
524   return true;
525 }
526 
527 
528 /* Function vect_recog_widen_mult_pattern
529 
530    Try to find the following pattern:
531 
532      type a_t, b_t;
533      TYPE a_T, b_T, prod_T;
534 
535      S1  a_t = ;
536      S2  b_t = ;
537      S3  a_T = (TYPE) a_t;
538      S4  b_T = (TYPE) b_t;
539      S5  prod_T = a_T * b_T;
540 
541    where type 'TYPE' is at least double the size of type 'type'.
542 
543    Also detect unsigned cases:
544 
545      unsigned type a_t, b_t;
546      unsigned TYPE u_prod_T;
547      TYPE a_T, b_T, prod_T;
548 
549      S1  a_t = ;
550      S2  b_t = ;
551      S3  a_T = (TYPE) a_t;
552      S4  b_T = (TYPE) b_t;
553      S5  prod_T = a_T * b_T;
554      S6  u_prod_T = (unsigned TYPE) prod_T;
555 
556    and multiplication by constants:
557 
558      type a_t;
559      TYPE a_T, prod_T;
560 
561      S1  a_t = ;
562      S3  a_T = (TYPE) a_t;
563      S5  prod_T = a_T * CONST;
564 
565    A special case of multiplication by constants is when 'TYPE' is 4 times
566    bigger than 'type', but CONST fits an intermediate type 2 times smaller
567    than 'TYPE'.  In that case we create an additional pattern stmt for S3
568    to create a variable of the intermediate type, and perform widen-mult
569    on the intermediate type as well:
570 
571      type a_t;
572      interm_type a_it;
573      TYPE a_T, prod_T,  prod_T';
574 
575      S1  a_t = ;
576      S3  a_T = (TYPE) a_t;
577            '--> a_it = (interm_type) a_t;
578      S5  prod_T = a_T * CONST;
579            '--> prod_T' = a_it w* CONST;
580 
581    Input/Output:
582 
583    * STMTS: Contains a stmt from which the pattern search begins.  In the
584    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
585    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
586    replaced with S6 in STMTS.  In case of multiplication by a constant
587    of an intermediate type (the last case above), STMTS also contains S3
588    (inserted before S5).
589 
590    Output:
591 
592    * TYPE_IN: The type of the input arguments to the pattern.
593 
594    * TYPE_OUT: The type of the output of this pattern.
595 
596    * Return value: A new stmt that will be used to replace the sequence of
597    stmts that constitute the pattern.  In this case it will be:
598         WIDEN_MULT <a_t, b_t>
599 */
600 
601 static gimple
vect_recog_widen_mult_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)602 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
603                                tree *type_in, tree *type_out)
604 {
605   gimple last_stmt = stmts->pop ();
606   gimple def_stmt0, def_stmt1;
607   tree oprnd0, oprnd1;
608   tree type, half_type0, half_type1;
609   gimple pattern_stmt;
610   tree vectype, vectype_out = NULL_TREE;
611   tree var;
612   enum tree_code dummy_code;
613   int dummy_int;
614   vec<tree> dummy_vec;
615   bool op1_ok;
616   bool promotion;
617 
618   if (!is_gimple_assign (last_stmt))
619     return NULL;
620 
621   type = gimple_expr_type (last_stmt);
622 
623   /* Starting from LAST_STMT, follow the defs of its uses in search
624      of the above pattern.  */
625 
626   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
627     return NULL;
628 
629   oprnd0 = gimple_assign_rhs1 (last_stmt);
630   oprnd1 = gimple_assign_rhs2 (last_stmt);
631   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
632       || !types_compatible_p (TREE_TYPE (oprnd1), type))
633     return NULL;
634 
635   /* Check argument 0.  */
636   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
637                          &promotion)
638       || !promotion)
639      return NULL;
640   /* Check argument 1.  */
641   op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
642                               &def_stmt1, &promotion);
643 
644   if (op1_ok && promotion)
645     {
646       oprnd0 = gimple_assign_rhs1 (def_stmt0);
647       oprnd1 = gimple_assign_rhs1 (def_stmt1);
648     }
649   else
650     {
651       if (TREE_CODE (oprnd1) == INTEGER_CST
652           && TREE_CODE (half_type0) == INTEGER_TYPE
653           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
654 		                            &oprnd0, stmts, type,
655 					    &half_type0, def_stmt0))
656 	{
657 	  half_type1 = half_type0;
658 	  oprnd1 = fold_convert (half_type1, oprnd1);
659 	}
660       else
661         return NULL;
662     }
663 
664   /* Handle unsigned case.  Look for
665      S6  u_prod_T = (unsigned TYPE) prod_T;
666      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
667   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
668     {
669       gimple use_stmt;
670       tree use_lhs;
671       tree use_type;
672 
673       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
674         return NULL;
675 
676       use_stmt = vect_single_imm_use (last_stmt);
677       if (!use_stmt || !is_gimple_assign (use_stmt)
678 	  || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
679         return NULL;
680 
681       use_lhs = gimple_assign_lhs (use_stmt);
682       use_type = TREE_TYPE (use_lhs);
683       if (!INTEGRAL_TYPE_P (use_type)
684           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
685           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
686         return NULL;
687 
688       type = use_type;
689       last_stmt = use_stmt;
690     }
691 
692   if (!types_compatible_p (half_type0, half_type1))
693     return NULL;
694 
695   /* Pattern detected.  */
696   if (dump_enabled_p ())
697     dump_printf_loc (MSG_NOTE, vect_location,
698                      "vect_recog_widen_mult_pattern: detected:\n");
699 
700   /* Check target support  */
701   vectype = get_vectype_for_scalar_type (half_type0);
702   vectype_out = get_vectype_for_scalar_type (type);
703   if (!vectype
704       || !vectype_out
705       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
706 					  vectype_out, vectype,
707 					  &dummy_code, &dummy_code,
708 					  &dummy_int, &dummy_vec))
709     return NULL;
710 
711   *type_in = vectype;
712   *type_out = vectype_out;
713 
714   /* Pattern supported. Create a stmt to be used to replace the pattern: */
715   var = vect_recog_temp_ssa_var (type, NULL);
716   pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
717 					       oprnd1);
718 
719   if (dump_enabled_p ())
720     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
721 
722   stmts->safe_push (last_stmt);
723   return pattern_stmt;
724 }
725 
726 
727 /* Function vect_recog_pow_pattern
728 
729    Try to find the following pattern:
730 
731      x = POW (y, N);
732 
733    with POW being one of pow, powf, powi, powif and N being
734    either 2 or 0.5.
735 
736    Input:
737 
738    * LAST_STMT: A stmt from which the pattern search begins.
739 
740    Output:
741 
742    * TYPE_IN: The type of the input arguments to the pattern.
743 
744    * TYPE_OUT: The type of the output of this pattern.
745 
746    * Return value: A new stmt that will be used to replace the sequence of
747    stmts that constitute the pattern. In this case it will be:
748         x = x * x
749    or
750 	x = sqrt (x)
751 */
752 
753 static gimple
vect_recog_pow_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)754 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
755 			tree *type_out)
756 {
757   gimple last_stmt = (*stmts)[0];
758   tree fn, base, exp = NULL;
759   gimple stmt;
760   tree var;
761 
762   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
763     return NULL;
764 
765   fn = gimple_call_fndecl (last_stmt);
766   if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
767    return NULL;
768 
769   switch (DECL_FUNCTION_CODE (fn))
770     {
771     case BUILT_IN_POWIF:
772     case BUILT_IN_POWI:
773     case BUILT_IN_POWF:
774     case BUILT_IN_POW:
775       base = gimple_call_arg (last_stmt, 0);
776       exp = gimple_call_arg (last_stmt, 1);
777       if (TREE_CODE (exp) != REAL_CST
778 	  && TREE_CODE (exp) != INTEGER_CST)
779         return NULL;
780       break;
781 
782     default:
783       return NULL;
784     }
785 
786   /* We now have a pow or powi builtin function call with a constant
787      exponent.  */
788 
789   *type_out = NULL_TREE;
790 
791   /* Catch squaring.  */
792   if ((tree_fits_shwi_p (exp)
793        && tree_to_shwi (exp) == 2)
794       || (TREE_CODE (exp) == REAL_CST
795           && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
796     {
797       *type_in = TREE_TYPE (base);
798 
799       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
800       stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
801       return stmt;
802     }
803 
804   /* Catch square root.  */
805   if (TREE_CODE (exp) == REAL_CST
806       && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
807     {
808       tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
809       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
810       if (*type_in)
811 	{
812 	  gimple stmt = gimple_build_call (newfn, 1, base);
813 	  if (vectorizable_function (stmt, *type_in, *type_in)
814 	      != NULL_TREE)
815 	    {
816 	      var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
817 	      gimple_call_set_lhs (stmt, var);
818 	      return stmt;
819 	    }
820 	}
821     }
822 
823   return NULL;
824 }
825 
826 
827 /* Function vect_recog_widen_sum_pattern
828 
829    Try to find the following pattern:
830 
831      type x_t;
832      TYPE x_T, sum = init;
833    loop:
834      sum_0 = phi <init, sum_1>
835      S1  x_t = *p;
836      S2  x_T = (TYPE) x_t;
837      S3  sum_1 = x_T + sum_0;
838 
839    where type 'TYPE' is at least double the size of type 'type', i.e - we're
840    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
841    a special case of a reduction computation.
842 
843    Input:
844 
845    * LAST_STMT: A stmt from which the pattern search begins. In the example,
846    when this function is called with S3, the pattern {S2,S3} will be detected.
847 
848    Output:
849 
850    * TYPE_IN: The type of the input arguments to the pattern.
851 
852    * TYPE_OUT: The type of the output of this pattern.
853 
854    * Return value: A new stmt that will be used to replace the sequence of
855    stmts that constitute the pattern. In this case it will be:
856         WIDEN_SUM <x_t, sum_0>
857 
858    Note: The widening-sum idiom is a widening reduction pattern that is
859 	 vectorized without preserving all the intermediate results. It
860          produces only N/2 (widened) results (by summing up pairs of
861 	 intermediate results) rather than all N results.  Therefore, we
862 	 cannot allow this pattern when we want to get all the results and in
863 	 the correct order (as is the case when this computation is in an
864 	 inner-loop nested in an outer-loop that us being vectorized).  */
865 
866 static gimple
vect_recog_widen_sum_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)867 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
868 			      tree *type_out)
869 {
870   gimple stmt, last_stmt = (*stmts)[0];
871   tree oprnd0, oprnd1;
872   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
873   tree type, half_type;
874   gimple pattern_stmt;
875   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
876   struct loop *loop;
877   tree var;
878   bool promotion;
879 
880   if (!loop_info)
881     return NULL;
882 
883   loop = LOOP_VINFO_LOOP (loop_info);
884 
885   if (!is_gimple_assign (last_stmt))
886     return NULL;
887 
888   type = gimple_expr_type (last_stmt);
889 
890   /* Look for the following pattern
891           DX = (TYPE) X;
892           sum_1 = DX + sum_0;
893      In which DX is at least double the size of X, and sum_1 has been
894      recognized as a reduction variable.
895    */
896 
897   /* Starting from LAST_STMT, follow the defs of its uses in search
898      of the above pattern.  */
899 
900   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
901     return NULL;
902 
903   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
904     return NULL;
905 
906   oprnd0 = gimple_assign_rhs1 (last_stmt);
907   oprnd1 = gimple_assign_rhs2 (last_stmt);
908   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
909       || !types_compatible_p (TREE_TYPE (oprnd1), type))
910     return NULL;
911 
912   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
913      we know that oprnd1 is the reduction variable (defined by a loop-header
914      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
915      Left to check that oprnd0 is defined by a cast from type 'type' to type
916      'TYPE'.  */
917 
918   if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
919                           &promotion)
920       || !promotion)
921      return NULL;
922 
923   oprnd0 = gimple_assign_rhs1 (stmt);
924   *type_in = half_type;
925   *type_out = type;
926 
927   /* Pattern detected. Create a stmt to be used to replace the pattern: */
928   var = vect_recog_temp_ssa_var (type, NULL);
929   pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
930 					       oprnd0, oprnd1);
931 
932   if (dump_enabled_p ())
933     {
934       dump_printf_loc (MSG_NOTE, vect_location,
935                        "vect_recog_widen_sum_pattern: detected: ");
936       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
937       dump_printf (MSG_NOTE, "\n");
938     }
939 
940   /* We don't allow changing the order of the computation in the inner-loop
941      when doing outer-loop vectorization.  */
942   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
943 
944   return pattern_stmt;
945 }
946 
947 
948 /* Return TRUE if the operation in STMT can be performed on a smaller type.
949 
950    Input:
951    STMT - a statement to check.
952    DEF - we support operations with two operands, one of which is constant.
953          The other operand can be defined by a demotion operation, or by a
954          previous statement in a sequence of over-promoted operations.  In the
955          later case DEF is used to replace that operand.  (It is defined by a
956          pattern statement we created for the previous statement in the
957          sequence).
958 
959    Input/output:
960    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
961          NULL, it's the type of DEF.
962    STMTS - additional pattern statements.  If a pattern statement (type
963          conversion) is created in this function, its original statement is
964          added to STMTS.
965 
966    Output:
967    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
968          operands to use in the new pattern statement for STMT (will be created
969          in vect_recog_over_widening_pattern ()).
970    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
971          statements for STMT: the first one is a type promotion and the second
972          one is the operation itself.  We return the type promotion statement
973 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
974          the second pattern statement.  */
975 
976 static bool
vect_operation_fits_smaller_type(gimple stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple * new_def_stmt,vec<gimple> * stmts)977 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
978                                   tree *op0, tree *op1, gimple *new_def_stmt,
979                                   vec<gimple> *stmts)
980 {
981   enum tree_code code;
982   tree const_oprnd, oprnd;
983   tree interm_type = NULL_TREE, half_type, new_oprnd, type;
984   gimple def_stmt, new_stmt;
985   bool first = false;
986   bool promotion;
987 
988   *op0 = NULL_TREE;
989   *op1 = NULL_TREE;
990   *new_def_stmt = NULL;
991 
992   if (!is_gimple_assign (stmt))
993     return false;
994 
995   code = gimple_assign_rhs_code (stmt);
996   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
997       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
998     return false;
999 
1000   oprnd = gimple_assign_rhs1 (stmt);
1001   const_oprnd = gimple_assign_rhs2 (stmt);
1002   type = gimple_expr_type (stmt);
1003 
1004   if (TREE_CODE (oprnd) != SSA_NAME
1005       || TREE_CODE (const_oprnd) != INTEGER_CST)
1006     return false;
1007 
1008   /* If oprnd has other uses besides that in stmt we cannot mark it
1009      as being part of a pattern only.  */
1010   if (!has_single_use (oprnd))
1011     return false;
1012 
1013   /* If we are in the middle of a sequence, we use DEF from a previous
1014      statement.  Otherwise, OPRND has to be a result of type promotion.  */
1015   if (*new_type)
1016     {
1017       half_type = *new_type;
1018       oprnd = def;
1019     }
1020   else
1021     {
1022       first = true;
1023       if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1024 			      &promotion)
1025 	  || !promotion
1026 	  || !vect_same_loop_or_bb_p (stmt, def_stmt))
1027         return false;
1028     }
1029 
1030   /* Can we perform the operation on a smaller type?  */
1031   switch (code)
1032     {
1033       case BIT_IOR_EXPR:
1034       case BIT_XOR_EXPR:
1035       case BIT_AND_EXPR:
1036         if (!int_fits_type_p (const_oprnd, half_type))
1037           {
1038             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
1039             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1040               return false;
1041 
1042             interm_type = build_nonstandard_integer_type (
1043                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1044             if (!int_fits_type_p (const_oprnd, interm_type))
1045               return false;
1046           }
1047 
1048         break;
1049 
1050       case LSHIFT_EXPR:
1051         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
1052         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1053           return false;
1054 
1055         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1056           (e.g., if the original value was char, the shift amount is at most 8
1057            if we want to use short).  */
1058         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1059           return false;
1060 
1061         interm_type = build_nonstandard_integer_type (
1062                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1063 
1064         if (!vect_supportable_shift (code, interm_type))
1065           return false;
1066 
1067         break;
1068 
1069       case RSHIFT_EXPR:
1070         if (vect_supportable_shift (code, half_type))
1071           break;
1072 
1073         /* Try intermediate type - HALF_TYPE is not supported.  */
1074         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1075           return false;
1076 
1077         interm_type = build_nonstandard_integer_type (
1078                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1079 
1080         if (!vect_supportable_shift (code, interm_type))
1081           return false;
1082 
1083         break;
1084 
1085       default:
1086         gcc_unreachable ();
1087     }
1088 
1089   /* There are four possible cases:
1090      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1091         the first statement in the sequence)
1092         a. The original, HALF_TYPE, is not enough - we replace the promotion
1093            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1094         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1095            promotion.
1096      2. OPRND is defined by a pattern statement we created.
1097         a. Its type is not sufficient for the operation, we create a new stmt:
1098            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1099            this statement in NEW_DEF_STMT, and it is later put in
1100 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1101         b. OPRND is good to use in the new statement.  */
1102   if (first)
1103     {
1104       if (interm_type)
1105         {
1106           /* Replace the original type conversion HALF_TYPE->TYPE with
1107              HALF_TYPE->INTERM_TYPE.  */
1108           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1109             {
1110               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1111               /* Check if the already created pattern stmt is what we need.  */
1112               if (!is_gimple_assign (new_stmt)
1113                   || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1114                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1115                 return false;
1116 
1117 	      stmts->safe_push (def_stmt);
1118               oprnd = gimple_assign_lhs (new_stmt);
1119             }
1120           else
1121             {
1122               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1123               oprnd = gimple_assign_rhs1 (def_stmt);
1124               new_oprnd = make_ssa_name (interm_type, NULL);
1125               new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1126                                                        oprnd, NULL_TREE);
1127               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1128               stmts->safe_push (def_stmt);
1129               oprnd = new_oprnd;
1130             }
1131         }
1132       else
1133         {
1134           /* Retrieve the operand before the type promotion.  */
1135           oprnd = gimple_assign_rhs1 (def_stmt);
1136         }
1137     }
1138   else
1139     {
1140       if (interm_type)
1141         {
1142           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1143           new_oprnd = make_ssa_name (interm_type, NULL);
1144           new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1145                                                    oprnd, NULL_TREE);
1146           oprnd = new_oprnd;
1147           *new_def_stmt = new_stmt;
1148         }
1149 
1150       /* Otherwise, OPRND is already set.  */
1151     }
1152 
1153   if (interm_type)
1154     *new_type = interm_type;
1155   else
1156     *new_type = half_type;
1157 
1158   *op0 = oprnd;
1159   *op1 = fold_convert (*new_type, const_oprnd);
1160 
1161   return true;
1162 }
1163 
1164 
1165 /* Try to find a statement or a sequence of statements that can be performed
1166    on a smaller type:
1167 
1168      type x_t;
1169      TYPE x_T, res0_T, res1_T;
1170    loop:
1171      S1  x_t = *p;
1172      S2  x_T = (TYPE) x_t;
1173      S3  res0_T = op (x_T, C0);
1174      S4  res1_T = op (res0_T, C1);
1175      S5  ... = () res1_T;  - type demotion
1176 
1177    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1178    constants.
1179    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1180    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1181    demotion operation.  We also check that S3 and S4 have only one use.  */
1182 
1183 static gimple
vect_recog_over_widening_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1184 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1185                                   tree *type_in, tree *type_out)
1186 {
1187   gimple stmt = stmts->pop ();
1188   gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1189   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1190   tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1191   bool first;
1192   tree type = NULL;
1193 
1194   first = true;
1195   while (1)
1196     {
1197       if (!vinfo_for_stmt (stmt)
1198           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1199         return NULL;
1200 
1201       new_def_stmt = NULL;
1202       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1203                                              &op0, &op1, &new_def_stmt,
1204                                              stmts))
1205         {
1206           if (first)
1207             return NULL;
1208           else
1209             break;
1210         }
1211 
1212       /* STMT can be performed on a smaller type.  Check its uses.  */
1213       use_stmt = vect_single_imm_use (stmt);
1214       if (!use_stmt || !is_gimple_assign (use_stmt))
1215         return NULL;
1216 
1217       /* Create pattern statement for STMT.  */
1218       vectype = get_vectype_for_scalar_type (new_type);
1219       if (!vectype)
1220         return NULL;
1221 
1222       /* We want to collect all the statements for which we create pattern
1223          statetments, except for the case when the last statement in the
1224          sequence doesn't have a corresponding pattern statement.  In such
1225          case we associate the last pattern statement with the last statement
1226          in the sequence.  Therefore, we only add the original statement to
1227          the list if we know that it is not the last.  */
1228       if (prev_stmt)
1229         stmts->safe_push (prev_stmt);
1230 
1231       var = vect_recog_temp_ssa_var (new_type, NULL);
1232       pattern_stmt
1233 	= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1234 					op0, op1);
1235       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1236       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1237 
1238       if (dump_enabled_p ())
1239         {
1240           dump_printf_loc (MSG_NOTE, vect_location,
1241                            "created pattern stmt: ");
1242           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1243           dump_printf (MSG_NOTE, "\n");
1244         }
1245 
1246       type = gimple_expr_type (stmt);
1247       prev_stmt = stmt;
1248       stmt = use_stmt;
1249 
1250       first = false;
1251     }
1252 
1253   /* We got a sequence.  We expect it to end with a type demotion operation.
1254      Otherwise, we quit (for now).  There are three possible cases: the
1255      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1256      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1257      NEW_TYPE differs (we create a new conversion statement).  */
1258   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1259     {
1260       use_lhs = gimple_assign_lhs (use_stmt);
1261       use_type = TREE_TYPE (use_lhs);
1262       /* Support only type demotion or signedess change.  */
1263       if (!INTEGRAL_TYPE_P (use_type)
1264 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1265         return NULL;
1266 
1267       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1268       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1269 	return NULL;
1270 
1271       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1272           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1273         {
1274           /* Create NEW_TYPE->USE_TYPE conversion.  */
1275           new_oprnd = make_ssa_name (use_type, NULL);
1276           pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1277                                                        var, NULL_TREE);
1278           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1279 
1280           *type_in = get_vectype_for_scalar_type (new_type);
1281           *type_out = get_vectype_for_scalar_type (use_type);
1282 
1283           /* We created a pattern statement for the last statement in the
1284              sequence, so we don't need to associate it with the pattern
1285              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1286              to the list in order to mark it later in vect_pattern_recog_1.  */
1287           if (prev_stmt)
1288             stmts->safe_push (prev_stmt);
1289         }
1290       else
1291         {
1292           if (prev_stmt)
1293 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1294 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1295 
1296           *type_in = vectype;
1297           *type_out = NULL_TREE;
1298         }
1299 
1300       stmts->safe_push (use_stmt);
1301     }
1302   else
1303     /* TODO: support general case, create a conversion to the correct type.  */
1304     return NULL;
1305 
1306   /* Pattern detected.  */
1307   if (dump_enabled_p ())
1308     {
1309       dump_printf_loc (MSG_NOTE, vect_location,
1310                        "vect_recog_over_widening_pattern: detected: ");
1311       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1312       dump_printf (MSG_NOTE, "\n");
1313     }
1314 
1315   return pattern_stmt;
1316 }
1317 
1318 /* Detect widening shift pattern:
1319 
1320    type a_t;
1321    TYPE a_T, res_T;
1322 
1323    S1 a_t = ;
1324    S2 a_T = (TYPE) a_t;
1325    S3 res_T = a_T << CONST;
1326 
1327   where type 'TYPE' is at least double the size of type 'type'.
1328 
1329   Also detect cases where the shift result is immediately converted
1330   to another type 'result_type' that is no larger in size than 'TYPE'.
1331   In those cases we perform a widen-shift that directly results in
1332   'result_type', to avoid a possible over-widening situation:
1333 
1334   type a_t;
1335   TYPE a_T, res_T;
1336   result_type res_result;
1337 
1338   S1 a_t = ;
1339   S2 a_T = (TYPE) a_t;
1340   S3 res_T = a_T << CONST;
1341   S4 res_result = (result_type) res_T;
1342       '--> res_result' = a_t w<< CONST;
1343 
1344   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1345   create an additional pattern stmt for S2 to create a variable of an
1346   intermediate type, and perform widen-shift on the intermediate type:
1347 
1348   type a_t;
1349   interm_type a_it;
1350   TYPE a_T, res_T, res_T';
1351 
1352   S1 a_t = ;
1353   S2 a_T = (TYPE) a_t;
1354       '--> a_it = (interm_type) a_t;
1355   S3 res_T = a_T << CONST;
1356       '--> res_T' = a_it <<* CONST;
1357 
1358   Input/Output:
1359 
1360   * STMTS: Contains a stmt from which the pattern search begins.
1361     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1362     in STMTS.  When an intermediate type is used and a pattern statement is
1363     created for S2, we also put S2 here (before S3).
1364 
1365   Output:
1366 
1367   * TYPE_IN: The type of the input arguments to the pattern.
1368 
1369   * TYPE_OUT: The type of the output of this pattern.
1370 
1371   * Return value: A new stmt that will be used to replace the sequence of
1372     stmts that constitute the pattern.  In this case it will be:
1373     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1374 
1375 static gimple
vect_recog_widen_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1376 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1377 				tree *type_in, tree *type_out)
1378 {
1379   gimple last_stmt = stmts->pop ();
1380   gimple def_stmt0;
1381   tree oprnd0, oprnd1;
1382   tree type, half_type0;
1383   gimple pattern_stmt;
1384   tree vectype, vectype_out = NULL_TREE;
1385   tree var;
1386   enum tree_code dummy_code;
1387   int dummy_int;
1388   vec<tree>  dummy_vec;
1389   gimple use_stmt;
1390   bool promotion;
1391 
1392   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1393     return NULL;
1394 
1395   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1396     return NULL;
1397 
1398   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1399     return NULL;
1400 
1401   oprnd0 = gimple_assign_rhs1 (last_stmt);
1402   oprnd1 = gimple_assign_rhs2 (last_stmt);
1403   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1404     return NULL;
1405 
1406   /* Check operand 0: it has to be defined by a type promotion.  */
1407   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1408                           &promotion)
1409       || !promotion)
1410      return NULL;
1411 
1412   /* Check operand 1: has to be positive.  We check that it fits the type
1413      in vect_handle_widen_op_by_const ().  */
1414   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1415     return NULL;
1416 
1417   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1418   type = gimple_expr_type (last_stmt);
1419 
1420   /* Check for subsequent conversion to another type.  */
1421   use_stmt = vect_single_imm_use (last_stmt);
1422   if (use_stmt && is_gimple_assign (use_stmt)
1423       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1424       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1425     {
1426       tree use_lhs = gimple_assign_lhs (use_stmt);
1427       tree use_type = TREE_TYPE (use_lhs);
1428 
1429       if (INTEGRAL_TYPE_P (use_type)
1430 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1431 	{
1432 	  last_stmt = use_stmt;
1433 	  type = use_type;
1434 	}
1435     }
1436 
1437   /* Check if this a widening operation.  */
1438   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1439        				      &oprnd0, stmts,
1440 	                              type, &half_type0, def_stmt0))
1441     return NULL;
1442 
1443   /* Pattern detected.  */
1444   if (dump_enabled_p ())
1445     dump_printf_loc (MSG_NOTE, vect_location,
1446                      "vect_recog_widen_shift_pattern: detected:\n");
1447 
1448   /* Check target support.  */
1449   vectype = get_vectype_for_scalar_type (half_type0);
1450   vectype_out = get_vectype_for_scalar_type (type);
1451 
1452   if (!vectype
1453       || !vectype_out
1454       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1455 					  vectype_out, vectype,
1456 					  &dummy_code, &dummy_code,
1457 					  &dummy_int, &dummy_vec))
1458     return NULL;
1459 
1460   *type_in = vectype;
1461   *type_out = vectype_out;
1462 
1463   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1464   var = vect_recog_temp_ssa_var (type, NULL);
1465   pattern_stmt =
1466     gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1467 
1468   if (dump_enabled_p ())
1469     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1470 
1471   stmts->safe_push (last_stmt);
1472   return pattern_stmt;
1473 }
1474 
1475 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1476 
1477    type a_t, b_t, c_t;
1478 
1479    S0 a_t = b_t r<< c_t;
1480 
1481   Input/Output:
1482 
1483   * STMTS: Contains a stmt from which the pattern search begins,
1484     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
1485     with a sequence:
1486 
1487    S1 d_t = -c_t;
1488    S2 e_t = d_t & (B - 1);
1489    S3 f_t = b_t << c_t;
1490    S4 g_t = b_t >> e_t;
1491    S0 a_t = f_t | g_t;
1492 
1493     where B is element bitsize of type.
1494 
1495   Output:
1496 
1497   * TYPE_IN: The type of the input arguments to the pattern.
1498 
1499   * TYPE_OUT: The type of the output of this pattern.
1500 
1501   * Return value: A new stmt that will be used to replace the rotate
1502     S0 stmt.  */
1503 
1504 static gimple
vect_recog_rotate_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1505 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1506 {
1507   gimple last_stmt = stmts->pop ();
1508   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1509   gimple pattern_stmt, def_stmt;
1510   enum tree_code rhs_code;
1511   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1512   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1513   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1514   enum vect_def_type dt;
1515   optab optab1, optab2;
1516   edge ext_def = NULL;
1517 
1518   if (!is_gimple_assign (last_stmt))
1519     return NULL;
1520 
1521   rhs_code = gimple_assign_rhs_code (last_stmt);
1522   switch (rhs_code)
1523     {
1524     case LROTATE_EXPR:
1525     case RROTATE_EXPR:
1526       break;
1527     default:
1528       return NULL;
1529     }
1530 
1531   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1532     return NULL;
1533 
1534   lhs = gimple_assign_lhs (last_stmt);
1535   oprnd0 = gimple_assign_rhs1 (last_stmt);
1536   type = TREE_TYPE (oprnd0);
1537   oprnd1 = gimple_assign_rhs2 (last_stmt);
1538   if (TREE_CODE (oprnd0) != SSA_NAME
1539       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1540       || !INTEGRAL_TYPE_P (type)
1541       || !TYPE_UNSIGNED (type))
1542     return NULL;
1543 
1544   if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1545 			   &def, &dt))
1546     return NULL;
1547 
1548   if (dt != vect_internal_def
1549       && dt != vect_constant_def
1550       && dt != vect_external_def)
1551     return NULL;
1552 
1553   vectype = get_vectype_for_scalar_type (type);
1554   if (vectype == NULL_TREE)
1555     return NULL;
1556 
1557   /* If vector/vector or vector/scalar rotate is supported by the target,
1558      don't do anything here.  */
1559   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1560   if (optab1
1561       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1562     return NULL;
1563 
1564   if (bb_vinfo != NULL || dt != vect_internal_def)
1565     {
1566       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1567       if (optab2
1568 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1569 	return NULL;
1570     }
1571 
1572   /* If vector/vector or vector/scalar shifts aren't supported by the target,
1573      don't do anything here either.  */
1574   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1575   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1576   if (!optab1
1577       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1578       || !optab2
1579       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1580     {
1581       if (bb_vinfo == NULL && dt == vect_internal_def)
1582 	return NULL;
1583       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1584       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1585       if (!optab1
1586 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1587 	  || !optab2
1588 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1589 	return NULL;
1590     }
1591 
1592   *type_in = vectype;
1593   *type_out = vectype;
1594   if (*type_in == NULL_TREE)
1595     return NULL;
1596 
1597   if (dt == vect_external_def
1598       && TREE_CODE (oprnd1) == SSA_NAME
1599       && loop_vinfo)
1600     {
1601       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1602       ext_def = loop_preheader_edge (loop);
1603       if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1604 	{
1605 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1606 	  if (bb == NULL
1607 	      || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1608 	    ext_def = NULL;
1609 	}
1610     }
1611 
1612   def = NULL_TREE;
1613   if (TREE_CODE (oprnd1) == INTEGER_CST
1614       || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1615     def = oprnd1;
1616   else if (def_stmt && gimple_assign_cast_p (def_stmt))
1617     {
1618       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1619       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1620 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1621 	     == TYPE_PRECISION (type))
1622 	def = rhs1;
1623     }
1624 
1625   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1626   if (def == NULL_TREE)
1627     {
1628       def = vect_recog_temp_ssa_var (type, NULL);
1629       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1630 					       NULL_TREE);
1631       if (ext_def)
1632 	{
1633 	  basic_block new_bb
1634 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1635 	  gcc_assert (!new_bb);
1636 	}
1637       else
1638 	append_pattern_def_seq (stmt_vinfo, def_stmt);
1639     }
1640   stype = TREE_TYPE (def);
1641 
1642   if (TREE_CODE (def) == INTEGER_CST)
1643     {
1644       if (!tree_fits_uhwi_p (def)
1645 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1646 	  || integer_zerop (def))
1647 	return NULL;
1648       def2 = build_int_cst (stype,
1649 			    GET_MODE_PRECISION (TYPE_MODE (type))
1650 			    - tree_to_uhwi (def));
1651     }
1652   else
1653     {
1654       tree vecstype = get_vectype_for_scalar_type (stype);
1655       stmt_vec_info def_stmt_vinfo;
1656 
1657       if (vecstype == NULL_TREE)
1658 	return NULL;
1659       def2 = vect_recog_temp_ssa_var (stype, NULL);
1660       def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1661 					       NULL_TREE);
1662       if (ext_def)
1663 	{
1664 	  basic_block new_bb
1665 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1666 	  gcc_assert (!new_bb);
1667 	}
1668       else
1669 	{
1670 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1671 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1672 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1673 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1674 	}
1675 
1676       def2 = vect_recog_temp_ssa_var (stype, NULL);
1677       tree mask
1678 	= build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1679       def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1680 					       gimple_assign_lhs (def_stmt),
1681 					       mask);
1682       if (ext_def)
1683 	{
1684 	  basic_block new_bb
1685 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1686 	  gcc_assert (!new_bb);
1687 	}
1688       else
1689 	{
1690 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1691 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1692 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1693 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1694 	}
1695     }
1696 
1697   var1 = vect_recog_temp_ssa_var (type, NULL);
1698   def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1699 					   ? LSHIFT_EXPR : RSHIFT_EXPR,
1700 					   var1, oprnd0, def);
1701   append_pattern_def_seq (stmt_vinfo, def_stmt);
1702 
1703   var2 = vect_recog_temp_ssa_var (type, NULL);
1704   def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1705 					   ? RSHIFT_EXPR : LSHIFT_EXPR,
1706 					   var2, oprnd0, def2);
1707   append_pattern_def_seq (stmt_vinfo, def_stmt);
1708 
1709   /* Pattern detected.  */
1710   if (dump_enabled_p ())
1711     dump_printf_loc (MSG_NOTE, vect_location,
1712 		     "vect_recog_rotate_pattern: detected:\n");
1713 
1714   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1715   var = vect_recog_temp_ssa_var (type, NULL);
1716   pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1717 
1718   if (dump_enabled_p ())
1719     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1720 
1721   stmts->safe_push (last_stmt);
1722   return pattern_stmt;
1723 }
1724 
1725 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1726    vectorized:
1727 
1728    type a_t;
1729    TYPE b_T, res_T;
1730 
1731    S1 a_t = ;
1732    S2 b_T = ;
1733    S3 res_T = b_T op a_t;
1734 
1735   where type 'TYPE' is a type with different size than 'type',
1736   and op is <<, >> or rotate.
1737 
1738   Also detect cases:
1739 
1740    type a_t;
1741    TYPE b_T, c_T, res_T;
1742 
1743    S0 c_T = ;
1744    S1 a_t = (type) c_T;
1745    S2 b_T = ;
1746    S3 res_T = b_T op a_t;
1747 
1748   Input/Output:
1749 
1750   * STMTS: Contains a stmt from which the pattern search begins,
1751     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
1752     with a shift/rotate which has same type on both operands, in the
1753     second case just b_T op c_T, in the first case with added cast
1754     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1755 
1756   Output:
1757 
1758   * TYPE_IN: The type of the input arguments to the pattern.
1759 
1760   * TYPE_OUT: The type of the output of this pattern.
1761 
1762   * Return value: A new stmt that will be used to replace the shift/rotate
1763     S3 stmt.  */
1764 
1765 static gimple
vect_recog_vector_vector_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1766 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1767 					tree *type_in, tree *type_out)
1768 {
1769   gimple last_stmt = stmts->pop ();
1770   tree oprnd0, oprnd1, lhs, var;
1771   gimple pattern_stmt, def_stmt;
1772   enum tree_code rhs_code;
1773   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1774   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1775   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1776   enum vect_def_type dt;
1777   tree def;
1778 
1779   if (!is_gimple_assign (last_stmt))
1780     return NULL;
1781 
1782   rhs_code = gimple_assign_rhs_code (last_stmt);
1783   switch (rhs_code)
1784     {
1785     case LSHIFT_EXPR:
1786     case RSHIFT_EXPR:
1787     case LROTATE_EXPR:
1788     case RROTATE_EXPR:
1789       break;
1790     default:
1791       return NULL;
1792     }
1793 
1794   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1795     return NULL;
1796 
1797   lhs = gimple_assign_lhs (last_stmt);
1798   oprnd0 = gimple_assign_rhs1 (last_stmt);
1799   oprnd1 = gimple_assign_rhs2 (last_stmt);
1800   if (TREE_CODE (oprnd0) != SSA_NAME
1801       || TREE_CODE (oprnd1) != SSA_NAME
1802       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1803       || TYPE_PRECISION (TREE_TYPE (oprnd1))
1804 	 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1805       || TYPE_PRECISION (TREE_TYPE (lhs))
1806 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1807     return NULL;
1808 
1809   if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1810 			   &def, &dt))
1811     return NULL;
1812 
1813   if (dt != vect_internal_def)
1814     return NULL;
1815 
1816   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1817   *type_out = *type_in;
1818   if (*type_in == NULL_TREE)
1819     return NULL;
1820 
1821   def = NULL_TREE;
1822   if (gimple_assign_cast_p (def_stmt))
1823     {
1824       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1825       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1826 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1827 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1828 	def = rhs1;
1829     }
1830 
1831   if (def == NULL_TREE)
1832     {
1833       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1834       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1835 					       NULL_TREE);
1836       new_pattern_def_seq (stmt_vinfo, def_stmt);
1837     }
1838 
1839   /* Pattern detected.  */
1840   if (dump_enabled_p ())
1841     dump_printf_loc (MSG_NOTE, vect_location,
1842                      "vect_recog_vector_vector_shift_pattern: detected:\n");
1843 
1844   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1845   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1846   pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1847 
1848   if (dump_enabled_p ())
1849     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1850 
1851   stmts->safe_push (last_stmt);
1852   return pattern_stmt;
1853 }
1854 
1855 /* Detect a signed division by a constant that wouldn't be
1856    otherwise vectorized:
1857 
1858    type a_t, b_t;
1859 
1860    S1 a_t = b_t / N;
1861 
1862   where type 'type' is an integral type and N is a constant.
1863 
1864   Similarly handle modulo by a constant:
1865 
1866    S4 a_t = b_t % N;
1867 
1868   Input/Output:
1869 
1870   * STMTS: Contains a stmt from which the pattern search begins,
1871     i.e. the division stmt.  S1 is replaced by if N is a power
1872     of two constant and type is signed:
1873   S3  y_t = b_t < 0 ? N - 1 : 0;
1874   S2  x_t = b_t + y_t;
1875   S1' a_t = x_t >> log2 (N);
1876 
1877     S4 is replaced if N is a power of two constant and
1878     type is signed by (where *_T temporaries have unsigned type):
1879   S9  y_T = b_t < 0 ? -1U : 0U;
1880   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1881   S7  z_t = (type) z_T;
1882   S6  w_t = b_t + z_t;
1883   S5  x_t = w_t & (N - 1);
1884   S4' a_t = x_t - z_t;
1885 
1886   Output:
1887 
1888   * TYPE_IN: The type of the input arguments to the pattern.
1889 
1890   * TYPE_OUT: The type of the output of this pattern.
1891 
1892   * Return value: A new stmt that will be used to replace the division
1893     S1 or modulo S4 stmt.  */
1894 
1895 static gimple
vect_recog_divmod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1896 vect_recog_divmod_pattern (vec<gimple> *stmts,
1897 			   tree *type_in, tree *type_out)
1898 {
1899   gimple last_stmt = stmts->pop ();
1900   tree oprnd0, oprnd1, vectype, itype, cond;
1901   gimple pattern_stmt, def_stmt;
1902   enum tree_code rhs_code;
1903   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1904   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1905   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1906   optab optab;
1907   tree q;
1908   int dummy_int, prec;
1909   stmt_vec_info def_stmt_vinfo;
1910 
1911   if (!is_gimple_assign (last_stmt))
1912     return NULL;
1913 
1914   rhs_code = gimple_assign_rhs_code (last_stmt);
1915   switch (rhs_code)
1916     {
1917     case TRUNC_DIV_EXPR:
1918     case TRUNC_MOD_EXPR:
1919       break;
1920     default:
1921       return NULL;
1922     }
1923 
1924   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1925     return NULL;
1926 
1927   oprnd0 = gimple_assign_rhs1 (last_stmt);
1928   oprnd1 = gimple_assign_rhs2 (last_stmt);
1929   itype = TREE_TYPE (oprnd0);
1930   if (TREE_CODE (oprnd0) != SSA_NAME
1931       || TREE_CODE (oprnd1) != INTEGER_CST
1932       || TREE_CODE (itype) != INTEGER_TYPE
1933       || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1934     return NULL;
1935 
1936   vectype = get_vectype_for_scalar_type (itype);
1937   if (vectype == NULL_TREE)
1938     return NULL;
1939 
1940   /* If the target can handle vectorized division or modulo natively,
1941      don't attempt to optimize this.  */
1942   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1943   if (optab != unknown_optab)
1944     {
1945       enum machine_mode vec_mode = TYPE_MODE (vectype);
1946       int icode = (int) optab_handler (optab, vec_mode);
1947       if (icode != CODE_FOR_nothing)
1948 	return NULL;
1949     }
1950 
1951   prec = TYPE_PRECISION (itype);
1952   if (integer_pow2p (oprnd1))
1953     {
1954       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1955 	return NULL;
1956 
1957       /* Pattern detected.  */
1958       if (dump_enabled_p ())
1959         dump_printf_loc (MSG_NOTE, vect_location,
1960                          "vect_recog_divmod_pattern: detected:\n");
1961 
1962       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1963 		     build_int_cst (itype, 0));
1964       if (rhs_code == TRUNC_DIV_EXPR)
1965 	{
1966 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
1967 	  tree shift;
1968 	  def_stmt
1969 	    = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1970 					    fold_build2 (MINUS_EXPR, itype,
1971 							 oprnd1,
1972 							 build_int_cst (itype,
1973 									1)),
1974 					    build_int_cst (itype, 0));
1975 	  new_pattern_def_seq (stmt_vinfo, def_stmt);
1976 	  var = vect_recog_temp_ssa_var (itype, NULL);
1977 	  def_stmt
1978 	    = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1979 					    gimple_assign_lhs (def_stmt));
1980 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1981 
1982 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
1983 	  pattern_stmt
1984 	    = gimple_build_assign_with_ops (RSHIFT_EXPR,
1985 					    vect_recog_temp_ssa_var (itype,
1986 								     NULL),
1987 					    var, shift);
1988 	}
1989       else
1990 	{
1991 	  tree signmask;
1992 	  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1993 	  if (compare_tree_int (oprnd1, 2) == 0)
1994 	    {
1995 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
1996 	      def_stmt
1997 		= gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1998 						build_int_cst (itype, 1),
1999 						build_int_cst (itype, 0));
2000 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2001 	    }
2002 	  else
2003 	    {
2004 	      tree utype
2005 		= build_nonstandard_integer_type (prec, 1);
2006 	      tree vecutype = get_vectype_for_scalar_type (utype);
2007 	      tree shift
2008 		= build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2009 					- tree_log2 (oprnd1));
2010 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
2011 
2012 	      def_stmt
2013 		= gimple_build_assign_with_ops (COND_EXPR, var, cond,
2014 						build_int_cst (utype, -1),
2015 						build_int_cst (utype, 0));
2016 	      def_stmt_vinfo
2017 		= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2018 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2019 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2020 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2021 	      var = vect_recog_temp_ssa_var (utype, NULL);
2022 	      def_stmt
2023 		= gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2024 						gimple_assign_lhs (def_stmt),
2025 						shift);
2026 	      def_stmt_vinfo
2027 		= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2028 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2029 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2030 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2031 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2032 	      def_stmt
2033 		= gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2034 						NULL_TREE);
2035 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2036 	    }
2037 	  def_stmt
2038 	    = gimple_build_assign_with_ops (PLUS_EXPR,
2039 					    vect_recog_temp_ssa_var (itype,
2040 								     NULL),
2041 					    oprnd0, signmask);
2042 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2043 	  def_stmt
2044 	    = gimple_build_assign_with_ops (BIT_AND_EXPR,
2045 					    vect_recog_temp_ssa_var (itype,
2046 								     NULL),
2047 					    gimple_assign_lhs (def_stmt),
2048 					    fold_build2 (MINUS_EXPR, itype,
2049 							 oprnd1,
2050 							 build_int_cst (itype,
2051 									1)));
2052 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2053 
2054 	  pattern_stmt
2055 	    = gimple_build_assign_with_ops (MINUS_EXPR,
2056 					    vect_recog_temp_ssa_var (itype,
2057 								     NULL),
2058 					    gimple_assign_lhs (def_stmt),
2059 					    signmask);
2060 	}
2061 
2062       if (dump_enabled_p ())
2063 	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2064                               0);
2065 
2066       stmts->safe_push (last_stmt);
2067 
2068       *type_in = vectype;
2069       *type_out = vectype;
2070       return pattern_stmt;
2071     }
2072 
2073   if (prec > HOST_BITS_PER_WIDE_INT
2074       || integer_zerop (oprnd1))
2075     return NULL;
2076 
2077   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2078     return NULL;
2079 
2080   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2081 
2082   if (TYPE_UNSIGNED (itype))
2083     {
2084       unsigned HOST_WIDE_INT mh, ml;
2085       int pre_shift, post_shift;
2086       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2087 				  & GET_MODE_MASK (TYPE_MODE (itype)));
2088       tree t1, t2, t3, t4;
2089 
2090       if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2091 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
2092 	return NULL;
2093 
2094       /* Find a suitable multiplier and right shift count
2095 	 instead of multiplying with D.  */
2096       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2097 
2098       /* If the suggested multiplier is more than SIZE bits, we can do better
2099 	 for even divisors, using an initial right shift.  */
2100       if (mh != 0 && (d & 1) == 0)
2101 	{
2102 	  pre_shift = floor_log2 (d & -d);
2103 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2104 				  &ml, &post_shift, &dummy_int);
2105 	  gcc_assert (!mh);
2106 	}
2107       else
2108 	pre_shift = 0;
2109 
2110       if (mh != 0)
2111 	{
2112 	  if (post_shift - 1 >= prec)
2113 	    return NULL;
2114 
2115 	  /* t1 = oprnd0 h* ml;
2116 	     t2 = oprnd0 - t1;
2117 	     t3 = t2 >> 1;
2118 	     t4 = t1 + t3;
2119 	     q = t4 >> (post_shift - 1);  */
2120 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
2121 	  def_stmt
2122 	    = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2123 					    build_int_cst (itype, ml));
2124 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2125 
2126 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2127 	  def_stmt
2128 	    = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2129 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2130 
2131 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2132 	  def_stmt
2133 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2134 					    integer_one_node);
2135 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2136 
2137 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2138 	  def_stmt
2139 	    = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2140 
2141 	  if (post_shift != 1)
2142 	    {
2143 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2144 
2145 	      q = vect_recog_temp_ssa_var (itype, NULL);
2146 	      pattern_stmt
2147 		= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2148 						build_int_cst (itype,
2149 							       post_shift
2150 							       - 1));
2151 	    }
2152 	  else
2153 	    {
2154 	      q = t4;
2155 	      pattern_stmt = def_stmt;
2156 	    }
2157 	}
2158       else
2159 	{
2160 	  if (pre_shift >= prec || post_shift >= prec)
2161 	    return NULL;
2162 
2163 	  /* t1 = oprnd0 >> pre_shift;
2164 	     t2 = t1 h* ml;
2165 	     q = t2 >> post_shift;  */
2166 	  if (pre_shift)
2167 	    {
2168 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
2169 	      def_stmt
2170 		= gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2171 						build_int_cst (NULL,
2172 							       pre_shift));
2173 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2174 	    }
2175 	  else
2176 	    t1 = oprnd0;
2177 
2178 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2179 	  def_stmt
2180 	    = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2181 					    build_int_cst (itype, ml));
2182 
2183 	  if (post_shift)
2184 	    {
2185 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2186 
2187 	      q = vect_recog_temp_ssa_var (itype, NULL);
2188 	      def_stmt
2189 		= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2190 						build_int_cst (itype,
2191 							       post_shift));
2192 	    }
2193 	  else
2194 	    q = t2;
2195 
2196 	  pattern_stmt = def_stmt;
2197 	}
2198     }
2199   else
2200     {
2201       unsigned HOST_WIDE_INT ml;
2202       int post_shift;
2203       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2204       unsigned HOST_WIDE_INT abs_d;
2205       bool add = false;
2206       tree t1, t2, t3, t4;
2207 
2208       /* Give up for -1.  */
2209       if (d == -1)
2210 	return NULL;
2211 
2212       /* Since d might be INT_MIN, we have to cast to
2213 	 unsigned HOST_WIDE_INT before negating to avoid
2214 	 undefined signed overflow.  */
2215       abs_d = (d >= 0
2216 	       ? (unsigned HOST_WIDE_INT) d
2217 	       : - (unsigned HOST_WIDE_INT) d);
2218 
2219       /* n rem d = n rem -d */
2220       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2221 	{
2222 	  d = abs_d;
2223 	  oprnd1 = build_int_cst (itype, abs_d);
2224 	}
2225       else if (HOST_BITS_PER_WIDE_INT >= prec
2226 	       && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2227 	/* This case is not handled correctly below.  */
2228 	return NULL;
2229 
2230       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2231       if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2232 	{
2233 	  add = true;
2234 	  ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2235 	}
2236       if (post_shift >= prec)
2237 	return NULL;
2238 
2239       /* t1 = oprnd0 h* ml;  */
2240       t1 = vect_recog_temp_ssa_var (itype, NULL);
2241       def_stmt
2242 	= gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2243 					build_int_cst (itype, ml));
2244 
2245       if (add)
2246 	{
2247 	  /* t2 = t1 + oprnd0;  */
2248 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2249 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2250 	  def_stmt
2251 	    = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2252 	}
2253       else
2254 	t2 = t1;
2255 
2256       if (post_shift)
2257 	{
2258 	  /* t3 = t2 >> post_shift;  */
2259 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2260 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2261 	  def_stmt
2262 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2263 					    build_int_cst (itype, post_shift));
2264 	}
2265       else
2266 	t3 = t2;
2267 
2268       double_int oprnd0_min, oprnd0_max;
2269       int msb = 1;
2270       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2271 	{
2272 	  if (!oprnd0_min.is_negative ())
2273 	    msb = 0;
2274 	  else if (oprnd0_max.is_negative ())
2275 	    msb = -1;
2276 	}
2277 
2278       if (msb == 0 && d >= 0)
2279 	{
2280 	  /* q = t3;  */
2281 	  q = t3;
2282 	  pattern_stmt = def_stmt;
2283 	}
2284       else
2285 	{
2286 	  /* t4 = oprnd0 >> (prec - 1);
2287 	     or if we know from VRP that oprnd0 >= 0
2288 	     t4 = 0;
2289 	     or if we know from VRP that oprnd0 < 0
2290 	     t4 = -1;  */
2291 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2292 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2293 	  if (msb != 1)
2294 	    def_stmt
2295 	      = gimple_build_assign_with_ops (INTEGER_CST,
2296 					      t4, build_int_cst (itype, msb),
2297 					      NULL_TREE);
2298 	  else
2299 	    def_stmt
2300 	      = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2301 					      build_int_cst (itype, prec - 1));
2302 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2303 
2304 	  /* q = t3 - t4;  or q = t4 - t3;  */
2305 	  q = vect_recog_temp_ssa_var (itype, NULL);
2306 	  pattern_stmt
2307 	    = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2308 					    d < 0 ? t3 : t4);
2309 	}
2310     }
2311 
2312   if (rhs_code == TRUNC_MOD_EXPR)
2313     {
2314       tree r, t1;
2315 
2316       /* We divided.  Now finish by:
2317 	 t1 = q * oprnd1;
2318 	 r = oprnd0 - t1;  */
2319       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2320 
2321       t1 = vect_recog_temp_ssa_var (itype, NULL);
2322       def_stmt
2323 	= gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2324       append_pattern_def_seq (stmt_vinfo, def_stmt);
2325 
2326       r = vect_recog_temp_ssa_var (itype, NULL);
2327       pattern_stmt
2328 	= gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2329     }
2330 
2331   /* Pattern detected.  */
2332   if (dump_enabled_p ())
2333     {
2334       dump_printf_loc (MSG_NOTE, vect_location,
2335                        "vect_recog_divmod_pattern: detected: ");
2336       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2337       dump_printf (MSG_NOTE, "\n");
2338     }
2339 
2340   stmts->safe_push (last_stmt);
2341 
2342   *type_in = vectype;
2343   *type_out = vectype;
2344   return pattern_stmt;
2345 }
2346 
2347 /* Function vect_recog_mixed_size_cond_pattern
2348 
2349    Try to find the following pattern:
2350 
2351      type x_t, y_t;
2352      TYPE a_T, b_T, c_T;
2353    loop:
2354      S1  a_T = x_t CMP y_t ? b_T : c_T;
2355 
2356    where type 'TYPE' is an integral type which has different size
2357    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
2358    than 'type', the constants need to fit into an integer type
2359    with the same width as 'type') or results of conversion from 'type'.
2360 
2361    Input:
2362 
2363    * LAST_STMT: A stmt from which the pattern search begins.
2364 
2365    Output:
2366 
2367    * TYPE_IN: The type of the input arguments to the pattern.
2368 
2369    * TYPE_OUT: The type of the output of this pattern.
2370 
2371    * Return value: A new stmt that will be used to replace the pattern.
2372 	Additionally a def_stmt is added.
2373 
2374 	a_it = x_t CMP y_t ? b_it : c_it;
2375 	a_T = (TYPE) a_it;  */
2376 
2377 static gimple
vect_recog_mixed_size_cond_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2378 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2379 				    tree *type_out)
2380 {
2381   gimple last_stmt = (*stmts)[0];
2382   tree cond_expr, then_clause, else_clause;
2383   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2384   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2385   enum machine_mode cmpmode;
2386   gimple pattern_stmt, def_stmt;
2387   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2388   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2389   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2390   gimple def_stmt0 = NULL, def_stmt1 = NULL;
2391   bool promotion;
2392   tree comp_scalar_type;
2393 
2394   if (!is_gimple_assign (last_stmt)
2395       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2396       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2397     return NULL;
2398 
2399   cond_expr = gimple_assign_rhs1 (last_stmt);
2400   then_clause = gimple_assign_rhs2 (last_stmt);
2401   else_clause = gimple_assign_rhs3 (last_stmt);
2402 
2403   if (!COMPARISON_CLASS_P (cond_expr))
2404     return NULL;
2405 
2406   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2407   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2408   if (comp_vectype == NULL_TREE)
2409     return NULL;
2410 
2411   type = gimple_expr_type (last_stmt);
2412   if (types_compatible_p (type, comp_scalar_type)
2413       || ((TREE_CODE (then_clause) != INTEGER_CST
2414 	   || TREE_CODE (else_clause) != INTEGER_CST)
2415 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
2416       || !INTEGRAL_TYPE_P (type))
2417     return NULL;
2418 
2419   if ((TREE_CODE (then_clause) != INTEGER_CST
2420        && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2421                               &def_stmt0, &promotion))
2422       || (TREE_CODE (else_clause) != INTEGER_CST
2423           && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2424                                  &def_stmt1, &promotion)))
2425     return NULL;
2426 
2427   if (orig_type0 && orig_type1
2428       && !types_compatible_p (orig_type0, orig_type1))
2429     return NULL;
2430 
2431   if (orig_type0)
2432     {
2433       if (!types_compatible_p (orig_type0, comp_scalar_type))
2434 	return NULL;
2435       then_clause = gimple_assign_rhs1 (def_stmt0);
2436       itype = orig_type0;
2437     }
2438 
2439   if (orig_type1)
2440     {
2441       if (!types_compatible_p (orig_type1, comp_scalar_type))
2442 	return NULL;
2443       else_clause = gimple_assign_rhs1 (def_stmt1);
2444       itype = orig_type1;
2445     }
2446 
2447   cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2448 
2449   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2450     return NULL;
2451 
2452   vectype = get_vectype_for_scalar_type (type);
2453   if (vectype == NULL_TREE)
2454     return NULL;
2455 
2456   if (expand_vec_cond_expr_p (vectype, comp_vectype))
2457     return NULL;
2458 
2459   if (itype == NULL_TREE)
2460     itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2461   					    TYPE_UNSIGNED (type));
2462 
2463   if (itype == NULL_TREE
2464       || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2465     return NULL;
2466 
2467   vecitype = get_vectype_for_scalar_type (itype);
2468   if (vecitype == NULL_TREE)
2469     return NULL;
2470 
2471   if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2472     return NULL;
2473 
2474   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2475     {
2476       if ((TREE_CODE (then_clause) == INTEGER_CST
2477 	   && !int_fits_type_p (then_clause, itype))
2478 	  || (TREE_CODE (else_clause) == INTEGER_CST
2479 	      && !int_fits_type_p (else_clause, itype)))
2480 	return NULL;
2481     }
2482 
2483   def_stmt
2484     = gimple_build_assign_with_ops (COND_EXPR,
2485 				    vect_recog_temp_ssa_var (itype, NULL),
2486 				    unshare_expr (cond_expr),
2487 				    fold_convert (itype, then_clause),
2488 				    fold_convert (itype, else_clause));
2489   pattern_stmt
2490     = gimple_build_assign_with_ops (NOP_EXPR,
2491 				    vect_recog_temp_ssa_var (type, NULL),
2492 				    gimple_assign_lhs (def_stmt), NULL_TREE);
2493 
2494   new_pattern_def_seq (stmt_vinfo, def_stmt);
2495   def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2496   set_vinfo_for_stmt (def_stmt, def_stmt_info);
2497   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2498   *type_in = vecitype;
2499   *type_out = vectype;
2500 
2501   if (dump_enabled_p ())
2502     dump_printf_loc (MSG_NOTE, vect_location,
2503                      "vect_recog_mixed_size_cond_pattern: detected:\n");
2504 
2505   return pattern_stmt;
2506 }
2507 
2508 
2509 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
2510    true if bool VAR can be optimized that way.  */
2511 
2512 static bool
check_bool_pattern(tree var,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2513 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2514 {
2515   gimple def_stmt;
2516   enum vect_def_type dt;
2517   tree def, rhs1;
2518   enum tree_code rhs_code;
2519 
2520   if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2521 			   &dt))
2522     return false;
2523 
2524   if (dt != vect_internal_def)
2525     return false;
2526 
2527   if (!is_gimple_assign (def_stmt))
2528     return false;
2529 
2530   if (!has_single_use (def))
2531     return false;
2532 
2533   rhs1 = gimple_assign_rhs1 (def_stmt);
2534   rhs_code = gimple_assign_rhs_code (def_stmt);
2535   switch (rhs_code)
2536     {
2537     case SSA_NAME:
2538       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2539 
2540     CASE_CONVERT:
2541       if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2542 	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2543 	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2544 	return false;
2545       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2546 
2547     case BIT_NOT_EXPR:
2548       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2549 
2550     case BIT_AND_EXPR:
2551     case BIT_IOR_EXPR:
2552     case BIT_XOR_EXPR:
2553       if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2554 	return false;
2555       return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2556 				 bb_vinfo);
2557 
2558     default:
2559       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2560 	{
2561 	  tree vecitype, comp_vectype;
2562 
2563 	  /* If the comparison can throw, then is_gimple_condexpr will be
2564 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
2565 	  if (stmt_could_throw_p (def_stmt))
2566 	    return false;
2567 
2568 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2569 	  if (comp_vectype == NULL_TREE)
2570 	    return false;
2571 
2572 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2573 	    {
2574 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2575 	      tree itype
2576 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2577 	      vecitype = get_vectype_for_scalar_type (itype);
2578 	      if (vecitype == NULL_TREE)
2579 		return false;
2580 	    }
2581 	  else
2582 	    vecitype = comp_vectype;
2583 	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
2584 	}
2585       return false;
2586     }
2587 }
2588 
2589 
2590 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
2591    stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2592    to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
2593 
2594 static tree
adjust_bool_pattern_cast(tree type,tree var)2595 adjust_bool_pattern_cast (tree type, tree var)
2596 {
2597   stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2598   gimple cast_stmt, pattern_stmt;
2599 
2600   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2601   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2602   new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2603   cast_stmt
2604     = gimple_build_assign_with_ops (NOP_EXPR,
2605 				    vect_recog_temp_ssa_var (type, NULL),
2606 				    gimple_assign_lhs (pattern_stmt),
2607 				    NULL_TREE);
2608   STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2609   return gimple_assign_lhs (cast_stmt);
2610 }
2611 
2612 
2613 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
2614    recursively.  VAR is an SSA_NAME that should be transformed from bool
2615    to a wider integer type, OUT_TYPE is the desired final integer type of
2616    the whole pattern, TRUEVAL should be NULL unless optimizing
2617    BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2618    in the then_clause, STMTS is where statements with added pattern stmts
2619    should be pushed to.  */
2620 
2621 static tree
adjust_bool_pattern(tree var,tree out_type,tree trueval,vec<gimple> * stmts)2622 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2623 		     vec<gimple> *stmts)
2624 {
2625   gimple stmt = SSA_NAME_DEF_STMT (var);
2626   enum tree_code rhs_code, def_rhs_code;
2627   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2628   location_t loc;
2629   gimple pattern_stmt, def_stmt;
2630 
2631   rhs1 = gimple_assign_rhs1 (stmt);
2632   rhs2 = gimple_assign_rhs2 (stmt);
2633   rhs_code = gimple_assign_rhs_code (stmt);
2634   loc = gimple_location (stmt);
2635   switch (rhs_code)
2636     {
2637     case SSA_NAME:
2638     CASE_CONVERT:
2639       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2640       itype = TREE_TYPE (irhs1);
2641       pattern_stmt
2642 	= gimple_build_assign_with_ops (SSA_NAME,
2643 					vect_recog_temp_ssa_var (itype, NULL),
2644 					irhs1, NULL_TREE);
2645       break;
2646 
2647     case BIT_NOT_EXPR:
2648       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2649       itype = TREE_TYPE (irhs1);
2650       pattern_stmt
2651 	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
2652 					vect_recog_temp_ssa_var (itype, NULL),
2653 					irhs1, build_int_cst (itype, 1));
2654       break;
2655 
2656     case BIT_AND_EXPR:
2657       /* Try to optimize x = y & (a < b ? 1 : 0); into
2658 	 x = (a < b ? y : 0);
2659 
2660 	 E.g. for:
2661 	   bool a_b, b_b, c_b;
2662 	   TYPE d_T;
2663 
2664 	   S1  a_b = x1 CMP1 y1;
2665 	   S2  b_b = x2 CMP2 y2;
2666 	   S3  c_b = a_b & b_b;
2667 	   S4  d_T = (TYPE) c_b;
2668 
2669 	 we would normally emit:
2670 
2671 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2672 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2673 	   S3'  c_T = a_T & b_T;
2674 	   S4'  d_T = c_T;
2675 
2676 	 but we can save one stmt by using the
2677 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
2678 	 BIT_AND_EXPR stmt out:
2679 
2680 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2681 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2682 	   S4'  f_T = c_T;
2683 
2684 	 At least when VEC_COND_EXPR is implemented using masks
2685 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2686 	 computes the comparison masks and ands it, in one case with
2687 	 all ones vector, in the other case with a vector register.
2688 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2689 	 often more expensive.  */
2690       def_stmt = SSA_NAME_DEF_STMT (rhs2);
2691       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2692       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2693 	{
2694 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2695 	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2696 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
2697 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2698 	    {
2699 	      gimple tstmt;
2700 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2701 	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2702 	      tstmt = stmts->pop ();
2703 	      gcc_assert (tstmt == def_stmt);
2704 	      stmts->quick_push (stmt);
2705 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2706 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2707 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2708 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2709 	      return irhs2;
2710 	    }
2711 	  else
2712 	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2713 	  goto and_ior_xor;
2714 	}
2715       def_stmt = SSA_NAME_DEF_STMT (rhs1);
2716       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2717       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2718 	{
2719 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2720 	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2721 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
2722 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2723 	    {
2724 	      gimple tstmt;
2725 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2726 	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2727 	      tstmt = stmts->pop ();
2728 	      gcc_assert (tstmt == def_stmt);
2729 	      stmts->quick_push (stmt);
2730 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2731 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2732 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2733 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2734 	      return irhs1;
2735 	    }
2736 	  else
2737 	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2738 	  goto and_ior_xor;
2739 	}
2740       /* FALLTHRU */
2741     case BIT_IOR_EXPR:
2742     case BIT_XOR_EXPR:
2743       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2744       irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2745     and_ior_xor:
2746       if (TYPE_PRECISION (TREE_TYPE (irhs1))
2747 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
2748 	{
2749 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2750 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2751 	  int out_prec = TYPE_PRECISION (out_type);
2752 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2753 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2754 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2755 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2756 	  else
2757 	    {
2758 	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2759 	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2760 	    }
2761 	}
2762       itype = TREE_TYPE (irhs1);
2763       pattern_stmt
2764 	= gimple_build_assign_with_ops (rhs_code,
2765 					vect_recog_temp_ssa_var (itype, NULL),
2766 					irhs1, irhs2);
2767       break;
2768 
2769     default:
2770       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2771       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2772 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2773 	  || (TYPE_PRECISION (TREE_TYPE (rhs1))
2774 	      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2775 	{
2776 	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2777 	  itype
2778 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2779 	}
2780       else
2781 	itype = TREE_TYPE (rhs1);
2782       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2783       if (trueval == NULL_TREE)
2784 	trueval = build_int_cst (itype, 1);
2785       else
2786 	gcc_checking_assert (useless_type_conversion_p (itype,
2787 							TREE_TYPE (trueval)));
2788       pattern_stmt
2789 	= gimple_build_assign_with_ops (COND_EXPR,
2790 					vect_recog_temp_ssa_var (itype, NULL),
2791 					cond_expr, trueval,
2792 					build_int_cst (itype, 0));
2793       break;
2794     }
2795 
2796   stmts->safe_push (stmt);
2797   gimple_set_location (pattern_stmt, loc);
2798   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2799   return gimple_assign_lhs (pattern_stmt);
2800 }
2801 
2802 
2803 /* Function vect_recog_bool_pattern
2804 
2805    Try to find pattern like following:
2806 
2807      bool a_b, b_b, c_b, d_b, e_b;
2808      TYPE f_T;
2809    loop:
2810      S1  a_b = x1 CMP1 y1;
2811      S2  b_b = x2 CMP2 y2;
2812      S3  c_b = a_b & b_b;
2813      S4  d_b = x3 CMP3 y3;
2814      S5  e_b = c_b | d_b;
2815      S6  f_T = (TYPE) e_b;
2816 
2817    where type 'TYPE' is an integral type.
2818 
2819    Input:
2820 
2821    * LAST_STMT: A stmt at the end from which the pattern
2822 		search begins, i.e. cast of a bool to
2823 		an integer type.
2824 
2825    Output:
2826 
2827    * TYPE_IN: The type of the input arguments to the pattern.
2828 
2829    * TYPE_OUT: The type of the output of this pattern.
2830 
2831    * Return value: A new stmt that will be used to replace the pattern.
2832 
2833 	Assuming size of TYPE is the same as size of all comparisons
2834 	(otherwise some casts would be added where needed), the above
2835 	sequence we create related pattern stmts:
2836 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2837 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2838 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
2839 	S5'  e_T = c_T | d_T;
2840 	S6'  f_T = e_T;
2841 
2842 	Instead of the above S3' we could emit:
2843 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2844 	S3'  c_T = a_T | b_T;
2845 	but the above is more efficient.  */
2846 
2847 static gimple
vect_recog_bool_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2848 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2849 			 tree *type_out)
2850 {
2851   gimple last_stmt = stmts->pop ();
2852   enum tree_code rhs_code;
2853   tree var, lhs, rhs, vectype;
2854   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2855   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2856   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2857   gimple pattern_stmt;
2858 
2859   if (!is_gimple_assign (last_stmt))
2860     return NULL;
2861 
2862   var = gimple_assign_rhs1 (last_stmt);
2863   lhs = gimple_assign_lhs (last_stmt);
2864 
2865   if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2866        || !TYPE_UNSIGNED (TREE_TYPE (var)))
2867       && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2868     return NULL;
2869 
2870   rhs_code = gimple_assign_rhs_code (last_stmt);
2871   if (CONVERT_EXPR_CODE_P (rhs_code))
2872     {
2873       if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2874 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2875 	return NULL;
2876       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2877       if (vectype == NULL_TREE)
2878 	return NULL;
2879 
2880       if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2881 	return NULL;
2882 
2883       rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2884       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2885       if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2886 	pattern_stmt
2887 	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2888       else
2889 	pattern_stmt
2890 	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2891       *type_out = vectype;
2892       *type_in = vectype;
2893       stmts->safe_push (last_stmt);
2894       if (dump_enabled_p ())
2895 	dump_printf_loc (MSG_NOTE, vect_location,
2896                          "vect_recog_bool_pattern: detected:\n");
2897 
2898       return pattern_stmt;
2899     }
2900   else if (rhs_code == SSA_NAME
2901 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
2902     {
2903       stmt_vec_info pattern_stmt_info;
2904       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2905       gcc_assert (vectype != NULL_TREE);
2906       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2907 	return NULL;
2908       if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2909 	return NULL;
2910 
2911       rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2912       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2913       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2914 	{
2915 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2916 	  gimple cast_stmt
2917 	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2918 	  new_pattern_def_seq (stmt_vinfo, cast_stmt);
2919 	  rhs = rhs2;
2920 	}
2921       pattern_stmt
2922 	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2923       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2924 						bb_vinfo);
2925       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2926       STMT_VINFO_DATA_REF (pattern_stmt_info)
2927 	= STMT_VINFO_DATA_REF (stmt_vinfo);
2928       STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2929 	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2930       STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2931       STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2932 	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
2933       STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2934       STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2935 	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2936       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2937       *type_out = vectype;
2938       *type_in = vectype;
2939       stmts->safe_push (last_stmt);
2940       if (dump_enabled_p ())
2941 	dump_printf_loc (MSG_NOTE, vect_location,
2942                          "vect_recog_bool_pattern: detected:\n");
2943       return pattern_stmt;
2944     }
2945   else
2946     return NULL;
2947 }
2948 
2949 
2950 /* Mark statements that are involved in a pattern.  */
2951 
2952 static inline void
vect_mark_pattern_stmts(gimple orig_stmt,gimple pattern_stmt,tree pattern_vectype)2953 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2954                          tree pattern_vectype)
2955 {
2956   stmt_vec_info pattern_stmt_info, def_stmt_info;
2957   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2958   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2959   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2960   gimple def_stmt;
2961 
2962   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2963   if (pattern_stmt_info == NULL)
2964     {
2965       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2966 						bb_vinfo);
2967       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2968     }
2969   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2970 
2971   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2972   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2973     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2974   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2975   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2976   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2977   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2978     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2979   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2980     {
2981       gimple_stmt_iterator si;
2982       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2983 	   !gsi_end_p (si); gsi_next (&si))
2984 	{
2985 	  def_stmt = gsi_stmt (si);
2986 	  def_stmt_info = vinfo_for_stmt (def_stmt);
2987 	  if (def_stmt_info == NULL)
2988 	    {
2989 	      def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2990 						 bb_vinfo);
2991 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
2992 	    }
2993 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2994 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2995 	  STMT_VINFO_DEF_TYPE (def_stmt_info)
2996 	    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2997 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2998 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2999 	}
3000     }
3001 }
3002 
3003 /* Function vect_pattern_recog_1
3004 
3005    Input:
3006    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3007         computation pattern.
3008    STMT: A stmt from which the pattern search should start.
3009 
3010    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3011    expression that computes the same functionality and can be used to
3012    replace the sequence of stmts that are involved in the pattern.
3013 
3014    Output:
3015    This function checks if the expression returned by PATTERN_RECOG_FUNC is
3016    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
3017    relevant vector type. If 'TYPE_IN' is already a vector type, then this
3018    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3019    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3020    to the available target pattern.
3021 
3022    This function also does some bookkeeping, as explained in the documentation
3023    for vect_recog_pattern.  */
3024 
3025 static void
vect_pattern_recog_1(vect_recog_func_ptr vect_recog_func,gimple_stmt_iterator si,vec<gimple> * stmts_to_replace)3026 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3027 		      gimple_stmt_iterator si,
3028 		      vec<gimple> *stmts_to_replace)
3029 {
3030   gimple stmt = gsi_stmt (si), pattern_stmt;
3031   stmt_vec_info stmt_info;
3032   loop_vec_info loop_vinfo;
3033   tree pattern_vectype;
3034   tree type_in, type_out;
3035   enum tree_code code;
3036   int i;
3037   gimple next;
3038 
3039   stmts_to_replace->truncate (0);
3040   stmts_to_replace->quick_push (stmt);
3041   pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3042   if (!pattern_stmt)
3043     return;
3044 
3045   stmt = stmts_to_replace->last ();
3046   stmt_info = vinfo_for_stmt (stmt);
3047   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3048 
3049   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3050     {
3051       /* No need to check target support (already checked by the pattern
3052          recognition function).  */
3053       pattern_vectype = type_out ? type_out : type_in;
3054     }
3055   else
3056     {
3057       enum machine_mode vec_mode;
3058       enum insn_code icode;
3059       optab optab;
3060 
3061       /* Check target support  */
3062       type_in = get_vectype_for_scalar_type (type_in);
3063       if (!type_in)
3064 	return;
3065       if (type_out)
3066 	type_out = get_vectype_for_scalar_type (type_out);
3067       else
3068 	type_out = type_in;
3069       if (!type_out)
3070 	return;
3071       pattern_vectype = type_out;
3072 
3073       if (is_gimple_assign (pattern_stmt))
3074 	code = gimple_assign_rhs_code (pattern_stmt);
3075       else
3076         {
3077 	  gcc_assert (is_gimple_call (pattern_stmt));
3078 	  code = CALL_EXPR;
3079 	}
3080 
3081       optab = optab_for_tree_code (code, type_in, optab_default);
3082       vec_mode = TYPE_MODE (type_in);
3083       if (!optab
3084           || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3085           || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3086 	return;
3087     }
3088 
3089   /* Found a vectorizable pattern.  */
3090   if (dump_enabled_p ())
3091     {
3092       dump_printf_loc (MSG_NOTE, vect_location,
3093                        "pattern recognized: ");
3094       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3095       dump_printf (MSG_NOTE, "\n");
3096     }
3097 
3098   /* Mark the stmts that are involved in the pattern. */
3099   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3100 
3101   /* Patterns cannot be vectorized using SLP, because they change the order of
3102      computation.  */
3103   if (loop_vinfo)
3104     FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3105       if (next == stmt)
3106         LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3107 
3108   /* It is possible that additional pattern stmts are created and inserted in
3109      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
3110      relevant statements.  */
3111   for (i = 0; stmts_to_replace->iterate (i, &stmt)
3112 	      && (unsigned) i < (stmts_to_replace->length () - 1);
3113        i++)
3114     {
3115       stmt_info = vinfo_for_stmt (stmt);
3116       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3117       if (dump_enabled_p ())
3118         {
3119           dump_printf_loc (MSG_NOTE, vect_location,
3120                            "additional pattern stmt: ");
3121           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3122           dump_printf (MSG_NOTE, "\n");
3123         }
3124 
3125       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3126     }
3127 }
3128 
3129 
3130 /* Function vect_pattern_recog
3131 
3132    Input:
3133    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3134         computation idioms.
3135 
3136    Output - for each computation idiom that is detected we create a new stmt
3137         that provides the same functionality and that can be vectorized.  We
3138         also record some information in the struct_stmt_info of the relevant
3139         stmts, as explained below:
3140 
3141    At the entry to this function we have the following stmts, with the
3142    following initial value in the STMT_VINFO fields:
3143 
3144          stmt                     in_pattern_p  related_stmt    vec_stmt
3145          S1: a_i = ....                 -       -               -
3146          S2: a_2 = ..use(a_i)..         -       -               -
3147          S3: a_1 = ..use(a_2)..         -       -               -
3148          S4: a_0 = ..use(a_1)..         -       -               -
3149          S5: ... = ..use(a_0)..         -       -               -
3150 
3151    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3152    represented by a single stmt.  We then:
3153    - create a new stmt S6 equivalent to the pattern (the stmt is not
3154      inserted into the code)
3155    - fill in the STMT_VINFO fields as follows:
3156 
3157                                   in_pattern_p  related_stmt    vec_stmt
3158          S1: a_i = ....                 -       -               -
3159          S2: a_2 = ..use(a_i)..         -       -               -
3160          S3: a_1 = ..use(a_2)..         -       -               -
3161          S4: a_0 = ..use(a_1)..         true    S6              -
3162           '---> S6: a_new = ....        -       S4              -
3163          S5: ... = ..use(a_0)..         -       -               -
3164 
3165    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3166    to each other through the RELATED_STMT field).
3167 
3168    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3169    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
3170    remain irrelevant unless used by stmts other than S4.
3171 
3172    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3173    (because they are marked as irrelevant).  It will vectorize S6, and record
3174    a pointer to the new vector stmt VS6 from S6 (as usual).
3175    S4 will be skipped, and S5 will be vectorized as usual:
3176 
3177                                   in_pattern_p  related_stmt    vec_stmt
3178          S1: a_i = ....                 -       -               -
3179          S2: a_2 = ..use(a_i)..         -       -               -
3180          S3: a_1 = ..use(a_2)..         -       -               -
3181        > VS6: va_new = ....             -       -               -
3182          S4: a_0 = ..use(a_1)..         true    S6              VS6
3183           '---> S6: a_new = ....        -       S4              VS6
3184        > VS5: ... = ..vuse(va_new)..    -       -               -
3185          S5: ... = ..use(a_0)..         -       -               -
3186 
3187    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3188    elsewhere), and we'll end up with:
3189 
3190         VS6: va_new = ....
3191         VS5: ... = ..vuse(va_new)..
3192 
3193    In case of more than one pattern statements, e.g., widen-mult with
3194    intermediate type:
3195 
3196      S1  a_t = ;
3197      S2  a_T = (TYPE) a_t;
3198            '--> S3: a_it = (interm_type) a_t;
3199      S4  prod_T = a_T * CONST;
3200            '--> S5: prod_T' = a_it w* CONST;
3201 
3202    there may be other users of a_T outside the pattern.  In that case S2 will
3203    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3204    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
3205    be recorded in S3.  */
3206 
3207 void
vect_pattern_recog(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)3208 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3209 {
3210   struct loop *loop;
3211   basic_block *bbs;
3212   unsigned int nbbs;
3213   gimple_stmt_iterator si;
3214   unsigned int i, j;
3215   vect_recog_func_ptr vect_recog_func;
3216   auto_vec<gimple, 1> stmts_to_replace;
3217   gimple stmt;
3218 
3219   if (dump_enabled_p ())
3220     dump_printf_loc (MSG_NOTE, vect_location,
3221                      "=== vect_pattern_recog ===\n");
3222 
3223   if (loop_vinfo)
3224     {
3225       loop = LOOP_VINFO_LOOP (loop_vinfo);
3226       bbs = LOOP_VINFO_BBS (loop_vinfo);
3227       nbbs = loop->num_nodes;
3228     }
3229   else
3230     {
3231       bbs = &BB_VINFO_BB (bb_vinfo);
3232       nbbs = 1;
3233     }
3234 
3235   /* Scan through the loop stmts, applying the pattern recognition
3236      functions starting at each stmt visited:  */
3237   for (i = 0; i < nbbs; i++)
3238     {
3239       basic_block bb = bbs[i];
3240       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3241         {
3242 	  if (bb_vinfo && (stmt = gsi_stmt (si))
3243 	      && vinfo_for_stmt (stmt)
3244 	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3245 	   continue;
3246 
3247           /* Scan over all generic vect_recog_xxx_pattern functions.  */
3248           for (j = 0; j < NUM_PATTERNS; j++)
3249             {
3250 	      vect_recog_func = vect_vect_recog_func_ptrs[j];
3251 	      vect_pattern_recog_1 (vect_recog_func, si,
3252 				    &stmts_to_replace);
3253             }
3254         }
3255     }
3256 }
3257