1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h"		/* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
40 
41 /* Pattern recognition functions  */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 					    tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 					     tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 					   tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50                                                  tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 	                                tree *, tree *);
53 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
54 						      tree *, tree *);
55 static gimple vect_recog_divmod_pattern (vec<gimple> *,
56 					 tree *, tree *);
57 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
58 						  tree *, tree *);
59 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
60 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
61 	vect_recog_widen_mult_pattern,
62 	vect_recog_widen_sum_pattern,
63 	vect_recog_dot_prod_pattern,
64 	vect_recog_pow_pattern,
65 	vect_recog_widen_shift_pattern,
66 	vect_recog_over_widening_pattern,
67 	vect_recog_vector_vector_shift_pattern,
68 	vect_recog_divmod_pattern,
69 	vect_recog_mixed_size_cond_pattern,
70 	vect_recog_bool_pattern};
71 
72 static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)73 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
74 {
75   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
76 				      stmt);
77 }
78 
79 static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)80 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
81 {
82   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
83   append_pattern_def_seq (stmt_info, stmt);
84 }
85 
86 /* Check whether STMT2 is in the same loop or basic block as STMT1.
87    Which of the two applies depends on whether we're currently doing
88    loop-based or basic-block-based vectorization, as determined by
89    the vinfo_for_stmt for STMT1 (which must be defined).
90 
91    If this returns true, vinfo_for_stmt for STMT2 is guaranteed
92    to be defined as well.  */
93 
94 static bool
vect_same_loop_or_bb_p(gimple stmt1,gimple stmt2)95 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
96 {
97   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
98   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
99   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
100 
101   if (!gimple_bb (stmt2))
102     return false;
103 
104   if (loop_vinfo)
105     {
106       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
107       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
108 	return false;
109     }
110   else
111     {
112       if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
113 	  || gimple_code (stmt2) == GIMPLE_PHI)
114 	return false;
115     }
116 
117   gcc_assert (vinfo_for_stmt (stmt2));
118   return true;
119 }
120 
121 /* If the LHS of DEF_STMT has a single use, and that statement is
122    in the same loop or basic block, return it.  */
123 
124 static gimple
vect_single_imm_use(gimple def_stmt)125 vect_single_imm_use (gimple def_stmt)
126 {
127   tree lhs = gimple_assign_lhs (def_stmt);
128   use_operand_p use_p;
129   gimple use_stmt;
130 
131   if (!single_imm_use (lhs, &use_p, &use_stmt))
132     return NULL;
133 
134   if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
135     return NULL;
136 
137   return use_stmt;
138 }
139 
140 /* Check whether NAME, an ssa-name used in USE_STMT,
141    is a result of a type promotion or demotion, such that:
142      DEF_STMT: NAME = NOP (name0)
143    where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
144    If CHECK_SIGN is TRUE, check that either both types are signed or both are
145    unsigned.  */
146 
147 static bool
type_conversion_p(tree name,gimple use_stmt,bool check_sign,tree * orig_type,gimple * def_stmt,bool * promotion)148 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
149 		   tree *orig_type, gimple *def_stmt, bool *promotion)
150 {
151   tree dummy;
152   gimple dummy_gimple;
153   loop_vec_info loop_vinfo;
154   stmt_vec_info stmt_vinfo;
155   tree type = TREE_TYPE (name);
156   tree oprnd0;
157   enum vect_def_type dt;
158   tree def;
159   bb_vec_info bb_vinfo;
160 
161   stmt_vinfo = vinfo_for_stmt (use_stmt);
162   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
163   bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
164   if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
165 			   &def, &dt))
166     return false;
167 
168   if (dt != vect_internal_def
169       && dt != vect_external_def && dt != vect_constant_def)
170     return false;
171 
172   if (!*def_stmt)
173     return false;
174 
175   if (!is_gimple_assign (*def_stmt))
176     return false;
177 
178   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
179     return false;
180 
181   oprnd0 = gimple_assign_rhs1 (*def_stmt);
182 
183   *orig_type = TREE_TYPE (oprnd0);
184   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
185       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
186     return false;
187 
188   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
189     *promotion = true;
190   else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
191     *promotion = false;
192   else
193     return false;
194 
195   if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
196 			   bb_vinfo, &dummy_gimple, &dummy, &dt))
197     return false;
198 
199   return true;
200 }
201 
202 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
203    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
204 
205 static tree
vect_recog_temp_ssa_var(tree type,gimple stmt)206 vect_recog_temp_ssa_var (tree type, gimple stmt)
207 {
208   return make_temp_ssa_name (type, stmt, "patt");
209 }
210 
211 /* Function vect_recog_dot_prod_pattern
212 
213    Try to find the following pattern:
214 
215      type x_t, y_t;
216      TYPE1 prod;
217      TYPE2 sum = init;
218    loop:
219      sum_0 = phi <init, sum_1>
220      S1  x_t = ...
221      S2  y_t = ...
222      S3  x_T = (TYPE1) x_t;
223      S4  y_T = (TYPE1) y_t;
224      S5  prod = x_T * y_T;
225      [S6  prod = (TYPE2) prod;  #optional]
226      S7  sum_1 = prod + sum_0;
227 
228    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
229    same size of 'TYPE1' or bigger. This is a special case of a reduction
230    computation.
231 
232    Input:
233 
234    * STMTS: Contains a stmt from which the pattern search begins.  In the
235    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
236    will be detected.
237 
238    Output:
239 
240    * TYPE_IN: The type of the input arguments to the pattern.
241 
242    * TYPE_OUT: The type of the output  of this pattern.
243 
244    * Return value: A new stmt that will be used to replace the sequence of
245    stmts that constitute the pattern. In this case it will be:
246         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
247 
248    Note: The dot-prod idiom is a widening reduction pattern that is
249          vectorized without preserving all the intermediate results. It
250          produces only N/2 (widened) results (by summing up pairs of
251          intermediate results) rather than all N results.  Therefore, we
252          cannot allow this pattern when we want to get all the results and in
253          the correct order (as is the case when this computation is in an
254          inner-loop nested in an outer-loop that us being vectorized).  */
255 
256 static gimple
vect_recog_dot_prod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)257 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
258 			     tree *type_out)
259 {
260   gimple stmt, last_stmt = (*stmts)[0];
261   tree oprnd0, oprnd1;
262   tree oprnd00, oprnd01;
263   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
264   tree type, half_type;
265   gimple pattern_stmt;
266   tree prod_type;
267   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
268   struct loop *loop;
269   tree var;
270   bool promotion;
271 
272   if (!loop_info)
273     return NULL;
274 
275   loop = LOOP_VINFO_LOOP (loop_info);
276 
277   if (!is_gimple_assign (last_stmt))
278     return NULL;
279 
280   type = gimple_expr_type (last_stmt);
281 
282   /* Look for the following pattern
283           DX = (TYPE1) X;
284           DY = (TYPE1) Y;
285           DPROD = DX * DY;
286           DDPROD = (TYPE2) DPROD;
287           sum_1 = DDPROD + sum_0;
288      In which
289      - DX is double the size of X
290      - DY is double the size of Y
291      - DX, DY, DPROD all have the same type
292      - sum is the same size of DPROD or bigger
293      - sum has been recognized as a reduction variable.
294 
295      This is equivalent to:
296        DPROD = X w* Y;          #widen mult
297        sum_1 = DPROD w+ sum_0;  #widen summation
298      or
299        DPROD = X w* Y;          #widen mult
300        sum_1 = DPROD + sum_0;   #summation
301    */
302 
303   /* Starting from LAST_STMT, follow the defs of its uses in search
304      of the above pattern.  */
305 
306   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
307     return NULL;
308 
309   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
310     {
311       /* Has been detected as widening-summation?  */
312 
313       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
314       type = gimple_expr_type (stmt);
315       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
316         return NULL;
317       oprnd0 = gimple_assign_rhs1 (stmt);
318       oprnd1 = gimple_assign_rhs2 (stmt);
319       half_type = TREE_TYPE (oprnd0);
320     }
321   else
322     {
323       gimple def_stmt;
324 
325       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
326         return NULL;
327       oprnd0 = gimple_assign_rhs1 (last_stmt);
328       oprnd1 = gimple_assign_rhs2 (last_stmt);
329       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
330 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
331         return NULL;
332       stmt = last_stmt;
333 
334       if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
335                                &promotion)
336          && promotion)
337         {
338           stmt = def_stmt;
339           oprnd0 = gimple_assign_rhs1 (stmt);
340         }
341       else
342         half_type = type;
343     }
344 
345   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
346      we know that oprnd1 is the reduction variable (defined by a loop-header
347      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
348      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
349   if (TREE_CODE (oprnd0) != SSA_NAME)
350     return NULL;
351 
352   prod_type = half_type;
353   stmt = SSA_NAME_DEF_STMT (oprnd0);
354 
355   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
356   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
357     return NULL;
358 
359   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
360      inside the loop (in case we are analyzing an outer-loop).  */
361   if (!is_gimple_assign (stmt))
362     return NULL;
363   stmt_vinfo = vinfo_for_stmt (stmt);
364   gcc_assert (stmt_vinfo);
365   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
366     return NULL;
367   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
368     return NULL;
369   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
370     {
371       /* Has been detected as a widening multiplication?  */
372 
373       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
374       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
375         return NULL;
376       stmt_vinfo = vinfo_for_stmt (stmt);
377       gcc_assert (stmt_vinfo);
378       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
379       oprnd00 = gimple_assign_rhs1 (stmt);
380       oprnd01 = gimple_assign_rhs2 (stmt);
381     }
382   else
383     {
384       tree half_type0, half_type1;
385       gimple def_stmt;
386       tree oprnd0, oprnd1;
387 
388       oprnd0 = gimple_assign_rhs1 (stmt);
389       oprnd1 = gimple_assign_rhs2 (stmt);
390       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
391           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
392         return NULL;
393       if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
394                                 &promotion)
395           || !promotion)
396         return NULL;
397       oprnd00 = gimple_assign_rhs1 (def_stmt);
398       if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
399                                 &promotion)
400           || !promotion)
401         return NULL;
402       oprnd01 = gimple_assign_rhs1 (def_stmt);
403       if (!types_compatible_p (half_type0, half_type1))
404         return NULL;
405       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
406 	return NULL;
407     }
408 
409   half_type = TREE_TYPE (oprnd00);
410   *type_in = half_type;
411   *type_out = type;
412 
413   /* Pattern detected. Create a stmt to be used to replace the pattern: */
414   var = vect_recog_temp_ssa_var (type, NULL);
415   pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
416 					       oprnd00, oprnd01, oprnd1);
417 
418   if (dump_enabled_p ())
419     {
420       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
421                        "vect_recog_dot_prod_pattern: detected: ");
422       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
423     }
424 
425   /* We don't allow changing the order of the computation in the inner-loop
426      when doing outer-loop vectorization.  */
427   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
428 
429   return pattern_stmt;
430 }
431 
432 
433 /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
434    and LSHIFT_EXPR.
435 
436    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
437    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
438 
439    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
440    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
441    that satisfies the above restrictions,  we can perform a widening opeartion
442    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
443    with a_it = (interm_type) a_t;  */
444 
445 static bool
vect_handle_widen_op_by_const(gimple stmt,enum tree_code code,tree const_oprnd,tree * oprnd,vec<gimple> * stmts,tree type,tree * half_type,gimple def_stmt)446 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
447 		               tree const_oprnd, tree *oprnd,
448    		               vec<gimple> *stmts, tree type,
449 			       tree *half_type, gimple def_stmt)
450 {
451   tree new_type, new_oprnd;
452   gimple new_stmt;
453 
454   if (code != MULT_EXPR && code != LSHIFT_EXPR)
455     return false;
456 
457   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
458         || (code == LSHIFT_EXPR
459             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
460 	    	!= 1))
461       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
462     {
463       /* CONST_OPRND is a constant of HALF_TYPE.  */
464       *oprnd = gimple_assign_rhs1 (def_stmt);
465       return true;
466     }
467 
468   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
469     return false;
470 
471   if (!vect_same_loop_or_bb_p (stmt, def_stmt))
472     return false;
473 
474   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
475      a type 2 times bigger than HALF_TYPE.  */
476   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
477                                              TYPE_UNSIGNED (type));
478   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
479       || (code == LSHIFT_EXPR
480           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
481     return false;
482 
483   /* Use NEW_TYPE for widening operation.  */
484   if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
485     {
486       new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
487       /* Check if the already created pattern stmt is what we need.  */
488       if (!is_gimple_assign (new_stmt)
489           || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
490           || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
491         return false;
492 
493       stmts->safe_push (def_stmt);
494       *oprnd = gimple_assign_lhs (new_stmt);
495     }
496   else
497     {
498       /* Create a_T = (NEW_TYPE) a_t;  */
499       *oprnd = gimple_assign_rhs1 (def_stmt);
500       new_oprnd = make_ssa_name (new_type, NULL);
501       new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
502 					       NULL_TREE);
503       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
504       stmts->safe_push (def_stmt);
505       *oprnd = new_oprnd;
506     }
507 
508   *half_type = new_type;
509   return true;
510 }
511 
512 
513 /* Function vect_recog_widen_mult_pattern
514 
515    Try to find the following pattern:
516 
517      type a_t, b_t;
518      TYPE a_T, b_T, prod_T;
519 
520      S1  a_t = ;
521      S2  b_t = ;
522      S3  a_T = (TYPE) a_t;
523      S4  b_T = (TYPE) b_t;
524      S5  prod_T = a_T * b_T;
525 
526    where type 'TYPE' is at least double the size of type 'type'.
527 
528    Also detect unsigned cases:
529 
530      unsigned type a_t, b_t;
531      unsigned TYPE u_prod_T;
532      TYPE a_T, b_T, prod_T;
533 
534      S1  a_t = ;
535      S2  b_t = ;
536      S3  a_T = (TYPE) a_t;
537      S4  b_T = (TYPE) b_t;
538      S5  prod_T = a_T * b_T;
539      S6  u_prod_T = (unsigned TYPE) prod_T;
540 
541    and multiplication by constants:
542 
543      type a_t;
544      TYPE a_T, prod_T;
545 
546      S1  a_t = ;
547      S3  a_T = (TYPE) a_t;
548      S5  prod_T = a_T * CONST;
549 
550    A special case of multiplication by constants is when 'TYPE' is 4 times
551    bigger than 'type', but CONST fits an intermediate type 2 times smaller
552    than 'TYPE'.  In that case we create an additional pattern stmt for S3
553    to create a variable of the intermediate type, and perform widen-mult
554    on the intermediate type as well:
555 
556      type a_t;
557      interm_type a_it;
558      TYPE a_T, prod_T,  prod_T';
559 
560      S1  a_t = ;
561      S3  a_T = (TYPE) a_t;
562            '--> a_it = (interm_type) a_t;
563      S5  prod_T = a_T * CONST;
564            '--> prod_T' = a_it w* CONST;
565 
566    Input/Output:
567 
568    * STMTS: Contains a stmt from which the pattern search begins.  In the
569    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
570    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
571    replaced with S6 in STMTS.  In case of multiplication by a constant
572    of an intermediate type (the last case above), STMTS also contains S3
573    (inserted before S5).
574 
575    Output:
576 
577    * TYPE_IN: The type of the input arguments to the pattern.
578 
579    * TYPE_OUT: The type of the output of this pattern.
580 
581    * Return value: A new stmt that will be used to replace the sequence of
582    stmts that constitute the pattern.  In this case it will be:
583         WIDEN_MULT <a_t, b_t>
584 */
585 
586 static gimple
vect_recog_widen_mult_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)587 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
588                                tree *type_in, tree *type_out)
589 {
590   gimple last_stmt = stmts->pop ();
591   gimple def_stmt0, def_stmt1;
592   tree oprnd0, oprnd1;
593   tree type, half_type0, half_type1;
594   gimple pattern_stmt;
595   tree vectype, vectype_out = NULL_TREE;
596   tree var;
597   enum tree_code dummy_code;
598   int dummy_int;
599   vec<tree> dummy_vec;
600   bool op1_ok;
601   bool promotion;
602 
603   if (!is_gimple_assign (last_stmt))
604     return NULL;
605 
606   type = gimple_expr_type (last_stmt);
607 
608   /* Starting from LAST_STMT, follow the defs of its uses in search
609      of the above pattern.  */
610 
611   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
612     return NULL;
613 
614   oprnd0 = gimple_assign_rhs1 (last_stmt);
615   oprnd1 = gimple_assign_rhs2 (last_stmt);
616   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
617       || !types_compatible_p (TREE_TYPE (oprnd1), type))
618     return NULL;
619 
620   /* Check argument 0.  */
621   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
622                          &promotion)
623       || !promotion)
624      return NULL;
625   /* Check argument 1.  */
626   op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
627                               &def_stmt1, &promotion);
628 
629   if (op1_ok && promotion)
630     {
631       oprnd0 = gimple_assign_rhs1 (def_stmt0);
632       oprnd1 = gimple_assign_rhs1 (def_stmt1);
633     }
634   else
635     {
636       if (TREE_CODE (oprnd1) == INTEGER_CST
637           && TREE_CODE (half_type0) == INTEGER_TYPE
638           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
639 		                            &oprnd0, stmts, type,
640 					    &half_type0, def_stmt0))
641         half_type1 = half_type0;
642       else
643         return NULL;
644     }
645 
646   /* Handle unsigned case.  Look for
647      S6  u_prod_T = (unsigned TYPE) prod_T;
648      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
649   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
650     {
651       gimple use_stmt;
652       tree use_lhs;
653       tree use_type;
654 
655       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
656         return NULL;
657 
658       use_stmt = vect_single_imm_use (last_stmt);
659       if (!use_stmt || !is_gimple_assign (use_stmt)
660 	  || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
661         return NULL;
662 
663       use_lhs = gimple_assign_lhs (use_stmt);
664       use_type = TREE_TYPE (use_lhs);
665       if (!INTEGRAL_TYPE_P (use_type)
666           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
667           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
668         return NULL;
669 
670       type = use_type;
671       last_stmt = use_stmt;
672     }
673 
674   if (!types_compatible_p (half_type0, half_type1))
675     return NULL;
676 
677   /* Pattern detected.  */
678   if (dump_enabled_p ())
679     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
680                      "vect_recog_widen_mult_pattern: detected: ");
681 
682   /* Check target support  */
683   vectype = get_vectype_for_scalar_type (half_type0);
684   vectype_out = get_vectype_for_scalar_type (type);
685   if (!vectype
686       || !vectype_out
687       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
688 					  vectype_out, vectype,
689 					  &dummy_code, &dummy_code,
690 					  &dummy_int, &dummy_vec))
691     return NULL;
692 
693   *type_in = vectype;
694   *type_out = vectype_out;
695 
696   /* Pattern supported. Create a stmt to be used to replace the pattern: */
697   var = vect_recog_temp_ssa_var (type, NULL);
698   pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
699 					       oprnd1);
700 
701   if (dump_enabled_p ())
702     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
703 
704   stmts->safe_push (last_stmt);
705   return pattern_stmt;
706 }
707 
708 
709 /* Function vect_recog_pow_pattern
710 
711    Try to find the following pattern:
712 
713      x = POW (y, N);
714 
715    with POW being one of pow, powf, powi, powif and N being
716    either 2 or 0.5.
717 
718    Input:
719 
720    * LAST_STMT: A stmt from which the pattern search begins.
721 
722    Output:
723 
724    * TYPE_IN: The type of the input arguments to the pattern.
725 
726    * TYPE_OUT: The type of the output of this pattern.
727 
728    * Return value: A new stmt that will be used to replace the sequence of
729    stmts that constitute the pattern. In this case it will be:
730         x = x * x
731    or
732 	x = sqrt (x)
733 */
734 
735 static gimple
vect_recog_pow_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)736 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
737 			tree *type_out)
738 {
739   gimple last_stmt = (*stmts)[0];
740   tree fn, base, exp = NULL;
741   gimple stmt;
742   tree var;
743 
744   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
745     return NULL;
746 
747   fn = gimple_call_fndecl (last_stmt);
748   if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
749    return NULL;
750 
751   switch (DECL_FUNCTION_CODE (fn))
752     {
753     case BUILT_IN_POWIF:
754     case BUILT_IN_POWI:
755     case BUILT_IN_POWF:
756     case BUILT_IN_POW:
757       base = gimple_call_arg (last_stmt, 0);
758       exp = gimple_call_arg (last_stmt, 1);
759       if (TREE_CODE (exp) != REAL_CST
760 	  && TREE_CODE (exp) != INTEGER_CST)
761         return NULL;
762       break;
763 
764     default:
765       return NULL;
766     }
767 
768   /* We now have a pow or powi builtin function call with a constant
769      exponent.  */
770 
771   *type_out = NULL_TREE;
772 
773   /* Catch squaring.  */
774   if ((host_integerp (exp, 0)
775        && tree_low_cst (exp, 0) == 2)
776       || (TREE_CODE (exp) == REAL_CST
777           && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
778     {
779       *type_in = TREE_TYPE (base);
780 
781       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
782       stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
783       return stmt;
784     }
785 
786   /* Catch square root.  */
787   if (TREE_CODE (exp) == REAL_CST
788       && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
789     {
790       tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
791       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
792       if (*type_in)
793 	{
794 	  gimple stmt = gimple_build_call (newfn, 1, base);
795 	  if (vectorizable_function (stmt, *type_in, *type_in)
796 	      != NULL_TREE)
797 	    {
798 	      var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
799 	      gimple_call_set_lhs (stmt, var);
800 	      return stmt;
801 	    }
802 	}
803     }
804 
805   return NULL;
806 }
807 
808 
809 /* Function vect_recog_widen_sum_pattern
810 
811    Try to find the following pattern:
812 
813      type x_t;
814      TYPE x_T, sum = init;
815    loop:
816      sum_0 = phi <init, sum_1>
817      S1  x_t = *p;
818      S2  x_T = (TYPE) x_t;
819      S3  sum_1 = x_T + sum_0;
820 
821    where type 'TYPE' is at least double the size of type 'type', i.e - we're
822    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
823    a special case of a reduction computation.
824 
825    Input:
826 
827    * LAST_STMT: A stmt from which the pattern search begins. In the example,
828    when this function is called with S3, the pattern {S2,S3} will be detected.
829 
830    Output:
831 
832    * TYPE_IN: The type of the input arguments to the pattern.
833 
834    * TYPE_OUT: The type of the output of this pattern.
835 
836    * Return value: A new stmt that will be used to replace the sequence of
837    stmts that constitute the pattern. In this case it will be:
838         WIDEN_SUM <x_t, sum_0>
839 
840    Note: The widening-sum idiom is a widening reduction pattern that is
841 	 vectorized without preserving all the intermediate results. It
842          produces only N/2 (widened) results (by summing up pairs of
843 	 intermediate results) rather than all N results.  Therefore, we
844 	 cannot allow this pattern when we want to get all the results and in
845 	 the correct order (as is the case when this computation is in an
846 	 inner-loop nested in an outer-loop that us being vectorized).  */
847 
848 static gimple
vect_recog_widen_sum_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)849 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
850 			      tree *type_out)
851 {
852   gimple stmt, last_stmt = (*stmts)[0];
853   tree oprnd0, oprnd1;
854   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
855   tree type, half_type;
856   gimple pattern_stmt;
857   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
858   struct loop *loop;
859   tree var;
860   bool promotion;
861 
862   if (!loop_info)
863     return NULL;
864 
865   loop = LOOP_VINFO_LOOP (loop_info);
866 
867   if (!is_gimple_assign (last_stmt))
868     return NULL;
869 
870   type = gimple_expr_type (last_stmt);
871 
872   /* Look for the following pattern
873           DX = (TYPE) X;
874           sum_1 = DX + sum_0;
875      In which DX is at least double the size of X, and sum_1 has been
876      recognized as a reduction variable.
877    */
878 
879   /* Starting from LAST_STMT, follow the defs of its uses in search
880      of the above pattern.  */
881 
882   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
883     return NULL;
884 
885   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
886     return NULL;
887 
888   oprnd0 = gimple_assign_rhs1 (last_stmt);
889   oprnd1 = gimple_assign_rhs2 (last_stmt);
890   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
891       || !types_compatible_p (TREE_TYPE (oprnd1), type))
892     return NULL;
893 
894   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
895      we know that oprnd1 is the reduction variable (defined by a loop-header
896      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
897      Left to check that oprnd0 is defined by a cast from type 'type' to type
898      'TYPE'.  */
899 
900   if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
901                           &promotion)
902       || !promotion)
903      return NULL;
904 
905   oprnd0 = gimple_assign_rhs1 (stmt);
906   *type_in = half_type;
907   *type_out = type;
908 
909   /* Pattern detected. Create a stmt to be used to replace the pattern: */
910   var = vect_recog_temp_ssa_var (type, NULL);
911   pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
912 					       oprnd0, oprnd1);
913 
914   if (dump_enabled_p ())
915     {
916       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
917                        "vect_recog_widen_sum_pattern: detected: ");
918       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
919     }
920 
921   /* We don't allow changing the order of the computation in the inner-loop
922      when doing outer-loop vectorization.  */
923   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
924 
925   return pattern_stmt;
926 }
927 
928 
929 /* Return TRUE if the operation in STMT can be performed on a smaller type.
930 
931    Input:
932    STMT - a statement to check.
933    DEF - we support operations with two operands, one of which is constant.
934          The other operand can be defined by a demotion operation, or by a
935          previous statement in a sequence of over-promoted operations.  In the
936          later case DEF is used to replace that operand.  (It is defined by a
937          pattern statement we created for the previous statement in the
938          sequence).
939 
940    Input/output:
941    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
942          NULL, it's the type of DEF.
943    STMTS - additional pattern statements.  If a pattern statement (type
944          conversion) is created in this function, its original statement is
945          added to STMTS.
946 
947    Output:
948    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
949          operands to use in the new pattern statement for STMT (will be created
950          in vect_recog_over_widening_pattern ()).
951    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
952          statements for STMT: the first one is a type promotion and the second
953          one is the operation itself.  We return the type promotion statement
954 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
955          the second pattern statement.  */
956 
957 static bool
vect_operation_fits_smaller_type(gimple stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple * new_def_stmt,vec<gimple> * stmts)958 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
959                                   tree *op0, tree *op1, gimple *new_def_stmt,
960                                   vec<gimple> *stmts)
961 {
962   enum tree_code code;
963   tree const_oprnd, oprnd;
964   tree interm_type = NULL_TREE, half_type, new_oprnd, type;
965   gimple def_stmt, new_stmt;
966   bool first = false;
967   bool promotion;
968 
969   *op0 = NULL_TREE;
970   *op1 = NULL_TREE;
971   *new_def_stmt = NULL;
972 
973   if (!is_gimple_assign (stmt))
974     return false;
975 
976   code = gimple_assign_rhs_code (stmt);
977   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
978       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
979     return false;
980 
981   oprnd = gimple_assign_rhs1 (stmt);
982   const_oprnd = gimple_assign_rhs2 (stmt);
983   type = gimple_expr_type (stmt);
984 
985   if (TREE_CODE (oprnd) != SSA_NAME
986       || TREE_CODE (const_oprnd) != INTEGER_CST)
987     return false;
988 
989   /* If oprnd has other uses besides that in stmt we cannot mark it
990      as being part of a pattern only.  */
991   if (!has_single_use (oprnd))
992     return false;
993 
994   /* If we are in the middle of a sequence, we use DEF from a previous
995      statement.  Otherwise, OPRND has to be a result of type promotion.  */
996   if (*new_type)
997     {
998       half_type = *new_type;
999       oprnd = def;
1000     }
1001   else
1002     {
1003       first = true;
1004       if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1005 			      &promotion)
1006 	  || !promotion
1007 	  || !vect_same_loop_or_bb_p (stmt, def_stmt))
1008         return false;
1009     }
1010 
1011   /* Can we perform the operation on a smaller type?  */
1012   switch (code)
1013     {
1014       case BIT_IOR_EXPR:
1015       case BIT_XOR_EXPR:
1016       case BIT_AND_EXPR:
1017         if (!int_fits_type_p (const_oprnd, half_type))
1018           {
1019             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
1020             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1021               return false;
1022 
1023             interm_type = build_nonstandard_integer_type (
1024                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1025             if (!int_fits_type_p (const_oprnd, interm_type))
1026               return false;
1027           }
1028 
1029         break;
1030 
1031       case LSHIFT_EXPR:
1032         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
1033         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1034           return false;
1035 
1036         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1037           (e.g., if the original value was char, the shift amount is at most 8
1038            if we want to use short).  */
1039         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1040           return false;
1041 
1042         interm_type = build_nonstandard_integer_type (
1043                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1044 
1045         if (!vect_supportable_shift (code, interm_type))
1046           return false;
1047 
1048         break;
1049 
1050       case RSHIFT_EXPR:
1051         if (vect_supportable_shift (code, half_type))
1052           break;
1053 
1054         /* Try intermediate type - HALF_TYPE is not supported.  */
1055         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1056           return false;
1057 
1058         interm_type = build_nonstandard_integer_type (
1059                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1060 
1061         if (!vect_supportable_shift (code, interm_type))
1062           return false;
1063 
1064         break;
1065 
1066       default:
1067         gcc_unreachable ();
1068     }
1069 
1070   /* There are four possible cases:
1071      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1072         the first statement in the sequence)
1073         a. The original, HALF_TYPE, is not enough - we replace the promotion
1074            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1075         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1076            promotion.
1077      2. OPRND is defined by a pattern statement we created.
1078         a. Its type is not sufficient for the operation, we create a new stmt:
1079            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1080            this statement in NEW_DEF_STMT, and it is later put in
1081 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1082         b. OPRND is good to use in the new statement.  */
1083   if (first)
1084     {
1085       if (interm_type)
1086         {
1087           /* Replace the original type conversion HALF_TYPE->TYPE with
1088              HALF_TYPE->INTERM_TYPE.  */
1089           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1090             {
1091               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1092               /* Check if the already created pattern stmt is what we need.  */
1093               if (!is_gimple_assign (new_stmt)
1094                   || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1095                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1096                 return false;
1097 
1098 	      stmts->safe_push (def_stmt);
1099               oprnd = gimple_assign_lhs (new_stmt);
1100             }
1101           else
1102             {
1103               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1104               oprnd = gimple_assign_rhs1 (def_stmt);
1105               new_oprnd = make_ssa_name (interm_type, NULL);
1106               new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1107                                                        oprnd, NULL_TREE);
1108               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1109               stmts->safe_push (def_stmt);
1110               oprnd = new_oprnd;
1111             }
1112         }
1113       else
1114         {
1115           /* Retrieve the operand before the type promotion.  */
1116           oprnd = gimple_assign_rhs1 (def_stmt);
1117         }
1118     }
1119   else
1120     {
1121       if (interm_type)
1122         {
1123           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1124           new_oprnd = make_ssa_name (interm_type, NULL);
1125           new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1126                                                    oprnd, NULL_TREE);
1127           oprnd = new_oprnd;
1128           *new_def_stmt = new_stmt;
1129         }
1130 
1131       /* Otherwise, OPRND is already set.  */
1132     }
1133 
1134   if (interm_type)
1135     *new_type = interm_type;
1136   else
1137     *new_type = half_type;
1138 
1139   *op0 = oprnd;
1140   *op1 = fold_convert (*new_type, const_oprnd);
1141 
1142   return true;
1143 }
1144 
1145 
1146 /* Try to find a statement or a sequence of statements that can be performed
1147    on a smaller type:
1148 
1149      type x_t;
1150      TYPE x_T, res0_T, res1_T;
1151    loop:
1152      S1  x_t = *p;
1153      S2  x_T = (TYPE) x_t;
1154      S3  res0_T = op (x_T, C0);
1155      S4  res1_T = op (res0_T, C1);
1156      S5  ... = () res1_T;  - type demotion
1157 
1158    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1159    constants.
1160    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1161    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1162    demotion operation.  We also check that S3 and S4 have only one use.  */
1163 
1164 static gimple
vect_recog_over_widening_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1165 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1166                                   tree *type_in, tree *type_out)
1167 {
1168   gimple stmt = stmts->pop ();
1169   gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1170   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1171   tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1172   bool first;
1173   tree type = NULL;
1174 
1175   first = true;
1176   while (1)
1177     {
1178       if (!vinfo_for_stmt (stmt)
1179           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1180         return NULL;
1181 
1182       new_def_stmt = NULL;
1183       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1184                                              &op0, &op1, &new_def_stmt,
1185                                              stmts))
1186         {
1187           if (first)
1188             return NULL;
1189           else
1190             break;
1191         }
1192 
1193       /* STMT can be performed on a smaller type.  Check its uses.  */
1194       use_stmt = vect_single_imm_use (stmt);
1195       if (!use_stmt || !is_gimple_assign (use_stmt))
1196         return NULL;
1197 
1198       /* Create pattern statement for STMT.  */
1199       vectype = get_vectype_for_scalar_type (new_type);
1200       if (!vectype)
1201         return NULL;
1202 
1203       /* We want to collect all the statements for which we create pattern
1204          statetments, except for the case when the last statement in the
1205          sequence doesn't have a corresponding pattern statement.  In such
1206          case we associate the last pattern statement with the last statement
1207          in the sequence.  Therefore, we only add the original statement to
1208          the list if we know that it is not the last.  */
1209       if (prev_stmt)
1210         stmts->safe_push (prev_stmt);
1211 
1212       var = vect_recog_temp_ssa_var (new_type, NULL);
1213       pattern_stmt
1214 	= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1215 					op0, op1);
1216       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1217       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1218 
1219       if (dump_enabled_p ())
1220         {
1221           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1222                            "created pattern stmt: ");
1223           dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1224         }
1225 
1226       type = gimple_expr_type (stmt);
1227       prev_stmt = stmt;
1228       stmt = use_stmt;
1229 
1230       first = false;
1231     }
1232 
1233   /* We got a sequence.  We expect it to end with a type demotion operation.
1234      Otherwise, we quit (for now).  There are three possible cases: the
1235      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1236      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1237      NEW_TYPE differs (we create a new conversion statement).  */
1238   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1239     {
1240       use_lhs = gimple_assign_lhs (use_stmt);
1241       use_type = TREE_TYPE (use_lhs);
1242       /* Support only type demotion or signedess change.  */
1243       if (!INTEGRAL_TYPE_P (use_type)
1244 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1245         return NULL;
1246 
1247       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1248       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1249 	return NULL;
1250 
1251       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1252           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1253         {
1254           /* Create NEW_TYPE->USE_TYPE conversion.  */
1255           new_oprnd = make_ssa_name (use_type, NULL);
1256           pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1257                                                        var, NULL_TREE);
1258           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1259 
1260           *type_in = get_vectype_for_scalar_type (new_type);
1261           *type_out = get_vectype_for_scalar_type (use_type);
1262 
1263           /* We created a pattern statement for the last statement in the
1264              sequence, so we don't need to associate it with the pattern
1265              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1266              to the list in order to mark it later in vect_pattern_recog_1.  */
1267           if (prev_stmt)
1268             stmts->safe_push (prev_stmt);
1269         }
1270       else
1271         {
1272           if (prev_stmt)
1273 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1274 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1275 
1276           *type_in = vectype;
1277           *type_out = NULL_TREE;
1278         }
1279 
1280       stmts->safe_push (use_stmt);
1281     }
1282   else
1283     /* TODO: support general case, create a conversion to the correct type.  */
1284     return NULL;
1285 
1286   /* Pattern detected.  */
1287   if (dump_enabled_p ())
1288     {
1289       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1290                        "vect_recog_over_widening_pattern: detected: ");
1291       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1292     }
1293 
1294   return pattern_stmt;
1295 }
1296 
1297 /* Detect widening shift pattern:
1298 
1299    type a_t;
1300    TYPE a_T, res_T;
1301 
1302    S1 a_t = ;
1303    S2 a_T = (TYPE) a_t;
1304    S3 res_T = a_T << CONST;
1305 
1306   where type 'TYPE' is at least double the size of type 'type'.
1307 
1308   Also detect cases where the shift result is immediately converted
1309   to another type 'result_type' that is no larger in size than 'TYPE'.
1310   In those cases we perform a widen-shift that directly results in
1311   'result_type', to avoid a possible over-widening situation:
1312 
1313   type a_t;
1314   TYPE a_T, res_T;
1315   result_type res_result;
1316 
1317   S1 a_t = ;
1318   S2 a_T = (TYPE) a_t;
1319   S3 res_T = a_T << CONST;
1320   S4 res_result = (result_type) res_T;
1321       '--> res_result' = a_t w<< CONST;
1322 
1323   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1324   create an additional pattern stmt for S2 to create a variable of an
1325   intermediate type, and perform widen-shift on the intermediate type:
1326 
1327   type a_t;
1328   interm_type a_it;
1329   TYPE a_T, res_T, res_T';
1330 
1331   S1 a_t = ;
1332   S2 a_T = (TYPE) a_t;
1333       '--> a_it = (interm_type) a_t;
1334   S3 res_T = a_T << CONST;
1335       '--> res_T' = a_it <<* CONST;
1336 
1337   Input/Output:
1338 
1339   * STMTS: Contains a stmt from which the pattern search begins.
1340     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1341     in STMTS.  When an intermediate type is used and a pattern statement is
1342     created for S2, we also put S2 here (before S3).
1343 
1344   Output:
1345 
1346   * TYPE_IN: The type of the input arguments to the pattern.
1347 
1348   * TYPE_OUT: The type of the output of this pattern.
1349 
1350   * Return value: A new stmt that will be used to replace the sequence of
1351     stmts that constitute the pattern.  In this case it will be:
1352     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1353 
1354 static gimple
vect_recog_widen_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1355 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1356 				tree *type_in, tree *type_out)
1357 {
1358   gimple last_stmt = stmts->pop ();
1359   gimple def_stmt0;
1360   tree oprnd0, oprnd1;
1361   tree type, half_type0;
1362   gimple pattern_stmt;
1363   tree vectype, vectype_out = NULL_TREE;
1364   tree var;
1365   enum tree_code dummy_code;
1366   int dummy_int;
1367   vec<tree>  dummy_vec;
1368   gimple use_stmt;
1369   bool promotion;
1370 
1371   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1372     return NULL;
1373 
1374   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1375     return NULL;
1376 
1377   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1378     return NULL;
1379 
1380   oprnd0 = gimple_assign_rhs1 (last_stmt);
1381   oprnd1 = gimple_assign_rhs2 (last_stmt);
1382   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1383     return NULL;
1384 
1385   /* Check operand 0: it has to be defined by a type promotion.  */
1386   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1387                           &promotion)
1388       || !promotion)
1389      return NULL;
1390 
1391   /* Check operand 1: has to be positive.  We check that it fits the type
1392      in vect_handle_widen_op_by_const ().  */
1393   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1394     return NULL;
1395 
1396   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1397   type = gimple_expr_type (last_stmt);
1398 
1399   /* Check for subsequent conversion to another type.  */
1400   use_stmt = vect_single_imm_use (last_stmt);
1401   if (use_stmt && is_gimple_assign (use_stmt)
1402       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1403       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1404     {
1405       tree use_lhs = gimple_assign_lhs (use_stmt);
1406       tree use_type = TREE_TYPE (use_lhs);
1407 
1408       if (INTEGRAL_TYPE_P (use_type)
1409 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1410 	{
1411 	  last_stmt = use_stmt;
1412 	  type = use_type;
1413 	}
1414     }
1415 
1416   /* Check if this a widening operation.  */
1417   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1418        				      &oprnd0, stmts,
1419 	                              type, &half_type0, def_stmt0))
1420     return NULL;
1421 
1422   /* Pattern detected.  */
1423   if (dump_enabled_p ())
1424     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1425                      "vect_recog_widen_shift_pattern: detected: ");
1426 
1427   /* Check target support.  */
1428   vectype = get_vectype_for_scalar_type (half_type0);
1429   vectype_out = get_vectype_for_scalar_type (type);
1430 
1431   if (!vectype
1432       || !vectype_out
1433       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1434 					  vectype_out, vectype,
1435 					  &dummy_code, &dummy_code,
1436 					  &dummy_int, &dummy_vec))
1437     return NULL;
1438 
1439   *type_in = vectype;
1440   *type_out = vectype_out;
1441 
1442   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1443   var = vect_recog_temp_ssa_var (type, NULL);
1444   pattern_stmt =
1445     gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1446 
1447   if (dump_enabled_p ())
1448     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1449 
1450   stmts->safe_push (last_stmt);
1451   return pattern_stmt;
1452 }
1453 
1454 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1455    vectorized:
1456 
1457    type a_t;
1458    TYPE b_T, res_T;
1459 
1460    S1 a_t = ;
1461    S2 b_T = ;
1462    S3 res_T = b_T op a_t;
1463 
1464   where type 'TYPE' is a type with different size than 'type',
1465   and op is <<, >> or rotate.
1466 
1467   Also detect cases:
1468 
1469    type a_t;
1470    TYPE b_T, c_T, res_T;
1471 
1472    S0 c_T = ;
1473    S1 a_t = (type) c_T;
1474    S2 b_T = ;
1475    S3 res_T = b_T op a_t;
1476 
1477   Input/Output:
1478 
1479   * STMTS: Contains a stmt from which the pattern search begins,
1480     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
1481     with a shift/rotate which has same type on both operands, in the
1482     second case just b_T op c_T, in the first case with added cast
1483     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1484 
1485   Output:
1486 
1487   * TYPE_IN: The type of the input arguments to the pattern.
1488 
1489   * TYPE_OUT: The type of the output of this pattern.
1490 
1491   * Return value: A new stmt that will be used to replace the shift/rotate
1492     S3 stmt.  */
1493 
1494 static gimple
vect_recog_vector_vector_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1495 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1496 					tree *type_in, tree *type_out)
1497 {
1498   gimple last_stmt = stmts->pop ();
1499   tree oprnd0, oprnd1, lhs, var;
1500   gimple pattern_stmt, def_stmt;
1501   enum tree_code rhs_code;
1502   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1503   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1504   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1505   enum vect_def_type dt;
1506   tree def;
1507 
1508   if (!is_gimple_assign (last_stmt))
1509     return NULL;
1510 
1511   rhs_code = gimple_assign_rhs_code (last_stmt);
1512   switch (rhs_code)
1513     {
1514     case LSHIFT_EXPR:
1515     case RSHIFT_EXPR:
1516     case LROTATE_EXPR:
1517     case RROTATE_EXPR:
1518       break;
1519     default:
1520       return NULL;
1521     }
1522 
1523   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1524     return NULL;
1525 
1526   lhs = gimple_assign_lhs (last_stmt);
1527   oprnd0 = gimple_assign_rhs1 (last_stmt);
1528   oprnd1 = gimple_assign_rhs2 (last_stmt);
1529   if (TREE_CODE (oprnd0) != SSA_NAME
1530       || TREE_CODE (oprnd1) != SSA_NAME
1531       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1532       || TYPE_PRECISION (TREE_TYPE (oprnd1))
1533 	 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1534       || TYPE_PRECISION (TREE_TYPE (lhs))
1535 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1536     return NULL;
1537 
1538   if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1539 			   &def, &dt))
1540     return NULL;
1541 
1542   if (dt != vect_internal_def)
1543     return NULL;
1544 
1545   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1546   *type_out = *type_in;
1547   if (*type_in == NULL_TREE)
1548     return NULL;
1549 
1550   def = NULL_TREE;
1551   if (gimple_assign_cast_p (def_stmt))
1552     {
1553       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1554       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1555 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1556 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1557 	def = rhs1;
1558     }
1559 
1560   if (def == NULL_TREE)
1561     {
1562       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1563       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1564 					       NULL_TREE);
1565       new_pattern_def_seq (stmt_vinfo, def_stmt);
1566     }
1567 
1568   /* Pattern detected.  */
1569   if (dump_enabled_p ())
1570     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1571                      "vect_recog_vector_vector_shift_pattern: detected: ");
1572 
1573   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1574   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1575   pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1576 
1577   if (dump_enabled_p ())
1578     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1579 
1580   stmts->safe_push (last_stmt);
1581   return pattern_stmt;
1582 }
1583 
1584 /* Detect a signed division by a constant that wouldn't be
1585    otherwise vectorized:
1586 
1587    type a_t, b_t;
1588 
1589    S1 a_t = b_t / N;
1590 
1591   where type 'type' is an integral type and N is a constant.
1592 
1593   Similarly handle modulo by a constant:
1594 
1595    S4 a_t = b_t % N;
1596 
1597   Input/Output:
1598 
1599   * STMTS: Contains a stmt from which the pattern search begins,
1600     i.e. the division stmt.  S1 is replaced by if N is a power
1601     of two constant and type is signed:
1602   S3  y_t = b_t < 0 ? N - 1 : 0;
1603   S2  x_t = b_t + y_t;
1604   S1' a_t = x_t >> log2 (N);
1605 
1606     S4 is replaced if N is a power of two constant and
1607     type is signed by (where *_T temporaries have unsigned type):
1608   S9  y_T = b_t < 0 ? -1U : 0U;
1609   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1610   S7  z_t = (type) z_T;
1611   S6  w_t = b_t + z_t;
1612   S5  x_t = w_t & (N - 1);
1613   S4' a_t = x_t - z_t;
1614 
1615   Output:
1616 
1617   * TYPE_IN: The type of the input arguments to the pattern.
1618 
1619   * TYPE_OUT: The type of the output of this pattern.
1620 
1621   * Return value: A new stmt that will be used to replace the division
1622     S1 or modulo S4 stmt.  */
1623 
1624 static gimple
vect_recog_divmod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1625 vect_recog_divmod_pattern (vec<gimple> *stmts,
1626 			   tree *type_in, tree *type_out)
1627 {
1628   gimple last_stmt = stmts->pop ();
1629   tree oprnd0, oprnd1, vectype, itype, cond;
1630   gimple pattern_stmt, def_stmt;
1631   enum tree_code rhs_code;
1632   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1633   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1634   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1635   optab optab;
1636   tree q;
1637   int dummy_int, prec;
1638   stmt_vec_info def_stmt_vinfo;
1639 
1640   if (!is_gimple_assign (last_stmt))
1641     return NULL;
1642 
1643   rhs_code = gimple_assign_rhs_code (last_stmt);
1644   switch (rhs_code)
1645     {
1646     case TRUNC_DIV_EXPR:
1647     case TRUNC_MOD_EXPR:
1648       break;
1649     default:
1650       return NULL;
1651     }
1652 
1653   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1654     return NULL;
1655 
1656   oprnd0 = gimple_assign_rhs1 (last_stmt);
1657   oprnd1 = gimple_assign_rhs2 (last_stmt);
1658   itype = TREE_TYPE (oprnd0);
1659   if (TREE_CODE (oprnd0) != SSA_NAME
1660       || TREE_CODE (oprnd1) != INTEGER_CST
1661       || TREE_CODE (itype) != INTEGER_TYPE
1662       || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1663     return NULL;
1664 
1665   vectype = get_vectype_for_scalar_type (itype);
1666   if (vectype == NULL_TREE)
1667     return NULL;
1668 
1669   /* If the target can handle vectorized division or modulo natively,
1670      don't attempt to optimize this.  */
1671   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1672   if (optab != unknown_optab)
1673     {
1674       enum machine_mode vec_mode = TYPE_MODE (vectype);
1675       int icode = (int) optab_handler (optab, vec_mode);
1676       if (icode != CODE_FOR_nothing)
1677 	return NULL;
1678     }
1679 
1680   prec = TYPE_PRECISION (itype);
1681   if (integer_pow2p (oprnd1))
1682     {
1683       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1684 	return NULL;
1685 
1686       /* Pattern detected.  */
1687       if (dump_enabled_p ())
1688         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1689                          "vect_recog_divmod_pattern: detected: ");
1690 
1691       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1692 		     build_int_cst (itype, 0));
1693       if (rhs_code == TRUNC_DIV_EXPR)
1694 	{
1695 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
1696 	  tree shift;
1697 	  def_stmt
1698 	    = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1699 					    fold_build2 (MINUS_EXPR, itype,
1700 							 oprnd1,
1701 							 build_int_cst (itype,
1702 									1)),
1703 					    build_int_cst (itype, 0));
1704 	  new_pattern_def_seq (stmt_vinfo, def_stmt);
1705 	  var = vect_recog_temp_ssa_var (itype, NULL);
1706 	  def_stmt
1707 	    = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1708 					    gimple_assign_lhs (def_stmt));
1709 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1710 
1711 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
1712 	  pattern_stmt
1713 	    = gimple_build_assign_with_ops (RSHIFT_EXPR,
1714 					    vect_recog_temp_ssa_var (itype,
1715 								     NULL),
1716 					    var, shift);
1717 	}
1718       else
1719 	{
1720 	  tree signmask;
1721 	  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1722 	  if (compare_tree_int (oprnd1, 2) == 0)
1723 	    {
1724 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
1725 	      def_stmt
1726 		= gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1727 						build_int_cst (itype, 1),
1728 						build_int_cst (itype, 0));
1729 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1730 	    }
1731 	  else
1732 	    {
1733 	      tree utype
1734 		= build_nonstandard_integer_type (prec, 1);
1735 	      tree vecutype = get_vectype_for_scalar_type (utype);
1736 	      tree shift
1737 		= build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1738 					- tree_log2 (oprnd1));
1739 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
1740 
1741 	      def_stmt
1742 		= gimple_build_assign_with_ops (COND_EXPR, var, cond,
1743 						build_int_cst (utype, -1),
1744 						build_int_cst (utype, 0));
1745 	      def_stmt_vinfo
1746 		= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1747 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1748 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1749 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1750 	      var = vect_recog_temp_ssa_var (utype, NULL);
1751 	      def_stmt
1752 		= gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1753 						gimple_assign_lhs (def_stmt),
1754 						shift);
1755 	      def_stmt_vinfo
1756 		= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1757 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1758 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1759 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1760 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
1761 	      def_stmt
1762 		= gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1763 						NULL_TREE);
1764 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1765 	    }
1766 	  def_stmt
1767 	    = gimple_build_assign_with_ops (PLUS_EXPR,
1768 					    vect_recog_temp_ssa_var (itype,
1769 								     NULL),
1770 					    oprnd0, signmask);
1771 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1772 	  def_stmt
1773 	    = gimple_build_assign_with_ops (BIT_AND_EXPR,
1774 					    vect_recog_temp_ssa_var (itype,
1775 								     NULL),
1776 					    gimple_assign_lhs (def_stmt),
1777 					    fold_build2 (MINUS_EXPR, itype,
1778 							 oprnd1,
1779 							 build_int_cst (itype,
1780 									1)));
1781 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1782 
1783 	  pattern_stmt
1784 	    = gimple_build_assign_with_ops (MINUS_EXPR,
1785 					    vect_recog_temp_ssa_var (itype,
1786 								     NULL),
1787 					    gimple_assign_lhs (def_stmt),
1788 					    signmask);
1789 	}
1790 
1791       if (dump_enabled_p ())
1792 	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
1793                               0);
1794 
1795       stmts->safe_push (last_stmt);
1796 
1797       *type_in = vectype;
1798       *type_out = vectype;
1799       return pattern_stmt;
1800     }
1801 
1802   if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1803       || integer_zerop (oprnd1)
1804       || prec > HOST_BITS_PER_WIDE_INT)
1805     return NULL;
1806 
1807   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1808     return NULL;
1809 
1810   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1811 
1812   if (TYPE_UNSIGNED (itype))
1813     {
1814       unsigned HOST_WIDE_INT mh, ml;
1815       int pre_shift, post_shift;
1816       unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1817 				 & GET_MODE_MASK (TYPE_MODE (itype));
1818       tree t1, t2, t3, t4;
1819 
1820       if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1821 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
1822 	return NULL;
1823 
1824       /* Find a suitable multiplier and right shift count
1825 	 instead of multiplying with D.  */
1826       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1827 
1828       /* If the suggested multiplier is more than SIZE bits, we can do better
1829 	 for even divisors, using an initial right shift.  */
1830       if (mh != 0 && (d & 1) == 0)
1831 	{
1832 	  pre_shift = floor_log2 (d & -d);
1833 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1834 				  &ml, &post_shift, &dummy_int);
1835 	  gcc_assert (!mh);
1836 	}
1837       else
1838 	pre_shift = 0;
1839 
1840       if (mh != 0)
1841 	{
1842 	  if (post_shift - 1 >= prec)
1843 	    return NULL;
1844 
1845 	  /* t1 = oprnd0 h* ml;
1846 	     t2 = oprnd0 - t1;
1847 	     t3 = t2 >> 1;
1848 	     t4 = t1 + t3;
1849 	     q = t4 >> (post_shift - 1);  */
1850 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
1851 	  def_stmt
1852 	    = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1853 					    build_int_cst (itype, ml));
1854 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1855 
1856 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
1857 	  def_stmt
1858 	    = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
1859 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1860 
1861 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
1862 	  def_stmt
1863 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1864 					    integer_one_node);
1865 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1866 
1867 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
1868 	  def_stmt
1869 	    = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
1870 
1871 	  if (post_shift != 1)
1872 	    {
1873 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1874 
1875 	      q = vect_recog_temp_ssa_var (itype, NULL);
1876 	      pattern_stmt
1877 		= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
1878 						build_int_cst (itype,
1879 							       post_shift
1880 							       - 1));
1881 	    }
1882 	  else
1883 	    {
1884 	      q = t4;
1885 	      pattern_stmt = def_stmt;
1886 	    }
1887 	}
1888       else
1889 	{
1890 	  if (pre_shift >= prec || post_shift >= prec)
1891 	    return NULL;
1892 
1893 	  /* t1 = oprnd0 >> pre_shift;
1894 	     t2 = t1 h* ml;
1895 	     q = t2 >> post_shift;  */
1896 	  if (pre_shift)
1897 	    {
1898 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
1899 	      def_stmt
1900 		= gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1901 						build_int_cst (NULL,
1902 							       pre_shift));
1903 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1904 	    }
1905 	  else
1906 	    t1 = oprnd0;
1907 
1908 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
1909 	  def_stmt
1910 	    = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
1911 					    build_int_cst (itype, ml));
1912 
1913 	  if (post_shift)
1914 	    {
1915 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1916 
1917 	      q = vect_recog_temp_ssa_var (itype, NULL);
1918 	      def_stmt
1919 		= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1920 						build_int_cst (itype,
1921 							       post_shift));
1922 	    }
1923 	  else
1924 	    q = t2;
1925 
1926 	  pattern_stmt = def_stmt;
1927 	}
1928     }
1929   else
1930     {
1931       unsigned HOST_WIDE_INT ml;
1932       int post_shift;
1933       HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1934       unsigned HOST_WIDE_INT abs_d;
1935       bool add = false;
1936       tree t1, t2, t3, t4;
1937 
1938       /* Give up for -1.  */
1939       if (d == -1)
1940 	return NULL;
1941 
1942       /* Since d might be INT_MIN, we have to cast to
1943 	 unsigned HOST_WIDE_INT before negating to avoid
1944 	 undefined signed overflow.  */
1945       abs_d = (d >= 0
1946 	       ? (unsigned HOST_WIDE_INT) d
1947 	       : - (unsigned HOST_WIDE_INT) d);
1948 
1949       /* n rem d = n rem -d */
1950       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1951 	{
1952 	  d = abs_d;
1953 	  oprnd1 = build_int_cst (itype, abs_d);
1954 	}
1955       else if (HOST_BITS_PER_WIDE_INT >= prec
1956 	       && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1957 	/* This case is not handled correctly below.  */
1958 	return NULL;
1959 
1960       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
1961       if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1962 	{
1963 	  add = true;
1964 	  ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1965 	}
1966       if (post_shift >= prec)
1967 	return NULL;
1968 
1969       /* t1 = oprnd1 h* ml;  */
1970       t1 = vect_recog_temp_ssa_var (itype, NULL);
1971       def_stmt
1972 	= gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1973 					build_int_cst (itype, ml));
1974       append_pattern_def_seq (stmt_vinfo, def_stmt);
1975 
1976       if (add)
1977 	{
1978 	  /* t2 = t1 + oprnd0;  */
1979 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
1980 	  def_stmt
1981 	    = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
1982 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1983 	}
1984       else
1985 	t2 = t1;
1986 
1987       if (post_shift)
1988 	{
1989 	  /* t3 = t2 >> post_shift;  */
1990 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
1991 	  def_stmt
1992 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1993 					    build_int_cst (itype, post_shift));
1994 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1995 	}
1996       else
1997 	t3 = t2;
1998 
1999       /* t4 = oprnd0 >> (prec - 1);  */
2000       t4 = vect_recog_temp_ssa_var (itype, NULL);
2001       def_stmt
2002 	= gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2003 					build_int_cst (itype, prec - 1));
2004       append_pattern_def_seq (stmt_vinfo, def_stmt);
2005 
2006       /* q = t3 - t4;  or q = t4 - t3;  */
2007       q = vect_recog_temp_ssa_var (itype, NULL);
2008       pattern_stmt
2009 	= gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2010 					d < 0 ? t3 : t4);
2011     }
2012 
2013   if (rhs_code == TRUNC_MOD_EXPR)
2014     {
2015       tree r, t1;
2016 
2017       /* We divided.  Now finish by:
2018 	 t1 = q * oprnd1;
2019 	 r = oprnd0 - t1;  */
2020       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2021 
2022       t1 = vect_recog_temp_ssa_var (itype, NULL);
2023       def_stmt
2024 	= gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2025       append_pattern_def_seq (stmt_vinfo, def_stmt);
2026 
2027       r = vect_recog_temp_ssa_var (itype, NULL);
2028       pattern_stmt
2029 	= gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2030     }
2031 
2032   /* Pattern detected.  */
2033   if (dump_enabled_p ())
2034     {
2035       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2036                        "vect_recog_divmod_pattern: detected: ");
2037       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2038     }
2039 
2040   stmts->safe_push (last_stmt);
2041 
2042   *type_in = vectype;
2043   *type_out = vectype;
2044   return pattern_stmt;
2045 }
2046 
2047 /* Function vect_recog_mixed_size_cond_pattern
2048 
2049    Try to find the following pattern:
2050 
2051      type x_t, y_t;
2052      TYPE a_T, b_T, c_T;
2053    loop:
2054      S1  a_T = x_t CMP y_t ? b_T : c_T;
2055 
2056    where type 'TYPE' is an integral type which has different size
2057    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
2058    than 'type', the constants need to fit into an integer type
2059    with the same width as 'type') or results of conversion from 'type'.
2060 
2061    Input:
2062 
2063    * LAST_STMT: A stmt from which the pattern search begins.
2064 
2065    Output:
2066 
2067    * TYPE_IN: The type of the input arguments to the pattern.
2068 
2069    * TYPE_OUT: The type of the output of this pattern.
2070 
2071    * Return value: A new stmt that will be used to replace the pattern.
2072 	Additionally a def_stmt is added.
2073 
2074 	a_it = x_t CMP y_t ? b_it : c_it;
2075 	a_T = (TYPE) a_it;  */
2076 
2077 static gimple
vect_recog_mixed_size_cond_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2078 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2079 				    tree *type_out)
2080 {
2081   gimple last_stmt = (*stmts)[0];
2082   tree cond_expr, then_clause, else_clause;
2083   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2084   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2085   enum machine_mode cmpmode;
2086   gimple pattern_stmt, def_stmt;
2087   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2088   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2089   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2090   gimple def_stmt0 = NULL, def_stmt1 = NULL;
2091   bool promotion;
2092   tree comp_scalar_type;
2093 
2094   if (!is_gimple_assign (last_stmt)
2095       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2096       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2097     return NULL;
2098 
2099   cond_expr = gimple_assign_rhs1 (last_stmt);
2100   then_clause = gimple_assign_rhs2 (last_stmt);
2101   else_clause = gimple_assign_rhs3 (last_stmt);
2102 
2103   if (!COMPARISON_CLASS_P (cond_expr))
2104     return NULL;
2105 
2106   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2107   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2108   if (comp_vectype == NULL_TREE)
2109     return NULL;
2110 
2111   type = gimple_expr_type (last_stmt);
2112   if (types_compatible_p (type, comp_scalar_type)
2113       || ((TREE_CODE (then_clause) != INTEGER_CST
2114 	   || TREE_CODE (else_clause) != INTEGER_CST)
2115 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
2116       || !INTEGRAL_TYPE_P (type))
2117     return NULL;
2118 
2119   if ((TREE_CODE (then_clause) != INTEGER_CST
2120        && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2121                               &def_stmt0, &promotion))
2122       || (TREE_CODE (else_clause) != INTEGER_CST
2123           && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2124                                  &def_stmt1, &promotion)))
2125     return NULL;
2126 
2127   if (orig_type0 && orig_type1
2128       && !types_compatible_p (orig_type0, orig_type1))
2129     return NULL;
2130 
2131   if (orig_type0)
2132     {
2133       if (!types_compatible_p (orig_type0, comp_scalar_type))
2134 	return NULL;
2135       then_clause = gimple_assign_rhs1 (def_stmt0);
2136       itype = orig_type0;
2137     }
2138 
2139   if (orig_type1)
2140     {
2141       if (!types_compatible_p (orig_type1, comp_scalar_type))
2142 	return NULL;
2143       else_clause = gimple_assign_rhs1 (def_stmt1);
2144       itype = orig_type1;
2145     }
2146 
2147   cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2148 
2149   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2150     return NULL;
2151 
2152   vectype = get_vectype_for_scalar_type (type);
2153   if (vectype == NULL_TREE)
2154     return NULL;
2155 
2156   if (expand_vec_cond_expr_p (vectype, comp_vectype))
2157     return NULL;
2158 
2159   if (itype == NULL_TREE)
2160     itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2161   					    TYPE_UNSIGNED (type));
2162 
2163   if (itype == NULL_TREE
2164       || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2165     return NULL;
2166 
2167   vecitype = get_vectype_for_scalar_type (itype);
2168   if (vecitype == NULL_TREE)
2169     return NULL;
2170 
2171   if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2172     return NULL;
2173 
2174   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2175     {
2176       if ((TREE_CODE (then_clause) == INTEGER_CST
2177 	   && !int_fits_type_p (then_clause, itype))
2178 	  || (TREE_CODE (else_clause) == INTEGER_CST
2179 	      && !int_fits_type_p (else_clause, itype)))
2180 	return NULL;
2181     }
2182 
2183   def_stmt
2184     = gimple_build_assign_with_ops (COND_EXPR,
2185 				    vect_recog_temp_ssa_var (itype, NULL),
2186 				    unshare_expr (cond_expr),
2187 				    fold_convert (itype, then_clause),
2188 				    fold_convert (itype, else_clause));
2189   pattern_stmt
2190     = gimple_build_assign_with_ops (NOP_EXPR,
2191 				    vect_recog_temp_ssa_var (type, NULL),
2192 				    gimple_assign_lhs (def_stmt), NULL_TREE);
2193 
2194   new_pattern_def_seq (stmt_vinfo, def_stmt);
2195   def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2196   set_vinfo_for_stmt (def_stmt, def_stmt_info);
2197   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2198   *type_in = vecitype;
2199   *type_out = vectype;
2200 
2201   if (dump_enabled_p ())
2202     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2203                      "vect_recog_mixed_size_cond_pattern: detected: ");
2204 
2205   return pattern_stmt;
2206 }
2207 
2208 
2209 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
2210    true if bool VAR can be optimized that way.  */
2211 
2212 static bool
check_bool_pattern(tree var,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2213 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2214 {
2215   gimple def_stmt;
2216   enum vect_def_type dt;
2217   tree def, rhs1;
2218   enum tree_code rhs_code;
2219 
2220   if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2221 			   &dt))
2222     return false;
2223 
2224   if (dt != vect_internal_def)
2225     return false;
2226 
2227   if (!is_gimple_assign (def_stmt))
2228     return false;
2229 
2230   if (!has_single_use (def))
2231     return false;
2232 
2233   rhs1 = gimple_assign_rhs1 (def_stmt);
2234   rhs_code = gimple_assign_rhs_code (def_stmt);
2235   switch (rhs_code)
2236     {
2237     case SSA_NAME:
2238       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2239 
2240     CASE_CONVERT:
2241       if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2242 	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2243 	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2244 	return false;
2245       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2246 
2247     case BIT_NOT_EXPR:
2248       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2249 
2250     case BIT_AND_EXPR:
2251     case BIT_IOR_EXPR:
2252     case BIT_XOR_EXPR:
2253       if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2254 	return false;
2255       return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2256 				 bb_vinfo);
2257 
2258     default:
2259       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2260 	{
2261 	  tree vecitype, comp_vectype;
2262 
2263 	  /* If the comparison can throw, then is_gimple_condexpr will be
2264 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
2265 	  if (stmt_could_throw_p (def_stmt))
2266 	    return false;
2267 
2268 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2269 	  if (comp_vectype == NULL_TREE)
2270 	    return false;
2271 
2272 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2273 	    {
2274 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2275 	      tree itype
2276 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2277 	      vecitype = get_vectype_for_scalar_type (itype);
2278 	      if (vecitype == NULL_TREE)
2279 		return false;
2280 	    }
2281 	  else
2282 	    vecitype = comp_vectype;
2283 	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
2284 	}
2285       return false;
2286     }
2287 }
2288 
2289 
2290 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
2291    stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2292    to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
2293 
2294 static tree
adjust_bool_pattern_cast(tree type,tree var)2295 adjust_bool_pattern_cast (tree type, tree var)
2296 {
2297   stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2298   gimple cast_stmt, pattern_stmt;
2299 
2300   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2301   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2302   new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2303   cast_stmt
2304     = gimple_build_assign_with_ops (NOP_EXPR,
2305 				    vect_recog_temp_ssa_var (type, NULL),
2306 				    gimple_assign_lhs (pattern_stmt),
2307 				    NULL_TREE);
2308   STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2309   return gimple_assign_lhs (cast_stmt);
2310 }
2311 
2312 
2313 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
2314    recursively.  VAR is an SSA_NAME that should be transformed from bool
2315    to a wider integer type, OUT_TYPE is the desired final integer type of
2316    the whole pattern, TRUEVAL should be NULL unless optimizing
2317    BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2318    in the then_clause, STMTS is where statements with added pattern stmts
2319    should be pushed to.  */
2320 
2321 static tree
adjust_bool_pattern(tree var,tree out_type,tree trueval,vec<gimple> * stmts)2322 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2323 		     vec<gimple> *stmts)
2324 {
2325   gimple stmt = SSA_NAME_DEF_STMT (var);
2326   enum tree_code rhs_code, def_rhs_code;
2327   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2328   location_t loc;
2329   gimple pattern_stmt, def_stmt;
2330 
2331   rhs1 = gimple_assign_rhs1 (stmt);
2332   rhs2 = gimple_assign_rhs2 (stmt);
2333   rhs_code = gimple_assign_rhs_code (stmt);
2334   loc = gimple_location (stmt);
2335   switch (rhs_code)
2336     {
2337     case SSA_NAME:
2338     CASE_CONVERT:
2339       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2340       itype = TREE_TYPE (irhs1);
2341       pattern_stmt
2342 	= gimple_build_assign_with_ops (SSA_NAME,
2343 					vect_recog_temp_ssa_var (itype, NULL),
2344 					irhs1, NULL_TREE);
2345       break;
2346 
2347     case BIT_NOT_EXPR:
2348       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2349       itype = TREE_TYPE (irhs1);
2350       pattern_stmt
2351 	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
2352 					vect_recog_temp_ssa_var (itype, NULL),
2353 					irhs1, build_int_cst (itype, 1));
2354       break;
2355 
2356     case BIT_AND_EXPR:
2357       /* Try to optimize x = y & (a < b ? 1 : 0); into
2358 	 x = (a < b ? y : 0);
2359 
2360 	 E.g. for:
2361 	   bool a_b, b_b, c_b;
2362 	   TYPE d_T;
2363 
2364 	   S1  a_b = x1 CMP1 y1;
2365 	   S2  b_b = x2 CMP2 y2;
2366 	   S3  c_b = a_b & b_b;
2367 	   S4  d_T = (TYPE) c_b;
2368 
2369 	 we would normally emit:
2370 
2371 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2372 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2373 	   S3'  c_T = a_T & b_T;
2374 	   S4'  d_T = c_T;
2375 
2376 	 but we can save one stmt by using the
2377 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
2378 	 BIT_AND_EXPR stmt out:
2379 
2380 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2381 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2382 	   S4'  f_T = c_T;
2383 
2384 	 At least when VEC_COND_EXPR is implemented using masks
2385 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2386 	 computes the comparison masks and ands it, in one case with
2387 	 all ones vector, in the other case with a vector register.
2388 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2389 	 often more expensive.  */
2390       def_stmt = SSA_NAME_DEF_STMT (rhs2);
2391       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2392       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2393 	{
2394 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2395 	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2396 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
2397 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2398 	    {
2399 	      gimple tstmt;
2400 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2401 	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2402 	      tstmt = stmts->pop ();
2403 	      gcc_assert (tstmt == def_stmt);
2404 	      stmts->quick_push (stmt);
2405 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2406 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2407 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2408 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2409 	      return irhs2;
2410 	    }
2411 	  else
2412 	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2413 	  goto and_ior_xor;
2414 	}
2415       def_stmt = SSA_NAME_DEF_STMT (rhs1);
2416       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2417       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2418 	{
2419 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2420 	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2421 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
2422 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2423 	    {
2424 	      gimple tstmt;
2425 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2426 	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2427 	      tstmt = stmts->pop ();
2428 	      gcc_assert (tstmt == def_stmt);
2429 	      stmts->quick_push (stmt);
2430 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2431 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2432 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2433 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2434 	      return irhs1;
2435 	    }
2436 	  else
2437 	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2438 	  goto and_ior_xor;
2439 	}
2440       /* FALLTHRU */
2441     case BIT_IOR_EXPR:
2442     case BIT_XOR_EXPR:
2443       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2444       irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2445     and_ior_xor:
2446       if (TYPE_PRECISION (TREE_TYPE (irhs1))
2447 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
2448 	{
2449 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2450 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2451 	  int out_prec = TYPE_PRECISION (out_type);
2452 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2453 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2454 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2455 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2456 	  else
2457 	    {
2458 	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2459 	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2460 	    }
2461 	}
2462       itype = TREE_TYPE (irhs1);
2463       pattern_stmt
2464 	= gimple_build_assign_with_ops (rhs_code,
2465 					vect_recog_temp_ssa_var (itype, NULL),
2466 					irhs1, irhs2);
2467       break;
2468 
2469     default:
2470       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2471       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2472 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2473 	  || (TYPE_PRECISION (TREE_TYPE (rhs1))
2474 	      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2475 	{
2476 	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2477 	  itype
2478 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2479 	}
2480       else
2481 	itype = TREE_TYPE (rhs1);
2482       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2483       if (trueval == NULL_TREE)
2484 	trueval = build_int_cst (itype, 1);
2485       else
2486 	gcc_checking_assert (useless_type_conversion_p (itype,
2487 							TREE_TYPE (trueval)));
2488       pattern_stmt
2489 	= gimple_build_assign_with_ops (COND_EXPR,
2490 					vect_recog_temp_ssa_var (itype, NULL),
2491 					cond_expr, trueval,
2492 					build_int_cst (itype, 0));
2493       break;
2494     }
2495 
2496   stmts->safe_push (stmt);
2497   gimple_set_location (pattern_stmt, loc);
2498   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2499   return gimple_assign_lhs (pattern_stmt);
2500 }
2501 
2502 
2503 /* Function vect_recog_bool_pattern
2504 
2505    Try to find pattern like following:
2506 
2507      bool a_b, b_b, c_b, d_b, e_b;
2508      TYPE f_T;
2509    loop:
2510      S1  a_b = x1 CMP1 y1;
2511      S2  b_b = x2 CMP2 y2;
2512      S3  c_b = a_b & b_b;
2513      S4  d_b = x3 CMP3 y3;
2514      S5  e_b = c_b | d_b;
2515      S6  f_T = (TYPE) e_b;
2516 
2517    where type 'TYPE' is an integral type.
2518 
2519    Input:
2520 
2521    * LAST_STMT: A stmt at the end from which the pattern
2522 		search begins, i.e. cast of a bool to
2523 		an integer type.
2524 
2525    Output:
2526 
2527    * TYPE_IN: The type of the input arguments to the pattern.
2528 
2529    * TYPE_OUT: The type of the output of this pattern.
2530 
2531    * Return value: A new stmt that will be used to replace the pattern.
2532 
2533 	Assuming size of TYPE is the same as size of all comparisons
2534 	(otherwise some casts would be added where needed), the above
2535 	sequence we create related pattern stmts:
2536 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2537 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2538 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
2539 	S5'  e_T = c_T | d_T;
2540 	S6'  f_T = e_T;
2541 
2542 	Instead of the above S3' we could emit:
2543 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2544 	S3'  c_T = a_T | b_T;
2545 	but the above is more efficient.  */
2546 
2547 static gimple
vect_recog_bool_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2548 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2549 			 tree *type_out)
2550 {
2551   gimple last_stmt = stmts->pop ();
2552   enum tree_code rhs_code;
2553   tree var, lhs, rhs, vectype;
2554   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2555   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2556   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2557   gimple pattern_stmt;
2558 
2559   if (!is_gimple_assign (last_stmt))
2560     return NULL;
2561 
2562   var = gimple_assign_rhs1 (last_stmt);
2563   lhs = gimple_assign_lhs (last_stmt);
2564 
2565   if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2566        || !TYPE_UNSIGNED (TREE_TYPE (var)))
2567       && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2568     return NULL;
2569 
2570   rhs_code = gimple_assign_rhs_code (last_stmt);
2571   if (CONVERT_EXPR_CODE_P (rhs_code))
2572     {
2573       if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2574 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2575 	return NULL;
2576       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2577       if (vectype == NULL_TREE)
2578 	return NULL;
2579 
2580       if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2581 	return NULL;
2582 
2583       rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2584       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2585       if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2586 	pattern_stmt
2587 	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2588       else
2589 	pattern_stmt
2590 	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2591       *type_out = vectype;
2592       *type_in = vectype;
2593       stmts->safe_push (last_stmt);
2594       if (dump_enabled_p ())
2595 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2596                          "vect_recog_bool_pattern: detected: ");
2597 
2598       return pattern_stmt;
2599     }
2600   else if (rhs_code == SSA_NAME
2601 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
2602     {
2603       stmt_vec_info pattern_stmt_info;
2604       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2605       gcc_assert (vectype != NULL_TREE);
2606       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2607 	return NULL;
2608       if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2609 	return NULL;
2610 
2611       rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2612       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2613       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2614 	{
2615 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2616 	  gimple cast_stmt
2617 	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2618 	  new_pattern_def_seq (stmt_vinfo, cast_stmt);
2619 	  rhs = rhs2;
2620 	}
2621       pattern_stmt
2622 	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2623       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2624 						bb_vinfo);
2625       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2626       STMT_VINFO_DATA_REF (pattern_stmt_info)
2627 	= STMT_VINFO_DATA_REF (stmt_vinfo);
2628       STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2629 	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2630       STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2631       STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2632 	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
2633       STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2634       STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2635 	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2636       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2637       *type_out = vectype;
2638       *type_in = vectype;
2639       stmts->safe_push (last_stmt);
2640       if (dump_enabled_p ())
2641 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2642                          "vect_recog_bool_pattern: detected: ");
2643       return pattern_stmt;
2644     }
2645   else
2646     return NULL;
2647 }
2648 
2649 
2650 /* Mark statements that are involved in a pattern.  */
2651 
2652 static inline void
vect_mark_pattern_stmts(gimple orig_stmt,gimple pattern_stmt,tree pattern_vectype)2653 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2654                          tree pattern_vectype)
2655 {
2656   stmt_vec_info pattern_stmt_info, def_stmt_info;
2657   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2658   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2659   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2660   gimple def_stmt;
2661 
2662   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2663   if (pattern_stmt_info == NULL)
2664     {
2665       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2666 						bb_vinfo);
2667       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2668     }
2669   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2670 
2671   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2672   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2673     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2674   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2675   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2676   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2677   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2678     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2679   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2680     {
2681       gimple_stmt_iterator si;
2682       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2683 	   !gsi_end_p (si); gsi_next (&si))
2684 	{
2685 	  def_stmt = gsi_stmt (si);
2686 	  def_stmt_info = vinfo_for_stmt (def_stmt);
2687 	  if (def_stmt_info == NULL)
2688 	    {
2689 	      def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2690 						 bb_vinfo);
2691 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
2692 	    }
2693 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2694 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2695 	  STMT_VINFO_DEF_TYPE (def_stmt_info)
2696 	    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2697 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2698 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2699 	}
2700     }
2701 }
2702 
2703 /* Function vect_pattern_recog_1
2704 
2705    Input:
2706    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2707         computation pattern.
2708    STMT: A stmt from which the pattern search should start.
2709 
2710    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2711    expression that computes the same functionality and can be used to
2712    replace the sequence of stmts that are involved in the pattern.
2713 
2714    Output:
2715    This function checks if the expression returned by PATTERN_RECOG_FUNC is
2716    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
2717    relevant vector type. If 'TYPE_IN' is already a vector type, then this
2718    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2719    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2720    to the available target pattern.
2721 
2722    This function also does some bookkeeping, as explained in the documentation
2723    for vect_recog_pattern.  */
2724 
2725 static void
vect_pattern_recog_1(vect_recog_func_ptr vect_recog_func,gimple_stmt_iterator si,vec<gimple> * stmts_to_replace)2726 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2727 		      gimple_stmt_iterator si,
2728 		      vec<gimple> *stmts_to_replace)
2729 {
2730   gimple stmt = gsi_stmt (si), pattern_stmt;
2731   stmt_vec_info stmt_info;
2732   loop_vec_info loop_vinfo;
2733   tree pattern_vectype;
2734   tree type_in, type_out;
2735   enum tree_code code;
2736   int i;
2737   gimple next;
2738 
2739   stmts_to_replace->truncate (0);
2740   stmts_to_replace->quick_push (stmt);
2741   pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2742   if (!pattern_stmt)
2743     return;
2744 
2745   stmt = stmts_to_replace->last ();
2746   stmt_info = vinfo_for_stmt (stmt);
2747   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2748 
2749   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2750     {
2751       /* No need to check target support (already checked by the pattern
2752          recognition function).  */
2753       pattern_vectype = type_out ? type_out : type_in;
2754     }
2755   else
2756     {
2757       enum machine_mode vec_mode;
2758       enum insn_code icode;
2759       optab optab;
2760 
2761       /* Check target support  */
2762       type_in = get_vectype_for_scalar_type (type_in);
2763       if (!type_in)
2764 	return;
2765       if (type_out)
2766 	type_out = get_vectype_for_scalar_type (type_out);
2767       else
2768 	type_out = type_in;
2769       if (!type_out)
2770 	return;
2771       pattern_vectype = type_out;
2772 
2773       if (is_gimple_assign (pattern_stmt))
2774 	code = gimple_assign_rhs_code (pattern_stmt);
2775       else
2776         {
2777 	  gcc_assert (is_gimple_call (pattern_stmt));
2778 	  code = CALL_EXPR;
2779 	}
2780 
2781       optab = optab_for_tree_code (code, type_in, optab_default);
2782       vec_mode = TYPE_MODE (type_in);
2783       if (!optab
2784           || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2785           || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2786 	return;
2787     }
2788 
2789   /* Found a vectorizable pattern.  */
2790   if (dump_enabled_p ())
2791     {
2792       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2793                        "pattern recognized: ");
2794       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2795     }
2796 
2797   /* Mark the stmts that are involved in the pattern. */
2798   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2799 
2800   /* Patterns cannot be vectorized using SLP, because they change the order of
2801      computation.  */
2802   if (loop_vinfo)
2803     FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2804       if (next == stmt)
2805         LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
2806 
2807   /* It is possible that additional pattern stmts are created and inserted in
2808      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
2809      relevant statements.  */
2810   for (i = 0; stmts_to_replace->iterate (i, &stmt)
2811 	      && (unsigned) i < (stmts_to_replace->length () - 1);
2812        i++)
2813     {
2814       stmt_info = vinfo_for_stmt (stmt);
2815       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2816       if (dump_enabled_p ())
2817         {
2818           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2819                            "additional pattern stmt: ");
2820           dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2821         }
2822 
2823       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2824     }
2825 }
2826 
2827 
2828 /* Function vect_pattern_recog
2829 
2830    Input:
2831    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2832         computation idioms.
2833 
2834    Output - for each computation idiom that is detected we create a new stmt
2835         that provides the same functionality and that can be vectorized.  We
2836         also record some information in the struct_stmt_info of the relevant
2837         stmts, as explained below:
2838 
2839    At the entry to this function we have the following stmts, with the
2840    following initial value in the STMT_VINFO fields:
2841 
2842          stmt                     in_pattern_p  related_stmt    vec_stmt
2843          S1: a_i = ....                 -       -               -
2844          S2: a_2 = ..use(a_i)..         -       -               -
2845          S3: a_1 = ..use(a_2)..         -       -               -
2846          S4: a_0 = ..use(a_1)..         -       -               -
2847          S5: ... = ..use(a_0)..         -       -               -
2848 
2849    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2850    represented by a single stmt.  We then:
2851    - create a new stmt S6 equivalent to the pattern (the stmt is not
2852      inserted into the code)
2853    - fill in the STMT_VINFO fields as follows:
2854 
2855                                   in_pattern_p  related_stmt    vec_stmt
2856          S1: a_i = ....                 -       -               -
2857          S2: a_2 = ..use(a_i)..         -       -               -
2858          S3: a_1 = ..use(a_2)..         -       -               -
2859          S4: a_0 = ..use(a_1)..         true    S6              -
2860           '---> S6: a_new = ....        -       S4              -
2861          S5: ... = ..use(a_0)..         -       -               -
2862 
2863    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2864    to each other through the RELATED_STMT field).
2865 
2866    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2867    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
2868    remain irrelevant unless used by stmts other than S4.
2869 
2870    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2871    (because they are marked as irrelevant).  It will vectorize S6, and record
2872    a pointer to the new vector stmt VS6 from S6 (as usual).
2873    S4 will be skipped, and S5 will be vectorized as usual:
2874 
2875                                   in_pattern_p  related_stmt    vec_stmt
2876          S1: a_i = ....                 -       -               -
2877          S2: a_2 = ..use(a_i)..         -       -               -
2878          S3: a_1 = ..use(a_2)..         -       -               -
2879        > VS6: va_new = ....             -       -               -
2880          S4: a_0 = ..use(a_1)..         true    S6              VS6
2881           '---> S6: a_new = ....        -       S4              VS6
2882        > VS5: ... = ..vuse(va_new)..    -       -               -
2883          S5: ... = ..use(a_0)..         -       -               -
2884 
2885    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2886    elsewhere), and we'll end up with:
2887 
2888         VS6: va_new = ....
2889         VS5: ... = ..vuse(va_new)..
2890 
2891    In case of more than one pattern statements, e.g., widen-mult with
2892    intermediate type:
2893 
2894      S1  a_t = ;
2895      S2  a_T = (TYPE) a_t;
2896            '--> S3: a_it = (interm_type) a_t;
2897      S4  prod_T = a_T * CONST;
2898            '--> S5: prod_T' = a_it w* CONST;
2899 
2900    there may be other users of a_T outside the pattern.  In that case S2 will
2901    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2902    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
2903    be recorded in S3.  */
2904 
2905 void
vect_pattern_recog(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2906 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2907 {
2908   struct loop *loop;
2909   basic_block *bbs;
2910   unsigned int nbbs;
2911   gimple_stmt_iterator si;
2912   unsigned int i, j;
2913   vect_recog_func_ptr vect_recog_func;
2914   vec<gimple> stmts_to_replace;
2915   stmts_to_replace.create (1);
2916   gimple stmt;
2917 
2918   if (dump_enabled_p ())
2919     dump_printf_loc (MSG_NOTE, vect_location,
2920                      "=== vect_pattern_recog ===");
2921 
2922   if (loop_vinfo)
2923     {
2924       loop = LOOP_VINFO_LOOP (loop_vinfo);
2925       bbs = LOOP_VINFO_BBS (loop_vinfo);
2926       nbbs = loop->num_nodes;
2927     }
2928   else
2929     {
2930       bbs = &BB_VINFO_BB (bb_vinfo);
2931       nbbs = 1;
2932     }
2933 
2934   /* Scan through the loop stmts, applying the pattern recognition
2935      functions starting at each stmt visited:  */
2936   for (i = 0; i < nbbs; i++)
2937     {
2938       basic_block bb = bbs[i];
2939       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2940         {
2941 	  if (bb_vinfo && (stmt = gsi_stmt (si))
2942 	      && vinfo_for_stmt (stmt)
2943 	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2944 	   continue;
2945 
2946           /* Scan over all generic vect_recog_xxx_pattern functions.  */
2947           for (j = 0; j < NUM_PATTERNS; j++)
2948             {
2949 	      vect_recog_func = vect_vect_recog_func_ptrs[j];
2950 	      vect_pattern_recog_1 (vect_recog_func, si,
2951 				    &stmts_to_replace);
2952             }
2953         }
2954     }
2955 
2956   stmts_to_replace.release ();
2957 }
2958