1*e4b17023SJohn Marino /* Analysis Utilities for Loop Vectorization.
2*e4b17023SJohn Marino    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3*e4b17023SJohn Marino    Free Software Foundation, Inc.
4*e4b17023SJohn Marino    Contributed by Dorit Nuzman <dorit@il.ibm.com>
5*e4b17023SJohn Marino 
6*e4b17023SJohn Marino This file is part of GCC.
7*e4b17023SJohn Marino 
8*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
9*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
10*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
11*e4b17023SJohn Marino version.
12*e4b17023SJohn Marino 
13*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16*e4b17023SJohn Marino for more details.
17*e4b17023SJohn Marino 
18*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
19*e4b17023SJohn Marino along with GCC; see the file COPYING3.  If not see
20*e4b17023SJohn Marino <http://www.gnu.org/licenses/>.  */
21*e4b17023SJohn Marino 
22*e4b17023SJohn Marino #include "config.h"
23*e4b17023SJohn Marino #include "system.h"
24*e4b17023SJohn Marino #include "coretypes.h"
25*e4b17023SJohn Marino #include "tm.h"
26*e4b17023SJohn Marino #include "ggc.h"
27*e4b17023SJohn Marino #include "tree.h"
28*e4b17023SJohn Marino #include "target.h"
29*e4b17023SJohn Marino #include "basic-block.h"
30*e4b17023SJohn Marino #include "gimple-pretty-print.h"
31*e4b17023SJohn Marino #include "tree-flow.h"
32*e4b17023SJohn Marino #include "tree-dump.h"
33*e4b17023SJohn Marino #include "cfgloop.h"
34*e4b17023SJohn Marino #include "expr.h"
35*e4b17023SJohn Marino #include "optabs.h"
36*e4b17023SJohn Marino #include "params.h"
37*e4b17023SJohn Marino #include "tree-data-ref.h"
38*e4b17023SJohn Marino #include "tree-vectorizer.h"
39*e4b17023SJohn Marino #include "recog.h"
40*e4b17023SJohn Marino #include "diagnostic-core.h"
41*e4b17023SJohn Marino 
42*e4b17023SJohn Marino /* Pattern recognition functions  */
43*e4b17023SJohn Marino static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44*e4b17023SJohn Marino 					    tree *);
45*e4b17023SJohn Marino static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46*e4b17023SJohn Marino 					     tree *);
47*e4b17023SJohn Marino static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48*e4b17023SJohn Marino 					   tree *);
49*e4b17023SJohn Marino static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50*e4b17023SJohn Marino static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51*e4b17023SJohn Marino                                                  tree *);
52*e4b17023SJohn Marino static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53*e4b17023SJohn Marino 	                                tree *, tree *);
54*e4b17023SJohn Marino static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55*e4b17023SJohn Marino 						      tree *, tree *);
56*e4b17023SJohn Marino static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57*e4b17023SJohn Marino 					       tree *, tree *);
58*e4b17023SJohn Marino static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59*e4b17023SJohn Marino 						  tree *, tree *);
60*e4b17023SJohn Marino static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61*e4b17023SJohn Marino static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62*e4b17023SJohn Marino 	vect_recog_widen_mult_pattern,
63*e4b17023SJohn Marino 	vect_recog_widen_sum_pattern,
64*e4b17023SJohn Marino 	vect_recog_dot_prod_pattern,
65*e4b17023SJohn Marino 	vect_recog_pow_pattern,
66*e4b17023SJohn Marino 	vect_recog_widen_shift_pattern,
67*e4b17023SJohn Marino 	vect_recog_over_widening_pattern,
68*e4b17023SJohn Marino 	vect_recog_vector_vector_shift_pattern,
69*e4b17023SJohn Marino 	vect_recog_sdivmod_pow2_pattern,
70*e4b17023SJohn Marino 	vect_recog_mixed_size_cond_pattern,
71*e4b17023SJohn Marino 	vect_recog_bool_pattern};
72*e4b17023SJohn Marino 
73*e4b17023SJohn Marino static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)74*e4b17023SJohn Marino append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75*e4b17023SJohn Marino {
76*e4b17023SJohn Marino   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77*e4b17023SJohn Marino 				      stmt);
78*e4b17023SJohn Marino }
79*e4b17023SJohn Marino 
80*e4b17023SJohn Marino static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)81*e4b17023SJohn Marino new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82*e4b17023SJohn Marino {
83*e4b17023SJohn Marino   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84*e4b17023SJohn Marino   append_pattern_def_seq (stmt_info, stmt);
85*e4b17023SJohn Marino }
86*e4b17023SJohn Marino 
87*e4b17023SJohn Marino /* If the LHS of DEF_STMT has a single use, and that statement is
88*e4b17023SJohn Marino    in the same loop, return it.  */
89*e4b17023SJohn Marino 
90*e4b17023SJohn Marino static gimple
vect_single_imm_use(gimple def_stmt)91*e4b17023SJohn Marino vect_single_imm_use (gimple def_stmt)
92*e4b17023SJohn Marino {
93*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
94*e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
95*e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96*e4b17023SJohn Marino   tree lhs = gimple_assign_lhs (def_stmt);
97*e4b17023SJohn Marino   use_operand_p use_p;
98*e4b17023SJohn Marino   gimple use_stmt;
99*e4b17023SJohn Marino 
100*e4b17023SJohn Marino   if (!single_imm_use (lhs, &use_p, &use_stmt))
101*e4b17023SJohn Marino     return NULL;
102*e4b17023SJohn Marino 
103*e4b17023SJohn Marino   if (!gimple_bb (use_stmt))
104*e4b17023SJohn Marino     return NULL;
105*e4b17023SJohn Marino 
106*e4b17023SJohn Marino   if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
107*e4b17023SJohn Marino     return NULL;
108*e4b17023SJohn Marino 
109*e4b17023SJohn Marino   gcc_assert (vinfo_for_stmt (use_stmt));
110*e4b17023SJohn Marino   return use_stmt;
111*e4b17023SJohn Marino }
112*e4b17023SJohn Marino 
113*e4b17023SJohn Marino /* Function widened_name_p
114*e4b17023SJohn Marino 
115*e4b17023SJohn Marino    Check whether NAME, an ssa-name used in USE_STMT,
116*e4b17023SJohn Marino    is a result of a type-promotion, such that:
117*e4b17023SJohn Marino      DEF_STMT: NAME = NOP (name0)
118*e4b17023SJohn Marino    where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
119*e4b17023SJohn Marino    If CHECK_SIGN is TRUE, check that either both types are signed or both are
120*e4b17023SJohn Marino    unsigned.  */
121*e4b17023SJohn Marino 
122*e4b17023SJohn Marino static bool
widened_name_p(tree name,gimple use_stmt,tree * half_type,gimple * def_stmt,bool check_sign)123*e4b17023SJohn Marino widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
124*e4b17023SJohn Marino 		bool check_sign)
125*e4b17023SJohn Marino {
126*e4b17023SJohn Marino   tree dummy;
127*e4b17023SJohn Marino   gimple dummy_gimple;
128*e4b17023SJohn Marino   loop_vec_info loop_vinfo;
129*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo;
130*e4b17023SJohn Marino   tree type = TREE_TYPE (name);
131*e4b17023SJohn Marino   tree oprnd0;
132*e4b17023SJohn Marino   enum vect_def_type dt;
133*e4b17023SJohn Marino   tree def;
134*e4b17023SJohn Marino 
135*e4b17023SJohn Marino   stmt_vinfo = vinfo_for_stmt (use_stmt);
136*e4b17023SJohn Marino   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
137*e4b17023SJohn Marino 
138*e4b17023SJohn Marino   if (!vect_is_simple_use (name, use_stmt, loop_vinfo, NULL, def_stmt, &def,
139*e4b17023SJohn Marino 			   &dt))
140*e4b17023SJohn Marino     return false;
141*e4b17023SJohn Marino 
142*e4b17023SJohn Marino   if (dt != vect_internal_def
143*e4b17023SJohn Marino       && dt != vect_external_def && dt != vect_constant_def)
144*e4b17023SJohn Marino     return false;
145*e4b17023SJohn Marino 
146*e4b17023SJohn Marino   if (! *def_stmt)
147*e4b17023SJohn Marino     return false;
148*e4b17023SJohn Marino 
149*e4b17023SJohn Marino   if (!is_gimple_assign (*def_stmt))
150*e4b17023SJohn Marino     return false;
151*e4b17023SJohn Marino 
152*e4b17023SJohn Marino   if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
153*e4b17023SJohn Marino     return false;
154*e4b17023SJohn Marino 
155*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (*def_stmt);
156*e4b17023SJohn Marino 
157*e4b17023SJohn Marino   *half_type = TREE_TYPE (oprnd0);
158*e4b17023SJohn Marino   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
159*e4b17023SJohn Marino       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
160*e4b17023SJohn Marino       || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
161*e4b17023SJohn Marino     return false;
162*e4b17023SJohn Marino 
163*e4b17023SJohn Marino   if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
164*e4b17023SJohn Marino 			   NULL, &dummy_gimple, &dummy, &dt))
165*e4b17023SJohn Marino     return false;
166*e4b17023SJohn Marino 
167*e4b17023SJohn Marino   return true;
168*e4b17023SJohn Marino }
169*e4b17023SJohn Marino 
170*e4b17023SJohn Marino /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
171*e4b17023SJohn Marino    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
172*e4b17023SJohn Marino 
173*e4b17023SJohn Marino static tree
vect_recog_temp_ssa_var(tree type,gimple stmt)174*e4b17023SJohn Marino vect_recog_temp_ssa_var (tree type, gimple stmt)
175*e4b17023SJohn Marino {
176*e4b17023SJohn Marino   tree var = create_tmp_var (type, "patt");
177*e4b17023SJohn Marino 
178*e4b17023SJohn Marino   add_referenced_var (var);
179*e4b17023SJohn Marino   var = make_ssa_name (var, stmt);
180*e4b17023SJohn Marino   return var;
181*e4b17023SJohn Marino }
182*e4b17023SJohn Marino 
183*e4b17023SJohn Marino /* Function vect_recog_dot_prod_pattern
184*e4b17023SJohn Marino 
185*e4b17023SJohn Marino    Try to find the following pattern:
186*e4b17023SJohn Marino 
187*e4b17023SJohn Marino      type x_t, y_t;
188*e4b17023SJohn Marino      TYPE1 prod;
189*e4b17023SJohn Marino      TYPE2 sum = init;
190*e4b17023SJohn Marino    loop:
191*e4b17023SJohn Marino      sum_0 = phi <init, sum_1>
192*e4b17023SJohn Marino      S1  x_t = ...
193*e4b17023SJohn Marino      S2  y_t = ...
194*e4b17023SJohn Marino      S3  x_T = (TYPE1) x_t;
195*e4b17023SJohn Marino      S4  y_T = (TYPE1) y_t;
196*e4b17023SJohn Marino      S5  prod = x_T * y_T;
197*e4b17023SJohn Marino      [S6  prod = (TYPE2) prod;  #optional]
198*e4b17023SJohn Marino      S7  sum_1 = prod + sum_0;
199*e4b17023SJohn Marino 
200*e4b17023SJohn Marino    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
201*e4b17023SJohn Marino    same size of 'TYPE1' or bigger. This is a special case of a reduction
202*e4b17023SJohn Marino    computation.
203*e4b17023SJohn Marino 
204*e4b17023SJohn Marino    Input:
205*e4b17023SJohn Marino 
206*e4b17023SJohn Marino    * STMTS: Contains a stmt from which the pattern search begins.  In the
207*e4b17023SJohn Marino    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
208*e4b17023SJohn Marino    will be detected.
209*e4b17023SJohn Marino 
210*e4b17023SJohn Marino    Output:
211*e4b17023SJohn Marino 
212*e4b17023SJohn Marino    * TYPE_IN: The type of the input arguments to the pattern.
213*e4b17023SJohn Marino 
214*e4b17023SJohn Marino    * TYPE_OUT: The type of the output  of this pattern.
215*e4b17023SJohn Marino 
216*e4b17023SJohn Marino    * Return value: A new stmt that will be used to replace the sequence of
217*e4b17023SJohn Marino    stmts that constitute the pattern. In this case it will be:
218*e4b17023SJohn Marino         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
219*e4b17023SJohn Marino 
220*e4b17023SJohn Marino    Note: The dot-prod idiom is a widening reduction pattern that is
221*e4b17023SJohn Marino          vectorized without preserving all the intermediate results. It
222*e4b17023SJohn Marino          produces only N/2 (widened) results (by summing up pairs of
223*e4b17023SJohn Marino          intermediate results) rather than all N results.  Therefore, we
224*e4b17023SJohn Marino          cannot allow this pattern when we want to get all the results and in
225*e4b17023SJohn Marino          the correct order (as is the case when this computation is in an
226*e4b17023SJohn Marino          inner-loop nested in an outer-loop that us being vectorized).  */
227*e4b17023SJohn Marino 
228*e4b17023SJohn Marino static gimple
vect_recog_dot_prod_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)229*e4b17023SJohn Marino vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
230*e4b17023SJohn Marino 			     tree *type_out)
231*e4b17023SJohn Marino {
232*e4b17023SJohn Marino   gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
233*e4b17023SJohn Marino   tree oprnd0, oprnd1;
234*e4b17023SJohn Marino   tree oprnd00, oprnd01;
235*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
236*e4b17023SJohn Marino   tree type, half_type;
237*e4b17023SJohn Marino   gimple pattern_stmt;
238*e4b17023SJohn Marino   tree prod_type;
239*e4b17023SJohn Marino   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
240*e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
241*e4b17023SJohn Marino   tree var;
242*e4b17023SJohn Marino 
243*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt))
244*e4b17023SJohn Marino     return NULL;
245*e4b17023SJohn Marino 
246*e4b17023SJohn Marino   type = gimple_expr_type (last_stmt);
247*e4b17023SJohn Marino 
248*e4b17023SJohn Marino   /* Look for the following pattern
249*e4b17023SJohn Marino           DX = (TYPE1) X;
250*e4b17023SJohn Marino           DY = (TYPE1) Y;
251*e4b17023SJohn Marino           DPROD = DX * DY;
252*e4b17023SJohn Marino           DDPROD = (TYPE2) DPROD;
253*e4b17023SJohn Marino           sum_1 = DDPROD + sum_0;
254*e4b17023SJohn Marino      In which
255*e4b17023SJohn Marino      - DX is double the size of X
256*e4b17023SJohn Marino      - DY is double the size of Y
257*e4b17023SJohn Marino      - DX, DY, DPROD all have the same type
258*e4b17023SJohn Marino      - sum is the same size of DPROD or bigger
259*e4b17023SJohn Marino      - sum has been recognized as a reduction variable.
260*e4b17023SJohn Marino 
261*e4b17023SJohn Marino      This is equivalent to:
262*e4b17023SJohn Marino        DPROD = X w* Y;          #widen mult
263*e4b17023SJohn Marino        sum_1 = DPROD w+ sum_0;  #widen summation
264*e4b17023SJohn Marino      or
265*e4b17023SJohn Marino        DPROD = X w* Y;          #widen mult
266*e4b17023SJohn Marino        sum_1 = DPROD + sum_0;   #summation
267*e4b17023SJohn Marino    */
268*e4b17023SJohn Marino 
269*e4b17023SJohn Marino   /* Starting from LAST_STMT, follow the defs of its uses in search
270*e4b17023SJohn Marino      of the above pattern.  */
271*e4b17023SJohn Marino 
272*e4b17023SJohn Marino   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
273*e4b17023SJohn Marino     return NULL;
274*e4b17023SJohn Marino 
275*e4b17023SJohn Marino   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
276*e4b17023SJohn Marino     {
277*e4b17023SJohn Marino       /* Has been detected as widening-summation?  */
278*e4b17023SJohn Marino 
279*e4b17023SJohn Marino       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
280*e4b17023SJohn Marino       type = gimple_expr_type (stmt);
281*e4b17023SJohn Marino       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
282*e4b17023SJohn Marino         return NULL;
283*e4b17023SJohn Marino       oprnd0 = gimple_assign_rhs1 (stmt);
284*e4b17023SJohn Marino       oprnd1 = gimple_assign_rhs2 (stmt);
285*e4b17023SJohn Marino       half_type = TREE_TYPE (oprnd0);
286*e4b17023SJohn Marino     }
287*e4b17023SJohn Marino   else
288*e4b17023SJohn Marino     {
289*e4b17023SJohn Marino       gimple def_stmt;
290*e4b17023SJohn Marino 
291*e4b17023SJohn Marino       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
292*e4b17023SJohn Marino         return NULL;
293*e4b17023SJohn Marino       oprnd0 = gimple_assign_rhs1 (last_stmt);
294*e4b17023SJohn Marino       oprnd1 = gimple_assign_rhs2 (last_stmt);
295*e4b17023SJohn Marino       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
296*e4b17023SJohn Marino 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
297*e4b17023SJohn Marino         return NULL;
298*e4b17023SJohn Marino       stmt = last_stmt;
299*e4b17023SJohn Marino 
300*e4b17023SJohn Marino       if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
301*e4b17023SJohn Marino         {
302*e4b17023SJohn Marino           stmt = def_stmt;
303*e4b17023SJohn Marino           oprnd0 = gimple_assign_rhs1 (stmt);
304*e4b17023SJohn Marino         }
305*e4b17023SJohn Marino       else
306*e4b17023SJohn Marino         half_type = type;
307*e4b17023SJohn Marino     }
308*e4b17023SJohn Marino 
309*e4b17023SJohn Marino   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
310*e4b17023SJohn Marino      we know that oprnd1 is the reduction variable (defined by a loop-header
311*e4b17023SJohn Marino      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
312*e4b17023SJohn Marino      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
313*e4b17023SJohn Marino   if (TREE_CODE (oprnd0) != SSA_NAME)
314*e4b17023SJohn Marino     return NULL;
315*e4b17023SJohn Marino 
316*e4b17023SJohn Marino   prod_type = half_type;
317*e4b17023SJohn Marino   stmt = SSA_NAME_DEF_STMT (oprnd0);
318*e4b17023SJohn Marino 
319*e4b17023SJohn Marino   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
320*e4b17023SJohn Marino   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
321*e4b17023SJohn Marino     return NULL;
322*e4b17023SJohn Marino 
323*e4b17023SJohn Marino   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
324*e4b17023SJohn Marino      inside the loop (in case we are analyzing an outer-loop).  */
325*e4b17023SJohn Marino   if (!is_gimple_assign (stmt))
326*e4b17023SJohn Marino     return NULL;
327*e4b17023SJohn Marino   stmt_vinfo = vinfo_for_stmt (stmt);
328*e4b17023SJohn Marino   gcc_assert (stmt_vinfo);
329*e4b17023SJohn Marino   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
330*e4b17023SJohn Marino     return NULL;
331*e4b17023SJohn Marino   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
332*e4b17023SJohn Marino     return NULL;
333*e4b17023SJohn Marino   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
334*e4b17023SJohn Marino     {
335*e4b17023SJohn Marino       /* Has been detected as a widening multiplication?  */
336*e4b17023SJohn Marino 
337*e4b17023SJohn Marino       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
338*e4b17023SJohn Marino       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
339*e4b17023SJohn Marino         return NULL;
340*e4b17023SJohn Marino       stmt_vinfo = vinfo_for_stmt (stmt);
341*e4b17023SJohn Marino       gcc_assert (stmt_vinfo);
342*e4b17023SJohn Marino       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
343*e4b17023SJohn Marino       oprnd00 = gimple_assign_rhs1 (stmt);
344*e4b17023SJohn Marino       oprnd01 = gimple_assign_rhs2 (stmt);
345*e4b17023SJohn Marino     }
346*e4b17023SJohn Marino   else
347*e4b17023SJohn Marino     {
348*e4b17023SJohn Marino       tree half_type0, half_type1;
349*e4b17023SJohn Marino       gimple def_stmt;
350*e4b17023SJohn Marino       tree oprnd0, oprnd1;
351*e4b17023SJohn Marino 
352*e4b17023SJohn Marino       oprnd0 = gimple_assign_rhs1 (stmt);
353*e4b17023SJohn Marino       oprnd1 = gimple_assign_rhs2 (stmt);
354*e4b17023SJohn Marino       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
355*e4b17023SJohn Marino           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
356*e4b17023SJohn Marino         return NULL;
357*e4b17023SJohn Marino       if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
358*e4b17023SJohn Marino         return NULL;
359*e4b17023SJohn Marino       oprnd00 = gimple_assign_rhs1 (def_stmt);
360*e4b17023SJohn Marino       if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
361*e4b17023SJohn Marino         return NULL;
362*e4b17023SJohn Marino       oprnd01 = gimple_assign_rhs1 (def_stmt);
363*e4b17023SJohn Marino       if (!types_compatible_p (half_type0, half_type1))
364*e4b17023SJohn Marino         return NULL;
365*e4b17023SJohn Marino       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
366*e4b17023SJohn Marino 	return NULL;
367*e4b17023SJohn Marino     }
368*e4b17023SJohn Marino 
369*e4b17023SJohn Marino   half_type = TREE_TYPE (oprnd00);
370*e4b17023SJohn Marino   *type_in = half_type;
371*e4b17023SJohn Marino   *type_out = type;
372*e4b17023SJohn Marino 
373*e4b17023SJohn Marino   /* Pattern detected. Create a stmt to be used to replace the pattern: */
374*e4b17023SJohn Marino   var = vect_recog_temp_ssa_var (type, NULL);
375*e4b17023SJohn Marino   pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
376*e4b17023SJohn Marino 						oprnd00, oprnd01, oprnd1);
377*e4b17023SJohn Marino 
378*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
379*e4b17023SJohn Marino     {
380*e4b17023SJohn Marino       fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
381*e4b17023SJohn Marino       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
382*e4b17023SJohn Marino     }
383*e4b17023SJohn Marino 
384*e4b17023SJohn Marino   /* We don't allow changing the order of the computation in the inner-loop
385*e4b17023SJohn Marino      when doing outer-loop vectorization.  */
386*e4b17023SJohn Marino   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
387*e4b17023SJohn Marino 
388*e4b17023SJohn Marino   return pattern_stmt;
389*e4b17023SJohn Marino }
390*e4b17023SJohn Marino 
391*e4b17023SJohn Marino 
392*e4b17023SJohn Marino /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
393*e4b17023SJohn Marino    and LSHIFT_EXPR.
394*e4b17023SJohn Marino 
395*e4b17023SJohn Marino    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
396*e4b17023SJohn Marino    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
397*e4b17023SJohn Marino 
398*e4b17023SJohn Marino    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
399*e4b17023SJohn Marino    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
400*e4b17023SJohn Marino    that satisfies the above restrictions,  we can perform a widening opeartion
401*e4b17023SJohn Marino    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
402*e4b17023SJohn Marino    with a_it = (interm_type) a_t;  */
403*e4b17023SJohn Marino 
404*e4b17023SJohn Marino static bool
vect_handle_widen_op_by_const(gimple stmt,enum tree_code code,tree const_oprnd,tree * oprnd,VEC (gimple,heap)** stmts,tree type,tree * half_type,gimple def_stmt)405*e4b17023SJohn Marino vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
406*e4b17023SJohn Marino 		               tree const_oprnd, tree *oprnd,
407*e4b17023SJohn Marino    		               VEC (gimple, heap) **stmts, tree type,
408*e4b17023SJohn Marino 			       tree *half_type, gimple def_stmt)
409*e4b17023SJohn Marino {
410*e4b17023SJohn Marino   tree new_type, new_oprnd, tmp;
411*e4b17023SJohn Marino   gimple new_stmt;
412*e4b17023SJohn Marino   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
413*e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
414*e4b17023SJohn Marino 
415*e4b17023SJohn Marino   if (code != MULT_EXPR && code != LSHIFT_EXPR)
416*e4b17023SJohn Marino     return false;
417*e4b17023SJohn Marino 
418*e4b17023SJohn Marino   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
419*e4b17023SJohn Marino         || (code == LSHIFT_EXPR
420*e4b17023SJohn Marino             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
421*e4b17023SJohn Marino 	    	!= 1))
422*e4b17023SJohn Marino       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
423*e4b17023SJohn Marino     {
424*e4b17023SJohn Marino       /* CONST_OPRND is a constant of HALF_TYPE.  */
425*e4b17023SJohn Marino       *oprnd = gimple_assign_rhs1 (def_stmt);
426*e4b17023SJohn Marino       return true;
427*e4b17023SJohn Marino     }
428*e4b17023SJohn Marino 
429*e4b17023SJohn Marino   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
430*e4b17023SJohn Marino       || !gimple_bb (def_stmt)
431*e4b17023SJohn Marino       || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
432*e4b17023SJohn Marino       || !vinfo_for_stmt (def_stmt))
433*e4b17023SJohn Marino     return false;
434*e4b17023SJohn Marino 
435*e4b17023SJohn Marino   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
436*e4b17023SJohn Marino      a type 2 times bigger than HALF_TYPE.  */
437*e4b17023SJohn Marino   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
438*e4b17023SJohn Marino                                              TYPE_UNSIGNED (type));
439*e4b17023SJohn Marino   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
440*e4b17023SJohn Marino       || (code == LSHIFT_EXPR
441*e4b17023SJohn Marino           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
442*e4b17023SJohn Marino     return false;
443*e4b17023SJohn Marino 
444*e4b17023SJohn Marino   /* Use NEW_TYPE for widening operation.  */
445*e4b17023SJohn Marino   if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
446*e4b17023SJohn Marino     {
447*e4b17023SJohn Marino       new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
448*e4b17023SJohn Marino       /* Check if the already created pattern stmt is what we need.  */
449*e4b17023SJohn Marino       if (!is_gimple_assign (new_stmt)
450*e4b17023SJohn Marino           || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
451*e4b17023SJohn Marino           || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
452*e4b17023SJohn Marino         return false;
453*e4b17023SJohn Marino 
454*e4b17023SJohn Marino       VEC_safe_push (gimple, heap, *stmts, def_stmt);
455*e4b17023SJohn Marino       *oprnd = gimple_assign_lhs (new_stmt);
456*e4b17023SJohn Marino     }
457*e4b17023SJohn Marino   else
458*e4b17023SJohn Marino     {
459*e4b17023SJohn Marino       /* Create a_T = (NEW_TYPE) a_t;  */
460*e4b17023SJohn Marino       *oprnd = gimple_assign_rhs1 (def_stmt);
461*e4b17023SJohn Marino       tmp = create_tmp_var (new_type, NULL);
462*e4b17023SJohn Marino       add_referenced_var (tmp);
463*e4b17023SJohn Marino       new_oprnd = make_ssa_name (tmp, NULL);
464*e4b17023SJohn Marino       new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
465*e4b17023SJohn Marino 					       NULL_TREE);
466*e4b17023SJohn Marino       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
467*e4b17023SJohn Marino       VEC_safe_push (gimple, heap, *stmts, def_stmt);
468*e4b17023SJohn Marino       *oprnd = new_oprnd;
469*e4b17023SJohn Marino     }
470*e4b17023SJohn Marino 
471*e4b17023SJohn Marino   *half_type = new_type;
472*e4b17023SJohn Marino   return true;
473*e4b17023SJohn Marino }
474*e4b17023SJohn Marino 
475*e4b17023SJohn Marino 
476*e4b17023SJohn Marino /* Function vect_recog_widen_mult_pattern
477*e4b17023SJohn Marino 
478*e4b17023SJohn Marino    Try to find the following pattern:
479*e4b17023SJohn Marino 
480*e4b17023SJohn Marino      type a_t, b_t;
481*e4b17023SJohn Marino      TYPE a_T, b_T, prod_T;
482*e4b17023SJohn Marino 
483*e4b17023SJohn Marino      S1  a_t = ;
484*e4b17023SJohn Marino      S2  b_t = ;
485*e4b17023SJohn Marino      S3  a_T = (TYPE) a_t;
486*e4b17023SJohn Marino      S4  b_T = (TYPE) b_t;
487*e4b17023SJohn Marino      S5  prod_T = a_T * b_T;
488*e4b17023SJohn Marino 
489*e4b17023SJohn Marino    where type 'TYPE' is at least double the size of type 'type'.
490*e4b17023SJohn Marino 
491*e4b17023SJohn Marino    Also detect unsgigned cases:
492*e4b17023SJohn Marino 
493*e4b17023SJohn Marino      unsigned type a_t, b_t;
494*e4b17023SJohn Marino      unsigned TYPE u_prod_T;
495*e4b17023SJohn Marino      TYPE a_T, b_T, prod_T;
496*e4b17023SJohn Marino 
497*e4b17023SJohn Marino      S1  a_t = ;
498*e4b17023SJohn Marino      S2  b_t = ;
499*e4b17023SJohn Marino      S3  a_T = (TYPE) a_t;
500*e4b17023SJohn Marino      S4  b_T = (TYPE) b_t;
501*e4b17023SJohn Marino      S5  prod_T = a_T * b_T;
502*e4b17023SJohn Marino      S6  u_prod_T = (unsigned TYPE) prod_T;
503*e4b17023SJohn Marino 
504*e4b17023SJohn Marino    and multiplication by constants:
505*e4b17023SJohn Marino 
506*e4b17023SJohn Marino      type a_t;
507*e4b17023SJohn Marino      TYPE a_T, prod_T;
508*e4b17023SJohn Marino 
509*e4b17023SJohn Marino      S1  a_t = ;
510*e4b17023SJohn Marino      S3  a_T = (TYPE) a_t;
511*e4b17023SJohn Marino      S5  prod_T = a_T * CONST;
512*e4b17023SJohn Marino 
513*e4b17023SJohn Marino    A special case of multiplication by constants is when 'TYPE' is 4 times
514*e4b17023SJohn Marino    bigger than 'type', but CONST fits an intermediate type 2 times smaller
515*e4b17023SJohn Marino    than 'TYPE'.  In that case we create an additional pattern stmt for S3
516*e4b17023SJohn Marino    to create a variable of the intermediate type, and perform widen-mult
517*e4b17023SJohn Marino    on the intermediate type as well:
518*e4b17023SJohn Marino 
519*e4b17023SJohn Marino      type a_t;
520*e4b17023SJohn Marino      interm_type a_it;
521*e4b17023SJohn Marino      TYPE a_T, prod_T,  prod_T';
522*e4b17023SJohn Marino 
523*e4b17023SJohn Marino      S1  a_t = ;
524*e4b17023SJohn Marino      S3  a_T = (TYPE) a_t;
525*e4b17023SJohn Marino            '--> a_it = (interm_type) a_t;
526*e4b17023SJohn Marino      S5  prod_T = a_T * CONST;
527*e4b17023SJohn Marino            '--> prod_T' = a_it w* CONST;
528*e4b17023SJohn Marino 
529*e4b17023SJohn Marino    Input/Output:
530*e4b17023SJohn Marino 
531*e4b17023SJohn Marino    * STMTS: Contains a stmt from which the pattern search begins.  In the
532*e4b17023SJohn Marino    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
533*e4b17023SJohn Marino    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
534*e4b17023SJohn Marino    replaced with S6 in STMTS.  In case of multiplication by a constant
535*e4b17023SJohn Marino    of an intermediate type (the last case above), STMTS also contains S3
536*e4b17023SJohn Marino    (inserted before S5).
537*e4b17023SJohn Marino 
538*e4b17023SJohn Marino    Output:
539*e4b17023SJohn Marino 
540*e4b17023SJohn Marino    * TYPE_IN: The type of the input arguments to the pattern.
541*e4b17023SJohn Marino 
542*e4b17023SJohn Marino    * TYPE_OUT: The type of the output of this pattern.
543*e4b17023SJohn Marino 
544*e4b17023SJohn Marino    * Return value: A new stmt that will be used to replace the sequence of
545*e4b17023SJohn Marino    stmts that constitute the pattern.  In this case it will be:
546*e4b17023SJohn Marino         WIDEN_MULT <a_t, b_t>
547*e4b17023SJohn Marino */
548*e4b17023SJohn Marino 
549*e4b17023SJohn Marino static gimple
vect_recog_widen_mult_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)550*e4b17023SJohn Marino vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
551*e4b17023SJohn Marino                                tree *type_in, tree *type_out)
552*e4b17023SJohn Marino {
553*e4b17023SJohn Marino   gimple last_stmt = VEC_pop (gimple, *stmts);
554*e4b17023SJohn Marino   gimple def_stmt0, def_stmt1;
555*e4b17023SJohn Marino   tree oprnd0, oprnd1;
556*e4b17023SJohn Marino   tree type, half_type0, half_type1;
557*e4b17023SJohn Marino   gimple pattern_stmt;
558*e4b17023SJohn Marino   tree vectype, vectype_out = NULL_TREE;
559*e4b17023SJohn Marino   tree dummy;
560*e4b17023SJohn Marino   tree var;
561*e4b17023SJohn Marino   enum tree_code dummy_code;
562*e4b17023SJohn Marino   int dummy_int;
563*e4b17023SJohn Marino   VEC (tree, heap) *dummy_vec;
564*e4b17023SJohn Marino   bool op1_ok;
565*e4b17023SJohn Marino 
566*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt))
567*e4b17023SJohn Marino     return NULL;
568*e4b17023SJohn Marino 
569*e4b17023SJohn Marino   type = gimple_expr_type (last_stmt);
570*e4b17023SJohn Marino 
571*e4b17023SJohn Marino   /* Starting from LAST_STMT, follow the defs of its uses in search
572*e4b17023SJohn Marino      of the above pattern.  */
573*e4b17023SJohn Marino 
574*e4b17023SJohn Marino   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
575*e4b17023SJohn Marino     return NULL;
576*e4b17023SJohn Marino 
577*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (last_stmt);
578*e4b17023SJohn Marino   oprnd1 = gimple_assign_rhs2 (last_stmt);
579*e4b17023SJohn Marino   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
580*e4b17023SJohn Marino       || !types_compatible_p (TREE_TYPE (oprnd1), type))
581*e4b17023SJohn Marino     return NULL;
582*e4b17023SJohn Marino 
583*e4b17023SJohn Marino   /* Check argument 0.  */
584*e4b17023SJohn Marino   if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
585*e4b17023SJohn Marino     return NULL;
586*e4b17023SJohn Marino   /* Check argument 1.  */
587*e4b17023SJohn Marino   op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
588*e4b17023SJohn Marino 
589*e4b17023SJohn Marino   if (op1_ok)
590*e4b17023SJohn Marino     {
591*e4b17023SJohn Marino       oprnd0 = gimple_assign_rhs1 (def_stmt0);
592*e4b17023SJohn Marino       oprnd1 = gimple_assign_rhs1 (def_stmt1);
593*e4b17023SJohn Marino     }
594*e4b17023SJohn Marino   else
595*e4b17023SJohn Marino     {
596*e4b17023SJohn Marino       if (TREE_CODE (oprnd1) == INTEGER_CST
597*e4b17023SJohn Marino           && TREE_CODE (half_type0) == INTEGER_TYPE
598*e4b17023SJohn Marino           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
599*e4b17023SJohn Marino 		                            &oprnd0, stmts, type,
600*e4b17023SJohn Marino 					    &half_type0, def_stmt0))
601*e4b17023SJohn Marino         half_type1 = half_type0;
602*e4b17023SJohn Marino       else
603*e4b17023SJohn Marino         return NULL;
604*e4b17023SJohn Marino     }
605*e4b17023SJohn Marino 
606*e4b17023SJohn Marino   /* Handle unsigned case.  Look for
607*e4b17023SJohn Marino      S6  u_prod_T = (unsigned TYPE) prod_T;
608*e4b17023SJohn Marino      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
609*e4b17023SJohn Marino   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
610*e4b17023SJohn Marino     {
611*e4b17023SJohn Marino       gimple use_stmt;
612*e4b17023SJohn Marino       tree use_lhs;
613*e4b17023SJohn Marino       tree use_type;
614*e4b17023SJohn Marino 
615*e4b17023SJohn Marino       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
616*e4b17023SJohn Marino         return NULL;
617*e4b17023SJohn Marino 
618*e4b17023SJohn Marino       use_stmt = vect_single_imm_use (last_stmt);
619*e4b17023SJohn Marino       if (!use_stmt || !is_gimple_assign (use_stmt)
620*e4b17023SJohn Marino           || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
621*e4b17023SJohn Marino         return NULL;
622*e4b17023SJohn Marino 
623*e4b17023SJohn Marino       use_lhs = gimple_assign_lhs (use_stmt);
624*e4b17023SJohn Marino       use_type = TREE_TYPE (use_lhs);
625*e4b17023SJohn Marino       if (!INTEGRAL_TYPE_P (use_type)
626*e4b17023SJohn Marino           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
627*e4b17023SJohn Marino           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
628*e4b17023SJohn Marino         return NULL;
629*e4b17023SJohn Marino 
630*e4b17023SJohn Marino       type = use_type;
631*e4b17023SJohn Marino       last_stmt = use_stmt;
632*e4b17023SJohn Marino     }
633*e4b17023SJohn Marino 
634*e4b17023SJohn Marino   if (!types_compatible_p (half_type0, half_type1))
635*e4b17023SJohn Marino     return NULL;
636*e4b17023SJohn Marino 
637*e4b17023SJohn Marino   /* Pattern detected.  */
638*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
639*e4b17023SJohn Marino     fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
640*e4b17023SJohn Marino 
641*e4b17023SJohn Marino   /* Check target support  */
642*e4b17023SJohn Marino   vectype = get_vectype_for_scalar_type (half_type0);
643*e4b17023SJohn Marino   vectype_out = get_vectype_for_scalar_type (type);
644*e4b17023SJohn Marino   if (!vectype
645*e4b17023SJohn Marino       || !vectype_out
646*e4b17023SJohn Marino       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
647*e4b17023SJohn Marino 					  vectype_out, vectype,
648*e4b17023SJohn Marino 					  &dummy, &dummy, &dummy_code,
649*e4b17023SJohn Marino 					  &dummy_code, &dummy_int, &dummy_vec))
650*e4b17023SJohn Marino     return NULL;
651*e4b17023SJohn Marino 
652*e4b17023SJohn Marino   *type_in = vectype;
653*e4b17023SJohn Marino   *type_out = vectype_out;
654*e4b17023SJohn Marino 
655*e4b17023SJohn Marino   /* Pattern supported. Create a stmt to be used to replace the pattern: */
656*e4b17023SJohn Marino   var = vect_recog_temp_ssa_var (type, NULL);
657*e4b17023SJohn Marino   pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
658*e4b17023SJohn Marino 					       oprnd1);
659*e4b17023SJohn Marino 
660*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
661*e4b17023SJohn Marino     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
662*e4b17023SJohn Marino 
663*e4b17023SJohn Marino   VEC_safe_push (gimple, heap, *stmts, last_stmt);
664*e4b17023SJohn Marino   return pattern_stmt;
665*e4b17023SJohn Marino }
666*e4b17023SJohn Marino 
667*e4b17023SJohn Marino 
668*e4b17023SJohn Marino /* Function vect_recog_pow_pattern
669*e4b17023SJohn Marino 
670*e4b17023SJohn Marino    Try to find the following pattern:
671*e4b17023SJohn Marino 
672*e4b17023SJohn Marino      x = POW (y, N);
673*e4b17023SJohn Marino 
674*e4b17023SJohn Marino    with POW being one of pow, powf, powi, powif and N being
675*e4b17023SJohn Marino    either 2 or 0.5.
676*e4b17023SJohn Marino 
677*e4b17023SJohn Marino    Input:
678*e4b17023SJohn Marino 
679*e4b17023SJohn Marino    * LAST_STMT: A stmt from which the pattern search begins.
680*e4b17023SJohn Marino 
681*e4b17023SJohn Marino    Output:
682*e4b17023SJohn Marino 
683*e4b17023SJohn Marino    * TYPE_IN: The type of the input arguments to the pattern.
684*e4b17023SJohn Marino 
685*e4b17023SJohn Marino    * TYPE_OUT: The type of the output of this pattern.
686*e4b17023SJohn Marino 
687*e4b17023SJohn Marino    * Return value: A new stmt that will be used to replace the sequence of
688*e4b17023SJohn Marino    stmts that constitute the pattern. In this case it will be:
689*e4b17023SJohn Marino         x = x * x
690*e4b17023SJohn Marino    or
691*e4b17023SJohn Marino 	x = sqrt (x)
692*e4b17023SJohn Marino */
693*e4b17023SJohn Marino 
694*e4b17023SJohn Marino static gimple
vect_recog_pow_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)695*e4b17023SJohn Marino vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
696*e4b17023SJohn Marino 			tree *type_out)
697*e4b17023SJohn Marino {
698*e4b17023SJohn Marino   gimple last_stmt = VEC_index (gimple, *stmts, 0);
699*e4b17023SJohn Marino   tree fn, base, exp = NULL;
700*e4b17023SJohn Marino   gimple stmt;
701*e4b17023SJohn Marino   tree var;
702*e4b17023SJohn Marino 
703*e4b17023SJohn Marino   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
704*e4b17023SJohn Marino     return NULL;
705*e4b17023SJohn Marino 
706*e4b17023SJohn Marino   fn = gimple_call_fndecl (last_stmt);
707*e4b17023SJohn Marino   if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
708*e4b17023SJohn Marino    return NULL;
709*e4b17023SJohn Marino 
710*e4b17023SJohn Marino   switch (DECL_FUNCTION_CODE (fn))
711*e4b17023SJohn Marino     {
712*e4b17023SJohn Marino     case BUILT_IN_POWIF:
713*e4b17023SJohn Marino     case BUILT_IN_POWI:
714*e4b17023SJohn Marino     case BUILT_IN_POWF:
715*e4b17023SJohn Marino     case BUILT_IN_POW:
716*e4b17023SJohn Marino       base = gimple_call_arg (last_stmt, 0);
717*e4b17023SJohn Marino       exp = gimple_call_arg (last_stmt, 1);
718*e4b17023SJohn Marino       if (TREE_CODE (exp) != REAL_CST
719*e4b17023SJohn Marino 	  && TREE_CODE (exp) != INTEGER_CST)
720*e4b17023SJohn Marino         return NULL;
721*e4b17023SJohn Marino       break;
722*e4b17023SJohn Marino 
723*e4b17023SJohn Marino     default:
724*e4b17023SJohn Marino       return NULL;
725*e4b17023SJohn Marino     }
726*e4b17023SJohn Marino 
727*e4b17023SJohn Marino   /* We now have a pow or powi builtin function call with a constant
728*e4b17023SJohn Marino      exponent.  */
729*e4b17023SJohn Marino 
730*e4b17023SJohn Marino   *type_out = NULL_TREE;
731*e4b17023SJohn Marino 
732*e4b17023SJohn Marino   /* Catch squaring.  */
733*e4b17023SJohn Marino   if ((host_integerp (exp, 0)
734*e4b17023SJohn Marino        && tree_low_cst (exp, 0) == 2)
735*e4b17023SJohn Marino       || (TREE_CODE (exp) == REAL_CST
736*e4b17023SJohn Marino           && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
737*e4b17023SJohn Marino     {
738*e4b17023SJohn Marino       *type_in = TREE_TYPE (base);
739*e4b17023SJohn Marino 
740*e4b17023SJohn Marino       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
741*e4b17023SJohn Marino       stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
742*e4b17023SJohn Marino       return stmt;
743*e4b17023SJohn Marino     }
744*e4b17023SJohn Marino 
745*e4b17023SJohn Marino   /* Catch square root.  */
746*e4b17023SJohn Marino   if (TREE_CODE (exp) == REAL_CST
747*e4b17023SJohn Marino       && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
748*e4b17023SJohn Marino     {
749*e4b17023SJohn Marino       tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
750*e4b17023SJohn Marino       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
751*e4b17023SJohn Marino       if (*type_in)
752*e4b17023SJohn Marino 	{
753*e4b17023SJohn Marino 	  gimple stmt = gimple_build_call (newfn, 1, base);
754*e4b17023SJohn Marino 	  if (vectorizable_function (stmt, *type_in, *type_in)
755*e4b17023SJohn Marino 	      != NULL_TREE)
756*e4b17023SJohn Marino 	    {
757*e4b17023SJohn Marino 	      var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
758*e4b17023SJohn Marino 	      gimple_call_set_lhs (stmt, var);
759*e4b17023SJohn Marino 	      return stmt;
760*e4b17023SJohn Marino 	    }
761*e4b17023SJohn Marino 	}
762*e4b17023SJohn Marino     }
763*e4b17023SJohn Marino 
764*e4b17023SJohn Marino   return NULL;
765*e4b17023SJohn Marino }
766*e4b17023SJohn Marino 
767*e4b17023SJohn Marino 
768*e4b17023SJohn Marino /* Function vect_recog_widen_sum_pattern
769*e4b17023SJohn Marino 
770*e4b17023SJohn Marino    Try to find the following pattern:
771*e4b17023SJohn Marino 
772*e4b17023SJohn Marino      type x_t;
773*e4b17023SJohn Marino      TYPE x_T, sum = init;
774*e4b17023SJohn Marino    loop:
775*e4b17023SJohn Marino      sum_0 = phi <init, sum_1>
776*e4b17023SJohn Marino      S1  x_t = *p;
777*e4b17023SJohn Marino      S2  x_T = (TYPE) x_t;
778*e4b17023SJohn Marino      S3  sum_1 = x_T + sum_0;
779*e4b17023SJohn Marino 
780*e4b17023SJohn Marino    where type 'TYPE' is at least double the size of type 'type', i.e - we're
781*e4b17023SJohn Marino    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
782*e4b17023SJohn Marino    a special case of a reduction computation.
783*e4b17023SJohn Marino 
784*e4b17023SJohn Marino    Input:
785*e4b17023SJohn Marino 
786*e4b17023SJohn Marino    * LAST_STMT: A stmt from which the pattern search begins. In the example,
787*e4b17023SJohn Marino    when this function is called with S3, the pattern {S2,S3} will be detected.
788*e4b17023SJohn Marino 
789*e4b17023SJohn Marino    Output:
790*e4b17023SJohn Marino 
791*e4b17023SJohn Marino    * TYPE_IN: The type of the input arguments to the pattern.
792*e4b17023SJohn Marino 
793*e4b17023SJohn Marino    * TYPE_OUT: The type of the output of this pattern.
794*e4b17023SJohn Marino 
795*e4b17023SJohn Marino    * Return value: A new stmt that will be used to replace the sequence of
796*e4b17023SJohn Marino    stmts that constitute the pattern. In this case it will be:
797*e4b17023SJohn Marino         WIDEN_SUM <x_t, sum_0>
798*e4b17023SJohn Marino 
799*e4b17023SJohn Marino    Note: The widening-sum idiom is a widening reduction pattern that is
800*e4b17023SJohn Marino 	 vectorized without preserving all the intermediate results. It
801*e4b17023SJohn Marino          produces only N/2 (widened) results (by summing up pairs of
802*e4b17023SJohn Marino 	 intermediate results) rather than all N results.  Therefore, we
803*e4b17023SJohn Marino 	 cannot allow this pattern when we want to get all the results and in
804*e4b17023SJohn Marino 	 the correct order (as is the case when this computation is in an
805*e4b17023SJohn Marino 	 inner-loop nested in an outer-loop that us being vectorized).  */
806*e4b17023SJohn Marino 
807*e4b17023SJohn Marino static gimple
vect_recog_widen_sum_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)808*e4b17023SJohn Marino vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
809*e4b17023SJohn Marino 			      tree *type_out)
810*e4b17023SJohn Marino {
811*e4b17023SJohn Marino   gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
812*e4b17023SJohn Marino   tree oprnd0, oprnd1;
813*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
814*e4b17023SJohn Marino   tree type, half_type;
815*e4b17023SJohn Marino   gimple pattern_stmt;
816*e4b17023SJohn Marino   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
817*e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
818*e4b17023SJohn Marino   tree var;
819*e4b17023SJohn Marino 
820*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt))
821*e4b17023SJohn Marino     return NULL;
822*e4b17023SJohn Marino 
823*e4b17023SJohn Marino   type = gimple_expr_type (last_stmt);
824*e4b17023SJohn Marino 
825*e4b17023SJohn Marino   /* Look for the following pattern
826*e4b17023SJohn Marino           DX = (TYPE) X;
827*e4b17023SJohn Marino           sum_1 = DX + sum_0;
828*e4b17023SJohn Marino      In which DX is at least double the size of X, and sum_1 has been
829*e4b17023SJohn Marino      recognized as a reduction variable.
830*e4b17023SJohn Marino    */
831*e4b17023SJohn Marino 
832*e4b17023SJohn Marino   /* Starting from LAST_STMT, follow the defs of its uses in search
833*e4b17023SJohn Marino      of the above pattern.  */
834*e4b17023SJohn Marino 
835*e4b17023SJohn Marino   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
836*e4b17023SJohn Marino     return NULL;
837*e4b17023SJohn Marino 
838*e4b17023SJohn Marino   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
839*e4b17023SJohn Marino     return NULL;
840*e4b17023SJohn Marino 
841*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (last_stmt);
842*e4b17023SJohn Marino   oprnd1 = gimple_assign_rhs2 (last_stmt);
843*e4b17023SJohn Marino   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
844*e4b17023SJohn Marino       || !types_compatible_p (TREE_TYPE (oprnd1), type))
845*e4b17023SJohn Marino     return NULL;
846*e4b17023SJohn Marino 
847*e4b17023SJohn Marino   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
848*e4b17023SJohn Marino      we know that oprnd1 is the reduction variable (defined by a loop-header
849*e4b17023SJohn Marino      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
850*e4b17023SJohn Marino      Left to check that oprnd0 is defined by a cast from type 'type' to type
851*e4b17023SJohn Marino      'TYPE'.  */
852*e4b17023SJohn Marino 
853*e4b17023SJohn Marino   if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
854*e4b17023SJohn Marino     return NULL;
855*e4b17023SJohn Marino 
856*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (stmt);
857*e4b17023SJohn Marino   *type_in = half_type;
858*e4b17023SJohn Marino   *type_out = type;
859*e4b17023SJohn Marino 
860*e4b17023SJohn Marino   /* Pattern detected. Create a stmt to be used to replace the pattern: */
861*e4b17023SJohn Marino   var = vect_recog_temp_ssa_var (type, NULL);
862*e4b17023SJohn Marino   pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
863*e4b17023SJohn Marino 					       oprnd0, oprnd1);
864*e4b17023SJohn Marino 
865*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
866*e4b17023SJohn Marino     {
867*e4b17023SJohn Marino       fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
868*e4b17023SJohn Marino       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
869*e4b17023SJohn Marino     }
870*e4b17023SJohn Marino 
871*e4b17023SJohn Marino   /* We don't allow changing the order of the computation in the inner-loop
872*e4b17023SJohn Marino      when doing outer-loop vectorization.  */
873*e4b17023SJohn Marino   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
874*e4b17023SJohn Marino 
875*e4b17023SJohn Marino   return pattern_stmt;
876*e4b17023SJohn Marino }
877*e4b17023SJohn Marino 
878*e4b17023SJohn Marino 
879*e4b17023SJohn Marino /* Return TRUE if the operation in STMT can be performed on a smaller type.
880*e4b17023SJohn Marino 
881*e4b17023SJohn Marino    Input:
882*e4b17023SJohn Marino    STMT - a statement to check.
883*e4b17023SJohn Marino    DEF - we support operations with two operands, one of which is constant.
884*e4b17023SJohn Marino          The other operand can be defined by a demotion operation, or by a
885*e4b17023SJohn Marino          previous statement in a sequence of over-promoted operations.  In the
886*e4b17023SJohn Marino          later case DEF is used to replace that operand.  (It is defined by a
887*e4b17023SJohn Marino          pattern statement we created for the previous statement in the
888*e4b17023SJohn Marino          sequence).
889*e4b17023SJohn Marino 
890*e4b17023SJohn Marino    Input/output:
891*e4b17023SJohn Marino    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
892*e4b17023SJohn Marino          NULL, it's the type of DEF.
893*e4b17023SJohn Marino    STMTS - additional pattern statements.  If a pattern statement (type
894*e4b17023SJohn Marino          conversion) is created in this function, its original statement is
895*e4b17023SJohn Marino          added to STMTS.
896*e4b17023SJohn Marino 
897*e4b17023SJohn Marino    Output:
898*e4b17023SJohn Marino    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
899*e4b17023SJohn Marino          operands to use in the new pattern statement for STMT (will be created
900*e4b17023SJohn Marino          in vect_recog_over_widening_pattern ()).
901*e4b17023SJohn Marino    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
902*e4b17023SJohn Marino          statements for STMT: the first one is a type promotion and the second
903*e4b17023SJohn Marino          one is the operation itself.  We return the type promotion statement
904*e4b17023SJohn Marino 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
905*e4b17023SJohn Marino          the second pattern statement.  */
906*e4b17023SJohn Marino 
907*e4b17023SJohn Marino static bool
vect_operation_fits_smaller_type(gimple stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple * new_def_stmt,VEC (gimple,heap)** stmts)908*e4b17023SJohn Marino vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
909*e4b17023SJohn Marino                                   tree *op0, tree *op1, gimple *new_def_stmt,
910*e4b17023SJohn Marino                                   VEC (gimple, heap) **stmts)
911*e4b17023SJohn Marino {
912*e4b17023SJohn Marino   enum tree_code code;
913*e4b17023SJohn Marino   tree const_oprnd, oprnd;
914*e4b17023SJohn Marino   tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
915*e4b17023SJohn Marino   gimple def_stmt, new_stmt;
916*e4b17023SJohn Marino   bool first = false;
917*e4b17023SJohn Marino   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
918*e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
919*e4b17023SJohn Marino 
920*e4b17023SJohn Marino   *op0 = NULL_TREE;
921*e4b17023SJohn Marino   *op1 = NULL_TREE;
922*e4b17023SJohn Marino   *new_def_stmt = NULL;
923*e4b17023SJohn Marino 
924*e4b17023SJohn Marino   if (!is_gimple_assign (stmt))
925*e4b17023SJohn Marino     return false;
926*e4b17023SJohn Marino 
927*e4b17023SJohn Marino   code = gimple_assign_rhs_code (stmt);
928*e4b17023SJohn Marino   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
929*e4b17023SJohn Marino       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
930*e4b17023SJohn Marino     return false;
931*e4b17023SJohn Marino 
932*e4b17023SJohn Marino   oprnd = gimple_assign_rhs1 (stmt);
933*e4b17023SJohn Marino   const_oprnd = gimple_assign_rhs2 (stmt);
934*e4b17023SJohn Marino   type = gimple_expr_type (stmt);
935*e4b17023SJohn Marino 
936*e4b17023SJohn Marino   if (TREE_CODE (oprnd) != SSA_NAME
937*e4b17023SJohn Marino       || TREE_CODE (const_oprnd) != INTEGER_CST)
938*e4b17023SJohn Marino     return false;
939*e4b17023SJohn Marino 
940*e4b17023SJohn Marino   /* If oprnd has other uses besides that in stmt we cannot mark it
941*e4b17023SJohn Marino      as being part of a pattern only.  */
942*e4b17023SJohn Marino   if (!has_single_use (oprnd))
943*e4b17023SJohn Marino     return false;
944*e4b17023SJohn Marino 
945*e4b17023SJohn Marino   /* If we are in the middle of a sequence, we use DEF from a previous
946*e4b17023SJohn Marino      statement.  Otherwise, OPRND has to be a result of type promotion.  */
947*e4b17023SJohn Marino   if (*new_type)
948*e4b17023SJohn Marino     {
949*e4b17023SJohn Marino       half_type = *new_type;
950*e4b17023SJohn Marino       oprnd = def;
951*e4b17023SJohn Marino     }
952*e4b17023SJohn Marino   else
953*e4b17023SJohn Marino     {
954*e4b17023SJohn Marino       first = true;
955*e4b17023SJohn Marino       if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
956*e4b17023SJohn Marino           || !gimple_bb (def_stmt)
957*e4b17023SJohn Marino           || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
958*e4b17023SJohn Marino           || !vinfo_for_stmt (def_stmt))
959*e4b17023SJohn Marino         return false;
960*e4b17023SJohn Marino     }
961*e4b17023SJohn Marino 
962*e4b17023SJohn Marino   /* Can we perform the operation on a smaller type?  */
963*e4b17023SJohn Marino   switch (code)
964*e4b17023SJohn Marino     {
965*e4b17023SJohn Marino       case BIT_IOR_EXPR:
966*e4b17023SJohn Marino       case BIT_XOR_EXPR:
967*e4b17023SJohn Marino       case BIT_AND_EXPR:
968*e4b17023SJohn Marino         if (!int_fits_type_p (const_oprnd, half_type))
969*e4b17023SJohn Marino           {
970*e4b17023SJohn Marino             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
971*e4b17023SJohn Marino             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
972*e4b17023SJohn Marino               return false;
973*e4b17023SJohn Marino 
974*e4b17023SJohn Marino             interm_type = build_nonstandard_integer_type (
975*e4b17023SJohn Marino                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
976*e4b17023SJohn Marino             if (!int_fits_type_p (const_oprnd, interm_type))
977*e4b17023SJohn Marino               return false;
978*e4b17023SJohn Marino           }
979*e4b17023SJohn Marino 
980*e4b17023SJohn Marino         break;
981*e4b17023SJohn Marino 
982*e4b17023SJohn Marino       case LSHIFT_EXPR:
983*e4b17023SJohn Marino         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
984*e4b17023SJohn Marino         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
985*e4b17023SJohn Marino           return false;
986*e4b17023SJohn Marino 
987*e4b17023SJohn Marino         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
988*e4b17023SJohn Marino           (e.g., if the original value was char, the shift amount is at most 8
989*e4b17023SJohn Marino            if we want to use short).  */
990*e4b17023SJohn Marino         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
991*e4b17023SJohn Marino           return false;
992*e4b17023SJohn Marino 
993*e4b17023SJohn Marino         interm_type = build_nonstandard_integer_type (
994*e4b17023SJohn Marino                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
995*e4b17023SJohn Marino 
996*e4b17023SJohn Marino         if (!vect_supportable_shift (code, interm_type))
997*e4b17023SJohn Marino           return false;
998*e4b17023SJohn Marino 
999*e4b17023SJohn Marino         break;
1000*e4b17023SJohn Marino 
1001*e4b17023SJohn Marino       case RSHIFT_EXPR:
1002*e4b17023SJohn Marino         if (vect_supportable_shift (code, half_type))
1003*e4b17023SJohn Marino           break;
1004*e4b17023SJohn Marino 
1005*e4b17023SJohn Marino         /* Try intermediate type - HALF_TYPE is not supported.  */
1006*e4b17023SJohn Marino         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1007*e4b17023SJohn Marino           return false;
1008*e4b17023SJohn Marino 
1009*e4b17023SJohn Marino         interm_type = build_nonstandard_integer_type (
1010*e4b17023SJohn Marino                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1011*e4b17023SJohn Marino 
1012*e4b17023SJohn Marino         if (!vect_supportable_shift (code, interm_type))
1013*e4b17023SJohn Marino           return false;
1014*e4b17023SJohn Marino 
1015*e4b17023SJohn Marino         break;
1016*e4b17023SJohn Marino 
1017*e4b17023SJohn Marino       default:
1018*e4b17023SJohn Marino         gcc_unreachable ();
1019*e4b17023SJohn Marino     }
1020*e4b17023SJohn Marino 
1021*e4b17023SJohn Marino   /* There are four possible cases:
1022*e4b17023SJohn Marino      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1023*e4b17023SJohn Marino         the first statement in the sequence)
1024*e4b17023SJohn Marino         a. The original, HALF_TYPE, is not enough - we replace the promotion
1025*e4b17023SJohn Marino            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1026*e4b17023SJohn Marino         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1027*e4b17023SJohn Marino            promotion.
1028*e4b17023SJohn Marino      2. OPRND is defined by a pattern statement we created.
1029*e4b17023SJohn Marino         a. Its type is not sufficient for the operation, we create a new stmt:
1030*e4b17023SJohn Marino            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1031*e4b17023SJohn Marino            this statement in NEW_DEF_STMT, and it is later put in
1032*e4b17023SJohn Marino 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1033*e4b17023SJohn Marino         b. OPRND is good to use in the new statement.  */
1034*e4b17023SJohn Marino   if (first)
1035*e4b17023SJohn Marino     {
1036*e4b17023SJohn Marino       if (interm_type)
1037*e4b17023SJohn Marino         {
1038*e4b17023SJohn Marino           /* Replace the original type conversion HALF_TYPE->TYPE with
1039*e4b17023SJohn Marino              HALF_TYPE->INTERM_TYPE.  */
1040*e4b17023SJohn Marino           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1041*e4b17023SJohn Marino             {
1042*e4b17023SJohn Marino               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1043*e4b17023SJohn Marino               /* Check if the already created pattern stmt is what we need.  */
1044*e4b17023SJohn Marino               if (!is_gimple_assign (new_stmt)
1045*e4b17023SJohn Marino                   || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1046*e4b17023SJohn Marino                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1047*e4b17023SJohn Marino                 return false;
1048*e4b17023SJohn Marino 
1049*e4b17023SJohn Marino 	      VEC_safe_push (gimple, heap, *stmts, def_stmt);
1050*e4b17023SJohn Marino               oprnd = gimple_assign_lhs (new_stmt);
1051*e4b17023SJohn Marino             }
1052*e4b17023SJohn Marino           else
1053*e4b17023SJohn Marino             {
1054*e4b17023SJohn Marino               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1055*e4b17023SJohn Marino               oprnd = gimple_assign_rhs1 (def_stmt);
1056*e4b17023SJohn Marino               tmp = create_tmp_reg (interm_type, NULL);
1057*e4b17023SJohn Marino               add_referenced_var (tmp);
1058*e4b17023SJohn Marino               new_oprnd = make_ssa_name (tmp, NULL);
1059*e4b17023SJohn Marino               new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1060*e4b17023SJohn Marino                                                        oprnd, NULL_TREE);
1061*e4b17023SJohn Marino               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1062*e4b17023SJohn Marino               VEC_safe_push (gimple, heap, *stmts, def_stmt);
1063*e4b17023SJohn Marino               oprnd = new_oprnd;
1064*e4b17023SJohn Marino             }
1065*e4b17023SJohn Marino         }
1066*e4b17023SJohn Marino       else
1067*e4b17023SJohn Marino         {
1068*e4b17023SJohn Marino           /* Retrieve the operand before the type promotion.  */
1069*e4b17023SJohn Marino           oprnd = gimple_assign_rhs1 (def_stmt);
1070*e4b17023SJohn Marino         }
1071*e4b17023SJohn Marino     }
1072*e4b17023SJohn Marino   else
1073*e4b17023SJohn Marino     {
1074*e4b17023SJohn Marino       if (interm_type)
1075*e4b17023SJohn Marino         {
1076*e4b17023SJohn Marino           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1077*e4b17023SJohn Marino           tmp = create_tmp_reg (interm_type, NULL);
1078*e4b17023SJohn Marino           add_referenced_var (tmp);
1079*e4b17023SJohn Marino           new_oprnd = make_ssa_name (tmp, NULL);
1080*e4b17023SJohn Marino           new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1081*e4b17023SJohn Marino                                                    oprnd, NULL_TREE);
1082*e4b17023SJohn Marino           oprnd = new_oprnd;
1083*e4b17023SJohn Marino           *new_def_stmt = new_stmt;
1084*e4b17023SJohn Marino         }
1085*e4b17023SJohn Marino 
1086*e4b17023SJohn Marino       /* Otherwise, OPRND is already set.  */
1087*e4b17023SJohn Marino     }
1088*e4b17023SJohn Marino 
1089*e4b17023SJohn Marino   if (interm_type)
1090*e4b17023SJohn Marino     *new_type = interm_type;
1091*e4b17023SJohn Marino   else
1092*e4b17023SJohn Marino     *new_type = half_type;
1093*e4b17023SJohn Marino 
1094*e4b17023SJohn Marino   *op0 = oprnd;
1095*e4b17023SJohn Marino   *op1 = fold_convert (*new_type, const_oprnd);
1096*e4b17023SJohn Marino 
1097*e4b17023SJohn Marino   return true;
1098*e4b17023SJohn Marino }
1099*e4b17023SJohn Marino 
1100*e4b17023SJohn Marino 
1101*e4b17023SJohn Marino /* Try to find a statement or a sequence of statements that can be performed
1102*e4b17023SJohn Marino    on a smaller type:
1103*e4b17023SJohn Marino 
1104*e4b17023SJohn Marino      type x_t;
1105*e4b17023SJohn Marino      TYPE x_T, res0_T, res1_T;
1106*e4b17023SJohn Marino    loop:
1107*e4b17023SJohn Marino      S1  x_t = *p;
1108*e4b17023SJohn Marino      S2  x_T = (TYPE) x_t;
1109*e4b17023SJohn Marino      S3  res0_T = op (x_T, C0);
1110*e4b17023SJohn Marino      S4  res1_T = op (res0_T, C1);
1111*e4b17023SJohn Marino      S5  ... = () res1_T;  - type demotion
1112*e4b17023SJohn Marino 
1113*e4b17023SJohn Marino    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1114*e4b17023SJohn Marino    constants.
1115*e4b17023SJohn Marino    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1116*e4b17023SJohn Marino    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1117*e4b17023SJohn Marino    demotion operation.  We also check that S3 and S4 have only one use.  */
1118*e4b17023SJohn Marino 
1119*e4b17023SJohn Marino static gimple
vect_recog_over_widening_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)1120*e4b17023SJohn Marino vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1121*e4b17023SJohn Marino                                   tree *type_in, tree *type_out)
1122*e4b17023SJohn Marino {
1123*e4b17023SJohn Marino   gimple stmt = VEC_pop (gimple, *stmts);
1124*e4b17023SJohn Marino   gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1125*e4b17023SJohn Marino   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1126*e4b17023SJohn Marino   tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1127*e4b17023SJohn Marino   bool first;
1128*e4b17023SJohn Marino   tree type = NULL;
1129*e4b17023SJohn Marino 
1130*e4b17023SJohn Marino   first = true;
1131*e4b17023SJohn Marino   while (1)
1132*e4b17023SJohn Marino     {
1133*e4b17023SJohn Marino       if (!vinfo_for_stmt (stmt)
1134*e4b17023SJohn Marino           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1135*e4b17023SJohn Marino         return NULL;
1136*e4b17023SJohn Marino 
1137*e4b17023SJohn Marino       new_def_stmt = NULL;
1138*e4b17023SJohn Marino       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1139*e4b17023SJohn Marino                                              &op0, &op1, &new_def_stmt,
1140*e4b17023SJohn Marino                                              stmts))
1141*e4b17023SJohn Marino         {
1142*e4b17023SJohn Marino           if (first)
1143*e4b17023SJohn Marino             return NULL;
1144*e4b17023SJohn Marino           else
1145*e4b17023SJohn Marino             break;
1146*e4b17023SJohn Marino         }
1147*e4b17023SJohn Marino 
1148*e4b17023SJohn Marino       /* STMT can be performed on a smaller type.  Check its uses.  */
1149*e4b17023SJohn Marino       use_stmt = vect_single_imm_use (stmt);
1150*e4b17023SJohn Marino       if (!use_stmt || !is_gimple_assign (use_stmt))
1151*e4b17023SJohn Marino         return NULL;
1152*e4b17023SJohn Marino 
1153*e4b17023SJohn Marino       /* Create pattern statement for STMT.  */
1154*e4b17023SJohn Marino       vectype = get_vectype_for_scalar_type (new_type);
1155*e4b17023SJohn Marino       if (!vectype)
1156*e4b17023SJohn Marino         return NULL;
1157*e4b17023SJohn Marino 
1158*e4b17023SJohn Marino       /* We want to collect all the statements for which we create pattern
1159*e4b17023SJohn Marino          statetments, except for the case when the last statement in the
1160*e4b17023SJohn Marino          sequence doesn't have a corresponding pattern statement.  In such
1161*e4b17023SJohn Marino          case we associate the last pattern statement with the last statement
1162*e4b17023SJohn Marino          in the sequence.  Therefore, we only add the original statement to
1163*e4b17023SJohn Marino          the list if we know that it is not the last.  */
1164*e4b17023SJohn Marino       if (prev_stmt)
1165*e4b17023SJohn Marino         VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1166*e4b17023SJohn Marino 
1167*e4b17023SJohn Marino       var = vect_recog_temp_ssa_var (new_type, NULL);
1168*e4b17023SJohn Marino       pattern_stmt
1169*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1170*e4b17023SJohn Marino 					op0, op1);
1171*e4b17023SJohn Marino       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1172*e4b17023SJohn Marino       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1173*e4b17023SJohn Marino 
1174*e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1175*e4b17023SJohn Marino         {
1176*e4b17023SJohn Marino           fprintf (vect_dump, "created pattern stmt: ");
1177*e4b17023SJohn Marino           print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1178*e4b17023SJohn Marino         }
1179*e4b17023SJohn Marino 
1180*e4b17023SJohn Marino       type = gimple_expr_type (stmt);
1181*e4b17023SJohn Marino       prev_stmt = stmt;
1182*e4b17023SJohn Marino       stmt = use_stmt;
1183*e4b17023SJohn Marino 
1184*e4b17023SJohn Marino       first = false;
1185*e4b17023SJohn Marino     }
1186*e4b17023SJohn Marino 
1187*e4b17023SJohn Marino   /* We got a sequence.  We expect it to end with a type demotion operation.
1188*e4b17023SJohn Marino      Otherwise, we quit (for now).  There are three possible cases: the
1189*e4b17023SJohn Marino      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1190*e4b17023SJohn Marino      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1191*e4b17023SJohn Marino      NEW_TYPE differs (we create a new conversion statement).  */
1192*e4b17023SJohn Marino   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1193*e4b17023SJohn Marino     {
1194*e4b17023SJohn Marino       use_lhs = gimple_assign_lhs (use_stmt);
1195*e4b17023SJohn Marino       use_type = TREE_TYPE (use_lhs);
1196*e4b17023SJohn Marino       /* Support only type demotion or signedess change.  */
1197*e4b17023SJohn Marino       if (!INTEGRAL_TYPE_P (use_type)
1198*e4b17023SJohn Marino 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1199*e4b17023SJohn Marino         return NULL;
1200*e4b17023SJohn Marino 
1201*e4b17023SJohn Marino       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1202*e4b17023SJohn Marino       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1203*e4b17023SJohn Marino 	return NULL;
1204*e4b17023SJohn Marino 
1205*e4b17023SJohn Marino       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1206*e4b17023SJohn Marino           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1207*e4b17023SJohn Marino         {
1208*e4b17023SJohn Marino           /* Create NEW_TYPE->USE_TYPE conversion.  */
1209*e4b17023SJohn Marino           tmp = create_tmp_reg (use_type, NULL);
1210*e4b17023SJohn Marino           add_referenced_var (tmp);
1211*e4b17023SJohn Marino           new_oprnd = make_ssa_name (tmp, NULL);
1212*e4b17023SJohn Marino           pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1213*e4b17023SJohn Marino                                                        var, NULL_TREE);
1214*e4b17023SJohn Marino           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1215*e4b17023SJohn Marino 
1216*e4b17023SJohn Marino           *type_in = get_vectype_for_scalar_type (new_type);
1217*e4b17023SJohn Marino           *type_out = get_vectype_for_scalar_type (use_type);
1218*e4b17023SJohn Marino 
1219*e4b17023SJohn Marino           /* We created a pattern statement for the last statement in the
1220*e4b17023SJohn Marino              sequence, so we don't need to associate it with the pattern
1221*e4b17023SJohn Marino              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1222*e4b17023SJohn Marino              to the list in order to mark it later in vect_pattern_recog_1.  */
1223*e4b17023SJohn Marino           if (prev_stmt)
1224*e4b17023SJohn Marino             VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1225*e4b17023SJohn Marino         }
1226*e4b17023SJohn Marino       else
1227*e4b17023SJohn Marino         {
1228*e4b17023SJohn Marino           if (prev_stmt)
1229*e4b17023SJohn Marino 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1230*e4b17023SJohn Marino 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1231*e4b17023SJohn Marino 
1232*e4b17023SJohn Marino           *type_in = vectype;
1233*e4b17023SJohn Marino           *type_out = NULL_TREE;
1234*e4b17023SJohn Marino         }
1235*e4b17023SJohn Marino 
1236*e4b17023SJohn Marino       VEC_safe_push (gimple, heap, *stmts, use_stmt);
1237*e4b17023SJohn Marino     }
1238*e4b17023SJohn Marino   else
1239*e4b17023SJohn Marino     /* TODO: support general case, create a conversion to the correct type.  */
1240*e4b17023SJohn Marino     return NULL;
1241*e4b17023SJohn Marino 
1242*e4b17023SJohn Marino   /* Pattern detected.  */
1243*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1244*e4b17023SJohn Marino     {
1245*e4b17023SJohn Marino       fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1246*e4b17023SJohn Marino       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1247*e4b17023SJohn Marino     }
1248*e4b17023SJohn Marino 
1249*e4b17023SJohn Marino   return pattern_stmt;
1250*e4b17023SJohn Marino }
1251*e4b17023SJohn Marino 
1252*e4b17023SJohn Marino /* Detect widening shift pattern:
1253*e4b17023SJohn Marino 
1254*e4b17023SJohn Marino    type a_t;
1255*e4b17023SJohn Marino    TYPE a_T, res_T;
1256*e4b17023SJohn Marino 
1257*e4b17023SJohn Marino    S1 a_t = ;
1258*e4b17023SJohn Marino    S2 a_T = (TYPE) a_t;
1259*e4b17023SJohn Marino    S3 res_T = a_T << CONST;
1260*e4b17023SJohn Marino 
1261*e4b17023SJohn Marino   where type 'TYPE' is at least double the size of type 'type'.
1262*e4b17023SJohn Marino 
1263*e4b17023SJohn Marino   Also detect cases where the shift result is immediately converted
1264*e4b17023SJohn Marino   to another type 'result_type' that is no larger in size than 'TYPE'.
1265*e4b17023SJohn Marino   In those cases we perform a widen-shift that directly results in
1266*e4b17023SJohn Marino   'result_type', to avoid a possible over-widening situation:
1267*e4b17023SJohn Marino 
1268*e4b17023SJohn Marino   type a_t;
1269*e4b17023SJohn Marino   TYPE a_T, res_T;
1270*e4b17023SJohn Marino   result_type res_result;
1271*e4b17023SJohn Marino 
1272*e4b17023SJohn Marino   S1 a_t = ;
1273*e4b17023SJohn Marino   S2 a_T = (TYPE) a_t;
1274*e4b17023SJohn Marino   S3 res_T = a_T << CONST;
1275*e4b17023SJohn Marino   S4 res_result = (result_type) res_T;
1276*e4b17023SJohn Marino       '--> res_result' = a_t w<< CONST;
1277*e4b17023SJohn Marino 
1278*e4b17023SJohn Marino   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1279*e4b17023SJohn Marino   create an additional pattern stmt for S2 to create a variable of an
1280*e4b17023SJohn Marino   intermediate type, and perform widen-shift on the intermediate type:
1281*e4b17023SJohn Marino 
1282*e4b17023SJohn Marino   type a_t;
1283*e4b17023SJohn Marino   interm_type a_it;
1284*e4b17023SJohn Marino   TYPE a_T, res_T, res_T';
1285*e4b17023SJohn Marino 
1286*e4b17023SJohn Marino   S1 a_t = ;
1287*e4b17023SJohn Marino   S2 a_T = (TYPE) a_t;
1288*e4b17023SJohn Marino       '--> a_it = (interm_type) a_t;
1289*e4b17023SJohn Marino   S3 res_T = a_T << CONST;
1290*e4b17023SJohn Marino       '--> res_T' = a_it <<* CONST;
1291*e4b17023SJohn Marino 
1292*e4b17023SJohn Marino   Input/Output:
1293*e4b17023SJohn Marino 
1294*e4b17023SJohn Marino   * STMTS: Contains a stmt from which the pattern search begins.
1295*e4b17023SJohn Marino     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1296*e4b17023SJohn Marino     in STMTS.  When an intermediate type is used and a pattern statement is
1297*e4b17023SJohn Marino     created for S2, we also put S2 here (before S3).
1298*e4b17023SJohn Marino 
1299*e4b17023SJohn Marino   Output:
1300*e4b17023SJohn Marino 
1301*e4b17023SJohn Marino   * TYPE_IN: The type of the input arguments to the pattern.
1302*e4b17023SJohn Marino 
1303*e4b17023SJohn Marino   * TYPE_OUT: The type of the output of this pattern.
1304*e4b17023SJohn Marino 
1305*e4b17023SJohn Marino   * Return value: A new stmt that will be used to replace the sequence of
1306*e4b17023SJohn Marino     stmts that constitute the pattern.  In this case it will be:
1307*e4b17023SJohn Marino     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1308*e4b17023SJohn Marino 
1309*e4b17023SJohn Marino static gimple
vect_recog_widen_shift_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)1310*e4b17023SJohn Marino vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1311*e4b17023SJohn Marino 				tree *type_in, tree *type_out)
1312*e4b17023SJohn Marino {
1313*e4b17023SJohn Marino   gimple last_stmt = VEC_pop (gimple, *stmts);
1314*e4b17023SJohn Marino   gimple def_stmt0;
1315*e4b17023SJohn Marino   tree oprnd0, oprnd1;
1316*e4b17023SJohn Marino   tree type, half_type0;
1317*e4b17023SJohn Marino   gimple pattern_stmt;
1318*e4b17023SJohn Marino   tree vectype, vectype_out = NULL_TREE;
1319*e4b17023SJohn Marino   tree dummy;
1320*e4b17023SJohn Marino   tree var;
1321*e4b17023SJohn Marino   enum tree_code dummy_code;
1322*e4b17023SJohn Marino   int dummy_int;
1323*e4b17023SJohn Marino   VEC (tree, heap) * dummy_vec;
1324*e4b17023SJohn Marino   gimple use_stmt;
1325*e4b17023SJohn Marino 
1326*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1327*e4b17023SJohn Marino     return NULL;
1328*e4b17023SJohn Marino 
1329*e4b17023SJohn Marino   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1330*e4b17023SJohn Marino     return NULL;
1331*e4b17023SJohn Marino 
1332*e4b17023SJohn Marino   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1333*e4b17023SJohn Marino     return NULL;
1334*e4b17023SJohn Marino 
1335*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (last_stmt);
1336*e4b17023SJohn Marino   oprnd1 = gimple_assign_rhs2 (last_stmt);
1337*e4b17023SJohn Marino   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1338*e4b17023SJohn Marino     return NULL;
1339*e4b17023SJohn Marino 
1340*e4b17023SJohn Marino   /* Check operand 0: it has to be defined by a type promotion.  */
1341*e4b17023SJohn Marino   if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1342*e4b17023SJohn Marino     return NULL;
1343*e4b17023SJohn Marino 
1344*e4b17023SJohn Marino   /* Check operand 1: has to be positive.  We check that it fits the type
1345*e4b17023SJohn Marino      in vect_handle_widen_op_by_const ().  */
1346*e4b17023SJohn Marino   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1347*e4b17023SJohn Marino     return NULL;
1348*e4b17023SJohn Marino 
1349*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1350*e4b17023SJohn Marino   type = gimple_expr_type (last_stmt);
1351*e4b17023SJohn Marino 
1352*e4b17023SJohn Marino   /* Check for subsequent conversion to another type.  */
1353*e4b17023SJohn Marino   use_stmt = vect_single_imm_use (last_stmt);
1354*e4b17023SJohn Marino   if (use_stmt && is_gimple_assign (use_stmt)
1355*e4b17023SJohn Marino       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1356*e4b17023SJohn Marino       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1357*e4b17023SJohn Marino     {
1358*e4b17023SJohn Marino       tree use_lhs = gimple_assign_lhs (use_stmt);
1359*e4b17023SJohn Marino       tree use_type = TREE_TYPE (use_lhs);
1360*e4b17023SJohn Marino 
1361*e4b17023SJohn Marino       if (INTEGRAL_TYPE_P (use_type)
1362*e4b17023SJohn Marino 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1363*e4b17023SJohn Marino 	{
1364*e4b17023SJohn Marino 	  last_stmt = use_stmt;
1365*e4b17023SJohn Marino 	  type = use_type;
1366*e4b17023SJohn Marino 	}
1367*e4b17023SJohn Marino     }
1368*e4b17023SJohn Marino 
1369*e4b17023SJohn Marino   /* Check if this a widening operation.  */
1370*e4b17023SJohn Marino   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1371*e4b17023SJohn Marino        				      &oprnd0, stmts,
1372*e4b17023SJohn Marino 	                              type, &half_type0, def_stmt0))
1373*e4b17023SJohn Marino     return NULL;
1374*e4b17023SJohn Marino 
1375*e4b17023SJohn Marino   /* Pattern detected.  */
1376*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1377*e4b17023SJohn Marino     fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1378*e4b17023SJohn Marino 
1379*e4b17023SJohn Marino   /* Check target support.  */
1380*e4b17023SJohn Marino   vectype = get_vectype_for_scalar_type (half_type0);
1381*e4b17023SJohn Marino   vectype_out = get_vectype_for_scalar_type (type);
1382*e4b17023SJohn Marino 
1383*e4b17023SJohn Marino   if (!vectype
1384*e4b17023SJohn Marino       || !vectype_out
1385*e4b17023SJohn Marino       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1386*e4b17023SJohn Marino 					  vectype_out, vectype,
1387*e4b17023SJohn Marino 					  &dummy, &dummy, &dummy_code,
1388*e4b17023SJohn Marino 					  &dummy_code, &dummy_int,
1389*e4b17023SJohn Marino 					  &dummy_vec))
1390*e4b17023SJohn Marino     return NULL;
1391*e4b17023SJohn Marino 
1392*e4b17023SJohn Marino   *type_in = vectype;
1393*e4b17023SJohn Marino   *type_out = vectype_out;
1394*e4b17023SJohn Marino 
1395*e4b17023SJohn Marino   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1396*e4b17023SJohn Marino   var = vect_recog_temp_ssa_var (type, NULL);
1397*e4b17023SJohn Marino   pattern_stmt =
1398*e4b17023SJohn Marino     gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1399*e4b17023SJohn Marino 
1400*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1401*e4b17023SJohn Marino     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1402*e4b17023SJohn Marino 
1403*e4b17023SJohn Marino   VEC_safe_push (gimple, heap, *stmts, last_stmt);
1404*e4b17023SJohn Marino   return pattern_stmt;
1405*e4b17023SJohn Marino }
1406*e4b17023SJohn Marino 
1407*e4b17023SJohn Marino /* Detect a vector by vector shift pattern that wouldn't be otherwise
1408*e4b17023SJohn Marino    vectorized:
1409*e4b17023SJohn Marino 
1410*e4b17023SJohn Marino    type a_t;
1411*e4b17023SJohn Marino    TYPE b_T, res_T;
1412*e4b17023SJohn Marino 
1413*e4b17023SJohn Marino    S1 a_t = ;
1414*e4b17023SJohn Marino    S2 b_T = ;
1415*e4b17023SJohn Marino    S3 res_T = b_T op a_t;
1416*e4b17023SJohn Marino 
1417*e4b17023SJohn Marino   where type 'TYPE' is a type with different size than 'type',
1418*e4b17023SJohn Marino   and op is <<, >> or rotate.
1419*e4b17023SJohn Marino 
1420*e4b17023SJohn Marino   Also detect cases:
1421*e4b17023SJohn Marino 
1422*e4b17023SJohn Marino    type a_t;
1423*e4b17023SJohn Marino    TYPE b_T, c_T, res_T;
1424*e4b17023SJohn Marino 
1425*e4b17023SJohn Marino    S0 c_T = ;
1426*e4b17023SJohn Marino    S1 a_t = (type) c_T;
1427*e4b17023SJohn Marino    S2 b_T = ;
1428*e4b17023SJohn Marino    S3 res_T = b_T op a_t;
1429*e4b17023SJohn Marino 
1430*e4b17023SJohn Marino   Input/Output:
1431*e4b17023SJohn Marino 
1432*e4b17023SJohn Marino   * STMTS: Contains a stmt from which the pattern search begins,
1433*e4b17023SJohn Marino     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
1434*e4b17023SJohn Marino     with a shift/rotate which has same type on both operands, in the
1435*e4b17023SJohn Marino     second case just b_T op c_T, in the first case with added cast
1436*e4b17023SJohn Marino     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1437*e4b17023SJohn Marino 
1438*e4b17023SJohn Marino   Output:
1439*e4b17023SJohn Marino 
1440*e4b17023SJohn Marino   * TYPE_IN: The type of the input arguments to the pattern.
1441*e4b17023SJohn Marino 
1442*e4b17023SJohn Marino   * TYPE_OUT: The type of the output of this pattern.
1443*e4b17023SJohn Marino 
1444*e4b17023SJohn Marino   * Return value: A new stmt that will be used to replace the shift/rotate
1445*e4b17023SJohn Marino     S3 stmt.  */
1446*e4b17023SJohn Marino 
1447*e4b17023SJohn Marino static gimple
vect_recog_vector_vector_shift_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)1448*e4b17023SJohn Marino vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1449*e4b17023SJohn Marino 					tree *type_in, tree *type_out)
1450*e4b17023SJohn Marino {
1451*e4b17023SJohn Marino   gimple last_stmt = VEC_pop (gimple, *stmts);
1452*e4b17023SJohn Marino   tree oprnd0, oprnd1, lhs, var;
1453*e4b17023SJohn Marino   gimple pattern_stmt, def_stmt;
1454*e4b17023SJohn Marino   enum tree_code rhs_code;
1455*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1456*e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1457*e4b17023SJohn Marino   enum vect_def_type dt;
1458*e4b17023SJohn Marino   tree def;
1459*e4b17023SJohn Marino 
1460*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt))
1461*e4b17023SJohn Marino     return NULL;
1462*e4b17023SJohn Marino 
1463*e4b17023SJohn Marino   rhs_code = gimple_assign_rhs_code (last_stmt);
1464*e4b17023SJohn Marino   switch (rhs_code)
1465*e4b17023SJohn Marino     {
1466*e4b17023SJohn Marino     case LSHIFT_EXPR:
1467*e4b17023SJohn Marino     case RSHIFT_EXPR:
1468*e4b17023SJohn Marino     case LROTATE_EXPR:
1469*e4b17023SJohn Marino     case RROTATE_EXPR:
1470*e4b17023SJohn Marino       break;
1471*e4b17023SJohn Marino     default:
1472*e4b17023SJohn Marino       return NULL;
1473*e4b17023SJohn Marino     }
1474*e4b17023SJohn Marino 
1475*e4b17023SJohn Marino   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1476*e4b17023SJohn Marino     return NULL;
1477*e4b17023SJohn Marino 
1478*e4b17023SJohn Marino   lhs = gimple_assign_lhs (last_stmt);
1479*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (last_stmt);
1480*e4b17023SJohn Marino   oprnd1 = gimple_assign_rhs2 (last_stmt);
1481*e4b17023SJohn Marino   if (TREE_CODE (oprnd0) != SSA_NAME
1482*e4b17023SJohn Marino       || TREE_CODE (oprnd1) != SSA_NAME
1483*e4b17023SJohn Marino       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1484*e4b17023SJohn Marino       || TYPE_PRECISION (TREE_TYPE (oprnd1))
1485*e4b17023SJohn Marino 	 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1486*e4b17023SJohn Marino       || TYPE_PRECISION (TREE_TYPE (lhs))
1487*e4b17023SJohn Marino 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1488*e4b17023SJohn Marino     return NULL;
1489*e4b17023SJohn Marino 
1490*e4b17023SJohn Marino   if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, NULL, &def_stmt,
1491*e4b17023SJohn Marino 			   &def, &dt))
1492*e4b17023SJohn Marino     return NULL;
1493*e4b17023SJohn Marino 
1494*e4b17023SJohn Marino   if (dt != vect_internal_def)
1495*e4b17023SJohn Marino     return NULL;
1496*e4b17023SJohn Marino 
1497*e4b17023SJohn Marino   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1498*e4b17023SJohn Marino   *type_out = *type_in;
1499*e4b17023SJohn Marino   if (*type_in == NULL_TREE)
1500*e4b17023SJohn Marino     return NULL;
1501*e4b17023SJohn Marino 
1502*e4b17023SJohn Marino   def = NULL_TREE;
1503*e4b17023SJohn Marino   if (gimple_assign_cast_p (def_stmt))
1504*e4b17023SJohn Marino     {
1505*e4b17023SJohn Marino       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1506*e4b17023SJohn Marino       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1507*e4b17023SJohn Marino 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1508*e4b17023SJohn Marino 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1509*e4b17023SJohn Marino 	def = rhs1;
1510*e4b17023SJohn Marino     }
1511*e4b17023SJohn Marino 
1512*e4b17023SJohn Marino   if (def == NULL_TREE)
1513*e4b17023SJohn Marino     {
1514*e4b17023SJohn Marino       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1515*e4b17023SJohn Marino       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1516*e4b17023SJohn Marino 					       NULL_TREE);
1517*e4b17023SJohn Marino       new_pattern_def_seq (stmt_vinfo, def_stmt);
1518*e4b17023SJohn Marino     }
1519*e4b17023SJohn Marino 
1520*e4b17023SJohn Marino   /* Pattern detected.  */
1521*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1522*e4b17023SJohn Marino     fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1523*e4b17023SJohn Marino 
1524*e4b17023SJohn Marino   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1525*e4b17023SJohn Marino   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1526*e4b17023SJohn Marino   pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1527*e4b17023SJohn Marino 
1528*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1529*e4b17023SJohn Marino     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1530*e4b17023SJohn Marino 
1531*e4b17023SJohn Marino   VEC_safe_push (gimple, heap, *stmts, last_stmt);
1532*e4b17023SJohn Marino   return pattern_stmt;
1533*e4b17023SJohn Marino }
1534*e4b17023SJohn Marino 
1535*e4b17023SJohn Marino /* Detect a signed division by power of two constant that wouldn't be
1536*e4b17023SJohn Marino    otherwise vectorized:
1537*e4b17023SJohn Marino 
1538*e4b17023SJohn Marino    type a_t, b_t;
1539*e4b17023SJohn Marino 
1540*e4b17023SJohn Marino    S1 a_t = b_t / N;
1541*e4b17023SJohn Marino 
1542*e4b17023SJohn Marino   where type 'type' is a signed integral type and N is a constant positive
1543*e4b17023SJohn Marino   power of two.
1544*e4b17023SJohn Marino 
1545*e4b17023SJohn Marino   Similarly handle signed modulo by power of two constant:
1546*e4b17023SJohn Marino 
1547*e4b17023SJohn Marino    S4 a_t = b_t % N;
1548*e4b17023SJohn Marino 
1549*e4b17023SJohn Marino   Input/Output:
1550*e4b17023SJohn Marino 
1551*e4b17023SJohn Marino   * STMTS: Contains a stmt from which the pattern search begins,
1552*e4b17023SJohn Marino     i.e. the division stmt.  S1 is replaced by:
1553*e4b17023SJohn Marino   S3  y_t = b_t < 0 ? N - 1 : 0;
1554*e4b17023SJohn Marino   S2  x_t = b_t + y_t;
1555*e4b17023SJohn Marino   S1' a_t = x_t >> log2 (N);
1556*e4b17023SJohn Marino 
1557*e4b17023SJohn Marino     S4 is replaced by (where *_T temporaries have unsigned type):
1558*e4b17023SJohn Marino   S9  y_T = b_t < 0 ? -1U : 0U;
1559*e4b17023SJohn Marino   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1560*e4b17023SJohn Marino   S7  z_t = (type) z_T;
1561*e4b17023SJohn Marino   S6  w_t = b_t + z_t;
1562*e4b17023SJohn Marino   S5  x_t = w_t & (N - 1);
1563*e4b17023SJohn Marino   S4' a_t = x_t - z_t;
1564*e4b17023SJohn Marino 
1565*e4b17023SJohn Marino   Output:
1566*e4b17023SJohn Marino 
1567*e4b17023SJohn Marino   * TYPE_IN: The type of the input arguments to the pattern.
1568*e4b17023SJohn Marino 
1569*e4b17023SJohn Marino   * TYPE_OUT: The type of the output of this pattern.
1570*e4b17023SJohn Marino 
1571*e4b17023SJohn Marino   * Return value: A new stmt that will be used to replace the division
1572*e4b17023SJohn Marino     S1 or modulo S4 stmt.  */
1573*e4b17023SJohn Marino 
1574*e4b17023SJohn Marino static gimple
vect_recog_sdivmod_pow2_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)1575*e4b17023SJohn Marino vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1576*e4b17023SJohn Marino 				 tree *type_in, tree *type_out)
1577*e4b17023SJohn Marino {
1578*e4b17023SJohn Marino   gimple last_stmt = VEC_pop (gimple, *stmts);
1579*e4b17023SJohn Marino   tree oprnd0, oprnd1, vectype, itype, cond;
1580*e4b17023SJohn Marino   gimple pattern_stmt, def_stmt;
1581*e4b17023SJohn Marino   enum tree_code rhs_code;
1582*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1583*e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1584*e4b17023SJohn Marino   optab optab;
1585*e4b17023SJohn Marino 
1586*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt))
1587*e4b17023SJohn Marino     return NULL;
1588*e4b17023SJohn Marino 
1589*e4b17023SJohn Marino   rhs_code = gimple_assign_rhs_code (last_stmt);
1590*e4b17023SJohn Marino   switch (rhs_code)
1591*e4b17023SJohn Marino     {
1592*e4b17023SJohn Marino     case TRUNC_DIV_EXPR:
1593*e4b17023SJohn Marino     case TRUNC_MOD_EXPR:
1594*e4b17023SJohn Marino       break;
1595*e4b17023SJohn Marino     default:
1596*e4b17023SJohn Marino       return NULL;
1597*e4b17023SJohn Marino     }
1598*e4b17023SJohn Marino 
1599*e4b17023SJohn Marino   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1600*e4b17023SJohn Marino     return NULL;
1601*e4b17023SJohn Marino 
1602*e4b17023SJohn Marino   oprnd0 = gimple_assign_rhs1 (last_stmt);
1603*e4b17023SJohn Marino   oprnd1 = gimple_assign_rhs2 (last_stmt);
1604*e4b17023SJohn Marino   itype = TREE_TYPE (oprnd0);
1605*e4b17023SJohn Marino   if (TREE_CODE (oprnd0) != SSA_NAME
1606*e4b17023SJohn Marino       || TREE_CODE (oprnd1) != INTEGER_CST
1607*e4b17023SJohn Marino       || TREE_CODE (itype) != INTEGER_TYPE
1608*e4b17023SJohn Marino       || TYPE_UNSIGNED (itype)
1609*e4b17023SJohn Marino       || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1610*e4b17023SJohn Marino       || !integer_pow2p (oprnd1)
1611*e4b17023SJohn Marino       || tree_int_cst_sgn (oprnd1) != 1)
1612*e4b17023SJohn Marino     return NULL;
1613*e4b17023SJohn Marino 
1614*e4b17023SJohn Marino   vectype = get_vectype_for_scalar_type (itype);
1615*e4b17023SJohn Marino   if (vectype == NULL_TREE)
1616*e4b17023SJohn Marino     return NULL;
1617*e4b17023SJohn Marino 
1618*e4b17023SJohn Marino   /* If the target can handle vectorized division or modulo natively,
1619*e4b17023SJohn Marino      don't attempt to optimize this.  */
1620*e4b17023SJohn Marino   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1621*e4b17023SJohn Marino   if (optab != NULL)
1622*e4b17023SJohn Marino     {
1623*e4b17023SJohn Marino       enum machine_mode vec_mode = TYPE_MODE (vectype);
1624*e4b17023SJohn Marino       int icode = (int) optab_handler (optab, vec_mode);
1625*e4b17023SJohn Marino       if (icode != CODE_FOR_nothing
1626*e4b17023SJohn Marino 	  || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1627*e4b17023SJohn Marino 	return NULL;
1628*e4b17023SJohn Marino     }
1629*e4b17023SJohn Marino 
1630*e4b17023SJohn Marino   /* Pattern detected.  */
1631*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1632*e4b17023SJohn Marino     fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1633*e4b17023SJohn Marino 
1634*e4b17023SJohn Marino   cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1635*e4b17023SJohn Marino   if (rhs_code == TRUNC_DIV_EXPR)
1636*e4b17023SJohn Marino     {
1637*e4b17023SJohn Marino       tree var = vect_recog_temp_ssa_var (itype, NULL);
1638*e4b17023SJohn Marino       def_stmt
1639*e4b17023SJohn Marino 	= gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1640*e4b17023SJohn Marino 					 fold_build2 (MINUS_EXPR, itype,
1641*e4b17023SJohn Marino 						      oprnd1,
1642*e4b17023SJohn Marino 						      build_int_cst (itype,
1643*e4b17023SJohn Marino 								     1)),
1644*e4b17023SJohn Marino 					 build_int_cst (itype, 0));
1645*e4b17023SJohn Marino       new_pattern_def_seq (stmt_vinfo, def_stmt);
1646*e4b17023SJohn Marino       var = vect_recog_temp_ssa_var (itype, NULL);
1647*e4b17023SJohn Marino       def_stmt
1648*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1649*e4b17023SJohn Marino 					gimple_assign_lhs (def_stmt));
1650*e4b17023SJohn Marino       append_pattern_def_seq (stmt_vinfo, def_stmt);
1651*e4b17023SJohn Marino 
1652*e4b17023SJohn Marino       pattern_stmt
1653*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (RSHIFT_EXPR,
1654*e4b17023SJohn Marino 					vect_recog_temp_ssa_var (itype, NULL),
1655*e4b17023SJohn Marino 					var,
1656*e4b17023SJohn Marino 					build_int_cst (itype,
1657*e4b17023SJohn Marino 						       tree_log2 (oprnd1)));
1658*e4b17023SJohn Marino     }
1659*e4b17023SJohn Marino   else
1660*e4b17023SJohn Marino     {
1661*e4b17023SJohn Marino       tree signmask;
1662*e4b17023SJohn Marino       STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1663*e4b17023SJohn Marino       if (compare_tree_int (oprnd1, 2) == 0)
1664*e4b17023SJohn Marino 	{
1665*e4b17023SJohn Marino 	  signmask = vect_recog_temp_ssa_var (itype, NULL);
1666*e4b17023SJohn Marino 	  def_stmt
1667*e4b17023SJohn Marino 	    = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1668*e4b17023SJohn Marino 					     build_int_cst (itype, 1),
1669*e4b17023SJohn Marino 					     build_int_cst (itype, 0));
1670*e4b17023SJohn Marino 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1671*e4b17023SJohn Marino 	}
1672*e4b17023SJohn Marino       else
1673*e4b17023SJohn Marino 	{
1674*e4b17023SJohn Marino 	  tree utype
1675*e4b17023SJohn Marino 	    = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1676*e4b17023SJohn Marino 	  tree vecutype = get_vectype_for_scalar_type (utype);
1677*e4b17023SJohn Marino 	  tree shift
1678*e4b17023SJohn Marino 	    = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1679*e4b17023SJohn Marino 				    - tree_log2 (oprnd1));
1680*e4b17023SJohn Marino 	  tree var = vect_recog_temp_ssa_var (utype, NULL);
1681*e4b17023SJohn Marino 	  stmt_vec_info def_stmt_vinfo;
1682*e4b17023SJohn Marino 
1683*e4b17023SJohn Marino 	  def_stmt
1684*e4b17023SJohn Marino 	    = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1685*e4b17023SJohn Marino 					     build_int_cst (utype, -1),
1686*e4b17023SJohn Marino 					     build_int_cst (utype, 0));
1687*e4b17023SJohn Marino 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1688*e4b17023SJohn Marino 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1689*e4b17023SJohn Marino 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1690*e4b17023SJohn Marino 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1691*e4b17023SJohn Marino 	  var = vect_recog_temp_ssa_var (utype, NULL);
1692*e4b17023SJohn Marino 	  def_stmt
1693*e4b17023SJohn Marino 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1694*e4b17023SJohn Marino 					    gimple_assign_lhs (def_stmt),
1695*e4b17023SJohn Marino 					    shift);
1696*e4b17023SJohn Marino 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1697*e4b17023SJohn Marino 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1698*e4b17023SJohn Marino 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1699*e4b17023SJohn Marino 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1700*e4b17023SJohn Marino 	  signmask = vect_recog_temp_ssa_var (itype, NULL);
1701*e4b17023SJohn Marino 	  def_stmt
1702*e4b17023SJohn Marino 	    = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1703*e4b17023SJohn Marino 					    NULL_TREE);
1704*e4b17023SJohn Marino 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1705*e4b17023SJohn Marino 	}
1706*e4b17023SJohn Marino       def_stmt
1707*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (PLUS_EXPR,
1708*e4b17023SJohn Marino 					vect_recog_temp_ssa_var (itype, NULL),
1709*e4b17023SJohn Marino 					oprnd0, signmask);
1710*e4b17023SJohn Marino       append_pattern_def_seq (stmt_vinfo, def_stmt);
1711*e4b17023SJohn Marino       def_stmt
1712*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (BIT_AND_EXPR,
1713*e4b17023SJohn Marino 					vect_recog_temp_ssa_var (itype, NULL),
1714*e4b17023SJohn Marino 					gimple_assign_lhs (def_stmt),
1715*e4b17023SJohn Marino 					fold_build2 (MINUS_EXPR, itype,
1716*e4b17023SJohn Marino 						     oprnd1,
1717*e4b17023SJohn Marino 						     build_int_cst (itype,
1718*e4b17023SJohn Marino 								    1)));
1719*e4b17023SJohn Marino       append_pattern_def_seq (stmt_vinfo, def_stmt);
1720*e4b17023SJohn Marino 
1721*e4b17023SJohn Marino       pattern_stmt
1722*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (MINUS_EXPR,
1723*e4b17023SJohn Marino 					vect_recog_temp_ssa_var (itype, NULL),
1724*e4b17023SJohn Marino 					gimple_assign_lhs (def_stmt),
1725*e4b17023SJohn Marino 					signmask);
1726*e4b17023SJohn Marino     }
1727*e4b17023SJohn Marino 
1728*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1729*e4b17023SJohn Marino     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1730*e4b17023SJohn Marino 
1731*e4b17023SJohn Marino   VEC_safe_push (gimple, heap, *stmts, last_stmt);
1732*e4b17023SJohn Marino 
1733*e4b17023SJohn Marino   *type_in = vectype;
1734*e4b17023SJohn Marino   *type_out = vectype;
1735*e4b17023SJohn Marino   return pattern_stmt;
1736*e4b17023SJohn Marino }
1737*e4b17023SJohn Marino 
1738*e4b17023SJohn Marino /* Function vect_recog_mixed_size_cond_pattern
1739*e4b17023SJohn Marino 
1740*e4b17023SJohn Marino    Try to find the following pattern:
1741*e4b17023SJohn Marino 
1742*e4b17023SJohn Marino      type x_t, y_t;
1743*e4b17023SJohn Marino      TYPE a_T, b_T, c_T;
1744*e4b17023SJohn Marino    loop:
1745*e4b17023SJohn Marino      S1  a_T = x_t CMP y_t ? b_T : c_T;
1746*e4b17023SJohn Marino 
1747*e4b17023SJohn Marino    where type 'TYPE' is an integral type which has different size
1748*e4b17023SJohn Marino    from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
1749*e4b17023SJohn Marino    than 'type', the constants need to fit into an integer type
1750*e4b17023SJohn Marino    with the same width as 'type'.
1751*e4b17023SJohn Marino 
1752*e4b17023SJohn Marino    Input:
1753*e4b17023SJohn Marino 
1754*e4b17023SJohn Marino    * LAST_STMT: A stmt from which the pattern search begins.
1755*e4b17023SJohn Marino 
1756*e4b17023SJohn Marino    Output:
1757*e4b17023SJohn Marino 
1758*e4b17023SJohn Marino    * TYPE_IN: The type of the input arguments to the pattern.
1759*e4b17023SJohn Marino 
1760*e4b17023SJohn Marino    * TYPE_OUT: The type of the output of this pattern.
1761*e4b17023SJohn Marino 
1762*e4b17023SJohn Marino    * Return value: A new stmt that will be used to replace the pattern.
1763*e4b17023SJohn Marino 	Additionally a def_stmt is added.
1764*e4b17023SJohn Marino 
1765*e4b17023SJohn Marino 	a_it = x_t CMP y_t ? b_it : c_it;
1766*e4b17023SJohn Marino 	a_T = (TYPE) a_it;  */
1767*e4b17023SJohn Marino 
1768*e4b17023SJohn Marino static gimple
vect_recog_mixed_size_cond_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)1769*e4b17023SJohn Marino vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1770*e4b17023SJohn Marino 				    tree *type_out)
1771*e4b17023SJohn Marino {
1772*e4b17023SJohn Marino   gimple last_stmt = VEC_index (gimple, *stmts, 0);
1773*e4b17023SJohn Marino   tree cond_expr, then_clause, else_clause;
1774*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1775*e4b17023SJohn Marino   tree type, vectype, comp_vectype, itype, vecitype;
1776*e4b17023SJohn Marino   enum machine_mode cmpmode;
1777*e4b17023SJohn Marino   gimple pattern_stmt, def_stmt;
1778*e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1779*e4b17023SJohn Marino 
1780*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt)
1781*e4b17023SJohn Marino       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1782*e4b17023SJohn Marino       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1783*e4b17023SJohn Marino     return NULL;
1784*e4b17023SJohn Marino 
1785*e4b17023SJohn Marino   cond_expr = gimple_assign_rhs1 (last_stmt);
1786*e4b17023SJohn Marino   then_clause = gimple_assign_rhs2 (last_stmt);
1787*e4b17023SJohn Marino   else_clause = gimple_assign_rhs3 (last_stmt);
1788*e4b17023SJohn Marino 
1789*e4b17023SJohn Marino   if (TREE_CODE (then_clause) != INTEGER_CST
1790*e4b17023SJohn Marino       || TREE_CODE (else_clause) != INTEGER_CST)
1791*e4b17023SJohn Marino     return NULL;
1792*e4b17023SJohn Marino 
1793*e4b17023SJohn Marino   if (!COMPARISON_CLASS_P (cond_expr))
1794*e4b17023SJohn Marino     return NULL;
1795*e4b17023SJohn Marino 
1796*e4b17023SJohn Marino   comp_vectype
1797*e4b17023SJohn Marino     = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1798*e4b17023SJohn Marino   if (comp_vectype == NULL_TREE)
1799*e4b17023SJohn Marino     return NULL;
1800*e4b17023SJohn Marino 
1801*e4b17023SJohn Marino   type = gimple_expr_type (last_stmt);
1802*e4b17023SJohn Marino   cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1803*e4b17023SJohn Marino 
1804*e4b17023SJohn Marino   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1805*e4b17023SJohn Marino     return NULL;
1806*e4b17023SJohn Marino 
1807*e4b17023SJohn Marino   vectype = get_vectype_for_scalar_type (type);
1808*e4b17023SJohn Marino   if (vectype == NULL_TREE)
1809*e4b17023SJohn Marino     return NULL;
1810*e4b17023SJohn Marino 
1811*e4b17023SJohn Marino   if (expand_vec_cond_expr_p (vectype, comp_vectype))
1812*e4b17023SJohn Marino     return NULL;
1813*e4b17023SJohn Marino 
1814*e4b17023SJohn Marino   itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1815*e4b17023SJohn Marino 					  TYPE_UNSIGNED (type));
1816*e4b17023SJohn Marino   if (itype == NULL_TREE
1817*e4b17023SJohn Marino       || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1818*e4b17023SJohn Marino     return NULL;
1819*e4b17023SJohn Marino 
1820*e4b17023SJohn Marino   vecitype = get_vectype_for_scalar_type (itype);
1821*e4b17023SJohn Marino   if (vecitype == NULL_TREE)
1822*e4b17023SJohn Marino     return NULL;
1823*e4b17023SJohn Marino 
1824*e4b17023SJohn Marino   if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1825*e4b17023SJohn Marino     return NULL;
1826*e4b17023SJohn Marino 
1827*e4b17023SJohn Marino   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1828*e4b17023SJohn Marino     {
1829*e4b17023SJohn Marino       if (!int_fits_type_p (then_clause, itype)
1830*e4b17023SJohn Marino 	  || !int_fits_type_p (else_clause, itype))
1831*e4b17023SJohn Marino 	return NULL;
1832*e4b17023SJohn Marino     }
1833*e4b17023SJohn Marino 
1834*e4b17023SJohn Marino   def_stmt
1835*e4b17023SJohn Marino     = gimple_build_assign_with_ops3 (COND_EXPR,
1836*e4b17023SJohn Marino 				     vect_recog_temp_ssa_var (itype, NULL),
1837*e4b17023SJohn Marino 				     unshare_expr (cond_expr),
1838*e4b17023SJohn Marino 				     fold_convert (itype, then_clause),
1839*e4b17023SJohn Marino 				     fold_convert (itype, else_clause));
1840*e4b17023SJohn Marino   pattern_stmt
1841*e4b17023SJohn Marino     = gimple_build_assign_with_ops (NOP_EXPR,
1842*e4b17023SJohn Marino 				    vect_recog_temp_ssa_var (type, NULL),
1843*e4b17023SJohn Marino 				    gimple_assign_lhs (def_stmt), NULL_TREE);
1844*e4b17023SJohn Marino 
1845*e4b17023SJohn Marino   new_pattern_def_seq (stmt_vinfo, def_stmt);
1846*e4b17023SJohn Marino   def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1847*e4b17023SJohn Marino   set_vinfo_for_stmt (def_stmt, def_stmt_info);
1848*e4b17023SJohn Marino   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1849*e4b17023SJohn Marino   *type_in = vecitype;
1850*e4b17023SJohn Marino   *type_out = vectype;
1851*e4b17023SJohn Marino 
1852*e4b17023SJohn Marino   return pattern_stmt;
1853*e4b17023SJohn Marino }
1854*e4b17023SJohn Marino 
1855*e4b17023SJohn Marino 
1856*e4b17023SJohn Marino /* Helper function of vect_recog_bool_pattern.  Called recursively, return
1857*e4b17023SJohn Marino    true if bool VAR can be optimized that way.  */
1858*e4b17023SJohn Marino 
1859*e4b17023SJohn Marino static bool
check_bool_pattern(tree var,loop_vec_info loop_vinfo)1860*e4b17023SJohn Marino check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1861*e4b17023SJohn Marino {
1862*e4b17023SJohn Marino   gimple def_stmt;
1863*e4b17023SJohn Marino   enum vect_def_type dt;
1864*e4b17023SJohn Marino   tree def, rhs1;
1865*e4b17023SJohn Marino   enum tree_code rhs_code;
1866*e4b17023SJohn Marino 
1867*e4b17023SJohn Marino   if (!vect_is_simple_use (var, NULL, loop_vinfo, NULL, &def_stmt, &def, &dt))
1868*e4b17023SJohn Marino     return false;
1869*e4b17023SJohn Marino 
1870*e4b17023SJohn Marino   if (dt != vect_internal_def)
1871*e4b17023SJohn Marino     return false;
1872*e4b17023SJohn Marino 
1873*e4b17023SJohn Marino   if (!is_gimple_assign (def_stmt))
1874*e4b17023SJohn Marino     return false;
1875*e4b17023SJohn Marino 
1876*e4b17023SJohn Marino   if (!has_single_use (def))
1877*e4b17023SJohn Marino     return false;
1878*e4b17023SJohn Marino 
1879*e4b17023SJohn Marino   rhs1 = gimple_assign_rhs1 (def_stmt);
1880*e4b17023SJohn Marino   rhs_code = gimple_assign_rhs_code (def_stmt);
1881*e4b17023SJohn Marino   switch (rhs_code)
1882*e4b17023SJohn Marino     {
1883*e4b17023SJohn Marino     case SSA_NAME:
1884*e4b17023SJohn Marino       return check_bool_pattern (rhs1, loop_vinfo);
1885*e4b17023SJohn Marino 
1886*e4b17023SJohn Marino     CASE_CONVERT:
1887*e4b17023SJohn Marino       if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1888*e4b17023SJohn Marino 	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1889*e4b17023SJohn Marino 	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1890*e4b17023SJohn Marino 	return false;
1891*e4b17023SJohn Marino       return check_bool_pattern (rhs1, loop_vinfo);
1892*e4b17023SJohn Marino 
1893*e4b17023SJohn Marino     case BIT_NOT_EXPR:
1894*e4b17023SJohn Marino       return check_bool_pattern (rhs1, loop_vinfo);
1895*e4b17023SJohn Marino 
1896*e4b17023SJohn Marino     case BIT_AND_EXPR:
1897*e4b17023SJohn Marino     case BIT_IOR_EXPR:
1898*e4b17023SJohn Marino     case BIT_XOR_EXPR:
1899*e4b17023SJohn Marino       if (!check_bool_pattern (rhs1, loop_vinfo))
1900*e4b17023SJohn Marino 	return false;
1901*e4b17023SJohn Marino       return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1902*e4b17023SJohn Marino 
1903*e4b17023SJohn Marino     default:
1904*e4b17023SJohn Marino       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1905*e4b17023SJohn Marino 	{
1906*e4b17023SJohn Marino 	  tree vecitype, comp_vectype;
1907*e4b17023SJohn Marino 
1908*e4b17023SJohn Marino 	  /* If the comparison can throw, then is_gimple_condexpr will be
1909*e4b17023SJohn Marino 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
1910*e4b17023SJohn Marino 	  if (stmt_could_throw_p (def_stmt))
1911*e4b17023SJohn Marino 	    return false;
1912*e4b17023SJohn Marino 
1913*e4b17023SJohn Marino 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1914*e4b17023SJohn Marino 	  if (comp_vectype == NULL_TREE)
1915*e4b17023SJohn Marino 	    return false;
1916*e4b17023SJohn Marino 
1917*e4b17023SJohn Marino 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1918*e4b17023SJohn Marino 	    {
1919*e4b17023SJohn Marino 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1920*e4b17023SJohn Marino 	      tree itype
1921*e4b17023SJohn Marino 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1922*e4b17023SJohn Marino 	      vecitype = get_vectype_for_scalar_type (itype);
1923*e4b17023SJohn Marino 	      if (vecitype == NULL_TREE)
1924*e4b17023SJohn Marino 		return false;
1925*e4b17023SJohn Marino 	    }
1926*e4b17023SJohn Marino 	  else
1927*e4b17023SJohn Marino 	    vecitype = comp_vectype;
1928*e4b17023SJohn Marino 	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
1929*e4b17023SJohn Marino 	}
1930*e4b17023SJohn Marino       return false;
1931*e4b17023SJohn Marino     }
1932*e4b17023SJohn Marino }
1933*e4b17023SJohn Marino 
1934*e4b17023SJohn Marino 
1935*e4b17023SJohn Marino /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
1936*e4b17023SJohn Marino    stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1937*e4b17023SJohn Marino    to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
1938*e4b17023SJohn Marino 
1939*e4b17023SJohn Marino static tree
adjust_bool_pattern_cast(tree type,tree var)1940*e4b17023SJohn Marino adjust_bool_pattern_cast (tree type, tree var)
1941*e4b17023SJohn Marino {
1942*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1943*e4b17023SJohn Marino   gimple cast_stmt, pattern_stmt;
1944*e4b17023SJohn Marino 
1945*e4b17023SJohn Marino   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
1946*e4b17023SJohn Marino   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1947*e4b17023SJohn Marino   new_pattern_def_seq (stmt_vinfo, pattern_stmt);
1948*e4b17023SJohn Marino   cast_stmt
1949*e4b17023SJohn Marino     = gimple_build_assign_with_ops (NOP_EXPR,
1950*e4b17023SJohn Marino 				    vect_recog_temp_ssa_var (type, NULL),
1951*e4b17023SJohn Marino 				    gimple_assign_lhs (pattern_stmt),
1952*e4b17023SJohn Marino 				    NULL_TREE);
1953*e4b17023SJohn Marino   STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
1954*e4b17023SJohn Marino   return gimple_assign_lhs (cast_stmt);
1955*e4b17023SJohn Marino }
1956*e4b17023SJohn Marino 
1957*e4b17023SJohn Marino 
1958*e4b17023SJohn Marino /* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
1959*e4b17023SJohn Marino    recursively.  VAR is an SSA_NAME that should be transformed from bool
1960*e4b17023SJohn Marino    to a wider integer type, OUT_TYPE is the desired final integer type of
1961*e4b17023SJohn Marino    the whole pattern, TRUEVAL should be NULL unless optimizing
1962*e4b17023SJohn Marino    BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1963*e4b17023SJohn Marino    in the then_clause, STMTS is where statements with added pattern stmts
1964*e4b17023SJohn Marino    should be pushed to.  */
1965*e4b17023SJohn Marino 
1966*e4b17023SJohn Marino static tree
adjust_bool_pattern(tree var,tree out_type,tree trueval,VEC (gimple,heap)** stmts)1967*e4b17023SJohn Marino adjust_bool_pattern (tree var, tree out_type, tree trueval,
1968*e4b17023SJohn Marino 		     VEC (gimple, heap) **stmts)
1969*e4b17023SJohn Marino {
1970*e4b17023SJohn Marino   gimple stmt = SSA_NAME_DEF_STMT (var);
1971*e4b17023SJohn Marino   enum tree_code rhs_code, def_rhs_code;
1972*e4b17023SJohn Marino   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
1973*e4b17023SJohn Marino   location_t loc;
1974*e4b17023SJohn Marino   gimple pattern_stmt, def_stmt;
1975*e4b17023SJohn Marino 
1976*e4b17023SJohn Marino   rhs1 = gimple_assign_rhs1 (stmt);
1977*e4b17023SJohn Marino   rhs2 = gimple_assign_rhs2 (stmt);
1978*e4b17023SJohn Marino   rhs_code = gimple_assign_rhs_code (stmt);
1979*e4b17023SJohn Marino   loc = gimple_location (stmt);
1980*e4b17023SJohn Marino   switch (rhs_code)
1981*e4b17023SJohn Marino     {
1982*e4b17023SJohn Marino     case SSA_NAME:
1983*e4b17023SJohn Marino     CASE_CONVERT:
1984*e4b17023SJohn Marino       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1985*e4b17023SJohn Marino       itype = TREE_TYPE (irhs1);
1986*e4b17023SJohn Marino       pattern_stmt
1987*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (SSA_NAME,
1988*e4b17023SJohn Marino 					vect_recog_temp_ssa_var (itype, NULL),
1989*e4b17023SJohn Marino 					irhs1, NULL_TREE);
1990*e4b17023SJohn Marino       break;
1991*e4b17023SJohn Marino 
1992*e4b17023SJohn Marino     case BIT_NOT_EXPR:
1993*e4b17023SJohn Marino       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1994*e4b17023SJohn Marino       itype = TREE_TYPE (irhs1);
1995*e4b17023SJohn Marino       pattern_stmt
1996*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
1997*e4b17023SJohn Marino 					vect_recog_temp_ssa_var (itype, NULL),
1998*e4b17023SJohn Marino 					irhs1, build_int_cst (itype, 1));
1999*e4b17023SJohn Marino       break;
2000*e4b17023SJohn Marino 
2001*e4b17023SJohn Marino     case BIT_AND_EXPR:
2002*e4b17023SJohn Marino       /* Try to optimize x = y & (a < b ? 1 : 0); into
2003*e4b17023SJohn Marino 	 x = (a < b ? y : 0);
2004*e4b17023SJohn Marino 
2005*e4b17023SJohn Marino 	 E.g. for:
2006*e4b17023SJohn Marino 	   bool a_b, b_b, c_b;
2007*e4b17023SJohn Marino 	   TYPE d_T;
2008*e4b17023SJohn Marino 
2009*e4b17023SJohn Marino 	   S1  a_b = x1 CMP1 y1;
2010*e4b17023SJohn Marino 	   S2  b_b = x2 CMP2 y2;
2011*e4b17023SJohn Marino 	   S3  c_b = a_b & b_b;
2012*e4b17023SJohn Marino 	   S4  d_T = (TYPE) c_b;
2013*e4b17023SJohn Marino 
2014*e4b17023SJohn Marino 	 we would normally emit:
2015*e4b17023SJohn Marino 
2016*e4b17023SJohn Marino 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2017*e4b17023SJohn Marino 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2018*e4b17023SJohn Marino 	   S3'  c_T = a_T & b_T;
2019*e4b17023SJohn Marino 	   S4'  d_T = c_T;
2020*e4b17023SJohn Marino 
2021*e4b17023SJohn Marino 	 but we can save one stmt by using the
2022*e4b17023SJohn Marino 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
2023*e4b17023SJohn Marino 	 BIT_AND_EXPR stmt out:
2024*e4b17023SJohn Marino 
2025*e4b17023SJohn Marino 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2026*e4b17023SJohn Marino 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2027*e4b17023SJohn Marino 	   S4'  f_T = c_T;
2028*e4b17023SJohn Marino 
2029*e4b17023SJohn Marino 	 At least when VEC_COND_EXPR is implemented using masks
2030*e4b17023SJohn Marino 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2031*e4b17023SJohn Marino 	 computes the comparison masks and ands it, in one case with
2032*e4b17023SJohn Marino 	 all ones vector, in the other case with a vector register.
2033*e4b17023SJohn Marino 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2034*e4b17023SJohn Marino 	 often more expensive.  */
2035*e4b17023SJohn Marino       def_stmt = SSA_NAME_DEF_STMT (rhs2);
2036*e4b17023SJohn Marino       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2037*e4b17023SJohn Marino       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2038*e4b17023SJohn Marino 	{
2039*e4b17023SJohn Marino 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2040*e4b17023SJohn Marino 	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2041*e4b17023SJohn Marino 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
2042*e4b17023SJohn Marino 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2043*e4b17023SJohn Marino 	    {
2044*e4b17023SJohn Marino 	      gimple tstmt;
2045*e4b17023SJohn Marino 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2046*e4b17023SJohn Marino 	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2047*e4b17023SJohn Marino 	      tstmt = VEC_pop (gimple, *stmts);
2048*e4b17023SJohn Marino 	      gcc_assert (tstmt == def_stmt);
2049*e4b17023SJohn Marino 	      VEC_quick_push (gimple, *stmts, stmt);
2050*e4b17023SJohn Marino 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2051*e4b17023SJohn Marino 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2052*e4b17023SJohn Marino 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2053*e4b17023SJohn Marino 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2054*e4b17023SJohn Marino 	      return irhs2;
2055*e4b17023SJohn Marino 	    }
2056*e4b17023SJohn Marino 	  else
2057*e4b17023SJohn Marino 	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2058*e4b17023SJohn Marino 	  goto and_ior_xor;
2059*e4b17023SJohn Marino 	}
2060*e4b17023SJohn Marino       def_stmt = SSA_NAME_DEF_STMT (rhs1);
2061*e4b17023SJohn Marino       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2062*e4b17023SJohn Marino       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2063*e4b17023SJohn Marino 	{
2064*e4b17023SJohn Marino 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2065*e4b17023SJohn Marino 	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2066*e4b17023SJohn Marino 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
2067*e4b17023SJohn Marino 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2068*e4b17023SJohn Marino 	    {
2069*e4b17023SJohn Marino 	      gimple tstmt;
2070*e4b17023SJohn Marino 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2071*e4b17023SJohn Marino 	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2072*e4b17023SJohn Marino 	      tstmt = VEC_pop (gimple, *stmts);
2073*e4b17023SJohn Marino 	      gcc_assert (tstmt == def_stmt);
2074*e4b17023SJohn Marino 	      VEC_quick_push (gimple, *stmts, stmt);
2075*e4b17023SJohn Marino 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2076*e4b17023SJohn Marino 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2077*e4b17023SJohn Marino 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2078*e4b17023SJohn Marino 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2079*e4b17023SJohn Marino 	      return irhs1;
2080*e4b17023SJohn Marino 	    }
2081*e4b17023SJohn Marino 	  else
2082*e4b17023SJohn Marino 	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2083*e4b17023SJohn Marino 	  goto and_ior_xor;
2084*e4b17023SJohn Marino 	}
2085*e4b17023SJohn Marino       /* FALLTHRU */
2086*e4b17023SJohn Marino     case BIT_IOR_EXPR:
2087*e4b17023SJohn Marino     case BIT_XOR_EXPR:
2088*e4b17023SJohn Marino       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2089*e4b17023SJohn Marino       irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2090*e4b17023SJohn Marino     and_ior_xor:
2091*e4b17023SJohn Marino       if (TYPE_PRECISION (TREE_TYPE (irhs1))
2092*e4b17023SJohn Marino 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
2093*e4b17023SJohn Marino 	{
2094*e4b17023SJohn Marino 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2095*e4b17023SJohn Marino 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2096*e4b17023SJohn Marino 	  int out_prec = TYPE_PRECISION (out_type);
2097*e4b17023SJohn Marino 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2098*e4b17023SJohn Marino 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2099*e4b17023SJohn Marino 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2100*e4b17023SJohn Marino 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2101*e4b17023SJohn Marino 	  else
2102*e4b17023SJohn Marino 	    {
2103*e4b17023SJohn Marino 	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2104*e4b17023SJohn Marino 	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2105*e4b17023SJohn Marino 	    }
2106*e4b17023SJohn Marino 	}
2107*e4b17023SJohn Marino       itype = TREE_TYPE (irhs1);
2108*e4b17023SJohn Marino       pattern_stmt
2109*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (rhs_code,
2110*e4b17023SJohn Marino 					vect_recog_temp_ssa_var (itype, NULL),
2111*e4b17023SJohn Marino 					irhs1, irhs2);
2112*e4b17023SJohn Marino       break;
2113*e4b17023SJohn Marino 
2114*e4b17023SJohn Marino     default:
2115*e4b17023SJohn Marino       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2116*e4b17023SJohn Marino       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2117*e4b17023SJohn Marino 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2118*e4b17023SJohn Marino 	  || (TYPE_PRECISION (TREE_TYPE (rhs1))
2119*e4b17023SJohn Marino 	      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2120*e4b17023SJohn Marino 	{
2121*e4b17023SJohn Marino 	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2122*e4b17023SJohn Marino 	  itype
2123*e4b17023SJohn Marino 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2124*e4b17023SJohn Marino 	}
2125*e4b17023SJohn Marino       else
2126*e4b17023SJohn Marino 	itype = TREE_TYPE (rhs1);
2127*e4b17023SJohn Marino       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2128*e4b17023SJohn Marino       if (trueval == NULL_TREE)
2129*e4b17023SJohn Marino 	trueval = build_int_cst (itype, 1);
2130*e4b17023SJohn Marino       else
2131*e4b17023SJohn Marino 	gcc_checking_assert (useless_type_conversion_p (itype,
2132*e4b17023SJohn Marino 							TREE_TYPE (trueval)));
2133*e4b17023SJohn Marino       pattern_stmt
2134*e4b17023SJohn Marino 	= gimple_build_assign_with_ops3 (COND_EXPR,
2135*e4b17023SJohn Marino 					 vect_recog_temp_ssa_var (itype, NULL),
2136*e4b17023SJohn Marino 					 cond_expr, trueval,
2137*e4b17023SJohn Marino 					 build_int_cst (itype, 0));
2138*e4b17023SJohn Marino       break;
2139*e4b17023SJohn Marino     }
2140*e4b17023SJohn Marino 
2141*e4b17023SJohn Marino   VEC_safe_push (gimple, heap, *stmts, stmt);
2142*e4b17023SJohn Marino   gimple_set_location (pattern_stmt, loc);
2143*e4b17023SJohn Marino   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2144*e4b17023SJohn Marino   return gimple_assign_lhs (pattern_stmt);
2145*e4b17023SJohn Marino }
2146*e4b17023SJohn Marino 
2147*e4b17023SJohn Marino 
2148*e4b17023SJohn Marino /* Function vect_recog_bool_pattern
2149*e4b17023SJohn Marino 
2150*e4b17023SJohn Marino    Try to find pattern like following:
2151*e4b17023SJohn Marino 
2152*e4b17023SJohn Marino      bool a_b, b_b, c_b, d_b, e_b;
2153*e4b17023SJohn Marino      TYPE f_T;
2154*e4b17023SJohn Marino    loop:
2155*e4b17023SJohn Marino      S1  a_b = x1 CMP1 y1;
2156*e4b17023SJohn Marino      S2  b_b = x2 CMP2 y2;
2157*e4b17023SJohn Marino      S3  c_b = a_b & b_b;
2158*e4b17023SJohn Marino      S4  d_b = x3 CMP3 y3;
2159*e4b17023SJohn Marino      S5  e_b = c_b | d_b;
2160*e4b17023SJohn Marino      S6  f_T = (TYPE) e_b;
2161*e4b17023SJohn Marino 
2162*e4b17023SJohn Marino    where type 'TYPE' is an integral type.
2163*e4b17023SJohn Marino 
2164*e4b17023SJohn Marino    Input:
2165*e4b17023SJohn Marino 
2166*e4b17023SJohn Marino    * LAST_STMT: A stmt at the end from which the pattern
2167*e4b17023SJohn Marino 		search begins, i.e. cast of a bool to
2168*e4b17023SJohn Marino 		an integer type.
2169*e4b17023SJohn Marino 
2170*e4b17023SJohn Marino    Output:
2171*e4b17023SJohn Marino 
2172*e4b17023SJohn Marino    * TYPE_IN: The type of the input arguments to the pattern.
2173*e4b17023SJohn Marino 
2174*e4b17023SJohn Marino    * TYPE_OUT: The type of the output of this pattern.
2175*e4b17023SJohn Marino 
2176*e4b17023SJohn Marino    * Return value: A new stmt that will be used to replace the pattern.
2177*e4b17023SJohn Marino 
2178*e4b17023SJohn Marino 	Assuming size of TYPE is the same as size of all comparisons
2179*e4b17023SJohn Marino 	(otherwise some casts would be added where needed), the above
2180*e4b17023SJohn Marino 	sequence we create related pattern stmts:
2181*e4b17023SJohn Marino 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2182*e4b17023SJohn Marino 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2183*e4b17023SJohn Marino 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
2184*e4b17023SJohn Marino 	S5'  e_T = c_T | d_T;
2185*e4b17023SJohn Marino 	S6'  f_T = e_T;
2186*e4b17023SJohn Marino 
2187*e4b17023SJohn Marino 	Instead of the above S3' we could emit:
2188*e4b17023SJohn Marino 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2189*e4b17023SJohn Marino 	S3'  c_T = a_T | b_T;
2190*e4b17023SJohn Marino 	but the above is more efficient.  */
2191*e4b17023SJohn Marino 
2192*e4b17023SJohn Marino static gimple
vect_recog_bool_pattern(VEC (gimple,heap)** stmts,tree * type_in,tree * type_out)2193*e4b17023SJohn Marino vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2194*e4b17023SJohn Marino 			 tree *type_out)
2195*e4b17023SJohn Marino {
2196*e4b17023SJohn Marino   gimple last_stmt = VEC_pop (gimple, *stmts);
2197*e4b17023SJohn Marino   enum tree_code rhs_code;
2198*e4b17023SJohn Marino   tree var, lhs, rhs, vectype;
2199*e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2200*e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2201*e4b17023SJohn Marino   gimple pattern_stmt;
2202*e4b17023SJohn Marino 
2203*e4b17023SJohn Marino   if (!is_gimple_assign (last_stmt))
2204*e4b17023SJohn Marino     return NULL;
2205*e4b17023SJohn Marino 
2206*e4b17023SJohn Marino   var = gimple_assign_rhs1 (last_stmt);
2207*e4b17023SJohn Marino   lhs = gimple_assign_lhs (last_stmt);
2208*e4b17023SJohn Marino 
2209*e4b17023SJohn Marino   if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2210*e4b17023SJohn Marino        || !TYPE_UNSIGNED (TREE_TYPE (var)))
2211*e4b17023SJohn Marino       && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2212*e4b17023SJohn Marino     return NULL;
2213*e4b17023SJohn Marino 
2214*e4b17023SJohn Marino   rhs_code = gimple_assign_rhs_code (last_stmt);
2215*e4b17023SJohn Marino   if (CONVERT_EXPR_CODE_P (rhs_code))
2216*e4b17023SJohn Marino     {
2217*e4b17023SJohn Marino       if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2218*e4b17023SJohn Marino 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2219*e4b17023SJohn Marino 	return NULL;
2220*e4b17023SJohn Marino       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2221*e4b17023SJohn Marino       if (vectype == NULL_TREE)
2222*e4b17023SJohn Marino 	return NULL;
2223*e4b17023SJohn Marino 
2224*e4b17023SJohn Marino       if (!check_bool_pattern (var, loop_vinfo))
2225*e4b17023SJohn Marino 	return NULL;
2226*e4b17023SJohn Marino 
2227*e4b17023SJohn Marino       rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2228*e4b17023SJohn Marino       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2229*e4b17023SJohn Marino       if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2230*e4b17023SJohn Marino 	pattern_stmt
2231*e4b17023SJohn Marino 	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2232*e4b17023SJohn Marino       else
2233*e4b17023SJohn Marino 	pattern_stmt
2234*e4b17023SJohn Marino 	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2235*e4b17023SJohn Marino       *type_out = vectype;
2236*e4b17023SJohn Marino       *type_in = vectype;
2237*e4b17023SJohn Marino       VEC_safe_push (gimple, heap, *stmts, last_stmt);
2238*e4b17023SJohn Marino       return pattern_stmt;
2239*e4b17023SJohn Marino     }
2240*e4b17023SJohn Marino   else if (rhs_code == SSA_NAME
2241*e4b17023SJohn Marino 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
2242*e4b17023SJohn Marino     {
2243*e4b17023SJohn Marino       stmt_vec_info pattern_stmt_info;
2244*e4b17023SJohn Marino       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2245*e4b17023SJohn Marino       gcc_assert (vectype != NULL_TREE);
2246*e4b17023SJohn Marino       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2247*e4b17023SJohn Marino 	return NULL;
2248*e4b17023SJohn Marino       if (!check_bool_pattern (var, loop_vinfo))
2249*e4b17023SJohn Marino 	return NULL;
2250*e4b17023SJohn Marino 
2251*e4b17023SJohn Marino       rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2252*e4b17023SJohn Marino       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2253*e4b17023SJohn Marino       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2254*e4b17023SJohn Marino 	{
2255*e4b17023SJohn Marino 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2256*e4b17023SJohn Marino 	  gimple cast_stmt
2257*e4b17023SJohn Marino 	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2258*e4b17023SJohn Marino 	  new_pattern_def_seq (stmt_vinfo, cast_stmt);
2259*e4b17023SJohn Marino 	  rhs = rhs2;
2260*e4b17023SJohn Marino 	}
2261*e4b17023SJohn Marino       pattern_stmt
2262*e4b17023SJohn Marino 	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2263*e4b17023SJohn Marino       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2264*e4b17023SJohn Marino       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2265*e4b17023SJohn Marino       STMT_VINFO_DATA_REF (pattern_stmt_info)
2266*e4b17023SJohn Marino 	= STMT_VINFO_DATA_REF (stmt_vinfo);
2267*e4b17023SJohn Marino       STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2268*e4b17023SJohn Marino 	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2269*e4b17023SJohn Marino       STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2270*e4b17023SJohn Marino       STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2271*e4b17023SJohn Marino 	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
2272*e4b17023SJohn Marino       STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2273*e4b17023SJohn Marino       STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2274*e4b17023SJohn Marino 	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2275*e4b17023SJohn Marino       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2276*e4b17023SJohn Marino       *type_out = vectype;
2277*e4b17023SJohn Marino       *type_in = vectype;
2278*e4b17023SJohn Marino       VEC_safe_push (gimple, heap, *stmts, last_stmt);
2279*e4b17023SJohn Marino       return pattern_stmt;
2280*e4b17023SJohn Marino     }
2281*e4b17023SJohn Marino   else
2282*e4b17023SJohn Marino     return NULL;
2283*e4b17023SJohn Marino }
2284*e4b17023SJohn Marino 
2285*e4b17023SJohn Marino 
2286*e4b17023SJohn Marino /* Mark statements that are involved in a pattern.  */
2287*e4b17023SJohn Marino 
2288*e4b17023SJohn Marino static inline void
vect_mark_pattern_stmts(gimple orig_stmt,gimple pattern_stmt,tree pattern_vectype)2289*e4b17023SJohn Marino vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2290*e4b17023SJohn Marino                          tree pattern_vectype)
2291*e4b17023SJohn Marino {
2292*e4b17023SJohn Marino   stmt_vec_info pattern_stmt_info, def_stmt_info;
2293*e4b17023SJohn Marino   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2294*e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2295*e4b17023SJohn Marino   gimple def_stmt;
2296*e4b17023SJohn Marino 
2297*e4b17023SJohn Marino   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2298*e4b17023SJohn Marino   if (pattern_stmt_info == NULL)
2299*e4b17023SJohn Marino     {
2300*e4b17023SJohn Marino       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2301*e4b17023SJohn Marino       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2302*e4b17023SJohn Marino     }
2303*e4b17023SJohn Marino   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2304*e4b17023SJohn Marino 
2305*e4b17023SJohn Marino   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2306*e4b17023SJohn Marino   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2307*e4b17023SJohn Marino     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2308*e4b17023SJohn Marino   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2309*e4b17023SJohn Marino   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2310*e4b17023SJohn Marino   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2311*e4b17023SJohn Marino   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2312*e4b17023SJohn Marino     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2313*e4b17023SJohn Marino   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2314*e4b17023SJohn Marino     {
2315*e4b17023SJohn Marino       gimple_stmt_iterator si;
2316*e4b17023SJohn Marino       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2317*e4b17023SJohn Marino 	   !gsi_end_p (si); gsi_next (&si))
2318*e4b17023SJohn Marino 	{
2319*e4b17023SJohn Marino 	  def_stmt = gsi_stmt (si);
2320*e4b17023SJohn Marino 	  def_stmt_info = vinfo_for_stmt (def_stmt);
2321*e4b17023SJohn Marino 	  if (def_stmt_info == NULL)
2322*e4b17023SJohn Marino 	    {
2323*e4b17023SJohn Marino 	      def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2324*e4b17023SJohn Marino 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
2325*e4b17023SJohn Marino 	    }
2326*e4b17023SJohn Marino 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2327*e4b17023SJohn Marino 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2328*e4b17023SJohn Marino 	  STMT_VINFO_DEF_TYPE (def_stmt_info)
2329*e4b17023SJohn Marino 	    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2330*e4b17023SJohn Marino 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2331*e4b17023SJohn Marino 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2332*e4b17023SJohn Marino 	}
2333*e4b17023SJohn Marino     }
2334*e4b17023SJohn Marino }
2335*e4b17023SJohn Marino 
2336*e4b17023SJohn Marino /* Function vect_pattern_recog_1
2337*e4b17023SJohn Marino 
2338*e4b17023SJohn Marino    Input:
2339*e4b17023SJohn Marino    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2340*e4b17023SJohn Marino         computation pattern.
2341*e4b17023SJohn Marino    STMT: A stmt from which the pattern search should start.
2342*e4b17023SJohn Marino 
2343*e4b17023SJohn Marino    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2344*e4b17023SJohn Marino    expression that computes the same functionality and can be used to
2345*e4b17023SJohn Marino    replace the sequence of stmts that are involved in the pattern.
2346*e4b17023SJohn Marino 
2347*e4b17023SJohn Marino    Output:
2348*e4b17023SJohn Marino    This function checks if the expression returned by PATTERN_RECOG_FUNC is
2349*e4b17023SJohn Marino    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
2350*e4b17023SJohn Marino    relevant vector type. If 'TYPE_IN' is already a vector type, then this
2351*e4b17023SJohn Marino    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2352*e4b17023SJohn Marino    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2353*e4b17023SJohn Marino    to the available target pattern.
2354*e4b17023SJohn Marino 
2355*e4b17023SJohn Marino    This function also does some bookkeeping, as explained in the documentation
2356*e4b17023SJohn Marino    for vect_recog_pattern.  */
2357*e4b17023SJohn Marino 
2358*e4b17023SJohn Marino static void
vect_pattern_recog_1(vect_recog_func_ptr vect_recog_func,gimple_stmt_iterator si,VEC (gimple,heap)** stmts_to_replace)2359*e4b17023SJohn Marino vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2360*e4b17023SJohn Marino 		      gimple_stmt_iterator si,
2361*e4b17023SJohn Marino 		      VEC (gimple, heap) **stmts_to_replace)
2362*e4b17023SJohn Marino {
2363*e4b17023SJohn Marino   gimple stmt = gsi_stmt (si), pattern_stmt;
2364*e4b17023SJohn Marino   stmt_vec_info stmt_info;
2365*e4b17023SJohn Marino   loop_vec_info loop_vinfo;
2366*e4b17023SJohn Marino   tree pattern_vectype;
2367*e4b17023SJohn Marino   tree type_in, type_out;
2368*e4b17023SJohn Marino   enum tree_code code;
2369*e4b17023SJohn Marino   int i;
2370*e4b17023SJohn Marino   gimple next;
2371*e4b17023SJohn Marino 
2372*e4b17023SJohn Marino   VEC_truncate (gimple, *stmts_to_replace, 0);
2373*e4b17023SJohn Marino   VEC_quick_push (gimple, *stmts_to_replace, stmt);
2374*e4b17023SJohn Marino   pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2375*e4b17023SJohn Marino   if (!pattern_stmt)
2376*e4b17023SJohn Marino     return;
2377*e4b17023SJohn Marino 
2378*e4b17023SJohn Marino   stmt = VEC_last (gimple, *stmts_to_replace);
2379*e4b17023SJohn Marino   stmt_info = vinfo_for_stmt (stmt);
2380*e4b17023SJohn Marino   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2381*e4b17023SJohn Marino 
2382*e4b17023SJohn Marino   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2383*e4b17023SJohn Marino     {
2384*e4b17023SJohn Marino       /* No need to check target support (already checked by the pattern
2385*e4b17023SJohn Marino          recognition function).  */
2386*e4b17023SJohn Marino       pattern_vectype = type_out ? type_out : type_in;
2387*e4b17023SJohn Marino     }
2388*e4b17023SJohn Marino   else
2389*e4b17023SJohn Marino     {
2390*e4b17023SJohn Marino       enum machine_mode vec_mode;
2391*e4b17023SJohn Marino       enum insn_code icode;
2392*e4b17023SJohn Marino       optab optab;
2393*e4b17023SJohn Marino 
2394*e4b17023SJohn Marino       /* Check target support  */
2395*e4b17023SJohn Marino       type_in = get_vectype_for_scalar_type (type_in);
2396*e4b17023SJohn Marino       if (!type_in)
2397*e4b17023SJohn Marino 	return;
2398*e4b17023SJohn Marino       if (type_out)
2399*e4b17023SJohn Marino 	type_out = get_vectype_for_scalar_type (type_out);
2400*e4b17023SJohn Marino       else
2401*e4b17023SJohn Marino 	type_out = type_in;
2402*e4b17023SJohn Marino       if (!type_out)
2403*e4b17023SJohn Marino 	return;
2404*e4b17023SJohn Marino       pattern_vectype = type_out;
2405*e4b17023SJohn Marino 
2406*e4b17023SJohn Marino       if (is_gimple_assign (pattern_stmt))
2407*e4b17023SJohn Marino 	code = gimple_assign_rhs_code (pattern_stmt);
2408*e4b17023SJohn Marino       else
2409*e4b17023SJohn Marino         {
2410*e4b17023SJohn Marino 	  gcc_assert (is_gimple_call (pattern_stmt));
2411*e4b17023SJohn Marino 	  code = CALL_EXPR;
2412*e4b17023SJohn Marino 	}
2413*e4b17023SJohn Marino 
2414*e4b17023SJohn Marino       optab = optab_for_tree_code (code, type_in, optab_default);
2415*e4b17023SJohn Marino       vec_mode = TYPE_MODE (type_in);
2416*e4b17023SJohn Marino       if (!optab
2417*e4b17023SJohn Marino           || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2418*e4b17023SJohn Marino           || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2419*e4b17023SJohn Marino 	return;
2420*e4b17023SJohn Marino     }
2421*e4b17023SJohn Marino 
2422*e4b17023SJohn Marino   /* Found a vectorizable pattern.  */
2423*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
2424*e4b17023SJohn Marino     {
2425*e4b17023SJohn Marino       fprintf (vect_dump, "pattern recognized: ");
2426*e4b17023SJohn Marino       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2427*e4b17023SJohn Marino     }
2428*e4b17023SJohn Marino 
2429*e4b17023SJohn Marino   /* Mark the stmts that are involved in the pattern. */
2430*e4b17023SJohn Marino   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2431*e4b17023SJohn Marino 
2432*e4b17023SJohn Marino   /* Patterns cannot be vectorized using SLP, because they change the order of
2433*e4b17023SJohn Marino      computation.  */
2434*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2435*e4b17023SJohn Marino     if (next == stmt)
2436*e4b17023SJohn Marino       VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2437*e4b17023SJohn Marino 
2438*e4b17023SJohn Marino   /* It is possible that additional pattern stmts are created and inserted in
2439*e4b17023SJohn Marino      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
2440*e4b17023SJohn Marino      relevant statements.  */
2441*e4b17023SJohn Marino   for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2442*e4b17023SJohn Marino 	      && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2443*e4b17023SJohn Marino        i++)
2444*e4b17023SJohn Marino     {
2445*e4b17023SJohn Marino       stmt_info = vinfo_for_stmt (stmt);
2446*e4b17023SJohn Marino       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2447*e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2448*e4b17023SJohn Marino         {
2449*e4b17023SJohn Marino           fprintf (vect_dump, "additional pattern stmt: ");
2450*e4b17023SJohn Marino           print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2451*e4b17023SJohn Marino         }
2452*e4b17023SJohn Marino 
2453*e4b17023SJohn Marino       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2454*e4b17023SJohn Marino     }
2455*e4b17023SJohn Marino }
2456*e4b17023SJohn Marino 
2457*e4b17023SJohn Marino 
2458*e4b17023SJohn Marino /* Function vect_pattern_recog
2459*e4b17023SJohn Marino 
2460*e4b17023SJohn Marino    Input:
2461*e4b17023SJohn Marino    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2462*e4b17023SJohn Marino         computation idioms.
2463*e4b17023SJohn Marino 
2464*e4b17023SJohn Marino    Output - for each computation idiom that is detected we create a new stmt
2465*e4b17023SJohn Marino         that provides the same functionality and that can be vectorized.  We
2466*e4b17023SJohn Marino         also record some information in the struct_stmt_info of the relevant
2467*e4b17023SJohn Marino         stmts, as explained below:
2468*e4b17023SJohn Marino 
2469*e4b17023SJohn Marino    At the entry to this function we have the following stmts, with the
2470*e4b17023SJohn Marino    following initial value in the STMT_VINFO fields:
2471*e4b17023SJohn Marino 
2472*e4b17023SJohn Marino          stmt                     in_pattern_p  related_stmt    vec_stmt
2473*e4b17023SJohn Marino          S1: a_i = ....                 -       -               -
2474*e4b17023SJohn Marino          S2: a_2 = ..use(a_i)..         -       -               -
2475*e4b17023SJohn Marino          S3: a_1 = ..use(a_2)..         -       -               -
2476*e4b17023SJohn Marino          S4: a_0 = ..use(a_1)..         -       -               -
2477*e4b17023SJohn Marino          S5: ... = ..use(a_0)..         -       -               -
2478*e4b17023SJohn Marino 
2479*e4b17023SJohn Marino    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2480*e4b17023SJohn Marino    represented by a single stmt.  We then:
2481*e4b17023SJohn Marino    - create a new stmt S6 equivalent to the pattern (the stmt is not
2482*e4b17023SJohn Marino      inserted into the code)
2483*e4b17023SJohn Marino    - fill in the STMT_VINFO fields as follows:
2484*e4b17023SJohn Marino 
2485*e4b17023SJohn Marino                                   in_pattern_p  related_stmt    vec_stmt
2486*e4b17023SJohn Marino          S1: a_i = ....                 -       -               -
2487*e4b17023SJohn Marino          S2: a_2 = ..use(a_i)..         -       -               -
2488*e4b17023SJohn Marino          S3: a_1 = ..use(a_2)..         -       -               -
2489*e4b17023SJohn Marino          S4: a_0 = ..use(a_1)..         true    S6              -
2490*e4b17023SJohn Marino           '---> S6: a_new = ....        -       S4              -
2491*e4b17023SJohn Marino          S5: ... = ..use(a_0)..         -       -               -
2492*e4b17023SJohn Marino 
2493*e4b17023SJohn Marino    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2494*e4b17023SJohn Marino    to each other through the RELATED_STMT field).
2495*e4b17023SJohn Marino 
2496*e4b17023SJohn Marino    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2497*e4b17023SJohn Marino    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
2498*e4b17023SJohn Marino    remain irrelevant unless used by stmts other than S4.
2499*e4b17023SJohn Marino 
2500*e4b17023SJohn Marino    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2501*e4b17023SJohn Marino    (because they are marked as irrelevant).  It will vectorize S6, and record
2502*e4b17023SJohn Marino    a pointer to the new vector stmt VS6 from S6 (as usual).
2503*e4b17023SJohn Marino    S4 will be skipped, and S5 will be vectorized as usual:
2504*e4b17023SJohn Marino 
2505*e4b17023SJohn Marino                                   in_pattern_p  related_stmt    vec_stmt
2506*e4b17023SJohn Marino          S1: a_i = ....                 -       -               -
2507*e4b17023SJohn Marino          S2: a_2 = ..use(a_i)..         -       -               -
2508*e4b17023SJohn Marino          S3: a_1 = ..use(a_2)..         -       -               -
2509*e4b17023SJohn Marino        > VS6: va_new = ....             -       -               -
2510*e4b17023SJohn Marino          S4: a_0 = ..use(a_1)..         true    S6              VS6
2511*e4b17023SJohn Marino           '---> S6: a_new = ....        -       S4              VS6
2512*e4b17023SJohn Marino        > VS5: ... = ..vuse(va_new)..    -       -               -
2513*e4b17023SJohn Marino          S5: ... = ..use(a_0)..         -       -               -
2514*e4b17023SJohn Marino 
2515*e4b17023SJohn Marino    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2516*e4b17023SJohn Marino    elsewhere), and we'll end up with:
2517*e4b17023SJohn Marino 
2518*e4b17023SJohn Marino         VS6: va_new = ....
2519*e4b17023SJohn Marino         VS5: ... = ..vuse(va_new)..
2520*e4b17023SJohn Marino 
2521*e4b17023SJohn Marino    In case of more than one pattern statements, e.g., widen-mult with
2522*e4b17023SJohn Marino    intermediate type:
2523*e4b17023SJohn Marino 
2524*e4b17023SJohn Marino      S1  a_t = ;
2525*e4b17023SJohn Marino      S2  a_T = (TYPE) a_t;
2526*e4b17023SJohn Marino            '--> S3: a_it = (interm_type) a_t;
2527*e4b17023SJohn Marino      S4  prod_T = a_T * CONST;
2528*e4b17023SJohn Marino            '--> S5: prod_T' = a_it w* CONST;
2529*e4b17023SJohn Marino 
2530*e4b17023SJohn Marino    there may be other users of a_T outside the pattern.  In that case S2 will
2531*e4b17023SJohn Marino    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2532*e4b17023SJohn Marino    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
2533*e4b17023SJohn Marino    be recorded in S3.  */
2534*e4b17023SJohn Marino 
2535*e4b17023SJohn Marino void
vect_pattern_recog(loop_vec_info loop_vinfo)2536*e4b17023SJohn Marino vect_pattern_recog (loop_vec_info loop_vinfo)
2537*e4b17023SJohn Marino {
2538*e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2539*e4b17023SJohn Marino   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2540*e4b17023SJohn Marino   unsigned int nbbs = loop->num_nodes;
2541*e4b17023SJohn Marino   gimple_stmt_iterator si;
2542*e4b17023SJohn Marino   unsigned int i, j;
2543*e4b17023SJohn Marino   vect_recog_func_ptr vect_recog_func;
2544*e4b17023SJohn Marino   VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2545*e4b17023SJohn Marino 
2546*e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
2547*e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_pattern_recog ===");
2548*e4b17023SJohn Marino 
2549*e4b17023SJohn Marino   /* Scan through the loop stmts, applying the pattern recognition
2550*e4b17023SJohn Marino      functions starting at each stmt visited:  */
2551*e4b17023SJohn Marino   for (i = 0; i < nbbs; i++)
2552*e4b17023SJohn Marino     {
2553*e4b17023SJohn Marino       basic_block bb = bbs[i];
2554*e4b17023SJohn Marino       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2555*e4b17023SJohn Marino         {
2556*e4b17023SJohn Marino           /* Scan over all generic vect_recog_xxx_pattern functions.  */
2557*e4b17023SJohn Marino           for (j = 0; j < NUM_PATTERNS; j++)
2558*e4b17023SJohn Marino             {
2559*e4b17023SJohn Marino 	      vect_recog_func = vect_vect_recog_func_ptrs[j];
2560*e4b17023SJohn Marino 	      vect_pattern_recog_1 (vect_recog_func, si,
2561*e4b17023SJohn Marino 				    &stmts_to_replace);
2562*e4b17023SJohn Marino             }
2563*e4b17023SJohn Marino         }
2564*e4b17023SJohn Marino     }
2565*e4b17023SJohn Marino 
2566*e4b17023SJohn Marino   VEC_free (gimple, heap, stmts_to_replace);
2567*e4b17023SJohn Marino }
2568