1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3    Free Software Foundation, Inc.
4    Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41 
42 /* Pattern recognition functions  */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 					    tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 					     tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 					   tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51                                                  tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 	                                tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 						      tree *, tree *);
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 					       tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 						  tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 	vect_recog_widen_mult_pattern,
63 	vect_recog_widen_sum_pattern,
64 	vect_recog_dot_prod_pattern,
65 	vect_recog_pow_pattern,
66 	vect_recog_widen_shift_pattern,
67 	vect_recog_over_widening_pattern,
68 	vect_recog_vector_vector_shift_pattern,
69 	vect_recog_sdivmod_pow2_pattern,
70 	vect_recog_mixed_size_cond_pattern,
71 	vect_recog_bool_pattern};
72 
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75 {
76   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 				      stmt);
78 }
79 
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82 {
83   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84   append_pattern_def_seq (stmt_info, stmt);
85 }
86 
87 /* If the LHS of DEF_STMT has a single use, and that statement is
88    in the same loop, return it.  */
89 
90 static gimple
91 vect_single_imm_use (gimple def_stmt)
92 {
93   stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
94   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
95   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96   tree lhs = gimple_assign_lhs (def_stmt);
97   use_operand_p use_p;
98   gimple use_stmt;
99 
100   if (!single_imm_use (lhs, &use_p, &use_stmt))
101     return NULL;
102 
103   if (!gimple_bb (use_stmt))
104     return NULL;
105 
106   if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
107     return NULL;
108 
109   gcc_assert (vinfo_for_stmt (use_stmt));
110   return use_stmt;
111 }
112 
113 /* Function widened_name_p
114 
115    Check whether NAME, an ssa-name used in USE_STMT,
116    is a result of a type-promotion, such that:
117      DEF_STMT: NAME = NOP (name0)
118    where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
119    If CHECK_SIGN is TRUE, check that either both types are signed or both are
120    unsigned.  */
121 
122 static bool
123 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
124 		bool check_sign)
125 {
126   tree dummy;
127   gimple dummy_gimple;
128   loop_vec_info loop_vinfo;
129   stmt_vec_info stmt_vinfo;
130   tree type = TREE_TYPE (name);
131   tree oprnd0;
132   enum vect_def_type dt;
133   tree def;
134 
135   stmt_vinfo = vinfo_for_stmt (use_stmt);
136   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
137 
138   if (!vect_is_simple_use (name, use_stmt, loop_vinfo, NULL, def_stmt, &def,
139 			   &dt))
140     return false;
141 
142   if (dt != vect_internal_def
143       && dt != vect_external_def && dt != vect_constant_def)
144     return false;
145 
146   if (! *def_stmt)
147     return false;
148 
149   if (!is_gimple_assign (*def_stmt))
150     return false;
151 
152   if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
153     return false;
154 
155   oprnd0 = gimple_assign_rhs1 (*def_stmt);
156 
157   *half_type = TREE_TYPE (oprnd0);
158   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
159       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
160       || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
161     return false;
162 
163   if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
164 			   NULL, &dummy_gimple, &dummy, &dt))
165     return false;
166 
167   return true;
168 }
169 
170 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
171    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
172 
173 static tree
174 vect_recog_temp_ssa_var (tree type, gimple stmt)
175 {
176   tree var = create_tmp_var (type, "patt");
177 
178   add_referenced_var (var);
179   var = make_ssa_name (var, stmt);
180   return var;
181 }
182 
183 /* Function vect_recog_dot_prod_pattern
184 
185    Try to find the following pattern:
186 
187      type x_t, y_t;
188      TYPE1 prod;
189      TYPE2 sum = init;
190    loop:
191      sum_0 = phi <init, sum_1>
192      S1  x_t = ...
193      S2  y_t = ...
194      S3  x_T = (TYPE1) x_t;
195      S4  y_T = (TYPE1) y_t;
196      S5  prod = x_T * y_T;
197      [S6  prod = (TYPE2) prod;  #optional]
198      S7  sum_1 = prod + sum_0;
199 
200    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
201    same size of 'TYPE1' or bigger. This is a special case of a reduction
202    computation.
203 
204    Input:
205 
206    * STMTS: Contains a stmt from which the pattern search begins.  In the
207    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
208    will be detected.
209 
210    Output:
211 
212    * TYPE_IN: The type of the input arguments to the pattern.
213 
214    * TYPE_OUT: The type of the output  of this pattern.
215 
216    * Return value: A new stmt that will be used to replace the sequence of
217    stmts that constitute the pattern. In this case it will be:
218         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
219 
220    Note: The dot-prod idiom is a widening reduction pattern that is
221          vectorized without preserving all the intermediate results. It
222          produces only N/2 (widened) results (by summing up pairs of
223          intermediate results) rather than all N results.  Therefore, we
224          cannot allow this pattern when we want to get all the results and in
225          the correct order (as is the case when this computation is in an
226          inner-loop nested in an outer-loop that us being vectorized).  */
227 
228 static gimple
229 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
230 			     tree *type_out)
231 {
232   gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
233   tree oprnd0, oprnd1;
234   tree oprnd00, oprnd01;
235   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
236   tree type, half_type;
237   gimple pattern_stmt;
238   tree prod_type;
239   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
240   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
241   tree var;
242 
243   if (!is_gimple_assign (last_stmt))
244     return NULL;
245 
246   type = gimple_expr_type (last_stmt);
247 
248   /* Look for the following pattern
249           DX = (TYPE1) X;
250           DY = (TYPE1) Y;
251           DPROD = DX * DY;
252           DDPROD = (TYPE2) DPROD;
253           sum_1 = DDPROD + sum_0;
254      In which
255      - DX is double the size of X
256      - DY is double the size of Y
257      - DX, DY, DPROD all have the same type
258      - sum is the same size of DPROD or bigger
259      - sum has been recognized as a reduction variable.
260 
261      This is equivalent to:
262        DPROD = X w* Y;          #widen mult
263        sum_1 = DPROD w+ sum_0;  #widen summation
264      or
265        DPROD = X w* Y;          #widen mult
266        sum_1 = DPROD + sum_0;   #summation
267    */
268 
269   /* Starting from LAST_STMT, follow the defs of its uses in search
270      of the above pattern.  */
271 
272   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
273     return NULL;
274 
275   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
276     {
277       /* Has been detected as widening-summation?  */
278 
279       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
280       type = gimple_expr_type (stmt);
281       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
282         return NULL;
283       oprnd0 = gimple_assign_rhs1 (stmt);
284       oprnd1 = gimple_assign_rhs2 (stmt);
285       half_type = TREE_TYPE (oprnd0);
286     }
287   else
288     {
289       gimple def_stmt;
290 
291       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
292         return NULL;
293       oprnd0 = gimple_assign_rhs1 (last_stmt);
294       oprnd1 = gimple_assign_rhs2 (last_stmt);
295       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
296 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
297         return NULL;
298       stmt = last_stmt;
299 
300       if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
301         {
302           stmt = def_stmt;
303           oprnd0 = gimple_assign_rhs1 (stmt);
304         }
305       else
306         half_type = type;
307     }
308 
309   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
310      we know that oprnd1 is the reduction variable (defined by a loop-header
311      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
312      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
313   if (TREE_CODE (oprnd0) != SSA_NAME)
314     return NULL;
315 
316   prod_type = half_type;
317   stmt = SSA_NAME_DEF_STMT (oprnd0);
318 
319   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
320   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
321     return NULL;
322 
323   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
324      inside the loop (in case we are analyzing an outer-loop).  */
325   if (!is_gimple_assign (stmt))
326     return NULL;
327   stmt_vinfo = vinfo_for_stmt (stmt);
328   gcc_assert (stmt_vinfo);
329   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
330     return NULL;
331   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
332     return NULL;
333   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
334     {
335       /* Has been detected as a widening multiplication?  */
336 
337       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
338       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
339         return NULL;
340       stmt_vinfo = vinfo_for_stmt (stmt);
341       gcc_assert (stmt_vinfo);
342       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
343       oprnd00 = gimple_assign_rhs1 (stmt);
344       oprnd01 = gimple_assign_rhs2 (stmt);
345     }
346   else
347     {
348       tree half_type0, half_type1;
349       gimple def_stmt;
350       tree oprnd0, oprnd1;
351 
352       oprnd0 = gimple_assign_rhs1 (stmt);
353       oprnd1 = gimple_assign_rhs2 (stmt);
354       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
355           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
356         return NULL;
357       if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
358         return NULL;
359       oprnd00 = gimple_assign_rhs1 (def_stmt);
360       if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
361         return NULL;
362       oprnd01 = gimple_assign_rhs1 (def_stmt);
363       if (!types_compatible_p (half_type0, half_type1))
364         return NULL;
365       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
366 	return NULL;
367     }
368 
369   half_type = TREE_TYPE (oprnd00);
370   *type_in = half_type;
371   *type_out = type;
372 
373   /* Pattern detected. Create a stmt to be used to replace the pattern: */
374   var = vect_recog_temp_ssa_var (type, NULL);
375   pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
376 						oprnd00, oprnd01, oprnd1);
377 
378   if (vect_print_dump_info (REPORT_DETAILS))
379     {
380       fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
381       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
382     }
383 
384   /* We don't allow changing the order of the computation in the inner-loop
385      when doing outer-loop vectorization.  */
386   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
387 
388   return pattern_stmt;
389 }
390 
391 
392 /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
393    and LSHIFT_EXPR.
394 
395    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
396    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
397 
398    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
399    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
400    that satisfies the above restrictions,  we can perform a widening opeartion
401    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
402    with a_it = (interm_type) a_t;  */
403 
404 static bool
405 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
406 		               tree const_oprnd, tree *oprnd,
407    		               VEC (gimple, heap) **stmts, tree type,
408 			       tree *half_type, gimple def_stmt)
409 {
410   tree new_type, new_oprnd, tmp;
411   gimple new_stmt;
412   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
413   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
414 
415   if (code != MULT_EXPR && code != LSHIFT_EXPR)
416     return false;
417 
418   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
419         || (code == LSHIFT_EXPR
420             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
421 	    	!= 1))
422       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
423     {
424       /* CONST_OPRND is a constant of HALF_TYPE.  */
425       *oprnd = gimple_assign_rhs1 (def_stmt);
426       return true;
427     }
428 
429   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
430       || !gimple_bb (def_stmt)
431       || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
432       || !vinfo_for_stmt (def_stmt))
433     return false;
434 
435   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
436      a type 2 times bigger than HALF_TYPE.  */
437   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
438                                              TYPE_UNSIGNED (type));
439   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
440       || (code == LSHIFT_EXPR
441           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
442     return false;
443 
444   /* Use NEW_TYPE for widening operation.  */
445   if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
446     {
447       new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
448       /* Check if the already created pattern stmt is what we need.  */
449       if (!is_gimple_assign (new_stmt)
450           || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
451           || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
452         return false;
453 
454       VEC_safe_push (gimple, heap, *stmts, def_stmt);
455       *oprnd = gimple_assign_lhs (new_stmt);
456     }
457   else
458     {
459       /* Create a_T = (NEW_TYPE) a_t;  */
460       *oprnd = gimple_assign_rhs1 (def_stmt);
461       tmp = create_tmp_var (new_type, NULL);
462       add_referenced_var (tmp);
463       new_oprnd = make_ssa_name (tmp, NULL);
464       new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
465 					       NULL_TREE);
466       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
467       VEC_safe_push (gimple, heap, *stmts, def_stmt);
468       *oprnd = new_oprnd;
469     }
470 
471   *half_type = new_type;
472   return true;
473 }
474 
475 
476 /* Function vect_recog_widen_mult_pattern
477 
478    Try to find the following pattern:
479 
480      type a_t, b_t;
481      TYPE a_T, b_T, prod_T;
482 
483      S1  a_t = ;
484      S2  b_t = ;
485      S3  a_T = (TYPE) a_t;
486      S4  b_T = (TYPE) b_t;
487      S5  prod_T = a_T * b_T;
488 
489    where type 'TYPE' is at least double the size of type 'type'.
490 
491    Also detect unsgigned cases:
492 
493      unsigned type a_t, b_t;
494      unsigned TYPE u_prod_T;
495      TYPE a_T, b_T, prod_T;
496 
497      S1  a_t = ;
498      S2  b_t = ;
499      S3  a_T = (TYPE) a_t;
500      S4  b_T = (TYPE) b_t;
501      S5  prod_T = a_T * b_T;
502      S6  u_prod_T = (unsigned TYPE) prod_T;
503 
504    and multiplication by constants:
505 
506      type a_t;
507      TYPE a_T, prod_T;
508 
509      S1  a_t = ;
510      S3  a_T = (TYPE) a_t;
511      S5  prod_T = a_T * CONST;
512 
513    A special case of multiplication by constants is when 'TYPE' is 4 times
514    bigger than 'type', but CONST fits an intermediate type 2 times smaller
515    than 'TYPE'.  In that case we create an additional pattern stmt for S3
516    to create a variable of the intermediate type, and perform widen-mult
517    on the intermediate type as well:
518 
519      type a_t;
520      interm_type a_it;
521      TYPE a_T, prod_T,  prod_T';
522 
523      S1  a_t = ;
524      S3  a_T = (TYPE) a_t;
525            '--> a_it = (interm_type) a_t;
526      S5  prod_T = a_T * CONST;
527            '--> prod_T' = a_it w* CONST;
528 
529    Input/Output:
530 
531    * STMTS: Contains a stmt from which the pattern search begins.  In the
532    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
533    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
534    replaced with S6 in STMTS.  In case of multiplication by a constant
535    of an intermediate type (the last case above), STMTS also contains S3
536    (inserted before S5).
537 
538    Output:
539 
540    * TYPE_IN: The type of the input arguments to the pattern.
541 
542    * TYPE_OUT: The type of the output of this pattern.
543 
544    * Return value: A new stmt that will be used to replace the sequence of
545    stmts that constitute the pattern.  In this case it will be:
546         WIDEN_MULT <a_t, b_t>
547 */
548 
549 static gimple
550 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
551                                tree *type_in, tree *type_out)
552 {
553   gimple last_stmt = VEC_pop (gimple, *stmts);
554   gimple def_stmt0, def_stmt1;
555   tree oprnd0, oprnd1;
556   tree type, half_type0, half_type1;
557   gimple pattern_stmt;
558   tree vectype, vectype_out = NULL_TREE;
559   tree dummy;
560   tree var;
561   enum tree_code dummy_code;
562   int dummy_int;
563   VEC (tree, heap) *dummy_vec;
564   bool op1_ok;
565 
566   if (!is_gimple_assign (last_stmt))
567     return NULL;
568 
569   type = gimple_expr_type (last_stmt);
570 
571   /* Starting from LAST_STMT, follow the defs of its uses in search
572      of the above pattern.  */
573 
574   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
575     return NULL;
576 
577   oprnd0 = gimple_assign_rhs1 (last_stmt);
578   oprnd1 = gimple_assign_rhs2 (last_stmt);
579   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
580       || !types_compatible_p (TREE_TYPE (oprnd1), type))
581     return NULL;
582 
583   /* Check argument 0.  */
584   if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
585     return NULL;
586   /* Check argument 1.  */
587   op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
588 
589   if (op1_ok)
590     {
591       oprnd0 = gimple_assign_rhs1 (def_stmt0);
592       oprnd1 = gimple_assign_rhs1 (def_stmt1);
593     }
594   else
595     {
596       if (TREE_CODE (oprnd1) == INTEGER_CST
597           && TREE_CODE (half_type0) == INTEGER_TYPE
598           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
599 		                            &oprnd0, stmts, type,
600 					    &half_type0, def_stmt0))
601         half_type1 = half_type0;
602       else
603         return NULL;
604     }
605 
606   /* Handle unsigned case.  Look for
607      S6  u_prod_T = (unsigned TYPE) prod_T;
608      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
609   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
610     {
611       gimple use_stmt;
612       tree use_lhs;
613       tree use_type;
614 
615       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
616         return NULL;
617 
618       use_stmt = vect_single_imm_use (last_stmt);
619       if (!use_stmt || !is_gimple_assign (use_stmt)
620           || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
621         return NULL;
622 
623       use_lhs = gimple_assign_lhs (use_stmt);
624       use_type = TREE_TYPE (use_lhs);
625       if (!INTEGRAL_TYPE_P (use_type)
626           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
627           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
628         return NULL;
629 
630       type = use_type;
631       last_stmt = use_stmt;
632     }
633 
634   if (!types_compatible_p (half_type0, half_type1))
635     return NULL;
636 
637   /* Pattern detected.  */
638   if (vect_print_dump_info (REPORT_DETAILS))
639     fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
640 
641   /* Check target support  */
642   vectype = get_vectype_for_scalar_type (half_type0);
643   vectype_out = get_vectype_for_scalar_type (type);
644   if (!vectype
645       || !vectype_out
646       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
647 					  vectype_out, vectype,
648 					  &dummy, &dummy, &dummy_code,
649 					  &dummy_code, &dummy_int, &dummy_vec))
650     return NULL;
651 
652   *type_in = vectype;
653   *type_out = vectype_out;
654 
655   /* Pattern supported. Create a stmt to be used to replace the pattern: */
656   var = vect_recog_temp_ssa_var (type, NULL);
657   pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
658 					       oprnd1);
659 
660   if (vect_print_dump_info (REPORT_DETAILS))
661     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
662 
663   VEC_safe_push (gimple, heap, *stmts, last_stmt);
664   return pattern_stmt;
665 }
666 
667 
668 /* Function vect_recog_pow_pattern
669 
670    Try to find the following pattern:
671 
672      x = POW (y, N);
673 
674    with POW being one of pow, powf, powi, powif and N being
675    either 2 or 0.5.
676 
677    Input:
678 
679    * LAST_STMT: A stmt from which the pattern search begins.
680 
681    Output:
682 
683    * TYPE_IN: The type of the input arguments to the pattern.
684 
685    * TYPE_OUT: The type of the output of this pattern.
686 
687    * Return value: A new stmt that will be used to replace the sequence of
688    stmts that constitute the pattern. In this case it will be:
689         x = x * x
690    or
691 	x = sqrt (x)
692 */
693 
694 static gimple
695 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
696 			tree *type_out)
697 {
698   gimple last_stmt = VEC_index (gimple, *stmts, 0);
699   tree fn, base, exp = NULL;
700   gimple stmt;
701   tree var;
702 
703   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
704     return NULL;
705 
706   fn = gimple_call_fndecl (last_stmt);
707   if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
708    return NULL;
709 
710   switch (DECL_FUNCTION_CODE (fn))
711     {
712     case BUILT_IN_POWIF:
713     case BUILT_IN_POWI:
714     case BUILT_IN_POWF:
715     case BUILT_IN_POW:
716       base = gimple_call_arg (last_stmt, 0);
717       exp = gimple_call_arg (last_stmt, 1);
718       if (TREE_CODE (exp) != REAL_CST
719 	  && TREE_CODE (exp) != INTEGER_CST)
720         return NULL;
721       break;
722 
723     default:
724       return NULL;
725     }
726 
727   /* We now have a pow or powi builtin function call with a constant
728      exponent.  */
729 
730   *type_out = NULL_TREE;
731 
732   /* Catch squaring.  */
733   if ((host_integerp (exp, 0)
734        && tree_low_cst (exp, 0) == 2)
735       || (TREE_CODE (exp) == REAL_CST
736           && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
737     {
738       *type_in = TREE_TYPE (base);
739 
740       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
741       stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
742       return stmt;
743     }
744 
745   /* Catch square root.  */
746   if (TREE_CODE (exp) == REAL_CST
747       && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
748     {
749       tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
750       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
751       if (*type_in)
752 	{
753 	  gimple stmt = gimple_build_call (newfn, 1, base);
754 	  if (vectorizable_function (stmt, *type_in, *type_in)
755 	      != NULL_TREE)
756 	    {
757 	      var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
758 	      gimple_call_set_lhs (stmt, var);
759 	      return stmt;
760 	    }
761 	}
762     }
763 
764   return NULL;
765 }
766 
767 
768 /* Function vect_recog_widen_sum_pattern
769 
770    Try to find the following pattern:
771 
772      type x_t;
773      TYPE x_T, sum = init;
774    loop:
775      sum_0 = phi <init, sum_1>
776      S1  x_t = *p;
777      S2  x_T = (TYPE) x_t;
778      S3  sum_1 = x_T + sum_0;
779 
780    where type 'TYPE' is at least double the size of type 'type', i.e - we're
781    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
782    a special case of a reduction computation.
783 
784    Input:
785 
786    * LAST_STMT: A stmt from which the pattern search begins. In the example,
787    when this function is called with S3, the pattern {S2,S3} will be detected.
788 
789    Output:
790 
791    * TYPE_IN: The type of the input arguments to the pattern.
792 
793    * TYPE_OUT: The type of the output of this pattern.
794 
795    * Return value: A new stmt that will be used to replace the sequence of
796    stmts that constitute the pattern. In this case it will be:
797         WIDEN_SUM <x_t, sum_0>
798 
799    Note: The widening-sum idiom is a widening reduction pattern that is
800 	 vectorized without preserving all the intermediate results. It
801          produces only N/2 (widened) results (by summing up pairs of
802 	 intermediate results) rather than all N results.  Therefore, we
803 	 cannot allow this pattern when we want to get all the results and in
804 	 the correct order (as is the case when this computation is in an
805 	 inner-loop nested in an outer-loop that us being vectorized).  */
806 
807 static gimple
808 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
809 			      tree *type_out)
810 {
811   gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
812   tree oprnd0, oprnd1;
813   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
814   tree type, half_type;
815   gimple pattern_stmt;
816   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
817   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
818   tree var;
819 
820   if (!is_gimple_assign (last_stmt))
821     return NULL;
822 
823   type = gimple_expr_type (last_stmt);
824 
825   /* Look for the following pattern
826           DX = (TYPE) X;
827           sum_1 = DX + sum_0;
828      In which DX is at least double the size of X, and sum_1 has been
829      recognized as a reduction variable.
830    */
831 
832   /* Starting from LAST_STMT, follow the defs of its uses in search
833      of the above pattern.  */
834 
835   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
836     return NULL;
837 
838   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
839     return NULL;
840 
841   oprnd0 = gimple_assign_rhs1 (last_stmt);
842   oprnd1 = gimple_assign_rhs2 (last_stmt);
843   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
844       || !types_compatible_p (TREE_TYPE (oprnd1), type))
845     return NULL;
846 
847   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
848      we know that oprnd1 is the reduction variable (defined by a loop-header
849      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
850      Left to check that oprnd0 is defined by a cast from type 'type' to type
851      'TYPE'.  */
852 
853   if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
854     return NULL;
855 
856   oprnd0 = gimple_assign_rhs1 (stmt);
857   *type_in = half_type;
858   *type_out = type;
859 
860   /* Pattern detected. Create a stmt to be used to replace the pattern: */
861   var = vect_recog_temp_ssa_var (type, NULL);
862   pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
863 					       oprnd0, oprnd1);
864 
865   if (vect_print_dump_info (REPORT_DETAILS))
866     {
867       fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
868       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
869     }
870 
871   /* We don't allow changing the order of the computation in the inner-loop
872      when doing outer-loop vectorization.  */
873   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
874 
875   return pattern_stmt;
876 }
877 
878 
879 /* Return TRUE if the operation in STMT can be performed on a smaller type.
880 
881    Input:
882    STMT - a statement to check.
883    DEF - we support operations with two operands, one of which is constant.
884          The other operand can be defined by a demotion operation, or by a
885          previous statement in a sequence of over-promoted operations.  In the
886          later case DEF is used to replace that operand.  (It is defined by a
887          pattern statement we created for the previous statement in the
888          sequence).
889 
890    Input/output:
891    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
892          NULL, it's the type of DEF.
893    STMTS - additional pattern statements.  If a pattern statement (type
894          conversion) is created in this function, its original statement is
895          added to STMTS.
896 
897    Output:
898    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
899          operands to use in the new pattern statement for STMT (will be created
900          in vect_recog_over_widening_pattern ()).
901    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
902          statements for STMT: the first one is a type promotion and the second
903          one is the operation itself.  We return the type promotion statement
904 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
905          the second pattern statement.  */
906 
907 static bool
908 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
909                                   tree *op0, tree *op1, gimple *new_def_stmt,
910                                   VEC (gimple, heap) **stmts)
911 {
912   enum tree_code code;
913   tree const_oprnd, oprnd;
914   tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
915   gimple def_stmt, new_stmt;
916   bool first = false;
917   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
918   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
919 
920   *op0 = NULL_TREE;
921   *op1 = NULL_TREE;
922   *new_def_stmt = NULL;
923 
924   if (!is_gimple_assign (stmt))
925     return false;
926 
927   code = gimple_assign_rhs_code (stmt);
928   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
929       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
930     return false;
931 
932   oprnd = gimple_assign_rhs1 (stmt);
933   const_oprnd = gimple_assign_rhs2 (stmt);
934   type = gimple_expr_type (stmt);
935 
936   if (TREE_CODE (oprnd) != SSA_NAME
937       || TREE_CODE (const_oprnd) != INTEGER_CST)
938     return false;
939 
940   /* If oprnd has other uses besides that in stmt we cannot mark it
941      as being part of a pattern only.  */
942   if (!has_single_use (oprnd))
943     return false;
944 
945   /* If we are in the middle of a sequence, we use DEF from a previous
946      statement.  Otherwise, OPRND has to be a result of type promotion.  */
947   if (*new_type)
948     {
949       half_type = *new_type;
950       oprnd = def;
951     }
952   else
953     {
954       first = true;
955       if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
956           || !gimple_bb (def_stmt)
957           || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
958           || !vinfo_for_stmt (def_stmt))
959         return false;
960     }
961 
962   /* Can we perform the operation on a smaller type?  */
963   switch (code)
964     {
965       case BIT_IOR_EXPR:
966       case BIT_XOR_EXPR:
967       case BIT_AND_EXPR:
968         if (!int_fits_type_p (const_oprnd, half_type))
969           {
970             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
971             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
972               return false;
973 
974             interm_type = build_nonstandard_integer_type (
975                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
976             if (!int_fits_type_p (const_oprnd, interm_type))
977               return false;
978           }
979 
980         break;
981 
982       case LSHIFT_EXPR:
983         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
984         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
985           return false;
986 
987         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
988           (e.g., if the original value was char, the shift amount is at most 8
989            if we want to use short).  */
990         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
991           return false;
992 
993         interm_type = build_nonstandard_integer_type (
994                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
995 
996         if (!vect_supportable_shift (code, interm_type))
997           return false;
998 
999         break;
1000 
1001       case RSHIFT_EXPR:
1002         if (vect_supportable_shift (code, half_type))
1003           break;
1004 
1005         /* Try intermediate type - HALF_TYPE is not supported.  */
1006         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1007           return false;
1008 
1009         interm_type = build_nonstandard_integer_type (
1010                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1011 
1012         if (!vect_supportable_shift (code, interm_type))
1013           return false;
1014 
1015         break;
1016 
1017       default:
1018         gcc_unreachable ();
1019     }
1020 
1021   /* There are four possible cases:
1022      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1023         the first statement in the sequence)
1024         a. The original, HALF_TYPE, is not enough - we replace the promotion
1025            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1026         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1027            promotion.
1028      2. OPRND is defined by a pattern statement we created.
1029         a. Its type is not sufficient for the operation, we create a new stmt:
1030            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1031            this statement in NEW_DEF_STMT, and it is later put in
1032 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1033         b. OPRND is good to use in the new statement.  */
1034   if (first)
1035     {
1036       if (interm_type)
1037         {
1038           /* Replace the original type conversion HALF_TYPE->TYPE with
1039              HALF_TYPE->INTERM_TYPE.  */
1040           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1041             {
1042               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1043               /* Check if the already created pattern stmt is what we need.  */
1044               if (!is_gimple_assign (new_stmt)
1045                   || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1046                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1047                 return false;
1048 
1049 	      VEC_safe_push (gimple, heap, *stmts, def_stmt);
1050               oprnd = gimple_assign_lhs (new_stmt);
1051             }
1052           else
1053             {
1054               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1055               oprnd = gimple_assign_rhs1 (def_stmt);
1056               tmp = create_tmp_reg (interm_type, NULL);
1057               add_referenced_var (tmp);
1058               new_oprnd = make_ssa_name (tmp, NULL);
1059               new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1060                                                        oprnd, NULL_TREE);
1061               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1062               VEC_safe_push (gimple, heap, *stmts, def_stmt);
1063               oprnd = new_oprnd;
1064             }
1065         }
1066       else
1067         {
1068           /* Retrieve the operand before the type promotion.  */
1069           oprnd = gimple_assign_rhs1 (def_stmt);
1070         }
1071     }
1072   else
1073     {
1074       if (interm_type)
1075         {
1076           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1077           tmp = create_tmp_reg (interm_type, NULL);
1078           add_referenced_var (tmp);
1079           new_oprnd = make_ssa_name (tmp, NULL);
1080           new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1081                                                    oprnd, NULL_TREE);
1082           oprnd = new_oprnd;
1083           *new_def_stmt = new_stmt;
1084         }
1085 
1086       /* Otherwise, OPRND is already set.  */
1087     }
1088 
1089   if (interm_type)
1090     *new_type = interm_type;
1091   else
1092     *new_type = half_type;
1093 
1094   *op0 = oprnd;
1095   *op1 = fold_convert (*new_type, const_oprnd);
1096 
1097   return true;
1098 }
1099 
1100 
1101 /* Try to find a statement or a sequence of statements that can be performed
1102    on a smaller type:
1103 
1104      type x_t;
1105      TYPE x_T, res0_T, res1_T;
1106    loop:
1107      S1  x_t = *p;
1108      S2  x_T = (TYPE) x_t;
1109      S3  res0_T = op (x_T, C0);
1110      S4  res1_T = op (res0_T, C1);
1111      S5  ... = () res1_T;  - type demotion
1112 
1113    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1114    constants.
1115    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1116    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1117    demotion operation.  We also check that S3 and S4 have only one use.  */
1118 
1119 static gimple
1120 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1121                                   tree *type_in, tree *type_out)
1122 {
1123   gimple stmt = VEC_pop (gimple, *stmts);
1124   gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1125   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1126   tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1127   bool first;
1128   tree type = NULL;
1129 
1130   first = true;
1131   while (1)
1132     {
1133       if (!vinfo_for_stmt (stmt)
1134           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1135         return NULL;
1136 
1137       new_def_stmt = NULL;
1138       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1139                                              &op0, &op1, &new_def_stmt,
1140                                              stmts))
1141         {
1142           if (first)
1143             return NULL;
1144           else
1145             break;
1146         }
1147 
1148       /* STMT can be performed on a smaller type.  Check its uses.  */
1149       use_stmt = vect_single_imm_use (stmt);
1150       if (!use_stmt || !is_gimple_assign (use_stmt))
1151         return NULL;
1152 
1153       /* Create pattern statement for STMT.  */
1154       vectype = get_vectype_for_scalar_type (new_type);
1155       if (!vectype)
1156         return NULL;
1157 
1158       /* We want to collect all the statements for which we create pattern
1159          statetments, except for the case when the last statement in the
1160          sequence doesn't have a corresponding pattern statement.  In such
1161          case we associate the last pattern statement with the last statement
1162          in the sequence.  Therefore, we only add the original statement to
1163          the list if we know that it is not the last.  */
1164       if (prev_stmt)
1165         VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1166 
1167       var = vect_recog_temp_ssa_var (new_type, NULL);
1168       pattern_stmt
1169 	= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1170 					op0, op1);
1171       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1172       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1173 
1174       if (vect_print_dump_info (REPORT_DETAILS))
1175         {
1176           fprintf (vect_dump, "created pattern stmt: ");
1177           print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1178         }
1179 
1180       type = gimple_expr_type (stmt);
1181       prev_stmt = stmt;
1182       stmt = use_stmt;
1183 
1184       first = false;
1185     }
1186 
1187   /* We got a sequence.  We expect it to end with a type demotion operation.
1188      Otherwise, we quit (for now).  There are three possible cases: the
1189      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1190      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1191      NEW_TYPE differs (we create a new conversion statement).  */
1192   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1193     {
1194       use_lhs = gimple_assign_lhs (use_stmt);
1195       use_type = TREE_TYPE (use_lhs);
1196       /* Support only type demotion or signedess change.  */
1197       if (!INTEGRAL_TYPE_P (use_type)
1198 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1199         return NULL;
1200 
1201       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1202       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1203 	return NULL;
1204 
1205       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1206           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1207         {
1208           /* Create NEW_TYPE->USE_TYPE conversion.  */
1209           tmp = create_tmp_reg (use_type, NULL);
1210           add_referenced_var (tmp);
1211           new_oprnd = make_ssa_name (tmp, NULL);
1212           pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1213                                                        var, NULL_TREE);
1214           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1215 
1216           *type_in = get_vectype_for_scalar_type (new_type);
1217           *type_out = get_vectype_for_scalar_type (use_type);
1218 
1219           /* We created a pattern statement for the last statement in the
1220              sequence, so we don't need to associate it with the pattern
1221              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1222              to the list in order to mark it later in vect_pattern_recog_1.  */
1223           if (prev_stmt)
1224             VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1225         }
1226       else
1227         {
1228           if (prev_stmt)
1229 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1230 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1231 
1232           *type_in = vectype;
1233           *type_out = NULL_TREE;
1234         }
1235 
1236       VEC_safe_push (gimple, heap, *stmts, use_stmt);
1237     }
1238   else
1239     /* TODO: support general case, create a conversion to the correct type.  */
1240     return NULL;
1241 
1242   /* Pattern detected.  */
1243   if (vect_print_dump_info (REPORT_DETAILS))
1244     {
1245       fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1246       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1247     }
1248 
1249   return pattern_stmt;
1250 }
1251 
1252 /* Detect widening shift pattern:
1253 
1254    type a_t;
1255    TYPE a_T, res_T;
1256 
1257    S1 a_t = ;
1258    S2 a_T = (TYPE) a_t;
1259    S3 res_T = a_T << CONST;
1260 
1261   where type 'TYPE' is at least double the size of type 'type'.
1262 
1263   Also detect cases where the shift result is immediately converted
1264   to another type 'result_type' that is no larger in size than 'TYPE'.
1265   In those cases we perform a widen-shift that directly results in
1266   'result_type', to avoid a possible over-widening situation:
1267 
1268   type a_t;
1269   TYPE a_T, res_T;
1270   result_type res_result;
1271 
1272   S1 a_t = ;
1273   S2 a_T = (TYPE) a_t;
1274   S3 res_T = a_T << CONST;
1275   S4 res_result = (result_type) res_T;
1276       '--> res_result' = a_t w<< CONST;
1277 
1278   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1279   create an additional pattern stmt for S2 to create a variable of an
1280   intermediate type, and perform widen-shift on the intermediate type:
1281 
1282   type a_t;
1283   interm_type a_it;
1284   TYPE a_T, res_T, res_T';
1285 
1286   S1 a_t = ;
1287   S2 a_T = (TYPE) a_t;
1288       '--> a_it = (interm_type) a_t;
1289   S3 res_T = a_T << CONST;
1290       '--> res_T' = a_it <<* CONST;
1291 
1292   Input/Output:
1293 
1294   * STMTS: Contains a stmt from which the pattern search begins.
1295     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1296     in STMTS.  When an intermediate type is used and a pattern statement is
1297     created for S2, we also put S2 here (before S3).
1298 
1299   Output:
1300 
1301   * TYPE_IN: The type of the input arguments to the pattern.
1302 
1303   * TYPE_OUT: The type of the output of this pattern.
1304 
1305   * Return value: A new stmt that will be used to replace the sequence of
1306     stmts that constitute the pattern.  In this case it will be:
1307     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1308 
1309 static gimple
1310 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1311 				tree *type_in, tree *type_out)
1312 {
1313   gimple last_stmt = VEC_pop (gimple, *stmts);
1314   gimple def_stmt0;
1315   tree oprnd0, oprnd1;
1316   tree type, half_type0;
1317   gimple pattern_stmt;
1318   tree vectype, vectype_out = NULL_TREE;
1319   tree dummy;
1320   tree var;
1321   enum tree_code dummy_code;
1322   int dummy_int;
1323   VEC (tree, heap) * dummy_vec;
1324   gimple use_stmt;
1325 
1326   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1327     return NULL;
1328 
1329   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1330     return NULL;
1331 
1332   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1333     return NULL;
1334 
1335   oprnd0 = gimple_assign_rhs1 (last_stmt);
1336   oprnd1 = gimple_assign_rhs2 (last_stmt);
1337   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1338     return NULL;
1339 
1340   /* Check operand 0: it has to be defined by a type promotion.  */
1341   if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1342     return NULL;
1343 
1344   /* Check operand 1: has to be positive.  We check that it fits the type
1345      in vect_handle_widen_op_by_const ().  */
1346   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1347     return NULL;
1348 
1349   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1350   type = gimple_expr_type (last_stmt);
1351 
1352   /* Check for subsequent conversion to another type.  */
1353   use_stmt = vect_single_imm_use (last_stmt);
1354   if (use_stmt && is_gimple_assign (use_stmt)
1355       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1356       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1357     {
1358       tree use_lhs = gimple_assign_lhs (use_stmt);
1359       tree use_type = TREE_TYPE (use_lhs);
1360 
1361       if (INTEGRAL_TYPE_P (use_type)
1362 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1363 	{
1364 	  last_stmt = use_stmt;
1365 	  type = use_type;
1366 	}
1367     }
1368 
1369   /* Check if this a widening operation.  */
1370   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1371        				      &oprnd0, stmts,
1372 	                              type, &half_type0, def_stmt0))
1373     return NULL;
1374 
1375   /* Pattern detected.  */
1376   if (vect_print_dump_info (REPORT_DETAILS))
1377     fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1378 
1379   /* Check target support.  */
1380   vectype = get_vectype_for_scalar_type (half_type0);
1381   vectype_out = get_vectype_for_scalar_type (type);
1382 
1383   if (!vectype
1384       || !vectype_out
1385       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1386 					  vectype_out, vectype,
1387 					  &dummy, &dummy, &dummy_code,
1388 					  &dummy_code, &dummy_int,
1389 					  &dummy_vec))
1390     return NULL;
1391 
1392   *type_in = vectype;
1393   *type_out = vectype_out;
1394 
1395   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1396   var = vect_recog_temp_ssa_var (type, NULL);
1397   pattern_stmt =
1398     gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1399 
1400   if (vect_print_dump_info (REPORT_DETAILS))
1401     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1402 
1403   VEC_safe_push (gimple, heap, *stmts, last_stmt);
1404   return pattern_stmt;
1405 }
1406 
1407 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1408    vectorized:
1409 
1410    type a_t;
1411    TYPE b_T, res_T;
1412 
1413    S1 a_t = ;
1414    S2 b_T = ;
1415    S3 res_T = b_T op a_t;
1416 
1417   where type 'TYPE' is a type with different size than 'type',
1418   and op is <<, >> or rotate.
1419 
1420   Also detect cases:
1421 
1422    type a_t;
1423    TYPE b_T, c_T, res_T;
1424 
1425    S0 c_T = ;
1426    S1 a_t = (type) c_T;
1427    S2 b_T = ;
1428    S3 res_T = b_T op a_t;
1429 
1430   Input/Output:
1431 
1432   * STMTS: Contains a stmt from which the pattern search begins,
1433     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
1434     with a shift/rotate which has same type on both operands, in the
1435     second case just b_T op c_T, in the first case with added cast
1436     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1437 
1438   Output:
1439 
1440   * TYPE_IN: The type of the input arguments to the pattern.
1441 
1442   * TYPE_OUT: The type of the output of this pattern.
1443 
1444   * Return value: A new stmt that will be used to replace the shift/rotate
1445     S3 stmt.  */
1446 
1447 static gimple
1448 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1449 					tree *type_in, tree *type_out)
1450 {
1451   gimple last_stmt = VEC_pop (gimple, *stmts);
1452   tree oprnd0, oprnd1, lhs, var;
1453   gimple pattern_stmt, def_stmt;
1454   enum tree_code rhs_code;
1455   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1456   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1457   enum vect_def_type dt;
1458   tree def;
1459 
1460   if (!is_gimple_assign (last_stmt))
1461     return NULL;
1462 
1463   rhs_code = gimple_assign_rhs_code (last_stmt);
1464   switch (rhs_code)
1465     {
1466     case LSHIFT_EXPR:
1467     case RSHIFT_EXPR:
1468     case LROTATE_EXPR:
1469     case RROTATE_EXPR:
1470       break;
1471     default:
1472       return NULL;
1473     }
1474 
1475   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1476     return NULL;
1477 
1478   lhs = gimple_assign_lhs (last_stmt);
1479   oprnd0 = gimple_assign_rhs1 (last_stmt);
1480   oprnd1 = gimple_assign_rhs2 (last_stmt);
1481   if (TREE_CODE (oprnd0) != SSA_NAME
1482       || TREE_CODE (oprnd1) != SSA_NAME
1483       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1484       || TYPE_PRECISION (TREE_TYPE (oprnd1))
1485 	 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1486       || TYPE_PRECISION (TREE_TYPE (lhs))
1487 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1488     return NULL;
1489 
1490   if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, NULL, &def_stmt,
1491 			   &def, &dt))
1492     return NULL;
1493 
1494   if (dt != vect_internal_def)
1495     return NULL;
1496 
1497   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1498   *type_out = *type_in;
1499   if (*type_in == NULL_TREE)
1500     return NULL;
1501 
1502   def = NULL_TREE;
1503   if (gimple_assign_cast_p (def_stmt))
1504     {
1505       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1506       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1507 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1508 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1509 	def = rhs1;
1510     }
1511 
1512   if (def == NULL_TREE)
1513     {
1514       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1515       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1516 					       NULL_TREE);
1517       new_pattern_def_seq (stmt_vinfo, def_stmt);
1518     }
1519 
1520   /* Pattern detected.  */
1521   if (vect_print_dump_info (REPORT_DETAILS))
1522     fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1523 
1524   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1525   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1526   pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1527 
1528   if (vect_print_dump_info (REPORT_DETAILS))
1529     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1530 
1531   VEC_safe_push (gimple, heap, *stmts, last_stmt);
1532   return pattern_stmt;
1533 }
1534 
1535 /* Detect a signed division by power of two constant that wouldn't be
1536    otherwise vectorized:
1537 
1538    type a_t, b_t;
1539 
1540    S1 a_t = b_t / N;
1541 
1542   where type 'type' is a signed integral type and N is a constant positive
1543   power of two.
1544 
1545   Similarly handle signed modulo by power of two constant:
1546 
1547    S4 a_t = b_t % N;
1548 
1549   Input/Output:
1550 
1551   * STMTS: Contains a stmt from which the pattern search begins,
1552     i.e. the division stmt.  S1 is replaced by:
1553   S3  y_t = b_t < 0 ? N - 1 : 0;
1554   S2  x_t = b_t + y_t;
1555   S1' a_t = x_t >> log2 (N);
1556 
1557     S4 is replaced by (where *_T temporaries have unsigned type):
1558   S9  y_T = b_t < 0 ? -1U : 0U;
1559   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1560   S7  z_t = (type) z_T;
1561   S6  w_t = b_t + z_t;
1562   S5  x_t = w_t & (N - 1);
1563   S4' a_t = x_t - z_t;
1564 
1565   Output:
1566 
1567   * TYPE_IN: The type of the input arguments to the pattern.
1568 
1569   * TYPE_OUT: The type of the output of this pattern.
1570 
1571   * Return value: A new stmt that will be used to replace the division
1572     S1 or modulo S4 stmt.  */
1573 
1574 static gimple
1575 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1576 				 tree *type_in, tree *type_out)
1577 {
1578   gimple last_stmt = VEC_pop (gimple, *stmts);
1579   tree oprnd0, oprnd1, vectype, itype, cond;
1580   gimple pattern_stmt, def_stmt;
1581   enum tree_code rhs_code;
1582   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1583   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1584   optab optab;
1585 
1586   if (!is_gimple_assign (last_stmt))
1587     return NULL;
1588 
1589   rhs_code = gimple_assign_rhs_code (last_stmt);
1590   switch (rhs_code)
1591     {
1592     case TRUNC_DIV_EXPR:
1593     case TRUNC_MOD_EXPR:
1594       break;
1595     default:
1596       return NULL;
1597     }
1598 
1599   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1600     return NULL;
1601 
1602   oprnd0 = gimple_assign_rhs1 (last_stmt);
1603   oprnd1 = gimple_assign_rhs2 (last_stmt);
1604   itype = TREE_TYPE (oprnd0);
1605   if (TREE_CODE (oprnd0) != SSA_NAME
1606       || TREE_CODE (oprnd1) != INTEGER_CST
1607       || TREE_CODE (itype) != INTEGER_TYPE
1608       || TYPE_UNSIGNED (itype)
1609       || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1610       || !integer_pow2p (oprnd1)
1611       || tree_int_cst_sgn (oprnd1) != 1)
1612     return NULL;
1613 
1614   vectype = get_vectype_for_scalar_type (itype);
1615   if (vectype == NULL_TREE)
1616     return NULL;
1617 
1618   /* If the target can handle vectorized division or modulo natively,
1619      don't attempt to optimize this.  */
1620   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1621   if (optab != NULL)
1622     {
1623       enum machine_mode vec_mode = TYPE_MODE (vectype);
1624       int icode = (int) optab_handler (optab, vec_mode);
1625       if (icode != CODE_FOR_nothing
1626 	  || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1627 	return NULL;
1628     }
1629 
1630   /* Pattern detected.  */
1631   if (vect_print_dump_info (REPORT_DETAILS))
1632     fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1633 
1634   cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1635   if (rhs_code == TRUNC_DIV_EXPR)
1636     {
1637       tree var = vect_recog_temp_ssa_var (itype, NULL);
1638       def_stmt
1639 	= gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1640 					 fold_build2 (MINUS_EXPR, itype,
1641 						      oprnd1,
1642 						      build_int_cst (itype,
1643 								     1)),
1644 					 build_int_cst (itype, 0));
1645       new_pattern_def_seq (stmt_vinfo, def_stmt);
1646       var = vect_recog_temp_ssa_var (itype, NULL);
1647       def_stmt
1648 	= gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1649 					gimple_assign_lhs (def_stmt));
1650       append_pattern_def_seq (stmt_vinfo, def_stmt);
1651 
1652       pattern_stmt
1653 	= gimple_build_assign_with_ops (RSHIFT_EXPR,
1654 					vect_recog_temp_ssa_var (itype, NULL),
1655 					var,
1656 					build_int_cst (itype,
1657 						       tree_log2 (oprnd1)));
1658     }
1659   else
1660     {
1661       tree signmask;
1662       STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1663       if (compare_tree_int (oprnd1, 2) == 0)
1664 	{
1665 	  signmask = vect_recog_temp_ssa_var (itype, NULL);
1666 	  def_stmt
1667 	    = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1668 					     build_int_cst (itype, 1),
1669 					     build_int_cst (itype, 0));
1670 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1671 	}
1672       else
1673 	{
1674 	  tree utype
1675 	    = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1676 	  tree vecutype = get_vectype_for_scalar_type (utype);
1677 	  tree shift
1678 	    = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1679 				    - tree_log2 (oprnd1));
1680 	  tree var = vect_recog_temp_ssa_var (utype, NULL);
1681 	  stmt_vec_info def_stmt_vinfo;
1682 
1683 	  def_stmt
1684 	    = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1685 					     build_int_cst (utype, -1),
1686 					     build_int_cst (utype, 0));
1687 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1688 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1689 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1690 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1691 	  var = vect_recog_temp_ssa_var (utype, NULL);
1692 	  def_stmt
1693 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1694 					    gimple_assign_lhs (def_stmt),
1695 					    shift);
1696 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1697 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1698 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1699 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1700 	  signmask = vect_recog_temp_ssa_var (itype, NULL);
1701 	  def_stmt
1702 	    = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1703 					    NULL_TREE);
1704 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1705 	}
1706       def_stmt
1707 	= gimple_build_assign_with_ops (PLUS_EXPR,
1708 					vect_recog_temp_ssa_var (itype, NULL),
1709 					oprnd0, signmask);
1710       append_pattern_def_seq (stmt_vinfo, def_stmt);
1711       def_stmt
1712 	= gimple_build_assign_with_ops (BIT_AND_EXPR,
1713 					vect_recog_temp_ssa_var (itype, NULL),
1714 					gimple_assign_lhs (def_stmt),
1715 					fold_build2 (MINUS_EXPR, itype,
1716 						     oprnd1,
1717 						     build_int_cst (itype,
1718 								    1)));
1719       append_pattern_def_seq (stmt_vinfo, def_stmt);
1720 
1721       pattern_stmt
1722 	= gimple_build_assign_with_ops (MINUS_EXPR,
1723 					vect_recog_temp_ssa_var (itype, NULL),
1724 					gimple_assign_lhs (def_stmt),
1725 					signmask);
1726     }
1727 
1728   if (vect_print_dump_info (REPORT_DETAILS))
1729     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1730 
1731   VEC_safe_push (gimple, heap, *stmts, last_stmt);
1732 
1733   *type_in = vectype;
1734   *type_out = vectype;
1735   return pattern_stmt;
1736 }
1737 
1738 /* Function vect_recog_mixed_size_cond_pattern
1739 
1740    Try to find the following pattern:
1741 
1742      type x_t, y_t;
1743      TYPE a_T, b_T, c_T;
1744    loop:
1745      S1  a_T = x_t CMP y_t ? b_T : c_T;
1746 
1747    where type 'TYPE' is an integral type which has different size
1748    from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
1749    than 'type', the constants need to fit into an integer type
1750    with the same width as 'type'.
1751 
1752    Input:
1753 
1754    * LAST_STMT: A stmt from which the pattern search begins.
1755 
1756    Output:
1757 
1758    * TYPE_IN: The type of the input arguments to the pattern.
1759 
1760    * TYPE_OUT: The type of the output of this pattern.
1761 
1762    * Return value: A new stmt that will be used to replace the pattern.
1763 	Additionally a def_stmt is added.
1764 
1765 	a_it = x_t CMP y_t ? b_it : c_it;
1766 	a_T = (TYPE) a_it;  */
1767 
1768 static gimple
1769 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1770 				    tree *type_out)
1771 {
1772   gimple last_stmt = VEC_index (gimple, *stmts, 0);
1773   tree cond_expr, then_clause, else_clause;
1774   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1775   tree type, vectype, comp_vectype, itype, vecitype;
1776   enum machine_mode cmpmode;
1777   gimple pattern_stmt, def_stmt;
1778   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1779 
1780   if (!is_gimple_assign (last_stmt)
1781       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1782       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1783     return NULL;
1784 
1785   cond_expr = gimple_assign_rhs1 (last_stmt);
1786   then_clause = gimple_assign_rhs2 (last_stmt);
1787   else_clause = gimple_assign_rhs3 (last_stmt);
1788 
1789   if (TREE_CODE (then_clause) != INTEGER_CST
1790       || TREE_CODE (else_clause) != INTEGER_CST)
1791     return NULL;
1792 
1793   if (!COMPARISON_CLASS_P (cond_expr))
1794     return NULL;
1795 
1796   comp_vectype
1797     = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1798   if (comp_vectype == NULL_TREE)
1799     return NULL;
1800 
1801   type = gimple_expr_type (last_stmt);
1802   cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1803 
1804   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1805     return NULL;
1806 
1807   vectype = get_vectype_for_scalar_type (type);
1808   if (vectype == NULL_TREE)
1809     return NULL;
1810 
1811   if (expand_vec_cond_expr_p (vectype, comp_vectype))
1812     return NULL;
1813 
1814   itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1815 					  TYPE_UNSIGNED (type));
1816   if (itype == NULL_TREE
1817       || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1818     return NULL;
1819 
1820   vecitype = get_vectype_for_scalar_type (itype);
1821   if (vecitype == NULL_TREE)
1822     return NULL;
1823 
1824   if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1825     return NULL;
1826 
1827   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1828     {
1829       if (!int_fits_type_p (then_clause, itype)
1830 	  || !int_fits_type_p (else_clause, itype))
1831 	return NULL;
1832     }
1833 
1834   def_stmt
1835     = gimple_build_assign_with_ops3 (COND_EXPR,
1836 				     vect_recog_temp_ssa_var (itype, NULL),
1837 				     unshare_expr (cond_expr),
1838 				     fold_convert (itype, then_clause),
1839 				     fold_convert (itype, else_clause));
1840   pattern_stmt
1841     = gimple_build_assign_with_ops (NOP_EXPR,
1842 				    vect_recog_temp_ssa_var (type, NULL),
1843 				    gimple_assign_lhs (def_stmt), NULL_TREE);
1844 
1845   new_pattern_def_seq (stmt_vinfo, def_stmt);
1846   def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1847   set_vinfo_for_stmt (def_stmt, def_stmt_info);
1848   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1849   *type_in = vecitype;
1850   *type_out = vectype;
1851 
1852   return pattern_stmt;
1853 }
1854 
1855 
1856 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
1857    true if bool VAR can be optimized that way.  */
1858 
1859 static bool
1860 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1861 {
1862   gimple def_stmt;
1863   enum vect_def_type dt;
1864   tree def, rhs1;
1865   enum tree_code rhs_code;
1866 
1867   if (!vect_is_simple_use (var, NULL, loop_vinfo, NULL, &def_stmt, &def, &dt))
1868     return false;
1869 
1870   if (dt != vect_internal_def)
1871     return false;
1872 
1873   if (!is_gimple_assign (def_stmt))
1874     return false;
1875 
1876   if (!has_single_use (def))
1877     return false;
1878 
1879   rhs1 = gimple_assign_rhs1 (def_stmt);
1880   rhs_code = gimple_assign_rhs_code (def_stmt);
1881   switch (rhs_code)
1882     {
1883     case SSA_NAME:
1884       return check_bool_pattern (rhs1, loop_vinfo);
1885 
1886     CASE_CONVERT:
1887       if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1888 	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1889 	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1890 	return false;
1891       return check_bool_pattern (rhs1, loop_vinfo);
1892 
1893     case BIT_NOT_EXPR:
1894       return check_bool_pattern (rhs1, loop_vinfo);
1895 
1896     case BIT_AND_EXPR:
1897     case BIT_IOR_EXPR:
1898     case BIT_XOR_EXPR:
1899       if (!check_bool_pattern (rhs1, loop_vinfo))
1900 	return false;
1901       return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1902 
1903     default:
1904       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1905 	{
1906 	  tree vecitype, comp_vectype;
1907 
1908 	  /* If the comparison can throw, then is_gimple_condexpr will be
1909 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
1910 	  if (stmt_could_throw_p (def_stmt))
1911 	    return false;
1912 
1913 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1914 	  if (comp_vectype == NULL_TREE)
1915 	    return false;
1916 
1917 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1918 	    {
1919 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1920 	      tree itype
1921 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1922 	      vecitype = get_vectype_for_scalar_type (itype);
1923 	      if (vecitype == NULL_TREE)
1924 		return false;
1925 	    }
1926 	  else
1927 	    vecitype = comp_vectype;
1928 	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
1929 	}
1930       return false;
1931     }
1932 }
1933 
1934 
1935 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
1936    stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1937    to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
1938 
1939 static tree
1940 adjust_bool_pattern_cast (tree type, tree var)
1941 {
1942   stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1943   gimple cast_stmt, pattern_stmt;
1944 
1945   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
1946   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1947   new_pattern_def_seq (stmt_vinfo, pattern_stmt);
1948   cast_stmt
1949     = gimple_build_assign_with_ops (NOP_EXPR,
1950 				    vect_recog_temp_ssa_var (type, NULL),
1951 				    gimple_assign_lhs (pattern_stmt),
1952 				    NULL_TREE);
1953   STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
1954   return gimple_assign_lhs (cast_stmt);
1955 }
1956 
1957 
1958 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
1959    recursively.  VAR is an SSA_NAME that should be transformed from bool
1960    to a wider integer type, OUT_TYPE is the desired final integer type of
1961    the whole pattern, TRUEVAL should be NULL unless optimizing
1962    BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1963    in the then_clause, STMTS is where statements with added pattern stmts
1964    should be pushed to.  */
1965 
1966 static tree
1967 adjust_bool_pattern (tree var, tree out_type, tree trueval,
1968 		     VEC (gimple, heap) **stmts)
1969 {
1970   gimple stmt = SSA_NAME_DEF_STMT (var);
1971   enum tree_code rhs_code, def_rhs_code;
1972   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
1973   location_t loc;
1974   gimple pattern_stmt, def_stmt;
1975 
1976   rhs1 = gimple_assign_rhs1 (stmt);
1977   rhs2 = gimple_assign_rhs2 (stmt);
1978   rhs_code = gimple_assign_rhs_code (stmt);
1979   loc = gimple_location (stmt);
1980   switch (rhs_code)
1981     {
1982     case SSA_NAME:
1983     CASE_CONVERT:
1984       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1985       itype = TREE_TYPE (irhs1);
1986       pattern_stmt
1987 	= gimple_build_assign_with_ops (SSA_NAME,
1988 					vect_recog_temp_ssa_var (itype, NULL),
1989 					irhs1, NULL_TREE);
1990       break;
1991 
1992     case BIT_NOT_EXPR:
1993       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1994       itype = TREE_TYPE (irhs1);
1995       pattern_stmt
1996 	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
1997 					vect_recog_temp_ssa_var (itype, NULL),
1998 					irhs1, build_int_cst (itype, 1));
1999       break;
2000 
2001     case BIT_AND_EXPR:
2002       /* Try to optimize x = y & (a < b ? 1 : 0); into
2003 	 x = (a < b ? y : 0);
2004 
2005 	 E.g. for:
2006 	   bool a_b, b_b, c_b;
2007 	   TYPE d_T;
2008 
2009 	   S1  a_b = x1 CMP1 y1;
2010 	   S2  b_b = x2 CMP2 y2;
2011 	   S3  c_b = a_b & b_b;
2012 	   S4  d_T = (TYPE) c_b;
2013 
2014 	 we would normally emit:
2015 
2016 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2017 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2018 	   S3'  c_T = a_T & b_T;
2019 	   S4'  d_T = c_T;
2020 
2021 	 but we can save one stmt by using the
2022 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
2023 	 BIT_AND_EXPR stmt out:
2024 
2025 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2026 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2027 	   S4'  f_T = c_T;
2028 
2029 	 At least when VEC_COND_EXPR is implemented using masks
2030 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2031 	 computes the comparison masks and ands it, in one case with
2032 	 all ones vector, in the other case with a vector register.
2033 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2034 	 often more expensive.  */
2035       def_stmt = SSA_NAME_DEF_STMT (rhs2);
2036       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2037       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2038 	{
2039 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2040 	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2041 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
2042 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2043 	    {
2044 	      gimple tstmt;
2045 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2046 	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2047 	      tstmt = VEC_pop (gimple, *stmts);
2048 	      gcc_assert (tstmt == def_stmt);
2049 	      VEC_quick_push (gimple, *stmts, stmt);
2050 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2051 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2052 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2053 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2054 	      return irhs2;
2055 	    }
2056 	  else
2057 	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2058 	  goto and_ior_xor;
2059 	}
2060       def_stmt = SSA_NAME_DEF_STMT (rhs1);
2061       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2062       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2063 	{
2064 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2065 	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2066 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
2067 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2068 	    {
2069 	      gimple tstmt;
2070 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2071 	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2072 	      tstmt = VEC_pop (gimple, *stmts);
2073 	      gcc_assert (tstmt == def_stmt);
2074 	      VEC_quick_push (gimple, *stmts, stmt);
2075 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2076 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2077 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2078 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2079 	      return irhs1;
2080 	    }
2081 	  else
2082 	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2083 	  goto and_ior_xor;
2084 	}
2085       /* FALLTHRU */
2086     case BIT_IOR_EXPR:
2087     case BIT_XOR_EXPR:
2088       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2089       irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2090     and_ior_xor:
2091       if (TYPE_PRECISION (TREE_TYPE (irhs1))
2092 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
2093 	{
2094 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2095 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2096 	  int out_prec = TYPE_PRECISION (out_type);
2097 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2098 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2099 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2100 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2101 	  else
2102 	    {
2103 	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2104 	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2105 	    }
2106 	}
2107       itype = TREE_TYPE (irhs1);
2108       pattern_stmt
2109 	= gimple_build_assign_with_ops (rhs_code,
2110 					vect_recog_temp_ssa_var (itype, NULL),
2111 					irhs1, irhs2);
2112       break;
2113 
2114     default:
2115       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2116       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2117 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2118 	  || (TYPE_PRECISION (TREE_TYPE (rhs1))
2119 	      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2120 	{
2121 	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2122 	  itype
2123 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2124 	}
2125       else
2126 	itype = TREE_TYPE (rhs1);
2127       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2128       if (trueval == NULL_TREE)
2129 	trueval = build_int_cst (itype, 1);
2130       else
2131 	gcc_checking_assert (useless_type_conversion_p (itype,
2132 							TREE_TYPE (trueval)));
2133       pattern_stmt
2134 	= gimple_build_assign_with_ops3 (COND_EXPR,
2135 					 vect_recog_temp_ssa_var (itype, NULL),
2136 					 cond_expr, trueval,
2137 					 build_int_cst (itype, 0));
2138       break;
2139     }
2140 
2141   VEC_safe_push (gimple, heap, *stmts, stmt);
2142   gimple_set_location (pattern_stmt, loc);
2143   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2144   return gimple_assign_lhs (pattern_stmt);
2145 }
2146 
2147 
2148 /* Function vect_recog_bool_pattern
2149 
2150    Try to find pattern like following:
2151 
2152      bool a_b, b_b, c_b, d_b, e_b;
2153      TYPE f_T;
2154    loop:
2155      S1  a_b = x1 CMP1 y1;
2156      S2  b_b = x2 CMP2 y2;
2157      S3  c_b = a_b & b_b;
2158      S4  d_b = x3 CMP3 y3;
2159      S5  e_b = c_b | d_b;
2160      S6  f_T = (TYPE) e_b;
2161 
2162    where type 'TYPE' is an integral type.
2163 
2164    Input:
2165 
2166    * LAST_STMT: A stmt at the end from which the pattern
2167 		search begins, i.e. cast of a bool to
2168 		an integer type.
2169 
2170    Output:
2171 
2172    * TYPE_IN: The type of the input arguments to the pattern.
2173 
2174    * TYPE_OUT: The type of the output of this pattern.
2175 
2176    * Return value: A new stmt that will be used to replace the pattern.
2177 
2178 	Assuming size of TYPE is the same as size of all comparisons
2179 	(otherwise some casts would be added where needed), the above
2180 	sequence we create related pattern stmts:
2181 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2182 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2183 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
2184 	S5'  e_T = c_T | d_T;
2185 	S6'  f_T = e_T;
2186 
2187 	Instead of the above S3' we could emit:
2188 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2189 	S3'  c_T = a_T | b_T;
2190 	but the above is more efficient.  */
2191 
2192 static gimple
2193 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2194 			 tree *type_out)
2195 {
2196   gimple last_stmt = VEC_pop (gimple, *stmts);
2197   enum tree_code rhs_code;
2198   tree var, lhs, rhs, vectype;
2199   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2200   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2201   gimple pattern_stmt;
2202 
2203   if (!is_gimple_assign (last_stmt))
2204     return NULL;
2205 
2206   var = gimple_assign_rhs1 (last_stmt);
2207   lhs = gimple_assign_lhs (last_stmt);
2208 
2209   if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2210        || !TYPE_UNSIGNED (TREE_TYPE (var)))
2211       && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2212     return NULL;
2213 
2214   rhs_code = gimple_assign_rhs_code (last_stmt);
2215   if (CONVERT_EXPR_CODE_P (rhs_code))
2216     {
2217       if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2218 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2219 	return NULL;
2220       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2221       if (vectype == NULL_TREE)
2222 	return NULL;
2223 
2224       if (!check_bool_pattern (var, loop_vinfo))
2225 	return NULL;
2226 
2227       rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2228       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2229       if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2230 	pattern_stmt
2231 	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2232       else
2233 	pattern_stmt
2234 	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2235       *type_out = vectype;
2236       *type_in = vectype;
2237       VEC_safe_push (gimple, heap, *stmts, last_stmt);
2238       return pattern_stmt;
2239     }
2240   else if (rhs_code == SSA_NAME
2241 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
2242     {
2243       stmt_vec_info pattern_stmt_info;
2244       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2245       gcc_assert (vectype != NULL_TREE);
2246       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2247 	return NULL;
2248       if (!check_bool_pattern (var, loop_vinfo))
2249 	return NULL;
2250 
2251       rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2252       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2253       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2254 	{
2255 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2256 	  gimple cast_stmt
2257 	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2258 	  new_pattern_def_seq (stmt_vinfo, cast_stmt);
2259 	  rhs = rhs2;
2260 	}
2261       pattern_stmt
2262 	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2263       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2264       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2265       STMT_VINFO_DATA_REF (pattern_stmt_info)
2266 	= STMT_VINFO_DATA_REF (stmt_vinfo);
2267       STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2268 	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2269       STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2270       STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2271 	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
2272       STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2273       STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2274 	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2275       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2276       *type_out = vectype;
2277       *type_in = vectype;
2278       VEC_safe_push (gimple, heap, *stmts, last_stmt);
2279       return pattern_stmt;
2280     }
2281   else
2282     return NULL;
2283 }
2284 
2285 
2286 /* Mark statements that are involved in a pattern.  */
2287 
2288 static inline void
2289 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2290                          tree pattern_vectype)
2291 {
2292   stmt_vec_info pattern_stmt_info, def_stmt_info;
2293   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2294   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2295   gimple def_stmt;
2296 
2297   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2298   if (pattern_stmt_info == NULL)
2299     {
2300       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2301       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2302     }
2303   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2304 
2305   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2306   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2307     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2308   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2309   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2310   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2311   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2312     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2313   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2314     {
2315       gimple_stmt_iterator si;
2316       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2317 	   !gsi_end_p (si); gsi_next (&si))
2318 	{
2319 	  def_stmt = gsi_stmt (si);
2320 	  def_stmt_info = vinfo_for_stmt (def_stmt);
2321 	  if (def_stmt_info == NULL)
2322 	    {
2323 	      def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2324 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
2325 	    }
2326 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2327 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2328 	  STMT_VINFO_DEF_TYPE (def_stmt_info)
2329 	    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2330 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2331 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2332 	}
2333     }
2334 }
2335 
2336 /* Function vect_pattern_recog_1
2337 
2338    Input:
2339    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2340         computation pattern.
2341    STMT: A stmt from which the pattern search should start.
2342 
2343    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2344    expression that computes the same functionality and can be used to
2345    replace the sequence of stmts that are involved in the pattern.
2346 
2347    Output:
2348    This function checks if the expression returned by PATTERN_RECOG_FUNC is
2349    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
2350    relevant vector type. If 'TYPE_IN' is already a vector type, then this
2351    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2352    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2353    to the available target pattern.
2354 
2355    This function also does some bookkeeping, as explained in the documentation
2356    for vect_recog_pattern.  */
2357 
2358 static void
2359 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2360 		      gimple_stmt_iterator si,
2361 		      VEC (gimple, heap) **stmts_to_replace)
2362 {
2363   gimple stmt = gsi_stmt (si), pattern_stmt;
2364   stmt_vec_info stmt_info;
2365   loop_vec_info loop_vinfo;
2366   tree pattern_vectype;
2367   tree type_in, type_out;
2368   enum tree_code code;
2369   int i;
2370   gimple next;
2371 
2372   VEC_truncate (gimple, *stmts_to_replace, 0);
2373   VEC_quick_push (gimple, *stmts_to_replace, stmt);
2374   pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2375   if (!pattern_stmt)
2376     return;
2377 
2378   stmt = VEC_last (gimple, *stmts_to_replace);
2379   stmt_info = vinfo_for_stmt (stmt);
2380   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2381 
2382   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2383     {
2384       /* No need to check target support (already checked by the pattern
2385          recognition function).  */
2386       pattern_vectype = type_out ? type_out : type_in;
2387     }
2388   else
2389     {
2390       enum machine_mode vec_mode;
2391       enum insn_code icode;
2392       optab optab;
2393 
2394       /* Check target support  */
2395       type_in = get_vectype_for_scalar_type (type_in);
2396       if (!type_in)
2397 	return;
2398       if (type_out)
2399 	type_out = get_vectype_for_scalar_type (type_out);
2400       else
2401 	type_out = type_in;
2402       if (!type_out)
2403 	return;
2404       pattern_vectype = type_out;
2405 
2406       if (is_gimple_assign (pattern_stmt))
2407 	code = gimple_assign_rhs_code (pattern_stmt);
2408       else
2409         {
2410 	  gcc_assert (is_gimple_call (pattern_stmt));
2411 	  code = CALL_EXPR;
2412 	}
2413 
2414       optab = optab_for_tree_code (code, type_in, optab_default);
2415       vec_mode = TYPE_MODE (type_in);
2416       if (!optab
2417           || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2418           || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2419 	return;
2420     }
2421 
2422   /* Found a vectorizable pattern.  */
2423   if (vect_print_dump_info (REPORT_DETAILS))
2424     {
2425       fprintf (vect_dump, "pattern recognized: ");
2426       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2427     }
2428 
2429   /* Mark the stmts that are involved in the pattern. */
2430   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2431 
2432   /* Patterns cannot be vectorized using SLP, because they change the order of
2433      computation.  */
2434   FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2435     if (next == stmt)
2436       VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2437 
2438   /* It is possible that additional pattern stmts are created and inserted in
2439      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
2440      relevant statements.  */
2441   for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2442 	      && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2443        i++)
2444     {
2445       stmt_info = vinfo_for_stmt (stmt);
2446       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2447       if (vect_print_dump_info (REPORT_DETAILS))
2448         {
2449           fprintf (vect_dump, "additional pattern stmt: ");
2450           print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2451         }
2452 
2453       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2454     }
2455 }
2456 
2457 
2458 /* Function vect_pattern_recog
2459 
2460    Input:
2461    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2462         computation idioms.
2463 
2464    Output - for each computation idiom that is detected we create a new stmt
2465         that provides the same functionality and that can be vectorized.  We
2466         also record some information in the struct_stmt_info of the relevant
2467         stmts, as explained below:
2468 
2469    At the entry to this function we have the following stmts, with the
2470    following initial value in the STMT_VINFO fields:
2471 
2472          stmt                     in_pattern_p  related_stmt    vec_stmt
2473          S1: a_i = ....                 -       -               -
2474          S2: a_2 = ..use(a_i)..         -       -               -
2475          S3: a_1 = ..use(a_2)..         -       -               -
2476          S4: a_0 = ..use(a_1)..         -       -               -
2477          S5: ... = ..use(a_0)..         -       -               -
2478 
2479    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2480    represented by a single stmt.  We then:
2481    - create a new stmt S6 equivalent to the pattern (the stmt is not
2482      inserted into the code)
2483    - fill in the STMT_VINFO fields as follows:
2484 
2485                                   in_pattern_p  related_stmt    vec_stmt
2486          S1: a_i = ....                 -       -               -
2487          S2: a_2 = ..use(a_i)..         -       -               -
2488          S3: a_1 = ..use(a_2)..         -       -               -
2489          S4: a_0 = ..use(a_1)..         true    S6              -
2490           '---> S6: a_new = ....        -       S4              -
2491          S5: ... = ..use(a_0)..         -       -               -
2492 
2493    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2494    to each other through the RELATED_STMT field).
2495 
2496    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2497    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
2498    remain irrelevant unless used by stmts other than S4.
2499 
2500    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2501    (because they are marked as irrelevant).  It will vectorize S6, and record
2502    a pointer to the new vector stmt VS6 from S6 (as usual).
2503    S4 will be skipped, and S5 will be vectorized as usual:
2504 
2505                                   in_pattern_p  related_stmt    vec_stmt
2506          S1: a_i = ....                 -       -               -
2507          S2: a_2 = ..use(a_i)..         -       -               -
2508          S3: a_1 = ..use(a_2)..         -       -               -
2509        > VS6: va_new = ....             -       -               -
2510          S4: a_0 = ..use(a_1)..         true    S6              VS6
2511           '---> S6: a_new = ....        -       S4              VS6
2512        > VS5: ... = ..vuse(va_new)..    -       -               -
2513          S5: ... = ..use(a_0)..         -       -               -
2514 
2515    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2516    elsewhere), and we'll end up with:
2517 
2518         VS6: va_new = ....
2519         VS5: ... = ..vuse(va_new)..
2520 
2521    In case of more than one pattern statements, e.g., widen-mult with
2522    intermediate type:
2523 
2524      S1  a_t = ;
2525      S2  a_T = (TYPE) a_t;
2526            '--> S3: a_it = (interm_type) a_t;
2527      S4  prod_T = a_T * CONST;
2528            '--> S5: prod_T' = a_it w* CONST;
2529 
2530    there may be other users of a_T outside the pattern.  In that case S2 will
2531    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2532    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
2533    be recorded in S3.  */
2534 
2535 void
2536 vect_pattern_recog (loop_vec_info loop_vinfo)
2537 {
2538   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2539   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2540   unsigned int nbbs = loop->num_nodes;
2541   gimple_stmt_iterator si;
2542   unsigned int i, j;
2543   vect_recog_func_ptr vect_recog_func;
2544   VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2545 
2546   if (vect_print_dump_info (REPORT_DETAILS))
2547     fprintf (vect_dump, "=== vect_pattern_recog ===");
2548 
2549   /* Scan through the loop stmts, applying the pattern recognition
2550      functions starting at each stmt visited:  */
2551   for (i = 0; i < nbbs; i++)
2552     {
2553       basic_block bb = bbs[i];
2554       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2555         {
2556           /* Scan over all generic vect_recog_xxx_pattern functions.  */
2557           for (j = 0; j < NUM_PATTERNS; j++)
2558             {
2559 	      vect_recog_func = vect_vect_recog_func_ptrs[j];
2560 	      vect_pattern_recog_1 (vect_recog_func, si,
2561 				    &stmts_to_replace);
2562             }
2563         }
2564     }
2565 
2566   VEC_free (gimple, heap, stmts_to_replace);
2567 }
2568