1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "recog.h"		/* FIXME: for insn_data */
44 
45 
46 static void expand_vector_operations_1 (gimple_stmt_iterator *);
47 
48 /* Return the number of elements in a vector type TYPE that we have
49    already decided needs to be expanded piecewise.  We don't support
50    this kind of expansion for variable-length vectors, since we should
51    always check for target support before introducing uses of those.  */
52 static unsigned int
nunits_for_known_piecewise_op(const_tree type)53 nunits_for_known_piecewise_op (const_tree type)
54 {
55   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
56 }
57 
58 /* Return true if TYPE1 has more elements than TYPE2, where either
59    type may be a vector or a scalar.  */
60 
61 static inline bool
subparts_gt(tree type1,tree type2)62 subparts_gt (tree type1, tree type2)
63 {
64   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
65   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
66   return known_gt (n1, n2);
67 }
68 
69 /* Build a constant of type TYPE, made of VALUE's bits replicated
70    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
71 static tree
build_replicated_const(tree type,tree inner_type,HOST_WIDE_INT value)72 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
73 {
74   int width = tree_to_uhwi (TYPE_SIZE (inner_type));
75   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76     / HOST_BITS_PER_WIDE_INT;
77   unsigned HOST_WIDE_INT low, mask;
78   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79   int i;
80 
81   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
82 
83   if (width == HOST_BITS_PER_WIDE_INT)
84     low = value;
85   else
86     {
87       mask = ((HOST_WIDE_INT)1 << width) - 1;
88       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
89     }
90 
91   for (i = 0; i < n; i++)
92     a[i] = low;
93 
94   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95   return wide_int_to_tree
96     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
97 }
98 
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
102 
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104    "word_mode" (the global variable).  */
105 static tree
build_word_mode_vector_type(int nunits)106 build_word_mode_vector_type (int nunits)
107 {
108   if (!vector_inner_type)
109     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110   else if (vector_last_nunits == nunits)
111     {
112       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113       return vector_last_type;
114     }
115 
116   vector_last_nunits = nunits;
117   vector_last_type = build_vector_type (vector_inner_type, nunits);
118   return vector_last_type;
119 }
120 
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 			      tree, tree, tree, tree, tree, enum tree_code,
123 			      tree);
124 
125 tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 		  tree t, tree bitsize, tree bitpos)
128 {
129   if (TREE_CODE (t) == SSA_NAME)
130     {
131       gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132       if (is_gimple_assign (def_stmt)
133 	  && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 	      || (bitpos
135 		  && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 	t = gimple_assign_rhs1 (def_stmt);
137     }
138   if (bitpos)
139     {
140       if (TREE_CODE (type) == BOOLEAN_TYPE)
141 	{
142 	  tree itype
143 	    = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
144 	  tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
145 					bitsize, bitpos);
146 	  return gimplify_build2 (gsi, NE_EXPR, type, field,
147 				  build_zero_cst (itype));
148 	}
149       else
150 	return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
151     }
152   else
153     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
154 }
155 
156 static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)157 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
158 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
159 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
160 {
161   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
162   return gimplify_build1 (gsi, code, inner_type, a);
163 }
164 
165 static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)166 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
167 	  tree bitpos, tree bitsize, enum tree_code code,
168 	  tree type ATTRIBUTE_UNUSED)
169 {
170   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
171     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
172   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
173     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
174   return gimplify_build2 (gsi, code, inner_type, a, b);
175 }
176 
177 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
178 
179    INNER_TYPE is the type of A and B elements
180 
181    returned expression is of signed integer type with the
182    size equal to the size of INNER_TYPE.  */
183 static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)184 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
185 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
186 {
187   tree stype = TREE_TYPE (type);
188   tree cst_false = build_zero_cst (stype);
189   tree cst_true = build_all_ones_cst (stype);
190   tree cmp;
191 
192   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
193   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
194 
195   cmp = build2 (code, boolean_type_node, a, b);
196   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
197 }
198 
199 /* Expand vector addition to scalars.  This does bit twiddling
200    in order to increase parallelism:
201 
202    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203            (a ^ b) & 0x80808080
204 
205    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
206             (a ^ ~b) & 0x80808080
207 
208    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
209 
210    This optimization should be done only if 4 vector items or more
211    fit into a word.  */
212 static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)213 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
214 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
215 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
216 {
217   tree inner_type = TREE_TYPE (TREE_TYPE (a));
218   unsigned HOST_WIDE_INT max;
219   tree low_bits, high_bits, a_low, b_low, result_low, signs;
220 
221   max = GET_MODE_MASK (TYPE_MODE (inner_type));
222   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
223   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
224 
225   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
226   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
227 
228   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
229   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
230   if (code == PLUS_EXPR)
231     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
232   else
233     {
234       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
235       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
236     }
237 
238   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
239   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
240   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
241 }
242 
243 static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)244 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
245 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
246 	   tree bitsize ATTRIBUTE_UNUSED,
247 	   enum tree_code code ATTRIBUTE_UNUSED,
248 	   tree type ATTRIBUTE_UNUSED)
249 {
250   tree inner_type = TREE_TYPE (TREE_TYPE (b));
251   HOST_WIDE_INT max;
252   tree low_bits, high_bits, b_low, result_low, signs;
253 
254   max = GET_MODE_MASK (TYPE_MODE (inner_type));
255   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
256   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
257 
258   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
259 
260   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
261   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
262   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
263   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
264   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
265 }
266 
267 /* Expand a vector operation to scalars, by using many operations
268    whose type is the vector type's inner type.  */
269 static tree
270 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
271 			 tree type, tree inner_type,
272 			 tree a, tree b, enum tree_code code,
273 			 tree ret_type = NULL_TREE)
274 {
275   vec<constructor_elt, va_gc> *v;
276   tree part_width = TYPE_SIZE (inner_type);
277   tree index = bitsize_int (0);
278   int nunits = nunits_for_known_piecewise_op (type);
279   int delta = tree_to_uhwi (part_width)
280 	      / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
281   int i;
282   location_t loc = gimple_location (gsi_stmt (*gsi));
283 
284   if (ret_type
285       || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
286     warning_at (loc, OPT_Wvector_operation_performance,
287 		"vector operation will be expanded piecewise");
288   else
289     warning_at (loc, OPT_Wvector_operation_performance,
290 		"vector operation will be expanded in parallel");
291 
292   if (!ret_type)
293     ret_type = type;
294   vec_alloc (v, (nunits + delta - 1) / delta);
295   for (i = 0; i < nunits;
296        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
297     {
298       tree result = f (gsi, inner_type, a, b, index, part_width, code,
299 		       ret_type);
300       constructor_elt ce = {NULL_TREE, result};
301       v->quick_push (ce);
302     }
303 
304   return build_constructor (ret_type, v);
305 }
306 
307 /* Expand a vector operation to scalars with the freedom to use
308    a scalar integer type, or to use a different size for the items
309    in the vector type.  */
310 static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)311 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
312 			tree a, tree b, enum tree_code code)
313 {
314   tree result, compute_type;
315   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
316   location_t loc = gimple_location (gsi_stmt (*gsi));
317 
318   /* We have three strategies.  If the type is already correct, just do
319      the operation an element at a time.  Else, if the vector is wider than
320      one word, do it a word at a time; finally, if the vector is smaller
321      than one word, do it as a scalar.  */
322   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
323      return expand_vector_piecewise (gsi, f,
324 				     type, TREE_TYPE (type),
325 				     a, b, code);
326   else if (n_words > 1)
327     {
328       tree word_type = build_word_mode_vector_type (n_words);
329       result = expand_vector_piecewise (gsi, f,
330 				        word_type, TREE_TYPE (word_type),
331 					a, b, code);
332       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
333                                          GSI_SAME_STMT);
334     }
335   else
336     {
337       /* Use a single scalar operation with a mode no wider than word_mode.  */
338       scalar_int_mode mode
339 	= int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
340       compute_type = lang_hooks.types.type_for_mode (mode, 1);
341       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
342       warning_at (loc, OPT_Wvector_operation_performance,
343 	          "vector operation will be expanded with a "
344 		  "single scalar operation");
345     }
346 
347   return result;
348 }
349 
350 /* Expand a vector operation to scalars; for integer types we can use
351    special bit twiddling tricks to do the sums a word at a time, using
352    function F_PARALLEL instead of F.  These tricks are done only if
353    they can process at least four items, that is, only if the vector
354    holds at least four items and if a word can hold four items.  */
355 static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)356 expand_vector_addition (gimple_stmt_iterator *gsi,
357 			elem_op_func f, elem_op_func f_parallel,
358 			tree type, tree a, tree b, enum tree_code code)
359 {
360   int parts_per_word = UNITS_PER_WORD
361 	  	       / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
362 
363   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
364       && parts_per_word >= 4
365       && nunits_for_known_piecewise_op (type) >= 4)
366     return expand_vector_parallel (gsi, f_parallel,
367 				   type, a, b, code);
368   else
369     return expand_vector_piecewise (gsi, f,
370 				    type, TREE_TYPE (type),
371 				    a, b, code);
372 }
373 
374 /* Try to expand vector comparison expression OP0 CODE OP1 by
375    querying optab if the following expression:
376 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
377    can be expanded.  */
378 static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)379 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
380                           tree op1, enum tree_code code)
381 {
382   tree t;
383   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
384       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
385     {
386       if (VECTOR_BOOLEAN_TYPE_P (type)
387 	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
388 	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
389 		       TYPE_VECTOR_SUBPARTS (type)
390 		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
391 						(TREE_TYPE (type)))))
392 	{
393 	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
394 	  tree part_width = TYPE_SIZE (inner_type);
395 	  tree index = bitsize_int (0);
396 	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
397 	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
398 	  tree ret_type = build_nonstandard_integer_type (prec, 1);
399 	  tree ret_inner_type = boolean_type_node;
400 	  int i;
401 	  location_t loc = gimple_location (gsi_stmt (*gsi));
402 	  t = build_zero_cst (ret_type);
403 
404 	  if (TYPE_PRECISION (ret_inner_type) != 1)
405 	    ret_inner_type = build_nonstandard_integer_type (1, 1);
406 	  warning_at (loc, OPT_Wvector_operation_performance,
407 		      "vector operation will be expanded piecewise");
408 	  for (i = 0; i < nunits;
409 	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
410 	    {
411 	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
412 					 index);
413 	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
414 					 index);
415 	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
416 	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
417 				   bitsize_int (i));
418 	    }
419 	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
420 	}
421       else
422 	t = expand_vector_piecewise (gsi, do_compare, type,
423 				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
424 				     code);
425     }
426   else
427     t = NULL_TREE;
428 
429   return t;
430 }
431 
432 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
433    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
434    the result if successful, otherwise return NULL_TREE.  */
435 static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)436 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
437 {
438   optab op;
439   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
440   bool scalar_shift = true;
441 
442   for (i = 1; i < nunits; i++)
443     {
444       if (shiftcnts[i] != shiftcnts[0])
445 	scalar_shift = false;
446     }
447 
448   if (scalar_shift && shiftcnts[0] == 0)
449     return op0;
450 
451   if (scalar_shift)
452     {
453       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
454       if (op != unknown_optab
455 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
456 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
457 				build_int_cst (NULL_TREE, shiftcnts[0]));
458     }
459 
460   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
461   if (op != unknown_optab
462       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
463     {
464       tree_vector_builder vec (type, nunits, 1);
465       for (i = 0; i < nunits; i++)
466 	vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
467       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
468     }
469 
470   return NULL_TREE;
471 }
472 
473 /* Try to expand integer vector division by constant using
474    widening multiply, shifts and additions.  */
475 static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)476 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
477 		      tree op1, enum tree_code code)
478 {
479   bool use_pow2 = true;
480   bool has_vector_shift = true;
481   bool use_abs_op1 = false;
482   int mode = -1, this_mode;
483   int pre_shift = -1, post_shift;
484   unsigned int nunits = nunits_for_known_piecewise_op (type);
485   int *shifts = XALLOCAVEC (int, nunits * 4);
486   int *pre_shifts = shifts + nunits;
487   int *post_shifts = pre_shifts + nunits;
488   int *shift_temps = post_shifts + nunits;
489   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
490   int prec = TYPE_PRECISION (TREE_TYPE (type));
491   int dummy_int;
492   unsigned int i;
493   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
494   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
495   tree cur_op, mulcst, tem;
496   optab op;
497 
498   if (prec > HOST_BITS_PER_WIDE_INT)
499     return NULL_TREE;
500 
501   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
502   if (op == unknown_optab
503       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
504     has_vector_shift = false;
505 
506   /* Analysis phase.  Determine if all op1 elements are either power
507      of two and it is possible to expand it using shifts (or for remainder
508      using masking).  Additionally compute the multiplicative constants
509      and pre and post shifts if the division is to be expanded using
510      widening or high part multiplication plus shifts.  */
511   for (i = 0; i < nunits; i++)
512     {
513       tree cst = VECTOR_CST_ELT (op1, i);
514       unsigned HOST_WIDE_INT ml;
515 
516       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
517 	return NULL_TREE;
518       pre_shifts[i] = 0;
519       post_shifts[i] = 0;
520       mulc[i] = 0;
521       if (use_pow2
522 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
523 	use_pow2 = false;
524       if (use_pow2)
525 	{
526 	  shifts[i] = tree_log2 (cst);
527 	  if (shifts[i] != shifts[0]
528 	      && code == TRUNC_DIV_EXPR
529 	      && !has_vector_shift)
530 	    use_pow2 = false;
531 	}
532       if (mode == -2)
533 	continue;
534       if (sign_p == UNSIGNED)
535 	{
536 	  unsigned HOST_WIDE_INT mh;
537 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
538 
539 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
540 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
541 	    return NULL_TREE;
542 
543 	  if (d <= 1)
544 	    {
545 	      mode = -2;
546 	      continue;
547 	    }
548 
549 	  /* Find a suitable multiplier and right shift count
550 	     instead of multiplying with D.  */
551 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
552 
553 	  /* If the suggested multiplier is more than SIZE bits, we can
554 	     do better for even divisors, using an initial right shift.  */
555 	  if ((mh != 0 && (d & 1) == 0)
556 	      || (!has_vector_shift && pre_shift != -1))
557 	    {
558 	      if (has_vector_shift)
559 		pre_shift = ctz_or_zero (d);
560 	      else if (pre_shift == -1)
561 		{
562 		  unsigned int j;
563 		  for (j = 0; j < nunits; j++)
564 		    {
565 		      tree cst2 = VECTOR_CST_ELT (op1, j);
566 		      unsigned HOST_WIDE_INT d2;
567 		      int this_pre_shift;
568 
569 		      if (!tree_fits_uhwi_p (cst2))
570 			return NULL_TREE;
571 		      d2 = tree_to_uhwi (cst2) & mask;
572 		      if (d2 == 0)
573 			return NULL_TREE;
574 		      this_pre_shift = floor_log2 (d2 & -d2);
575 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
576 			pre_shift = this_pre_shift;
577 		    }
578 		  if (i != 0 && pre_shift != 0)
579 		    {
580 		      /* Restart.  */
581 		      i = -1U;
582 		      mode = -1;
583 		      continue;
584 		    }
585 		}
586 	      if (pre_shift != 0)
587 		{
588 		  if ((d >> pre_shift) <= 1)
589 		    {
590 		      mode = -2;
591 		      continue;
592 		    }
593 		  mh = choose_multiplier (d >> pre_shift, prec,
594 					  prec - pre_shift,
595 					  &ml, &post_shift, &dummy_int);
596 		  gcc_assert (!mh);
597 		  pre_shifts[i] = pre_shift;
598 		}
599 	    }
600 	  if (!mh)
601 	    this_mode = 0;
602 	  else
603 	    this_mode = 1;
604 	}
605       else
606 	{
607 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
608 	  unsigned HOST_WIDE_INT abs_d;
609 
610 	  if (d == -1)
611 	    return NULL_TREE;
612 
613 	  /* Since d might be INT_MIN, we have to cast to
614 	     unsigned HOST_WIDE_INT before negating to avoid
615 	     undefined signed overflow.  */
616 	  abs_d = (d >= 0
617 		  ? (unsigned HOST_WIDE_INT) d
618 		  : - (unsigned HOST_WIDE_INT) d);
619 
620 	  /* n rem d = n rem -d */
621 	  if (code == TRUNC_MOD_EXPR && d < 0)
622 	    {
623 	      d = abs_d;
624 	      use_abs_op1 = true;
625 	    }
626 	  if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
627 	    {
628 	      /* This case is not handled correctly below.  */
629 	      mode = -2;
630 	      continue;
631 	    }
632 	  if (abs_d <= 1)
633 	    {
634 	      mode = -2;
635 	      continue;
636 	    }
637 
638 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
639 			     &post_shift, &dummy_int);
640 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
641 	    {
642 	      this_mode = 4 + (d < 0);
643 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
644 	    }
645 	  else
646 	    this_mode = 2 + (d < 0);
647 	}
648       mulc[i] = ml;
649       post_shifts[i] = post_shift;
650       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
651 	  || post_shift >= prec
652 	  || pre_shifts[i] >= prec)
653 	this_mode = -2;
654 
655       if (i == 0)
656 	mode = this_mode;
657       else if (mode != this_mode)
658 	mode = -2;
659     }
660 
661   if (use_pow2)
662     {
663       tree addend = NULL_TREE;
664       if (sign_p == SIGNED)
665 	{
666 	  tree uns_type;
667 
668 	  /* Both division and remainder sequences need
669 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
670 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
671 	     if none of the shifts is 0, or as the conditional.  */
672 	  for (i = 0; i < nunits; i++)
673 	    if (shifts[i] == 0)
674 	      break;
675 	  uns_type
676 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
677 				 nunits);
678 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
679 	    {
680 	      for (i = 0; i < nunits; i++)
681 		shift_temps[i] = prec - 1;
682 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
683 	      if (cur_op != NULL_TREE)
684 		{
685 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
686 					    uns_type, cur_op);
687 		  for (i = 0; i < nunits; i++)
688 		    shift_temps[i] = prec - shifts[i];
689 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
690 		  if (cur_op != NULL_TREE)
691 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
692 					      type, cur_op);
693 		}
694 	    }
695 	  if (addend == NULL_TREE
696 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
697 	    {
698 	      tree zero, cst, cond, mask_type;
699 	      gimple *stmt;
700 
701 	      mask_type = build_same_sized_truth_vector_type (type);
702 	      zero = build_zero_cst (type);
703 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
704 	      tree_vector_builder vec (type, nunits, 1);
705 	      for (i = 0; i < nunits; i++)
706 		vec.quick_push (build_int_cst (TREE_TYPE (type),
707 					       (HOST_WIDE_INT_1U
708 						<< shifts[i]) - 1));
709 	      cst = vec.build ();
710 	      addend = make_ssa_name (type);
711 	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
712 					  cst, zero);
713 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
714 	    }
715 	}
716       if (code == TRUNC_DIV_EXPR)
717 	{
718 	  if (sign_p == UNSIGNED)
719 	    {
720 	      /* q = op0 >> shift;  */
721 	      cur_op = add_rshift (gsi, type, op0, shifts);
722 	      if (cur_op != NULL_TREE)
723 		return cur_op;
724 	    }
725 	  else if (addend != NULL_TREE)
726 	    {
727 	      /* t1 = op0 + addend;
728 		 q = t1 >> shift;  */
729 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
730 	      if (op != unknown_optab
731 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
732 		{
733 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
734 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
735 		  if (cur_op != NULL_TREE)
736 		    return cur_op;
737 		}
738 	    }
739 	}
740       else
741 	{
742 	  tree mask;
743 	  tree_vector_builder vec (type, nunits, 1);
744 	  for (i = 0; i < nunits; i++)
745 	    vec.quick_push (build_int_cst (TREE_TYPE (type),
746 					   (HOST_WIDE_INT_1U
747 					    << shifts[i]) - 1));
748 	  mask = vec.build ();
749 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
750 	  if (op != unknown_optab
751 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
752 	    {
753 	      if (sign_p == UNSIGNED)
754 		/* r = op0 & mask;  */
755 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
756 	      else if (addend != NULL_TREE)
757 		{
758 		  /* t1 = op0 + addend;
759 		     t2 = t1 & mask;
760 		     r = t2 - addend;  */
761 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
762 		  if (op != unknown_optab
763 		      && optab_handler (op, TYPE_MODE (type))
764 			 != CODE_FOR_nothing)
765 		    {
766 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
767 						addend);
768 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
769 						cur_op, mask);
770 		      op = optab_for_tree_code (MINUS_EXPR, type,
771 						optab_default);
772 		      if (op != unknown_optab
773 			  && optab_handler (op, TYPE_MODE (type))
774 			     != CODE_FOR_nothing)
775 			return gimplify_build2 (gsi, MINUS_EXPR, type,
776 						cur_op, addend);
777 		    }
778 		}
779 	    }
780 	}
781     }
782 
783   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
784     return NULL_TREE;
785 
786   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
787     return NULL_TREE;
788 
789   cur_op = op0;
790 
791   switch (mode)
792     {
793     case 0:
794       gcc_assert (sign_p == UNSIGNED);
795       /* t1 = oprnd0 >> pre_shift;
796 	 t2 = t1 h* ml;
797 	 q = t2 >> post_shift;  */
798       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
799       if (cur_op == NULL_TREE)
800 	return NULL_TREE;
801       break;
802     case 1:
803       gcc_assert (sign_p == UNSIGNED);
804       for (i = 0; i < nunits; i++)
805 	{
806 	  shift_temps[i] = 1;
807 	  post_shifts[i]--;
808 	}
809       break;
810     case 2:
811     case 3:
812     case 4:
813     case 5:
814       gcc_assert (sign_p == SIGNED);
815       for (i = 0; i < nunits; i++)
816 	shift_temps[i] = prec - 1;
817       break;
818     default:
819       return NULL_TREE;
820     }
821 
822   tree_vector_builder vec (type, nunits, 1);
823   for (i = 0; i < nunits; i++)
824     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
825   mulcst = vec.build ();
826 
827   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
828 
829   switch (mode)
830     {
831     case 0:
832       /* t1 = oprnd0 >> pre_shift;
833 	 t2 = t1 h* ml;
834 	 q = t2 >> post_shift;  */
835       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
836       break;
837     case 1:
838       /* t1 = oprnd0 h* ml;
839 	 t2 = oprnd0 - t1;
840 	 t3 = t2 >> 1;
841 	 t4 = t1 + t3;
842 	 q = t4 >> (post_shift - 1);  */
843       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
844       if (op == unknown_optab
845 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
846 	return NULL_TREE;
847       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
848       tem = add_rshift (gsi, type, tem, shift_temps);
849       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
850       if (op == unknown_optab
851 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
852 	return NULL_TREE;
853       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
854       cur_op = add_rshift (gsi, type, tem, post_shifts);
855       if (cur_op == NULL_TREE)
856 	return NULL_TREE;
857       break;
858     case 2:
859     case 3:
860     case 4:
861     case 5:
862       /* t1 = oprnd0 h* ml;
863 	 t2 = t1; [ iff (mode & 2) != 0 ]
864 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
865 	 t3 = t2 >> post_shift;
866 	 t4 = oprnd0 >> (prec - 1);
867 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
868 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
869       if ((mode & 2) == 0)
870 	{
871 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
872 	  if (op == unknown_optab
873 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
874 	    return NULL_TREE;
875 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
876 	}
877       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
878       if (cur_op == NULL_TREE)
879 	return NULL_TREE;
880       tem = add_rshift (gsi, type, op0, shift_temps);
881       if (tem == NULL_TREE)
882 	return NULL_TREE;
883       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
884       if (op == unknown_optab
885 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
886 	return NULL_TREE;
887       if ((mode & 1) == 0)
888 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
889       else
890 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
891       break;
892     default:
893       gcc_unreachable ();
894     }
895 
896   if (code == TRUNC_DIV_EXPR)
897     return cur_op;
898 
899   /* We divided.  Now finish by:
900      t1 = q * oprnd1;
901      r = oprnd0 - t1;  */
902   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
903   if (op == unknown_optab
904       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
905     return NULL_TREE;
906   if (use_abs_op1)
907     {
908       tree_vector_builder elts;
909       if (!elts.new_unary_operation (type, op1, false))
910 	return NULL_TREE;
911       unsigned int count = elts.encoded_nelts ();
912       for (unsigned int i = 0; i < count; ++i)
913 	{
914 	  tree elem1 = VECTOR_CST_ELT (op1, i);
915 
916 	  tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
917 	  if (elt == NULL_TREE)
918 	    return NULL_TREE;
919 	  elts.quick_push (elt);
920 	}
921       op1 = elts.build ();
922     }
923   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
924   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
925   if (op == unknown_optab
926       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
927     return NULL_TREE;
928   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
929 }
930 
931 /* Expand a vector condition to scalars, by using many conditions
932    on the vector's elements.  */
933 static void
expand_vector_condition(gimple_stmt_iterator * gsi)934 expand_vector_condition (gimple_stmt_iterator *gsi)
935 {
936   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
937   tree type = gimple_expr_type (stmt);
938   tree a = gimple_assign_rhs1 (stmt);
939   tree a1 = a;
940   tree a2 = NULL_TREE;
941   bool a_is_comparison = false;
942   bool a_is_scalar_bitmask = false;
943   tree b = gimple_assign_rhs2 (stmt);
944   tree c = gimple_assign_rhs3 (stmt);
945   vec<constructor_elt, va_gc> *v;
946   tree constr;
947   tree inner_type = TREE_TYPE (type);
948   tree cond_type = TREE_TYPE (TREE_TYPE (a));
949   tree comp_inner_type = cond_type;
950   tree width = TYPE_SIZE (inner_type);
951   tree index = bitsize_int (0);
952   tree comp_width = width;
953   tree comp_index = index;
954   int i;
955   location_t loc = gimple_location (gsi_stmt (*gsi));
956 
957   if (!is_gimple_val (a))
958     {
959       gcc_assert (COMPARISON_CLASS_P (a));
960       a_is_comparison = true;
961       a1 = TREE_OPERAND (a, 0);
962       a2 = TREE_OPERAND (a, 1);
963       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
964       comp_width = TYPE_SIZE (comp_inner_type);
965     }
966 
967   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
968     return;
969 
970   /* Handle vector boolean types with bitmasks.  If there is a comparison
971      and we can expand the comparison into the vector boolean bitmask,
972      or otherwise if it is compatible with type, we can transform
973       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
974      into
975       tmp_6 = x_2 < y_3;
976       tmp_7 = tmp_6 & vbfld_4;
977       tmp_8 = ~tmp_6;
978       tmp_9 = tmp_8 & vbfld_5;
979       vbfld_1 = tmp_7 | tmp_9;
980      Similarly for vbfld_10 instead of x_2 < y_3.  */
981   if (VECTOR_BOOLEAN_TYPE_P (type)
982       && SCALAR_INT_MODE_P (TYPE_MODE (type))
983       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
984 		   TYPE_VECTOR_SUBPARTS (type)
985 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
986       && (a_is_comparison
987 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
988 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
989     {
990       if (a_is_comparison)
991 	a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
992       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
993       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
994       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
995       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
996       gimple_assign_set_rhs_from_tree (gsi, a);
997       update_stmt (gsi_stmt (*gsi));
998       return;
999     }
1000 
1001   /* TODO: try and find a smaller vector type.  */
1002 
1003   warning_at (loc, OPT_Wvector_operation_performance,
1004 	      "vector condition will be expanded piecewise");
1005 
1006   if (!a_is_comparison
1007       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1008       && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1009       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1010 		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1011 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1012 						(TREE_TYPE (TREE_TYPE (a))))))
1013     {
1014       a_is_scalar_bitmask = true;
1015       int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1016       tree atype = build_nonstandard_integer_type (prec, 1);
1017       a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1018     }
1019 
1020   int nunits = nunits_for_known_piecewise_op (type);
1021   vec_alloc (v, nunits);
1022   for (i = 0; i < nunits; i++)
1023     {
1024       tree aa, result;
1025       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1026       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1027       if (a_is_comparison)
1028 	{
1029 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1030 				       comp_width, comp_index);
1031 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1032 				       comp_width, comp_index);
1033 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
1034 	}
1035       else if (a_is_scalar_bitmask)
1036 	{
1037 	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1038 	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1039 				    a, wide_int_to_tree (TREE_TYPE (a), w));
1040 	  aa = fold_build2 (NE_EXPR, boolean_type_node, result,
1041 			    build_zero_cst (TREE_TYPE (a)));
1042 	}
1043       else
1044 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
1045       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1046       constructor_elt ce = {NULL_TREE, result};
1047       v->quick_push (ce);
1048       index = int_const_binop (PLUS_EXPR, index, width);
1049       if (width == comp_width)
1050 	comp_index = index;
1051       else
1052 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1053     }
1054 
1055   constr = build_constructor (type, v);
1056   gimple_assign_set_rhs_from_tree (gsi, constr);
1057   update_stmt (gsi_stmt (*gsi));
1058 }
1059 
1060 static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code)1061 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1062 			 gassign *assign, enum tree_code code)
1063 {
1064   machine_mode compute_mode = TYPE_MODE (compute_type);
1065 
1066   /* If the compute mode is not a vector mode (hence we are not decomposing
1067      a BLKmode vector to smaller, hardware-supported vectors), we may want
1068      to expand the operations in parallel.  */
1069   if (!VECTOR_MODE_P (compute_mode))
1070     switch (code)
1071       {
1072       case PLUS_EXPR:
1073       case MINUS_EXPR:
1074         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1075 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1076 					 gimple_assign_rhs1 (assign),
1077 					 gimple_assign_rhs2 (assign), code);
1078 	break;
1079 
1080       case NEGATE_EXPR:
1081         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1082           return expand_vector_addition (gsi, do_unop, do_negate, type,
1083 		      		         gimple_assign_rhs1 (assign),
1084 					 NULL_TREE, code);
1085 	break;
1086 
1087       case BIT_AND_EXPR:
1088       case BIT_IOR_EXPR:
1089       case BIT_XOR_EXPR:
1090         return expand_vector_parallel (gsi, do_binop, type,
1091 		      		       gimple_assign_rhs1 (assign),
1092 				       gimple_assign_rhs2 (assign), code);
1093 
1094       case BIT_NOT_EXPR:
1095         return expand_vector_parallel (gsi, do_unop, type,
1096 		      		       gimple_assign_rhs1 (assign),
1097         			       NULL_TREE, code);
1098       case EQ_EXPR:
1099       case NE_EXPR:
1100       case GT_EXPR:
1101       case LT_EXPR:
1102       case GE_EXPR:
1103       case LE_EXPR:
1104       case UNEQ_EXPR:
1105       case UNGT_EXPR:
1106       case UNLT_EXPR:
1107       case UNGE_EXPR:
1108       case UNLE_EXPR:
1109       case LTGT_EXPR:
1110       case ORDERED_EXPR:
1111       case UNORDERED_EXPR:
1112 	{
1113 	  tree rhs1 = gimple_assign_rhs1 (assign);
1114 	  tree rhs2 = gimple_assign_rhs2 (assign);
1115 
1116 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1117 	}
1118 
1119       case TRUNC_DIV_EXPR:
1120       case TRUNC_MOD_EXPR:
1121 	{
1122 	  tree rhs1 = gimple_assign_rhs1 (assign);
1123 	  tree rhs2 = gimple_assign_rhs2 (assign);
1124 	  tree ret;
1125 
1126 	  if (!optimize
1127 	      || !VECTOR_INTEGER_TYPE_P (type)
1128 	      || TREE_CODE (rhs2) != VECTOR_CST
1129 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1130 	    break;
1131 
1132 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1133 	  if (ret != NULL_TREE)
1134 	    return ret;
1135 	  break;
1136 	}
1137 
1138       default:
1139 	break;
1140       }
1141 
1142   if (TREE_CODE_CLASS (code) == tcc_unary)
1143     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1144 				    gimple_assign_rhs1 (assign),
1145 				    NULL_TREE, code);
1146   else
1147     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1148 				    gimple_assign_rhs1 (assign),
1149 				    gimple_assign_rhs2 (assign), code);
1150 }
1151 
1152 /* Try to optimize
1153    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1154    style stmts into:
1155    _9 = { b_7, b_7, b_7, b_7 };
1156    a_5 = _9 + { 0, 3, 6, 9 };
1157    because vector splat operation is usually more efficient
1158    than piecewise initialization of the vector.  */
1159 
1160 static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)1161 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1162 {
1163   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1164   tree lhs = gimple_assign_lhs (stmt);
1165   tree rhs = gimple_assign_rhs1 (stmt);
1166   tree type = TREE_TYPE (rhs);
1167   unsigned int i, j;
1168   unsigned HOST_WIDE_INT nelts;
1169   bool all_same = true;
1170   constructor_elt *elt;
1171   gimple *g;
1172   tree base = NULL_TREE;
1173   optab op;
1174 
1175   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1176       || nelts <= 2
1177       || CONSTRUCTOR_NELTS (rhs) != nelts)
1178     return;
1179   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1180   if (op == unknown_optab
1181       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1182     return;
1183   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1184     if (TREE_CODE (elt->value) != SSA_NAME
1185 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1186       return;
1187     else
1188       {
1189 	tree this_base = elt->value;
1190 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1191 	  all_same = false;
1192 	for (j = 0; j < nelts + 1; j++)
1193 	  {
1194 	    g = SSA_NAME_DEF_STMT (this_base);
1195 	    if (is_gimple_assign (g)
1196 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1197 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1198 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1199 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1200 	      this_base = gimple_assign_rhs1 (g);
1201 	    else
1202 	      break;
1203 	  }
1204 	if (i == 0)
1205 	  base = this_base;
1206 	else if (this_base != base)
1207 	  return;
1208       }
1209   if (all_same)
1210     return;
1211   tree_vector_builder cst (type, nelts, 1);
1212   for (i = 0; i < nelts; i++)
1213     {
1214       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1215       tree elt = build_zero_cst (TREE_TYPE (base));
1216       while (this_base != base)
1217 	{
1218 	  g = SSA_NAME_DEF_STMT (this_base);
1219 	  elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1220 			     elt, gimple_assign_rhs2 (g));
1221 	  if (elt == NULL_TREE
1222 	      || TREE_CODE (elt) != INTEGER_CST
1223 	      || TREE_OVERFLOW (elt))
1224 	    return;
1225 	  this_base = gimple_assign_rhs1 (g);
1226 	}
1227       cst.quick_push (elt);
1228     }
1229   for (i = 0; i < nelts; i++)
1230     CONSTRUCTOR_ELT (rhs, i)->value = base;
1231   g = gimple_build_assign (make_ssa_name (type), rhs);
1232   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1233   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1234 			   cst.build ());
1235   gsi_replace (gsi, g, false);
1236 }
1237 
1238 /* Return a type for the widest vector mode whose components are of type
1239    TYPE, or NULL_TREE if none is found.  */
1240 
1241 static tree
type_for_widest_vector_mode(tree type,optab op)1242 type_for_widest_vector_mode (tree type, optab op)
1243 {
1244   machine_mode inner_mode = TYPE_MODE (type);
1245   machine_mode best_mode = VOIDmode, mode;
1246   poly_int64 best_nunits = 0;
1247 
1248   if (SCALAR_FLOAT_MODE_P (inner_mode))
1249     mode = MIN_MODE_VECTOR_FLOAT;
1250   else if (SCALAR_FRACT_MODE_P (inner_mode))
1251     mode = MIN_MODE_VECTOR_FRACT;
1252   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1253     mode = MIN_MODE_VECTOR_UFRACT;
1254   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1255     mode = MIN_MODE_VECTOR_ACCUM;
1256   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1257     mode = MIN_MODE_VECTOR_UACCUM;
1258   else if (inner_mode == BImode)
1259     mode = MIN_MODE_VECTOR_BOOL;
1260   else
1261     mode = MIN_MODE_VECTOR_INT;
1262 
1263   FOR_EACH_MODE_FROM (mode, mode)
1264     if (GET_MODE_INNER (mode) == inner_mode
1265 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1266 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1267       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1268 
1269   if (best_mode == VOIDmode)
1270     return NULL_TREE;
1271   else
1272     return build_vector_type_for_mode (type, best_mode);
1273 }
1274 
1275 
1276 /* Build a reference to the element of the vector VECT.  Function
1277    returns either the element itself, either BIT_FIELD_REF, or an
1278    ARRAY_REF expression.
1279 
1280    GSI is required to insert temporary variables while building a
1281    refernece to the element of the vector VECT.
1282 
1283    PTMPVEC is a pointer to the temporary variable for caching
1284    purposes.  In case when PTMPVEC is NULL new temporary variable
1285    will be created.  */
1286 static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)1287 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1288 {
1289   tree vect_type, vect_elt_type;
1290   gimple *asgn;
1291   tree tmpvec;
1292   tree arraytype;
1293   bool need_asgn = true;
1294   unsigned int elements;
1295 
1296   vect_type = TREE_TYPE (vect);
1297   vect_elt_type = TREE_TYPE (vect_type);
1298   elements = nunits_for_known_piecewise_op (vect_type);
1299 
1300   if (TREE_CODE (idx) == INTEGER_CST)
1301     {
1302       unsigned HOST_WIDE_INT index;
1303 
1304       /* Given that we're about to compute a binary modulus,
1305 	 we don't care about the high bits of the value.  */
1306       index = TREE_INT_CST_LOW (idx);
1307       if (!tree_fits_uhwi_p (idx) || index >= elements)
1308 	{
1309 	  index &= elements - 1;
1310 	  idx = build_int_cst (TREE_TYPE (idx), index);
1311 	}
1312 
1313       /* When lowering a vector statement sequence do some easy
1314          simplification by looking through intermediate vector results.  */
1315       if (TREE_CODE (vect) == SSA_NAME)
1316 	{
1317 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1318 	  if (is_gimple_assign (def_stmt)
1319 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1320 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1321 	    vect = gimple_assign_rhs1 (def_stmt);
1322 	}
1323 
1324       if (TREE_CODE (vect) == VECTOR_CST)
1325 	return VECTOR_CST_ELT (vect, index);
1326       else if (TREE_CODE (vect) == CONSTRUCTOR
1327 	       && (CONSTRUCTOR_NELTS (vect) == 0
1328 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1329 		      != VECTOR_TYPE))
1330         {
1331 	  if (index < CONSTRUCTOR_NELTS (vect))
1332 	    return CONSTRUCTOR_ELT (vect, index)->value;
1333           return build_zero_cst (vect_elt_type);
1334         }
1335       else
1336         {
1337 	  tree size = TYPE_SIZE (vect_elt_type);
1338 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1339 				  size);
1340 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1341         }
1342     }
1343 
1344   if (!ptmpvec)
1345     tmpvec = create_tmp_var (vect_type, "vectmp");
1346   else if (!*ptmpvec)
1347     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1348   else
1349     {
1350       tmpvec = *ptmpvec;
1351       need_asgn = false;
1352     }
1353 
1354   if (need_asgn)
1355     {
1356       TREE_ADDRESSABLE (tmpvec) = 1;
1357       asgn = gimple_build_assign (tmpvec, vect);
1358       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1359     }
1360 
1361   arraytype = build_array_type_nelts (vect_elt_type, elements);
1362   return build4 (ARRAY_REF, vect_elt_type,
1363                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1364                  idx, NULL_TREE, NULL_TREE);
1365 }
1366 
1367 /* Check if VEC_PERM_EXPR within the given setting is supported
1368    by hardware, or lower it piecewise.
1369 
1370    When VEC_PERM_EXPR has the same first and second operands:
1371    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1372    {v0[mask[0]], v0[mask[1]], ...}
1373    MASK and V0 must have the same number of elements.
1374 
1375    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1376    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1377    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1378    same number of arguments.  */
1379 
1380 static void
lower_vec_perm(gimple_stmt_iterator * gsi)1381 lower_vec_perm (gimple_stmt_iterator *gsi)
1382 {
1383   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1384   tree mask = gimple_assign_rhs3 (stmt);
1385   tree vec0 = gimple_assign_rhs1 (stmt);
1386   tree vec1 = gimple_assign_rhs2 (stmt);
1387   tree vect_type = TREE_TYPE (vec0);
1388   tree mask_type = TREE_TYPE (mask);
1389   tree vect_elt_type = TREE_TYPE (vect_type);
1390   tree mask_elt_type = TREE_TYPE (mask_type);
1391   unsigned HOST_WIDE_INT elements;
1392   vec<constructor_elt, va_gc> *v;
1393   tree constr, t, si, i_val;
1394   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1395   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1396   location_t loc = gimple_location (gsi_stmt (*gsi));
1397   unsigned i;
1398 
1399   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1400     return;
1401 
1402   if (TREE_CODE (mask) == SSA_NAME)
1403     {
1404       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1405       if (is_gimple_assign (def_stmt)
1406 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1407 	mask = gimple_assign_rhs1 (def_stmt);
1408     }
1409 
1410   vec_perm_builder sel_int;
1411 
1412   if (TREE_CODE (mask) == VECTOR_CST
1413       && tree_to_vec_perm_builder (&sel_int, mask))
1414     {
1415       vec_perm_indices indices (sel_int, 2, elements);
1416       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1417 	{
1418 	  gimple_assign_set_rhs3 (stmt, mask);
1419 	  update_stmt (stmt);
1420 	  return;
1421 	}
1422       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1423 	 vector as VEC1 and a right element shift MASK.  */
1424       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1425 	  != CODE_FOR_nothing
1426 	  && TREE_CODE (vec1) == VECTOR_CST
1427 	  && initializer_zerop (vec1)
1428 	  && maybe_ne (indices[0], 0)
1429 	  && known_lt (poly_uint64 (indices[0]), elements))
1430 	{
1431 	  bool ok_p = indices.series_p (0, 1, indices[0], 1);
1432 	  if (!ok_p)
1433 	    {
1434 	      for (i = 1; i < elements; ++i)
1435 		{
1436 		  poly_uint64 actual = indices[i];
1437 		  poly_uint64 expected = i + indices[0];
1438 		  /* Indices into the second vector are all equivalent.  */
1439 		  if (maybe_lt (actual, elements)
1440 		      ? maybe_ne (actual, expected)
1441 		      : maybe_lt (expected, elements))
1442 		    break;
1443 		}
1444 	      ok_p = i == elements;
1445 	    }
1446 	  if (ok_p)
1447 	    {
1448 	      gimple_assign_set_rhs3 (stmt, mask);
1449 	      update_stmt (stmt);
1450 	      return;
1451 	    }
1452 	}
1453     }
1454   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1455     return;
1456 
1457   warning_at (loc, OPT_Wvector_operation_performance,
1458               "vector shuffling operation will be expanded piecewise");
1459 
1460   vec_alloc (v, elements);
1461   for (i = 0; i < elements; i++)
1462     {
1463       si = size_int (i);
1464       i_val = vector_element (gsi, mask, si, &masktmp);
1465 
1466       if (TREE_CODE (i_val) == INTEGER_CST)
1467         {
1468 	  unsigned HOST_WIDE_INT index;
1469 
1470 	  index = TREE_INT_CST_LOW (i_val);
1471 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1472 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1473 
1474           if (two_operand_p && (index & elements) != 0)
1475 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1476 	  else
1477 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1478 
1479           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1480 					true, GSI_SAME_STMT);
1481         }
1482       else
1483         {
1484 	  tree cond = NULL_TREE, v0_val;
1485 
1486 	  if (two_operand_p)
1487 	    {
1488 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1489 			          build_int_cst (mask_elt_type, elements));
1490 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1491 					       true, GSI_SAME_STMT);
1492 	    }
1493 
1494 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1495 			       build_int_cst (mask_elt_type, elements - 1));
1496 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1497 					    true, GSI_SAME_STMT);
1498 
1499 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1500 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1501 					     true, GSI_SAME_STMT);
1502 
1503 	  if (two_operand_p)
1504 	    {
1505 	      tree v1_val;
1506 
1507 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1508 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1509 						 true, GSI_SAME_STMT);
1510 
1511 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1512 				  cond, build_zero_cst (mask_elt_type));
1513 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1514 				  cond, v0_val, v1_val);
1515               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1516 					    true, GSI_SAME_STMT);
1517             }
1518 	  else
1519 	    t = v0_val;
1520         }
1521 
1522       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1523     }
1524 
1525   constr = build_constructor (vect_type, v);
1526   gimple_assign_set_rhs_from_tree (gsi, constr);
1527   update_stmt (gsi_stmt (*gsi));
1528 }
1529 
1530 /* If OP is a uniform vector return the element it is a splat from.  */
1531 
1532 static tree
ssa_uniform_vector_p(tree op)1533 ssa_uniform_vector_p (tree op)
1534 {
1535   if (TREE_CODE (op) == VECTOR_CST
1536       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1537       || TREE_CODE (op) == CONSTRUCTOR)
1538     return uniform_vector_p (op);
1539   if (TREE_CODE (op) == SSA_NAME)
1540     {
1541       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1542       if (gimple_assign_single_p (def_stmt))
1543 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1544     }
1545   return NULL_TREE;
1546 }
1547 
1548 /* Return type in which CODE operation with optab OP can be
1549    computed.  */
1550 
1551 static tree
get_compute_type(enum tree_code code,optab op,tree type)1552 get_compute_type (enum tree_code code, optab op, tree type)
1553 {
1554   /* For very wide vectors, try using a smaller vector mode.  */
1555   tree compute_type = type;
1556   if (op
1557       && (!VECTOR_MODE_P (TYPE_MODE (type))
1558 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1559     {
1560       tree vector_compute_type
1561 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
1562       if (vector_compute_type != NULL_TREE
1563 	  && subparts_gt (compute_type, vector_compute_type)
1564 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1565 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1566 	      != CODE_FOR_nothing))
1567 	compute_type = vector_compute_type;
1568     }
1569 
1570   /* If we are breaking a BLKmode vector into smaller pieces,
1571      type_for_widest_vector_mode has already looked into the optab,
1572      so skip these checks.  */
1573   if (compute_type == type)
1574     {
1575       machine_mode compute_mode = TYPE_MODE (compute_type);
1576       if (VECTOR_MODE_P (compute_mode))
1577 	{
1578 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1579 	    return compute_type;
1580 	  if (code == MULT_HIGHPART_EXPR
1581 	      && can_mult_highpart_p (compute_mode,
1582 				      TYPE_UNSIGNED (compute_type)))
1583 	    return compute_type;
1584 	}
1585       /* There is no operation in hardware, so fall back to scalars.  */
1586       compute_type = TREE_TYPE (type);
1587     }
1588 
1589   return compute_type;
1590 }
1591 
1592 static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)1593 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1594 	 tree bitpos, tree bitsize, enum tree_code code,
1595 	 tree type ATTRIBUTE_UNUSED)
1596 {
1597   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1598     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1599   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1600     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1601   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1602   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1603 }
1604 
1605 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1606 static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)1607 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1608 {
1609   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1610   tree type = gimple_expr_type (stmt);
1611   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1612   machine_mode compute_mode = TYPE_MODE (compute_type);
1613   gcc_assert (compute_mode != BLKmode);
1614   tree lhs = gimple_assign_lhs (stmt);
1615   tree rhs2 = gimple_assign_rhs2 (stmt);
1616   tree rhs3 = gimple_assign_rhs3 (stmt);
1617   tree new_rhs;
1618 
1619   /* If the compute mode is not a vector mode (hence we are not decomposing
1620      a BLKmode vector to smaller, hardware-supported vectors), we may want
1621      to expand the operations in parallel.  */
1622   if (!VECTOR_MODE_P (compute_mode))
1623     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1624 				      COND_EXPR);
1625   else
1626     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1627 				       rhs2, rhs3, COND_EXPR);
1628   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1629     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1630 			       new_rhs);
1631 
1632   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1633      way to do it is change expand_vector_operation and its callees to
1634      return a tree_code, RHS1 and RHS2 instead of a tree. */
1635   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1636   update_stmt (gsi_stmt (*gsi));
1637 }
1638 
1639 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1640    lowering.  If INNER_TYPE is not a vector type, this is a scalar
1641    fallback.  */
1642 
1643 static tree
do_vec_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree decl,tree bitpos,tree bitsize,enum tree_code code,tree type)1644 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1645 		   tree decl, tree bitpos, tree bitsize,
1646 		   enum tree_code code, tree type)
1647 {
1648   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1649   if (!VECTOR_TYPE_P (inner_type))
1650     return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1651   if (code == CALL_EXPR)
1652     {
1653       gimple *g = gimple_build_call (decl, 1, a);
1654       tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1655       gimple_call_set_lhs (g, lhs);
1656       gsi_insert_before (gsi, g, GSI_SAME_STMT);
1657       return lhs;
1658     }
1659   else
1660     {
1661       tree outer_type = build_vector_type (TREE_TYPE (type),
1662 					   TYPE_VECTOR_SUBPARTS (inner_type));
1663       return gimplify_build1 (gsi, code, outer_type, a);
1664     }
1665 }
1666 
1667 /* Similarly, but for narrowing conversion.  */
1668 
1669 static tree
do_vec_narrow_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree,tree bitpos,tree,enum tree_code code,tree type)1670 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1671 			  tree, tree bitpos, tree, enum tree_code code,
1672 			  tree type)
1673 {
1674   tree itype = build_vector_type (TREE_TYPE (inner_type),
1675 				  exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1676 					     2));
1677   tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1678   tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1679 			     int_const_binop (PLUS_EXPR, bitpos,
1680 					      TYPE_SIZE (itype)));
1681   tree outer_type = build_vector_type (TREE_TYPE (type),
1682 				       TYPE_VECTOR_SUBPARTS (inner_type));
1683   return gimplify_build2 (gsi, code, outer_type, b, c);
1684 }
1685 
1686 /* Expand VEC_CONVERT ifn call.  */
1687 
1688 static void
expand_vector_conversion(gimple_stmt_iterator * gsi)1689 expand_vector_conversion (gimple_stmt_iterator *gsi)
1690 {
1691   gimple *stmt = gsi_stmt (*gsi);
1692   gimple *g;
1693   tree lhs = gimple_call_lhs (stmt);
1694   if (lhs == NULL_TREE)
1695     {
1696       g = gimple_build_nop ();
1697       gsi_replace (gsi, g, false);
1698       return;
1699     }
1700   tree arg = gimple_call_arg (stmt, 0);
1701   tree decl = NULL_TREE;
1702   tree ret_type = TREE_TYPE (lhs);
1703   tree arg_type = TREE_TYPE (arg);
1704   tree new_rhs, compute_type = TREE_TYPE (arg_type);
1705   enum tree_code code = NOP_EXPR;
1706   enum tree_code code1 = ERROR_MARK;
1707   enum { NARROW, NONE, WIDEN } modifier = NONE;
1708   optab optab1 = unknown_optab;
1709 
1710   gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1711   gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
1712   gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
1713   if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1714       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1715     code = FIX_TRUNC_EXPR;
1716   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1717 	   && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1718     code = FLOAT_EXPR;
1719   if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1720       < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1721     modifier = NARROW;
1722   else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1723 	   > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1724     modifier = WIDEN;
1725 
1726   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1727     {
1728       if (supportable_convert_operation (code, ret_type, arg_type, &decl,
1729 					 &code1))
1730 	{
1731 	  if (code1 == CALL_EXPR)
1732 	    {
1733 	      g = gimple_build_call (decl, 1, arg);
1734 	      gimple_call_set_lhs (g, lhs);
1735 	    }
1736 	  else
1737 	    g = gimple_build_assign (lhs, code1, arg);
1738 	  gsi_replace (gsi, g, false);
1739 	  return;
1740 	}
1741       /* Can't use get_compute_type here, as supportable_convert_operation
1742 	 doesn't necessarily use an optab and needs two arguments.  */
1743       tree vec_compute_type
1744 	= type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1745       if (vec_compute_type
1746 	  && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1747 	  && subparts_gt (arg_type, vec_compute_type))
1748 	{
1749 	  unsigned HOST_WIDE_INT nelts
1750 	    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1751 	  while (nelts > 1)
1752 	    {
1753 	      tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1754 	      tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1755 	      if (supportable_convert_operation (code, ret1_type, arg1_type,
1756 						 &decl, &code1))
1757 		{
1758 		  new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1759 						     ret_type, arg1_type, arg,
1760 						     decl, code1);
1761 		  g = gimple_build_assign (lhs, new_rhs);
1762 		  gsi_replace (gsi, g, false);
1763 		  return;
1764 		}
1765 	      nelts = nelts / 2;
1766 	    }
1767 	}
1768     }
1769   else if (modifier == NARROW)
1770     {
1771       switch (code)
1772 	{
1773 	CASE_CONVERT:
1774 	  code1 = VEC_PACK_TRUNC_EXPR;
1775 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1776 	  break;
1777 	case FIX_TRUNC_EXPR:
1778 	  code1 = VEC_PACK_FIX_TRUNC_EXPR;
1779 	  /* The signedness is determined from output operand.  */
1780 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1781 	  break;
1782 	case FLOAT_EXPR:
1783 	  code1 = VEC_PACK_FLOAT_EXPR;
1784 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1785 	  break;
1786 	default:
1787 	  gcc_unreachable ();
1788 	}
1789 
1790       if (optab1)
1791 	compute_type = get_compute_type (code1, optab1, arg_type);
1792       enum insn_code icode1;
1793       if (VECTOR_TYPE_P (compute_type)
1794 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1795 	      != CODE_FOR_nothing)
1796 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1797 	{
1798 	  tree cretd_type
1799 	    = build_vector_type (TREE_TYPE (ret_type),
1800 				 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1801 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1802 	    {
1803 	      if (compute_type == arg_type)
1804 		{
1805 		  new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1806 					     arg, build_zero_cst (arg_type));
1807 		  new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1808 					      TYPE_SIZE (ret_type),
1809 					      bitsize_int (0));
1810 		  g = gimple_build_assign (lhs, new_rhs);
1811 		  gsi_replace (gsi, g, false);
1812 		  return;
1813 		}
1814 	      tree dcompute_type
1815 		= build_vector_type (TREE_TYPE (compute_type),
1816 				     TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1817 	      if (TYPE_MAIN_VARIANT (dcompute_type)
1818 		  == TYPE_MAIN_VARIANT (arg_type))
1819 		new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1820 						    NULL_TREE, bitsize_int (0),
1821 						    NULL_TREE, code1,
1822 						    ret_type);
1823 	      else
1824 		new_rhs = expand_vector_piecewise (gsi,
1825 						   do_vec_narrow_conversion,
1826 						   arg_type, dcompute_type,
1827 						   arg, NULL_TREE, code1,
1828 						   ret_type);
1829 	      g = gimple_build_assign (lhs, new_rhs);
1830 	      gsi_replace (gsi, g, false);
1831 	      return;
1832 	    }
1833 	}
1834     }
1835   else if (modifier == WIDEN)
1836     {
1837       enum tree_code code2 = ERROR_MARK;
1838       optab optab2 = unknown_optab;
1839       switch (code)
1840 	{
1841 	CASE_CONVERT:
1842 	  code1 = VEC_UNPACK_LO_EXPR;
1843           code2 = VEC_UNPACK_HI_EXPR;
1844 	  break;
1845 	case FIX_TRUNC_EXPR:
1846 	  code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1847 	  code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1848 	  break;
1849 	case FLOAT_EXPR:
1850 	  code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1851 	  code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1852 	  break;
1853 	default:
1854 	  gcc_unreachable ();
1855 	}
1856       if (BYTES_BIG_ENDIAN)
1857 	std::swap (code1, code2);
1858 
1859       if (code == FIX_TRUNC_EXPR)
1860 	{
1861 	  /* The signedness is determined from output operand.  */
1862 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1863 	  optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1864 	}
1865       else
1866 	{
1867 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1868 	  optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1869 	}
1870 
1871       if (optab1 && optab2)
1872 	compute_type = get_compute_type (code1, optab1, arg_type);
1873 
1874       enum insn_code icode1, icode2;
1875       if (VECTOR_TYPE_P (compute_type)
1876 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1877 	      != CODE_FOR_nothing)
1878 	  && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1879 	      != CODE_FOR_nothing)
1880 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1881 	  && (insn_data[icode1].operand[0].mode
1882 	      == insn_data[icode2].operand[0].mode))
1883 	{
1884 	  poly_uint64 nunits
1885 	    = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1886 	  tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1887 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1888 	    {
1889 	      vec<constructor_elt, va_gc> *v;
1890 	      tree part_width = TYPE_SIZE (compute_type);
1891 	      tree index = bitsize_int (0);
1892 	      int nunits = nunits_for_known_piecewise_op (arg_type);
1893 	      int delta = tree_to_uhwi (part_width)
1894 			  / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
1895 	      int i;
1896 	      location_t loc = gimple_location (gsi_stmt (*gsi));
1897 
1898 	      if (compute_type != arg_type)
1899 		warning_at (loc, OPT_Wvector_operation_performance,
1900 			    "vector operation will be expanded piecewise");
1901 	      else
1902 		{
1903 		  nunits = 1;
1904 		  delta = 1;
1905 		}
1906 
1907 	      vec_alloc (v, (nunits + delta - 1) / delta * 2);
1908 	      for (i = 0; i < nunits;
1909 		   i += delta, index = int_const_binop (PLUS_EXPR, index,
1910 							part_width))
1911 		{
1912 		  tree a = arg;
1913 		  if (compute_type != arg_type)
1914 		    a = tree_vec_extract (gsi, compute_type, a, part_width,
1915 					  index);
1916 		  tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1917 		  constructor_elt ce = { NULL_TREE, result };
1918 		  v->quick_push (ce);
1919 		  ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1920 		  v->quick_push (ce);
1921 		}
1922 
1923 	      new_rhs = build_constructor (ret_type, v);
1924 	      g = gimple_build_assign (lhs, new_rhs);
1925 	      gsi_replace (gsi, g, false);
1926 	      return;
1927 	    }
1928 	}
1929     }
1930 
1931   new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1932 				     TREE_TYPE (arg_type), arg,
1933 				     NULL_TREE, code, ret_type);
1934   g = gimple_build_assign (lhs, new_rhs);
1935   gsi_replace (gsi, g, false);
1936 }
1937 
1938 /* Process one statement.  If we identify a vector operation, expand it.  */
1939 
1940 static void
expand_vector_operations_1(gimple_stmt_iterator * gsi)1941 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1942 {
1943   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1944   enum tree_code code;
1945   optab op = unknown_optab;
1946   enum gimple_rhs_class rhs_class;
1947   tree new_rhs;
1948 
1949   /* Only consider code == GIMPLE_ASSIGN. */
1950   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1951   if (!stmt)
1952     {
1953       if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
1954 	expand_vector_conversion (gsi);
1955       return;
1956     }
1957 
1958   code = gimple_assign_rhs_code (stmt);
1959   rhs_class = get_gimple_rhs_class (code);
1960   lhs = gimple_assign_lhs (stmt);
1961 
1962   if (code == VEC_PERM_EXPR)
1963     {
1964       lower_vec_perm (gsi);
1965       return;
1966     }
1967 
1968   if (code == VEC_COND_EXPR)
1969     {
1970       expand_vector_condition (gsi);
1971       return;
1972     }
1973 
1974   if (code == COND_EXPR
1975       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1976       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1977     {
1978       expand_vector_scalar_condition (gsi);
1979       return;
1980     }
1981 
1982   if (code == CONSTRUCTOR
1983       && TREE_CODE (lhs) == SSA_NAME
1984       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1985       && !gimple_clobber_p (stmt)
1986       && optimize)
1987     {
1988       optimize_vector_constructor (gsi);
1989       return;
1990     }
1991 
1992   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1993     return;
1994 
1995   rhs1 = gimple_assign_rhs1 (stmt);
1996   type = gimple_expr_type (stmt);
1997   if (rhs_class == GIMPLE_BINARY_RHS)
1998     rhs2 = gimple_assign_rhs2 (stmt);
1999 
2000   if (!VECTOR_TYPE_P (type)
2001       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2002     return;
2003 
2004   /* A scalar operation pretending to be a vector one.  */
2005   if (VECTOR_BOOLEAN_TYPE_P (type)
2006       && !VECTOR_MODE_P (TYPE_MODE (type))
2007       && TYPE_MODE (type) != BLKmode
2008       && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2009 	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2010 	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2011 	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2012     return;
2013 
2014   /* If the vector operation is operating on all same vector elements
2015      implement it with a scalar operation and a splat if the target
2016      supports the scalar operation.  */
2017   tree srhs1, srhs2 = NULL_TREE;
2018   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2019       && (rhs2 == NULL_TREE
2020 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2021 	      && (srhs2 = rhs2))
2022 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2023       /* As we query direct optabs restrict to non-convert operations.  */
2024       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2025     {
2026       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2027       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2028 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2029 	{
2030 	  tree slhs = make_ssa_name (TREE_TYPE (srhs1));
2031 	  gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
2032 	  gsi_insert_before (gsi, repl, GSI_SAME_STMT);
2033 	  gimple_assign_set_rhs_from_tree (gsi,
2034 					   build_vector_from_val (type, slhs));
2035 	  update_stmt (stmt);
2036 	  return;
2037 	}
2038     }
2039 
2040   if (CONVERT_EXPR_CODE_P (code)
2041       || code == FLOAT_EXPR
2042       || code == FIX_TRUNC_EXPR
2043       || code == VIEW_CONVERT_EXPR)
2044     return;
2045 
2046   /* The signedness is determined from input argument.  */
2047   if (code == VEC_UNPACK_FLOAT_HI_EXPR
2048       || code == VEC_UNPACK_FLOAT_LO_EXPR
2049       || code == VEC_PACK_FLOAT_EXPR)
2050     {
2051       type = TREE_TYPE (rhs1);
2052       /* We do not know how to scalarize those.  */
2053       return;
2054     }
2055 
2056   /* For widening/narrowing vector operations, the relevant type is of the
2057      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
2058      calculated in the same way above.  */
2059   if (code == WIDEN_SUM_EXPR
2060       || code == VEC_WIDEN_MULT_HI_EXPR
2061       || code == VEC_WIDEN_MULT_LO_EXPR
2062       || code == VEC_WIDEN_MULT_EVEN_EXPR
2063       || code == VEC_WIDEN_MULT_ODD_EXPR
2064       || code == VEC_UNPACK_HI_EXPR
2065       || code == VEC_UNPACK_LO_EXPR
2066       || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2067       || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2068       || code == VEC_PACK_TRUNC_EXPR
2069       || code == VEC_PACK_SAT_EXPR
2070       || code == VEC_PACK_FIX_TRUNC_EXPR
2071       || code == VEC_WIDEN_LSHIFT_HI_EXPR
2072       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2073     {
2074       type = TREE_TYPE (rhs1);
2075       /* We do not know how to scalarize those.  */
2076       return;
2077     }
2078 
2079   /* Choose between vector shift/rotate by vector and vector shift/rotate by
2080      scalar */
2081   if (code == LSHIFT_EXPR
2082       || code == RSHIFT_EXPR
2083       || code == LROTATE_EXPR
2084       || code == RROTATE_EXPR)
2085     {
2086       optab opv;
2087 
2088       /* Check whether we have vector <op> {x,x,x,x} where x
2089          could be a scalar variable or a constant.  Transform
2090          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
2091       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2092         {
2093           tree first;
2094 
2095           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2096             {
2097               gimple_assign_set_rhs2 (stmt, first);
2098               update_stmt (stmt);
2099               rhs2 = first;
2100             }
2101         }
2102 
2103       opv = optab_for_tree_code (code, type, optab_vector);
2104       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2105 	op = opv;
2106       else
2107 	{
2108           op = optab_for_tree_code (code, type, optab_scalar);
2109 
2110 	  compute_type = get_compute_type (code, op, type);
2111 	  if (compute_type == type)
2112 	    return;
2113 	  /* The rtl expander will expand vector/scalar as vector/vector
2114 	     if necessary.  Pick one with wider vector type.  */
2115 	  tree compute_vtype = get_compute_type (code, opv, type);
2116 	  if (subparts_gt (compute_vtype, compute_type))
2117 	    {
2118 	      compute_type = compute_vtype;
2119 	      op = opv;
2120 	    }
2121 	}
2122 
2123       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2124 	{
2125 	  if (compute_type == NULL_TREE)
2126 	    compute_type = get_compute_type (code, op, type);
2127 	  if (compute_type == type)
2128 	    return;
2129 	  /* Before splitting vector rotates into scalar rotates,
2130 	     see if we can't use vector shifts and BIT_IOR_EXPR
2131 	     instead.  For vector by vector rotates we'd also
2132 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2133 	     for now, fold doesn't seem to create such rotates anyway.  */
2134 	  if (compute_type == TREE_TYPE (type)
2135 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2136 	    {
2137 	      optab oplv = vashl_optab, opl = ashl_optab;
2138 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2139 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2140 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2141 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2142 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2143 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2144 	      /* The rtl expander will expand vector/scalar as vector/vector
2145 		 if necessary.  Pick one with wider vector type.  */
2146 	      if (subparts_gt (compute_lvtype, compute_ltype))
2147 		{
2148 		  compute_ltype = compute_lvtype;
2149 		  opl = oplv;
2150 		}
2151 	      if (subparts_gt (compute_rvtype, compute_rtype))
2152 		{
2153 		  compute_rtype = compute_rvtype;
2154 		  opr = oprv;
2155 		}
2156 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2157 		 BIT_IOR_EXPR.  */
2158 	      compute_type = compute_ltype;
2159 	      if (subparts_gt (compute_type, compute_rtype))
2160 		compute_type = compute_rtype;
2161 	      if (subparts_gt (compute_type, compute_otype))
2162 		compute_type = compute_otype;
2163 	      /* Verify all 3 operations can be performed in that type.  */
2164 	      if (compute_type != TREE_TYPE (type))
2165 		{
2166 		  if (optab_handler (opl, TYPE_MODE (compute_type))
2167 		      == CODE_FOR_nothing
2168 		      || optab_handler (opr, TYPE_MODE (compute_type))
2169 			 == CODE_FOR_nothing
2170 		      || optab_handler (opo, TYPE_MODE (compute_type))
2171 			 == CODE_FOR_nothing)
2172 		    compute_type = TREE_TYPE (type);
2173 		}
2174 	    }
2175 	}
2176     }
2177   else
2178     op = optab_for_tree_code (code, type, optab_default);
2179 
2180   /* Optabs will try converting a negation into a subtraction, so
2181      look for it as well.  TODO: negation of floating-point vectors
2182      might be turned into an exclusive OR toggling the sign bit.  */
2183   if (op == unknown_optab
2184       && code == NEGATE_EXPR
2185       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2186     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2187 
2188   if (compute_type == NULL_TREE)
2189     compute_type = get_compute_type (code, op, type);
2190   if (compute_type == type)
2191     return;
2192 
2193   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
2194 
2195   /* Leave expression untouched for later expansion.  */
2196   if (new_rhs == NULL_TREE)
2197     return;
2198 
2199   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2200     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2201                                new_rhs);
2202 
2203   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
2204      way to do it is change expand_vector_operation and its callees to
2205      return a tree_code, RHS1 and RHS2 instead of a tree. */
2206   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2207   update_stmt (gsi_stmt (*gsi));
2208 }
2209 
2210 /* Use this to lower vector operations introduced by the vectorizer,
2211    if it may need the bit-twiddling tricks implemented in this file.  */
2212 
2213 static unsigned int
expand_vector_operations(void)2214 expand_vector_operations (void)
2215 {
2216   gimple_stmt_iterator gsi;
2217   basic_block bb;
2218   bool cfg_changed = false;
2219 
2220   FOR_EACH_BB_FN (bb, cfun)
2221     {
2222       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2223 	{
2224 	  expand_vector_operations_1 (&gsi);
2225 	  /* ???  If we do not cleanup EH then we will ICE in
2226 	     verification.  But in reality we have created wrong-code
2227 	     as we did not properly transition EH info and edges to
2228 	     the piecewise computations.  */
2229 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2230 	      && gimple_purge_dead_eh_edges (bb))
2231 	    cfg_changed = true;
2232 	}
2233     }
2234 
2235   return cfg_changed ? TODO_cleanup_cfg : 0;
2236 }
2237 
2238 namespace {
2239 
2240 const pass_data pass_data_lower_vector =
2241 {
2242   GIMPLE_PASS, /* type */
2243   "veclower", /* name */
2244   OPTGROUP_VEC, /* optinfo_flags */
2245   TV_NONE, /* tv_id */
2246   PROP_cfg, /* properties_required */
2247   PROP_gimple_lvec, /* properties_provided */
2248   0, /* properties_destroyed */
2249   0, /* todo_flags_start */
2250   TODO_update_ssa, /* todo_flags_finish */
2251 };
2252 
2253 class pass_lower_vector : public gimple_opt_pass
2254 {
2255 public:
pass_lower_vector(gcc::context * ctxt)2256   pass_lower_vector (gcc::context *ctxt)
2257     : gimple_opt_pass (pass_data_lower_vector, ctxt)
2258   {}
2259 
2260   /* opt_pass methods: */
gate(function * fun)2261   virtual bool gate (function *fun)
2262     {
2263       return !(fun->curr_properties & PROP_gimple_lvec);
2264     }
2265 
execute(function *)2266   virtual unsigned int execute (function *)
2267     {
2268       return expand_vector_operations ();
2269     }
2270 
2271 }; // class pass_lower_vector
2272 
2273 } // anon namespace
2274 
2275 gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)2276 make_pass_lower_vector (gcc::context *ctxt)
2277 {
2278   return new pass_lower_vector (ctxt);
2279 }
2280 
2281 namespace {
2282 
2283 const pass_data pass_data_lower_vector_ssa =
2284 {
2285   GIMPLE_PASS, /* type */
2286   "veclower2", /* name */
2287   OPTGROUP_VEC, /* optinfo_flags */
2288   TV_NONE, /* tv_id */
2289   PROP_cfg, /* properties_required */
2290   PROP_gimple_lvec, /* properties_provided */
2291   0, /* properties_destroyed */
2292   0, /* todo_flags_start */
2293   ( TODO_update_ssa
2294     | TODO_cleanup_cfg ), /* todo_flags_finish */
2295 };
2296 
2297 class pass_lower_vector_ssa : public gimple_opt_pass
2298 {
2299 public:
pass_lower_vector_ssa(gcc::context * ctxt)2300   pass_lower_vector_ssa (gcc::context *ctxt)
2301     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2302   {}
2303 
2304   /* opt_pass methods: */
clone()2305   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)2306   virtual unsigned int execute (function *)
2307     {
2308       return expand_vector_operations ();
2309     }
2310 
2311 }; // class pass_lower_vector_ssa
2312 
2313 } // anon namespace
2314 
2315 gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)2316 make_pass_lower_vector_ssa (gcc::context *ctxt)
2317 {
2318   return new pass_lower_vector_ssa (ctxt);
2319 }
2320 
2321 #include "gt-tree-vect-generic.h"
2322