1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "recog.h"		/* FIXME: for insn_data */
44 
45 
46 static void expand_vector_operations_1 (gimple_stmt_iterator *);
47 
48 /* Return the number of elements in a vector type TYPE that we have
49    already decided needs to be expanded piecewise.  We don't support
50    this kind of expansion for variable-length vectors, since we should
51    always check for target support before introducing uses of those.  */
52 static unsigned int
nunits_for_known_piecewise_op(const_tree type)53 nunits_for_known_piecewise_op (const_tree type)
54 {
55   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
56 }
57 
58 /* Return true if TYPE1 has more elements than TYPE2, where either
59    type may be a vector or a scalar.  */
60 
61 static inline bool
subparts_gt(tree type1,tree type2)62 subparts_gt (tree type1, tree type2)
63 {
64   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
65   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
66   return known_gt (n1, n2);
67 }
68 
69 /* Build a constant of type TYPE, made of VALUE's bits replicated
70    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
71 static tree
build_replicated_const(tree type,tree inner_type,HOST_WIDE_INT value)72 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
73 {
74   int width = tree_to_uhwi (TYPE_SIZE (inner_type));
75   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76     / HOST_BITS_PER_WIDE_INT;
77   unsigned HOST_WIDE_INT low, mask;
78   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79   int i;
80 
81   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
82 
83   if (width == HOST_BITS_PER_WIDE_INT)
84     low = value;
85   else
86     {
87       mask = ((HOST_WIDE_INT)1 << width) - 1;
88       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
89     }
90 
91   for (i = 0; i < n; i++)
92     a[i] = low;
93 
94   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95   return wide_int_to_tree
96     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
97 }
98 
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
102 
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104    "word_mode" (the global variable).  */
105 static tree
build_word_mode_vector_type(int nunits)106 build_word_mode_vector_type (int nunits)
107 {
108   if (!vector_inner_type)
109     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110   else if (vector_last_nunits == nunits)
111     {
112       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113       return vector_last_type;
114     }
115 
116   vector_last_nunits = nunits;
117   vector_last_type = build_vector_type (vector_inner_type, nunits);
118   return vector_last_type;
119 }
120 
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 			      tree, tree, tree, tree, tree, enum tree_code,
123 			      tree);
124 
125 tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 		  tree t, tree bitsize, tree bitpos)
128 {
129   if (TREE_CODE (t) == SSA_NAME)
130     {
131       gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132       if (is_gimple_assign (def_stmt)
133 	  && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 	      || (bitpos
135 		  && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 	t = gimple_assign_rhs1 (def_stmt);
137     }
138   if (bitpos)
139     {
140       if (TREE_CODE (type) == BOOLEAN_TYPE)
141 	{
142 	  tree itype
143 	    = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
144 	  tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
145 					bitsize, bitpos);
146 	  return gimplify_build2 (gsi, NE_EXPR, type, field,
147 				  build_zero_cst (itype));
148 	}
149       else
150 	return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
151     }
152   else
153     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
154 }
155 
156 static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)157 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
158 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
159 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
160 {
161   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
162   return gimplify_build1 (gsi, code, inner_type, a);
163 }
164 
165 static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)166 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
167 	  tree bitpos, tree bitsize, enum tree_code code,
168 	  tree type ATTRIBUTE_UNUSED)
169 {
170   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
171     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
172   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
173     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
174   return gimplify_build2 (gsi, code, inner_type, a, b);
175 }
176 
177 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
178 
179    INNER_TYPE is the type of A and B elements
180 
181    returned expression is of signed integer type with the
182    size equal to the size of INNER_TYPE.  */
183 static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)184 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
185 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
186 {
187   tree stype = TREE_TYPE (type);
188   tree cst_false = build_zero_cst (stype);
189   tree cst_true = build_all_ones_cst (stype);
190   tree cmp;
191 
192   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
193   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
194 
195   cmp = build2 (code, boolean_type_node, a, b);
196   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
197 }
198 
199 /* Expand vector addition to scalars.  This does bit twiddling
200    in order to increase parallelism:
201 
202    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203            (a ^ b) & 0x80808080
204 
205    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
206             (a ^ ~b) & 0x80808080
207 
208    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
209 
210    This optimization should be done only if 4 vector items or more
211    fit into a word.  */
212 static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)213 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
214 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
215 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
216 {
217   tree inner_type = TREE_TYPE (TREE_TYPE (a));
218   unsigned HOST_WIDE_INT max;
219   tree low_bits, high_bits, a_low, b_low, result_low, signs;
220 
221   max = GET_MODE_MASK (TYPE_MODE (inner_type));
222   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
223   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
224 
225   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
226   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
227 
228   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
229   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
230   if (code == PLUS_EXPR)
231     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
232   else
233     {
234       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
235       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
236     }
237 
238   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
239   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
240   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
241 }
242 
243 static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)244 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
245 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
246 	   tree bitsize ATTRIBUTE_UNUSED,
247 	   enum tree_code code ATTRIBUTE_UNUSED,
248 	   tree type ATTRIBUTE_UNUSED)
249 {
250   tree inner_type = TREE_TYPE (TREE_TYPE (b));
251   HOST_WIDE_INT max;
252   tree low_bits, high_bits, b_low, result_low, signs;
253 
254   max = GET_MODE_MASK (TYPE_MODE (inner_type));
255   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
256   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
257 
258   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
259 
260   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
261   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
262   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
263   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
264   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
265 }
266 
267 /* Expand a vector operation to scalars, by using many operations
268    whose type is the vector type's inner type.  */
269 static tree
270 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
271 			 tree type, tree inner_type,
272 			 tree a, tree b, enum tree_code code,
273 			 tree ret_type = NULL_TREE)
274 {
275   vec<constructor_elt, va_gc> *v;
276   tree part_width = TYPE_SIZE (inner_type);
277   tree index = bitsize_int (0);
278   int nunits = nunits_for_known_piecewise_op (type);
279   int delta = tree_to_uhwi (part_width)
280 	      / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
281   int i;
282   location_t loc = gimple_location (gsi_stmt (*gsi));
283 
284   if (ret_type
285       || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
286     warning_at (loc, OPT_Wvector_operation_performance,
287 		"vector operation will be expanded piecewise");
288   else
289     warning_at (loc, OPT_Wvector_operation_performance,
290 		"vector operation will be expanded in parallel");
291 
292   if (!ret_type)
293     ret_type = type;
294   vec_alloc (v, (nunits + delta - 1) / delta);
295   for (i = 0; i < nunits;
296        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
297     {
298       tree result = f (gsi, inner_type, a, b, index, part_width, code,
299 		       ret_type);
300       constructor_elt ce = {NULL_TREE, result};
301       v->quick_push (ce);
302     }
303 
304   return build_constructor (ret_type, v);
305 }
306 
307 /* Expand a vector operation to scalars with the freedom to use
308    a scalar integer type, or to use a different size for the items
309    in the vector type.  */
310 static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)311 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
312 			tree a, tree b, enum tree_code code)
313 {
314   tree result, compute_type;
315   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
316   location_t loc = gimple_location (gsi_stmt (*gsi));
317 
318   /* We have three strategies.  If the type is already correct, just do
319      the operation an element at a time.  Else, if the vector is wider than
320      one word, do it a word at a time; finally, if the vector is smaller
321      than one word, do it as a scalar.  */
322   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
323      return expand_vector_piecewise (gsi, f,
324 				     type, TREE_TYPE (type),
325 				     a, b, code);
326   else if (n_words > 1)
327     {
328       tree word_type = build_word_mode_vector_type (n_words);
329       result = expand_vector_piecewise (gsi, f,
330 				        word_type, TREE_TYPE (word_type),
331 					a, b, code);
332       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
333                                          GSI_SAME_STMT);
334     }
335   else
336     {
337       /* Use a single scalar operation with a mode no wider than word_mode.  */
338       scalar_int_mode mode
339 	= int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
340       compute_type = lang_hooks.types.type_for_mode (mode, 1);
341       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
342       warning_at (loc, OPT_Wvector_operation_performance,
343 	          "vector operation will be expanded with a "
344 		  "single scalar operation");
345     }
346 
347   return result;
348 }
349 
350 /* Expand a vector operation to scalars; for integer types we can use
351    special bit twiddling tricks to do the sums a word at a time, using
352    function F_PARALLEL instead of F.  These tricks are done only if
353    they can process at least four items, that is, only if the vector
354    holds at least four items and if a word can hold four items.  */
355 static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)356 expand_vector_addition (gimple_stmt_iterator *gsi,
357 			elem_op_func f, elem_op_func f_parallel,
358 			tree type, tree a, tree b, enum tree_code code)
359 {
360   int parts_per_word = UNITS_PER_WORD
361 	  	       / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
362 
363   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
364       && parts_per_word >= 4
365       && nunits_for_known_piecewise_op (type) >= 4)
366     return expand_vector_parallel (gsi, f_parallel,
367 				   type, a, b, code);
368   else
369     return expand_vector_piecewise (gsi, f,
370 				    type, TREE_TYPE (type),
371 				    a, b, code);
372 }
373 
374 /* Try to expand vector comparison expression OP0 CODE OP1 by
375    querying optab if the following expression:
376 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
377    can be expanded.  */
378 static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)379 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
380                           tree op1, enum tree_code code)
381 {
382   tree t;
383   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
384       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
385     {
386       if (VECTOR_BOOLEAN_TYPE_P (type)
387 	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
388 	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
389 		       TYPE_VECTOR_SUBPARTS (type)
390 		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
391 						(TREE_TYPE (type)))))
392 	{
393 	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
394 	  tree part_width = TYPE_SIZE (inner_type);
395 	  tree index = bitsize_int (0);
396 	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
397 	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
398 	  tree ret_type = build_nonstandard_integer_type (prec, 1);
399 	  tree ret_inner_type = boolean_type_node;
400 	  int i;
401 	  location_t loc = gimple_location (gsi_stmt (*gsi));
402 	  t = build_zero_cst (ret_type);
403 
404 	  if (TYPE_PRECISION (ret_inner_type) != 1)
405 	    ret_inner_type = build_nonstandard_integer_type (1, 1);
406 	  warning_at (loc, OPT_Wvector_operation_performance,
407 		      "vector operation will be expanded piecewise");
408 	  for (i = 0; i < nunits;
409 	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
410 	    {
411 	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
412 					 index);
413 	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
414 					 index);
415 	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
416 	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
417 				   bitsize_int (i));
418 	    }
419 	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
420 	}
421       else
422 	t = expand_vector_piecewise (gsi, do_compare, type,
423 				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
424 				     code);
425     }
426   else
427     t = NULL_TREE;
428 
429   return t;
430 }
431 
432 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
433    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
434    the result if successful, otherwise return NULL_TREE.  */
435 static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)436 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
437 {
438   optab op;
439   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
440   bool scalar_shift = true;
441 
442   for (i = 1; i < nunits; i++)
443     {
444       if (shiftcnts[i] != shiftcnts[0])
445 	scalar_shift = false;
446     }
447 
448   if (scalar_shift && shiftcnts[0] == 0)
449     return op0;
450 
451   if (scalar_shift)
452     {
453       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
454       if (op != unknown_optab
455 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
456 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
457 				build_int_cst (NULL_TREE, shiftcnts[0]));
458     }
459 
460   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
461   if (op != unknown_optab
462       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
463     {
464       tree_vector_builder vec (type, nunits, 1);
465       for (i = 0; i < nunits; i++)
466 	vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
467       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
468     }
469 
470   return NULL_TREE;
471 }
472 
473 /* Try to expand integer vector division by constant using
474    widening multiply, shifts and additions.  */
475 static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)476 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
477 		      tree op1, enum tree_code code)
478 {
479   bool use_pow2 = true;
480   bool has_vector_shift = true;
481   bool use_abs_op1 = false;
482   int mode = -1, this_mode;
483   int pre_shift = -1, post_shift;
484   unsigned int nunits = nunits_for_known_piecewise_op (type);
485   int *shifts = XALLOCAVEC (int, nunits * 4);
486   int *pre_shifts = shifts + nunits;
487   int *post_shifts = pre_shifts + nunits;
488   int *shift_temps = post_shifts + nunits;
489   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
490   int prec = TYPE_PRECISION (TREE_TYPE (type));
491   int dummy_int;
492   unsigned int i;
493   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
494   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
495   tree cur_op, mulcst, tem;
496   optab op;
497 
498   if (prec > HOST_BITS_PER_WIDE_INT)
499     return NULL_TREE;
500 
501   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
502   if (op == unknown_optab
503       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
504     has_vector_shift = false;
505 
506   /* Analysis phase.  Determine if all op1 elements are either power
507      of two and it is possible to expand it using shifts (or for remainder
508      using masking).  Additionally compute the multiplicative constants
509      and pre and post shifts if the division is to be expanded using
510      widening or high part multiplication plus shifts.  */
511   for (i = 0; i < nunits; i++)
512     {
513       tree cst = VECTOR_CST_ELT (op1, i);
514       unsigned HOST_WIDE_INT ml;
515 
516       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
517 	return NULL_TREE;
518       pre_shifts[i] = 0;
519       post_shifts[i] = 0;
520       mulc[i] = 0;
521       if (use_pow2
522 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
523 	use_pow2 = false;
524       if (use_pow2)
525 	{
526 	  shifts[i] = tree_log2 (cst);
527 	  if (shifts[i] != shifts[0]
528 	      && code == TRUNC_DIV_EXPR
529 	      && !has_vector_shift)
530 	    use_pow2 = false;
531 	}
532       if (mode == -2)
533 	continue;
534       if (sign_p == UNSIGNED)
535 	{
536 	  unsigned HOST_WIDE_INT mh;
537 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
538 
539 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
540 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
541 	    return NULL_TREE;
542 
543 	  if (d <= 1)
544 	    {
545 	      mode = -2;
546 	      continue;
547 	    }
548 
549 	  /* Find a suitable multiplier and right shift count
550 	     instead of multiplying with D.  */
551 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
552 
553 	  /* If the suggested multiplier is more than SIZE bits, we can
554 	     do better for even divisors, using an initial right shift.  */
555 	  if ((mh != 0 && (d & 1) == 0)
556 	      || (!has_vector_shift && pre_shift != -1))
557 	    {
558 	      if (has_vector_shift)
559 		pre_shift = ctz_or_zero (d);
560 	      else if (pre_shift == -1)
561 		{
562 		  unsigned int j;
563 		  for (j = 0; j < nunits; j++)
564 		    {
565 		      tree cst2 = VECTOR_CST_ELT (op1, j);
566 		      unsigned HOST_WIDE_INT d2;
567 		      int this_pre_shift;
568 
569 		      if (!tree_fits_uhwi_p (cst2))
570 			return NULL_TREE;
571 		      d2 = tree_to_uhwi (cst2) & mask;
572 		      if (d2 == 0)
573 			return NULL_TREE;
574 		      this_pre_shift = floor_log2 (d2 & -d2);
575 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
576 			pre_shift = this_pre_shift;
577 		    }
578 		  if (i != 0 && pre_shift != 0)
579 		    {
580 		      /* Restart.  */
581 		      i = -1U;
582 		      mode = -1;
583 		      continue;
584 		    }
585 		}
586 	      if (pre_shift != 0)
587 		{
588 		  if ((d >> pre_shift) <= 1)
589 		    {
590 		      mode = -2;
591 		      continue;
592 		    }
593 		  mh = choose_multiplier (d >> pre_shift, prec,
594 					  prec - pre_shift,
595 					  &ml, &post_shift, &dummy_int);
596 		  gcc_assert (!mh);
597 		  pre_shifts[i] = pre_shift;
598 		}
599 	    }
600 	  if (!mh)
601 	    this_mode = 0;
602 	  else
603 	    this_mode = 1;
604 	}
605       else
606 	{
607 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
608 	  unsigned HOST_WIDE_INT abs_d;
609 
610 	  if (d == -1)
611 	    return NULL_TREE;
612 
613 	  /* Since d might be INT_MIN, we have to cast to
614 	     unsigned HOST_WIDE_INT before negating to avoid
615 	     undefined signed overflow.  */
616 	  abs_d = (d >= 0
617 		  ? (unsigned HOST_WIDE_INT) d
618 		  : - (unsigned HOST_WIDE_INT) d);
619 
620 	  /* n rem d = n rem -d */
621 	  if (code == TRUNC_MOD_EXPR && d < 0)
622 	    {
623 	      d = abs_d;
624 	      use_abs_op1 = true;
625 	    }
626 	  if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
627 	    {
628 	      /* This case is not handled correctly below.  */
629 	      mode = -2;
630 	      continue;
631 	    }
632 	  if (abs_d <= 1)
633 	    {
634 	      mode = -2;
635 	      continue;
636 	    }
637 
638 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
639 			     &post_shift, &dummy_int);
640 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
641 	    {
642 	      this_mode = 4 + (d < 0);
643 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
644 	    }
645 	  else
646 	    this_mode = 2 + (d < 0);
647 	}
648       mulc[i] = ml;
649       post_shifts[i] = post_shift;
650       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
651 	  || post_shift >= prec
652 	  || pre_shifts[i] >= prec)
653 	this_mode = -2;
654 
655       if (i == 0)
656 	mode = this_mode;
657       else if (mode != this_mode)
658 	mode = -2;
659     }
660 
661   if (use_pow2)
662     {
663       tree addend = NULL_TREE;
664       if (sign_p == SIGNED)
665 	{
666 	  tree uns_type;
667 
668 	  /* Both division and remainder sequences need
669 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
670 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
671 	     if none of the shifts is 0, or as the conditional.  */
672 	  for (i = 0; i < nunits; i++)
673 	    if (shifts[i] == 0)
674 	      break;
675 	  uns_type
676 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
677 				 nunits);
678 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
679 	    {
680 	      for (i = 0; i < nunits; i++)
681 		shift_temps[i] = prec - 1;
682 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
683 	      if (cur_op != NULL_TREE)
684 		{
685 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
686 					    uns_type, cur_op);
687 		  for (i = 0; i < nunits; i++)
688 		    shift_temps[i] = prec - shifts[i];
689 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
690 		  if (cur_op != NULL_TREE)
691 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
692 					      type, cur_op);
693 		}
694 	    }
695 	  if (addend == NULL_TREE
696 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
697 	    {
698 	      tree zero, cst, cond, mask_type;
699 	      gimple *stmt;
700 
701 	      mask_type = truth_type_for (type);
702 	      zero = build_zero_cst (type);
703 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
704 	      tree_vector_builder vec (type, nunits, 1);
705 	      for (i = 0; i < nunits; i++)
706 		vec.quick_push (build_int_cst (TREE_TYPE (type),
707 					       (HOST_WIDE_INT_1U
708 						<< shifts[i]) - 1));
709 	      cst = vec.build ();
710 	      addend = make_ssa_name (type);
711 	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
712 					  cst, zero);
713 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
714 	    }
715 	}
716       if (code == TRUNC_DIV_EXPR)
717 	{
718 	  if (sign_p == UNSIGNED)
719 	    {
720 	      /* q = op0 >> shift;  */
721 	      cur_op = add_rshift (gsi, type, op0, shifts);
722 	      if (cur_op != NULL_TREE)
723 		return cur_op;
724 	    }
725 	  else if (addend != NULL_TREE)
726 	    {
727 	      /* t1 = op0 + addend;
728 		 q = t1 >> shift;  */
729 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
730 	      if (op != unknown_optab
731 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
732 		{
733 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
734 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
735 		  if (cur_op != NULL_TREE)
736 		    return cur_op;
737 		}
738 	    }
739 	}
740       else
741 	{
742 	  tree mask;
743 	  tree_vector_builder vec (type, nunits, 1);
744 	  for (i = 0; i < nunits; i++)
745 	    vec.quick_push (build_int_cst (TREE_TYPE (type),
746 					   (HOST_WIDE_INT_1U
747 					    << shifts[i]) - 1));
748 	  mask = vec.build ();
749 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
750 	  if (op != unknown_optab
751 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
752 	    {
753 	      if (sign_p == UNSIGNED)
754 		/* r = op0 & mask;  */
755 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
756 	      else if (addend != NULL_TREE)
757 		{
758 		  /* t1 = op0 + addend;
759 		     t2 = t1 & mask;
760 		     r = t2 - addend;  */
761 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
762 		  if (op != unknown_optab
763 		      && optab_handler (op, TYPE_MODE (type))
764 			 != CODE_FOR_nothing)
765 		    {
766 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
767 						addend);
768 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
769 						cur_op, mask);
770 		      op = optab_for_tree_code (MINUS_EXPR, type,
771 						optab_default);
772 		      if (op != unknown_optab
773 			  && optab_handler (op, TYPE_MODE (type))
774 			     != CODE_FOR_nothing)
775 			return gimplify_build2 (gsi, MINUS_EXPR, type,
776 						cur_op, addend);
777 		    }
778 		}
779 	    }
780 	}
781     }
782 
783   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
784     return NULL_TREE;
785 
786   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
787     return NULL_TREE;
788 
789   cur_op = op0;
790 
791   switch (mode)
792     {
793     case 0:
794       gcc_assert (sign_p == UNSIGNED);
795       /* t1 = oprnd0 >> pre_shift;
796 	 t2 = t1 h* ml;
797 	 q = t2 >> post_shift;  */
798       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
799       if (cur_op == NULL_TREE)
800 	return NULL_TREE;
801       break;
802     case 1:
803       gcc_assert (sign_p == UNSIGNED);
804       for (i = 0; i < nunits; i++)
805 	{
806 	  shift_temps[i] = 1;
807 	  post_shifts[i]--;
808 	}
809       break;
810     case 2:
811     case 3:
812     case 4:
813     case 5:
814       gcc_assert (sign_p == SIGNED);
815       for (i = 0; i < nunits; i++)
816 	shift_temps[i] = prec - 1;
817       break;
818     default:
819       return NULL_TREE;
820     }
821 
822   tree_vector_builder vec (type, nunits, 1);
823   for (i = 0; i < nunits; i++)
824     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
825   mulcst = vec.build ();
826 
827   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
828 
829   switch (mode)
830     {
831     case 0:
832       /* t1 = oprnd0 >> pre_shift;
833 	 t2 = t1 h* ml;
834 	 q = t2 >> post_shift;  */
835       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
836       break;
837     case 1:
838       /* t1 = oprnd0 h* ml;
839 	 t2 = oprnd0 - t1;
840 	 t3 = t2 >> 1;
841 	 t4 = t1 + t3;
842 	 q = t4 >> (post_shift - 1);  */
843       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
844       if (op == unknown_optab
845 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
846 	return NULL_TREE;
847       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
848       tem = add_rshift (gsi, type, tem, shift_temps);
849       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
850       if (op == unknown_optab
851 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
852 	return NULL_TREE;
853       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
854       cur_op = add_rshift (gsi, type, tem, post_shifts);
855       if (cur_op == NULL_TREE)
856 	return NULL_TREE;
857       break;
858     case 2:
859     case 3:
860     case 4:
861     case 5:
862       /* t1 = oprnd0 h* ml;
863 	 t2 = t1; [ iff (mode & 2) != 0 ]
864 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
865 	 t3 = t2 >> post_shift;
866 	 t4 = oprnd0 >> (prec - 1);
867 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
868 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
869       if ((mode & 2) == 0)
870 	{
871 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
872 	  if (op == unknown_optab
873 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
874 	    return NULL_TREE;
875 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
876 	}
877       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
878       if (cur_op == NULL_TREE)
879 	return NULL_TREE;
880       tem = add_rshift (gsi, type, op0, shift_temps);
881       if (tem == NULL_TREE)
882 	return NULL_TREE;
883       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
884       if (op == unknown_optab
885 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
886 	return NULL_TREE;
887       if ((mode & 1) == 0)
888 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
889       else
890 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
891       break;
892     default:
893       gcc_unreachable ();
894     }
895 
896   if (code == TRUNC_DIV_EXPR)
897     return cur_op;
898 
899   /* We divided.  Now finish by:
900      t1 = q * oprnd1;
901      r = oprnd0 - t1;  */
902   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
903   if (op == unknown_optab
904       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
905     return NULL_TREE;
906   if (use_abs_op1)
907     {
908       tree_vector_builder elts;
909       if (!elts.new_unary_operation (type, op1, false))
910 	return NULL_TREE;
911       unsigned int count = elts.encoded_nelts ();
912       for (unsigned int i = 0; i < count; ++i)
913 	{
914 	  tree elem1 = VECTOR_CST_ELT (op1, i);
915 
916 	  tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
917 	  if (elt == NULL_TREE)
918 	    return NULL_TREE;
919 	  elts.quick_push (elt);
920 	}
921       op1 = elts.build ();
922     }
923   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
924   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
925   if (op == unknown_optab
926       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
927     return NULL_TREE;
928   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
929 }
930 
931 /* Expand a vector condition to scalars, by using many conditions
932    on the vector's elements.  */
933 static void
expand_vector_condition(gimple_stmt_iterator * gsi)934 expand_vector_condition (gimple_stmt_iterator *gsi)
935 {
936   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
937   tree type = gimple_expr_type (stmt);
938   tree a = gimple_assign_rhs1 (stmt);
939   tree a1 = a;
940   tree a2 = NULL_TREE;
941   bool a_is_comparison = false;
942   bool a_is_scalar_bitmask = false;
943   tree b = gimple_assign_rhs2 (stmt);
944   tree c = gimple_assign_rhs3 (stmt);
945   vec<constructor_elt, va_gc> *v;
946   tree constr;
947   tree inner_type = TREE_TYPE (type);
948   tree cond_type = TREE_TYPE (TREE_TYPE (a));
949   tree comp_inner_type = cond_type;
950   tree width = TYPE_SIZE (inner_type);
951   tree index = bitsize_int (0);
952   tree comp_width = width;
953   tree comp_index = index;
954   int i;
955   location_t loc = gimple_location (gsi_stmt (*gsi));
956 
957   if (!is_gimple_val (a))
958     {
959       gcc_assert (COMPARISON_CLASS_P (a));
960       a_is_comparison = true;
961       a1 = TREE_OPERAND (a, 0);
962       a2 = TREE_OPERAND (a, 1);
963       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
964       comp_width = TYPE_SIZE (comp_inner_type);
965     }
966 
967   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
968     return;
969 
970   /* Handle vector boolean types with bitmasks.  If there is a comparison
971      and we can expand the comparison into the vector boolean bitmask,
972      or otherwise if it is compatible with type, we can transform
973       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
974      into
975       tmp_6 = x_2 < y_3;
976       tmp_7 = tmp_6 & vbfld_4;
977       tmp_8 = ~tmp_6;
978       tmp_9 = tmp_8 & vbfld_5;
979       vbfld_1 = tmp_7 | tmp_9;
980      Similarly for vbfld_10 instead of x_2 < y_3.  */
981   if (VECTOR_BOOLEAN_TYPE_P (type)
982       && SCALAR_INT_MODE_P (TYPE_MODE (type))
983       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
984 		   TYPE_VECTOR_SUBPARTS (type)
985 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
986       && (a_is_comparison
987 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
988 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
989     {
990       if (a_is_comparison)
991 	a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
992       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
993       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
994       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
995       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
996       gimple_assign_set_rhs_from_tree (gsi, a);
997       update_stmt (gsi_stmt (*gsi));
998       return;
999     }
1000 
1001   /* TODO: try and find a smaller vector type.  */
1002 
1003   warning_at (loc, OPT_Wvector_operation_performance,
1004 	      "vector condition will be expanded piecewise");
1005 
1006   if (!a_is_comparison
1007       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1008       && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1009       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1010 		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1011 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1012 						(TREE_TYPE (TREE_TYPE (a))))))
1013     {
1014       a_is_scalar_bitmask = true;
1015       int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1016       tree atype = build_nonstandard_integer_type (prec, 1);
1017       a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1018     }
1019 
1020   int nunits = nunits_for_known_piecewise_op (type);
1021   vec_alloc (v, nunits);
1022   for (i = 0; i < nunits; i++)
1023     {
1024       tree aa, result;
1025       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1026       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1027       if (a_is_comparison)
1028 	{
1029 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1030 				       comp_width, comp_index);
1031 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1032 				       comp_width, comp_index);
1033 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
1034 	}
1035       else if (a_is_scalar_bitmask)
1036 	{
1037 	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1038 	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1039 				    a, wide_int_to_tree (TREE_TYPE (a), w));
1040 	  aa = fold_build2 (NE_EXPR, boolean_type_node, result,
1041 			    build_zero_cst (TREE_TYPE (a)));
1042 	}
1043       else
1044 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
1045       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1046       constructor_elt ce = {NULL_TREE, result};
1047       v->quick_push (ce);
1048       index = int_const_binop (PLUS_EXPR, index, width);
1049       if (width == comp_width)
1050 	comp_index = index;
1051       else
1052 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1053     }
1054 
1055   constr = build_constructor (type, v);
1056   gimple_assign_set_rhs_from_tree (gsi, constr);
1057   update_stmt (gsi_stmt (*gsi));
1058 }
1059 
1060 static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code)1061 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1062 			 gassign *assign, enum tree_code code)
1063 {
1064   machine_mode compute_mode = TYPE_MODE (compute_type);
1065 
1066   /* If the compute mode is not a vector mode (hence we are not decomposing
1067      a BLKmode vector to smaller, hardware-supported vectors), we may want
1068      to expand the operations in parallel.  */
1069   if (!VECTOR_MODE_P (compute_mode))
1070     switch (code)
1071       {
1072       case PLUS_EXPR:
1073       case MINUS_EXPR:
1074         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1075 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1076 					 gimple_assign_rhs1 (assign),
1077 					 gimple_assign_rhs2 (assign), code);
1078 	break;
1079 
1080       case NEGATE_EXPR:
1081         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1082           return expand_vector_addition (gsi, do_unop, do_negate, type,
1083 		      		         gimple_assign_rhs1 (assign),
1084 					 NULL_TREE, code);
1085 	break;
1086 
1087       case BIT_AND_EXPR:
1088       case BIT_IOR_EXPR:
1089       case BIT_XOR_EXPR:
1090         return expand_vector_parallel (gsi, do_binop, type,
1091 		      		       gimple_assign_rhs1 (assign),
1092 				       gimple_assign_rhs2 (assign), code);
1093 
1094       case BIT_NOT_EXPR:
1095         return expand_vector_parallel (gsi, do_unop, type,
1096 		      		       gimple_assign_rhs1 (assign),
1097         			       NULL_TREE, code);
1098       case EQ_EXPR:
1099       case NE_EXPR:
1100       case GT_EXPR:
1101       case LT_EXPR:
1102       case GE_EXPR:
1103       case LE_EXPR:
1104       case UNEQ_EXPR:
1105       case UNGT_EXPR:
1106       case UNLT_EXPR:
1107       case UNGE_EXPR:
1108       case UNLE_EXPR:
1109       case LTGT_EXPR:
1110       case ORDERED_EXPR:
1111       case UNORDERED_EXPR:
1112 	{
1113 	  tree rhs1 = gimple_assign_rhs1 (assign);
1114 	  tree rhs2 = gimple_assign_rhs2 (assign);
1115 
1116 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1117 	}
1118 
1119       case TRUNC_DIV_EXPR:
1120       case TRUNC_MOD_EXPR:
1121 	{
1122 	  tree rhs1 = gimple_assign_rhs1 (assign);
1123 	  tree rhs2 = gimple_assign_rhs2 (assign);
1124 	  tree ret;
1125 
1126 	  if (!optimize
1127 	      || !VECTOR_INTEGER_TYPE_P (type)
1128 	      || TREE_CODE (rhs2) != VECTOR_CST
1129 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1130 	    break;
1131 
1132 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1133 	  if (ret != NULL_TREE)
1134 	    return ret;
1135 	  break;
1136 	}
1137 
1138       default:
1139 	break;
1140       }
1141 
1142   if (TREE_CODE_CLASS (code) == tcc_unary)
1143     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1144 				    gimple_assign_rhs1 (assign),
1145 				    NULL_TREE, code);
1146   else
1147     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1148 				    gimple_assign_rhs1 (assign),
1149 				    gimple_assign_rhs2 (assign), code);
1150 }
1151 
1152 /* Try to optimize
1153    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1154    style stmts into:
1155    _9 = { b_7, b_7, b_7, b_7 };
1156    a_5 = _9 + { 0, 3, 6, 9 };
1157    because vector splat operation is usually more efficient
1158    than piecewise initialization of the vector.  */
1159 
1160 static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)1161 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1162 {
1163   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1164   tree lhs = gimple_assign_lhs (stmt);
1165   tree rhs = gimple_assign_rhs1 (stmt);
1166   tree type = TREE_TYPE (rhs);
1167   unsigned int i, j;
1168   unsigned HOST_WIDE_INT nelts;
1169   bool all_same = true;
1170   constructor_elt *elt;
1171   gimple *g;
1172   tree base = NULL_TREE;
1173   optab op;
1174 
1175   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1176       || nelts <= 2
1177       || CONSTRUCTOR_NELTS (rhs) != nelts)
1178     return;
1179   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1180   if (op == unknown_optab
1181       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1182     return;
1183   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1184     if (TREE_CODE (elt->value) != SSA_NAME
1185 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1186       return;
1187     else
1188       {
1189 	tree this_base = elt->value;
1190 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1191 	  all_same = false;
1192 	for (j = 0; j < nelts + 1; j++)
1193 	  {
1194 	    g = SSA_NAME_DEF_STMT (this_base);
1195 	    if (is_gimple_assign (g)
1196 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1197 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1198 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1199 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1200 	      this_base = gimple_assign_rhs1 (g);
1201 	    else
1202 	      break;
1203 	  }
1204 	if (i == 0)
1205 	  base = this_base;
1206 	else if (this_base != base)
1207 	  return;
1208       }
1209   if (all_same)
1210     return;
1211   tree_vector_builder cst (type, nelts, 1);
1212   for (i = 0; i < nelts; i++)
1213     {
1214       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1215       tree elt = build_zero_cst (TREE_TYPE (base));
1216       while (this_base != base)
1217 	{
1218 	  g = SSA_NAME_DEF_STMT (this_base);
1219 	  elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1220 			     elt, gimple_assign_rhs2 (g));
1221 	  if (elt == NULL_TREE
1222 	      || TREE_CODE (elt) != INTEGER_CST
1223 	      || TREE_OVERFLOW (elt))
1224 	    return;
1225 	  this_base = gimple_assign_rhs1 (g);
1226 	}
1227       cst.quick_push (elt);
1228     }
1229   for (i = 0; i < nelts; i++)
1230     CONSTRUCTOR_ELT (rhs, i)->value = base;
1231   g = gimple_build_assign (make_ssa_name (type), rhs);
1232   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1233   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1234 			   cst.build ());
1235   gsi_replace (gsi, g, false);
1236 }
1237 
1238 /* Return a type for the widest vector mode whose components are of type
1239    TYPE, or NULL_TREE if none is found.  */
1240 
1241 static tree
type_for_widest_vector_mode(tree type,optab op)1242 type_for_widest_vector_mode (tree type, optab op)
1243 {
1244   machine_mode inner_mode = TYPE_MODE (type);
1245   machine_mode best_mode = VOIDmode, mode;
1246   poly_int64 best_nunits = 0;
1247 
1248   if (SCALAR_FLOAT_MODE_P (inner_mode))
1249     mode = MIN_MODE_VECTOR_FLOAT;
1250   else if (SCALAR_FRACT_MODE_P (inner_mode))
1251     mode = MIN_MODE_VECTOR_FRACT;
1252   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1253     mode = MIN_MODE_VECTOR_UFRACT;
1254   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1255     mode = MIN_MODE_VECTOR_ACCUM;
1256   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1257     mode = MIN_MODE_VECTOR_UACCUM;
1258   else if (inner_mode == BImode)
1259     mode = MIN_MODE_VECTOR_BOOL;
1260   else
1261     mode = MIN_MODE_VECTOR_INT;
1262 
1263   FOR_EACH_MODE_FROM (mode, mode)
1264     if (GET_MODE_INNER (mode) == inner_mode
1265 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1266 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1267       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1268 
1269   if (best_mode == VOIDmode)
1270     return NULL_TREE;
1271   else
1272     return build_vector_type_for_mode (type, best_mode);
1273 }
1274 
1275 
1276 /* Build a reference to the element of the vector VECT.  Function
1277    returns either the element itself, either BIT_FIELD_REF, or an
1278    ARRAY_REF expression.
1279 
1280    GSI is required to insert temporary variables while building a
1281    refernece to the element of the vector VECT.
1282 
1283    PTMPVEC is a pointer to the temporary variable for caching
1284    purposes.  In case when PTMPVEC is NULL new temporary variable
1285    will be created.  */
1286 static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)1287 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1288 {
1289   tree vect_type, vect_elt_type;
1290   gimple *asgn;
1291   tree tmpvec;
1292   tree arraytype;
1293   bool need_asgn = true;
1294   unsigned int elements;
1295 
1296   vect_type = TREE_TYPE (vect);
1297   vect_elt_type = TREE_TYPE (vect_type);
1298   elements = nunits_for_known_piecewise_op (vect_type);
1299 
1300   if (TREE_CODE (idx) == INTEGER_CST)
1301     {
1302       unsigned HOST_WIDE_INT index;
1303 
1304       /* Given that we're about to compute a binary modulus,
1305 	 we don't care about the high bits of the value.  */
1306       index = TREE_INT_CST_LOW (idx);
1307       if (!tree_fits_uhwi_p (idx) || index >= elements)
1308 	{
1309 	  index &= elements - 1;
1310 	  idx = build_int_cst (TREE_TYPE (idx), index);
1311 	}
1312 
1313       /* When lowering a vector statement sequence do some easy
1314          simplification by looking through intermediate vector results.  */
1315       if (TREE_CODE (vect) == SSA_NAME)
1316 	{
1317 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1318 	  if (is_gimple_assign (def_stmt)
1319 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1320 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1321 	    vect = gimple_assign_rhs1 (def_stmt);
1322 	}
1323 
1324       if (TREE_CODE (vect) == VECTOR_CST)
1325 	return VECTOR_CST_ELT (vect, index);
1326       else if (TREE_CODE (vect) == CONSTRUCTOR
1327 	       && (CONSTRUCTOR_NELTS (vect) == 0
1328 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1329 		      != VECTOR_TYPE))
1330         {
1331 	  if (index < CONSTRUCTOR_NELTS (vect))
1332 	    return CONSTRUCTOR_ELT (vect, index)->value;
1333           return build_zero_cst (vect_elt_type);
1334         }
1335       else
1336         {
1337 	  tree size = TYPE_SIZE (vect_elt_type);
1338 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1339 				  size);
1340 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1341         }
1342     }
1343 
1344   if (!ptmpvec)
1345     tmpvec = create_tmp_var (vect_type, "vectmp");
1346   else if (!*ptmpvec)
1347     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1348   else
1349     {
1350       tmpvec = *ptmpvec;
1351       need_asgn = false;
1352     }
1353 
1354   if (need_asgn)
1355     {
1356       TREE_ADDRESSABLE (tmpvec) = 1;
1357       asgn = gimple_build_assign (tmpvec, vect);
1358       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1359     }
1360 
1361   arraytype = build_array_type_nelts (vect_elt_type, elements);
1362   return build4 (ARRAY_REF, vect_elt_type,
1363                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1364                  idx, NULL_TREE, NULL_TREE);
1365 }
1366 
1367 /* Check if VEC_PERM_EXPR within the given setting is supported
1368    by hardware, or lower it piecewise.
1369 
1370    When VEC_PERM_EXPR has the same first and second operands:
1371    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1372    {v0[mask[0]], v0[mask[1]], ...}
1373    MASK and V0 must have the same number of elements.
1374 
1375    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1376    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1377    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1378    same number of arguments.  */
1379 
1380 static void
lower_vec_perm(gimple_stmt_iterator * gsi)1381 lower_vec_perm (gimple_stmt_iterator *gsi)
1382 {
1383   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1384   tree mask = gimple_assign_rhs3 (stmt);
1385   tree vec0 = gimple_assign_rhs1 (stmt);
1386   tree vec1 = gimple_assign_rhs2 (stmt);
1387   tree vect_type = TREE_TYPE (vec0);
1388   tree mask_type = TREE_TYPE (mask);
1389   tree vect_elt_type = TREE_TYPE (vect_type);
1390   tree mask_elt_type = TREE_TYPE (mask_type);
1391   unsigned HOST_WIDE_INT elements;
1392   vec<constructor_elt, va_gc> *v;
1393   tree constr, t, si, i_val;
1394   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1395   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1396   location_t loc = gimple_location (gsi_stmt (*gsi));
1397   unsigned i;
1398 
1399   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1400     return;
1401 
1402   if (TREE_CODE (mask) == SSA_NAME)
1403     {
1404       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1405       if (is_gimple_assign (def_stmt)
1406 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1407 	mask = gimple_assign_rhs1 (def_stmt);
1408     }
1409 
1410   vec_perm_builder sel_int;
1411 
1412   if (TREE_CODE (mask) == VECTOR_CST
1413       && tree_to_vec_perm_builder (&sel_int, mask))
1414     {
1415       vec_perm_indices indices (sel_int, 2, elements);
1416       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1417 	{
1418 	  gimple_assign_set_rhs3 (stmt, mask);
1419 	  update_stmt (stmt);
1420 	  return;
1421 	}
1422       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1423 	 vector as VEC1 and a right element shift MASK.  */
1424       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1425 	  != CODE_FOR_nothing
1426 	  && TREE_CODE (vec1) == VECTOR_CST
1427 	  && initializer_zerop (vec1)
1428 	  && maybe_ne (indices[0], 0)
1429 	  && known_lt (poly_uint64 (indices[0]), elements))
1430 	{
1431 	  bool ok_p = indices.series_p (0, 1, indices[0], 1);
1432 	  if (!ok_p)
1433 	    {
1434 	      for (i = 1; i < elements; ++i)
1435 		{
1436 		  poly_uint64 actual = indices[i];
1437 		  poly_uint64 expected = i + indices[0];
1438 		  /* Indices into the second vector are all equivalent.  */
1439 		  if (maybe_lt (actual, elements)
1440 		      ? maybe_ne (actual, expected)
1441 		      : maybe_lt (expected, elements))
1442 		    break;
1443 		}
1444 	      ok_p = i == elements;
1445 	    }
1446 	  if (ok_p)
1447 	    {
1448 	      gimple_assign_set_rhs3 (stmt, mask);
1449 	      update_stmt (stmt);
1450 	      return;
1451 	    }
1452 	}
1453       /* And similarly vec_shl pattern.  */
1454       if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1455 	  != CODE_FOR_nothing
1456 	  && TREE_CODE (vec0) == VECTOR_CST
1457 	  && initializer_zerop (vec0))
1458 	{
1459 	  unsigned int first = 0;
1460 	  for (i = 0; i < elements; ++i)
1461 	    if (known_eq (poly_uint64 (indices[i]), elements))
1462 	      {
1463 		if (i == 0 || first)
1464 		  break;
1465 		first = i;
1466 	      }
1467 	    else if (first
1468 		     ? maybe_ne (poly_uint64 (indices[i]),
1469 					      elements + i - first)
1470 		     : maybe_ge (poly_uint64 (indices[i]), elements))
1471 	      break;
1472 	  if (i == elements)
1473 	    {
1474 	      gimple_assign_set_rhs3 (stmt, mask);
1475 	      update_stmt (stmt);
1476 	      return;
1477 	    }
1478 	}
1479     }
1480   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1481     return;
1482 
1483   warning_at (loc, OPT_Wvector_operation_performance,
1484               "vector shuffling operation will be expanded piecewise");
1485 
1486   vec_alloc (v, elements);
1487   for (i = 0; i < elements; i++)
1488     {
1489       si = size_int (i);
1490       i_val = vector_element (gsi, mask, si, &masktmp);
1491 
1492       if (TREE_CODE (i_val) == INTEGER_CST)
1493         {
1494 	  unsigned HOST_WIDE_INT index;
1495 
1496 	  index = TREE_INT_CST_LOW (i_val);
1497 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1498 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1499 
1500           if (two_operand_p && (index & elements) != 0)
1501 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1502 	  else
1503 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1504 
1505           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1506 					true, GSI_SAME_STMT);
1507         }
1508       else
1509         {
1510 	  tree cond = NULL_TREE, v0_val;
1511 
1512 	  if (two_operand_p)
1513 	    {
1514 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1515 			          build_int_cst (mask_elt_type, elements));
1516 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1517 					       true, GSI_SAME_STMT);
1518 	    }
1519 
1520 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1521 			       build_int_cst (mask_elt_type, elements - 1));
1522 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1523 					    true, GSI_SAME_STMT);
1524 
1525 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1526 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1527 					     true, GSI_SAME_STMT);
1528 
1529 	  if (two_operand_p)
1530 	    {
1531 	      tree v1_val;
1532 
1533 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1534 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1535 						 true, GSI_SAME_STMT);
1536 
1537 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1538 				  cond, build_zero_cst (mask_elt_type));
1539 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1540 				  cond, v0_val, v1_val);
1541               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1542 					    true, GSI_SAME_STMT);
1543             }
1544 	  else
1545 	    t = v0_val;
1546         }
1547 
1548       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1549     }
1550 
1551   constr = build_constructor (vect_type, v);
1552   gimple_assign_set_rhs_from_tree (gsi, constr);
1553   update_stmt (gsi_stmt (*gsi));
1554 }
1555 
1556 /* If OP is a uniform vector return the element it is a splat from.  */
1557 
1558 static tree
ssa_uniform_vector_p(tree op)1559 ssa_uniform_vector_p (tree op)
1560 {
1561   if (TREE_CODE (op) == VECTOR_CST
1562       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1563       || TREE_CODE (op) == CONSTRUCTOR)
1564     return uniform_vector_p (op);
1565   if (TREE_CODE (op) == SSA_NAME)
1566     {
1567       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1568       if (gimple_assign_single_p (def_stmt))
1569 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1570     }
1571   return NULL_TREE;
1572 }
1573 
1574 /* Return type in which CODE operation with optab OP can be
1575    computed.  */
1576 
1577 static tree
get_compute_type(enum tree_code code,optab op,tree type)1578 get_compute_type (enum tree_code code, optab op, tree type)
1579 {
1580   /* For very wide vectors, try using a smaller vector mode.  */
1581   tree compute_type = type;
1582   if (op
1583       && (!VECTOR_MODE_P (TYPE_MODE (type))
1584 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1585     {
1586       tree vector_compute_type
1587 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
1588       if (vector_compute_type != NULL_TREE
1589 	  && subparts_gt (compute_type, vector_compute_type)
1590 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1591 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1592 	      != CODE_FOR_nothing))
1593 	compute_type = vector_compute_type;
1594     }
1595 
1596   /* If we are breaking a BLKmode vector into smaller pieces,
1597      type_for_widest_vector_mode has already looked into the optab,
1598      so skip these checks.  */
1599   if (compute_type == type)
1600     {
1601       machine_mode compute_mode = TYPE_MODE (compute_type);
1602       if (VECTOR_MODE_P (compute_mode))
1603 	{
1604 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1605 	    return compute_type;
1606 	  if (code == MULT_HIGHPART_EXPR
1607 	      && can_mult_highpart_p (compute_mode,
1608 				      TYPE_UNSIGNED (compute_type)))
1609 	    return compute_type;
1610 	}
1611       /* There is no operation in hardware, so fall back to scalars.  */
1612       compute_type = TREE_TYPE (type);
1613     }
1614 
1615   return compute_type;
1616 }
1617 
1618 static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)1619 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1620 	 tree bitpos, tree bitsize, enum tree_code code,
1621 	 tree type ATTRIBUTE_UNUSED)
1622 {
1623   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1624     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1625   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1626     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1627   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1628   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1629 }
1630 
1631 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1632 static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)1633 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1634 {
1635   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1636   tree type = gimple_expr_type (stmt);
1637   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1638   machine_mode compute_mode = TYPE_MODE (compute_type);
1639   gcc_assert (compute_mode != BLKmode);
1640   tree lhs = gimple_assign_lhs (stmt);
1641   tree rhs2 = gimple_assign_rhs2 (stmt);
1642   tree rhs3 = gimple_assign_rhs3 (stmt);
1643   tree new_rhs;
1644 
1645   /* If the compute mode is not a vector mode (hence we are not decomposing
1646      a BLKmode vector to smaller, hardware-supported vectors), we may want
1647      to expand the operations in parallel.  */
1648   if (!VECTOR_MODE_P (compute_mode))
1649     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1650 				      COND_EXPR);
1651   else
1652     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1653 				       rhs2, rhs3, COND_EXPR);
1654   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1655     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1656 			       new_rhs);
1657 
1658   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1659      way to do it is change expand_vector_operation and its callees to
1660      return a tree_code, RHS1 and RHS2 instead of a tree. */
1661   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1662   update_stmt (gsi_stmt (*gsi));
1663 }
1664 
1665 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1666    lowering.  If INNER_TYPE is not a vector type, this is a scalar
1667    fallback.  */
1668 
1669 static tree
do_vec_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree decl,tree bitpos,tree bitsize,enum tree_code code,tree type)1670 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1671 		   tree decl, tree bitpos, tree bitsize,
1672 		   enum tree_code code, tree type)
1673 {
1674   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1675   if (!VECTOR_TYPE_P (inner_type))
1676     return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1677   if (code == CALL_EXPR)
1678     {
1679       gimple *g = gimple_build_call (decl, 1, a);
1680       tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1681       gimple_call_set_lhs (g, lhs);
1682       gsi_insert_before (gsi, g, GSI_SAME_STMT);
1683       return lhs;
1684     }
1685   else
1686     {
1687       tree outer_type = build_vector_type (TREE_TYPE (type),
1688 					   TYPE_VECTOR_SUBPARTS (inner_type));
1689       return gimplify_build1 (gsi, code, outer_type, a);
1690     }
1691 }
1692 
1693 /* Similarly, but for narrowing conversion.  */
1694 
1695 static tree
do_vec_narrow_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree,tree bitpos,tree,enum tree_code code,tree type)1696 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1697 			  tree, tree bitpos, tree, enum tree_code code,
1698 			  tree type)
1699 {
1700   tree itype = build_vector_type (TREE_TYPE (inner_type),
1701 				  exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1702 					     2));
1703   tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1704   tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1705 			     int_const_binop (PLUS_EXPR, bitpos,
1706 					      TYPE_SIZE (itype)));
1707   tree outer_type = build_vector_type (TREE_TYPE (type),
1708 				       TYPE_VECTOR_SUBPARTS (inner_type));
1709   return gimplify_build2 (gsi, code, outer_type, b, c);
1710 }
1711 
1712 /* Expand VEC_CONVERT ifn call.  */
1713 
1714 static void
expand_vector_conversion(gimple_stmt_iterator * gsi)1715 expand_vector_conversion (gimple_stmt_iterator *gsi)
1716 {
1717   gimple *stmt = gsi_stmt (*gsi);
1718   gimple *g;
1719   tree lhs = gimple_call_lhs (stmt);
1720   if (lhs == NULL_TREE)
1721     {
1722       g = gimple_build_nop ();
1723       gsi_replace (gsi, g, false);
1724       return;
1725     }
1726   tree arg = gimple_call_arg (stmt, 0);
1727   tree ret_type = TREE_TYPE (lhs);
1728   tree arg_type = TREE_TYPE (arg);
1729   tree new_rhs, compute_type = TREE_TYPE (arg_type);
1730   enum tree_code code = NOP_EXPR;
1731   enum tree_code code1 = ERROR_MARK;
1732   enum { NARROW, NONE, WIDEN } modifier = NONE;
1733   optab optab1 = unknown_optab;
1734 
1735   gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1736   gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
1737   gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
1738   if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1739       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1740     code = FIX_TRUNC_EXPR;
1741   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1742 	   && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1743     code = FLOAT_EXPR;
1744   if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1745       < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1746     modifier = NARROW;
1747   else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1748 	   > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1749     modifier = WIDEN;
1750 
1751   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1752     {
1753       if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1754 	{
1755 	  g = gimple_build_assign (lhs, code1, arg);
1756 	  gsi_replace (gsi, g, false);
1757 	  return;
1758 	}
1759       /* Can't use get_compute_type here, as supportable_convert_operation
1760 	 doesn't necessarily use an optab and needs two arguments.  */
1761       tree vec_compute_type
1762 	= type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1763       if (vec_compute_type
1764 	  && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1765 	  && subparts_gt (arg_type, vec_compute_type))
1766 	{
1767 	  unsigned HOST_WIDE_INT nelts
1768 	    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1769 	  while (nelts > 1)
1770 	    {
1771 	      tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1772 	      tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1773 	      if (supportable_convert_operation (code, ret1_type, arg1_type,
1774 						 &code1))
1775 		{
1776 		  new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1777 						     ret_type, arg1_type, arg,
1778 						     NULL_TREE, code1);
1779 		  g = gimple_build_assign (lhs, new_rhs);
1780 		  gsi_replace (gsi, g, false);
1781 		  return;
1782 		}
1783 	      nelts = nelts / 2;
1784 	    }
1785 	}
1786     }
1787   else if (modifier == NARROW)
1788     {
1789       switch (code)
1790 	{
1791 	CASE_CONVERT:
1792 	  code1 = VEC_PACK_TRUNC_EXPR;
1793 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1794 	  break;
1795 	case FIX_TRUNC_EXPR:
1796 	  code1 = VEC_PACK_FIX_TRUNC_EXPR;
1797 	  /* The signedness is determined from output operand.  */
1798 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1799 	  break;
1800 	case FLOAT_EXPR:
1801 	  code1 = VEC_PACK_FLOAT_EXPR;
1802 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1803 	  break;
1804 	default:
1805 	  gcc_unreachable ();
1806 	}
1807 
1808       if (optab1)
1809 	compute_type = get_compute_type (code1, optab1, arg_type);
1810       enum insn_code icode1;
1811       if (VECTOR_TYPE_P (compute_type)
1812 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1813 	      != CODE_FOR_nothing)
1814 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1815 	{
1816 	  tree cretd_type
1817 	    = build_vector_type (TREE_TYPE (ret_type),
1818 				 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1819 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1820 	    {
1821 	      if (compute_type == arg_type)
1822 		{
1823 		  new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1824 					     arg, build_zero_cst (arg_type));
1825 		  new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1826 					      TYPE_SIZE (ret_type),
1827 					      bitsize_int (0));
1828 		  g = gimple_build_assign (lhs, new_rhs);
1829 		  gsi_replace (gsi, g, false);
1830 		  return;
1831 		}
1832 	      tree dcompute_type
1833 		= build_vector_type (TREE_TYPE (compute_type),
1834 				     TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1835 	      if (TYPE_MAIN_VARIANT (dcompute_type)
1836 		  == TYPE_MAIN_VARIANT (arg_type))
1837 		new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1838 						    NULL_TREE, bitsize_int (0),
1839 						    NULL_TREE, code1,
1840 						    ret_type);
1841 	      else
1842 		new_rhs = expand_vector_piecewise (gsi,
1843 						   do_vec_narrow_conversion,
1844 						   arg_type, dcompute_type,
1845 						   arg, NULL_TREE, code1,
1846 						   ret_type);
1847 	      g = gimple_build_assign (lhs, new_rhs);
1848 	      gsi_replace (gsi, g, false);
1849 	      return;
1850 	    }
1851 	}
1852     }
1853   else if (modifier == WIDEN)
1854     {
1855       enum tree_code code2 = ERROR_MARK;
1856       optab optab2 = unknown_optab;
1857       switch (code)
1858 	{
1859 	CASE_CONVERT:
1860 	  code1 = VEC_UNPACK_LO_EXPR;
1861           code2 = VEC_UNPACK_HI_EXPR;
1862 	  break;
1863 	case FIX_TRUNC_EXPR:
1864 	  code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1865 	  code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1866 	  break;
1867 	case FLOAT_EXPR:
1868 	  code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1869 	  code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1870 	  break;
1871 	default:
1872 	  gcc_unreachable ();
1873 	}
1874       if (BYTES_BIG_ENDIAN)
1875 	std::swap (code1, code2);
1876 
1877       if (code == FIX_TRUNC_EXPR)
1878 	{
1879 	  /* The signedness is determined from output operand.  */
1880 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1881 	  optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1882 	}
1883       else
1884 	{
1885 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1886 	  optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1887 	}
1888 
1889       if (optab1 && optab2)
1890 	compute_type = get_compute_type (code1, optab1, arg_type);
1891 
1892       enum insn_code icode1, icode2;
1893       if (VECTOR_TYPE_P (compute_type)
1894 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1895 	      != CODE_FOR_nothing)
1896 	  && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1897 	      != CODE_FOR_nothing)
1898 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1899 	  && (insn_data[icode1].operand[0].mode
1900 	      == insn_data[icode2].operand[0].mode))
1901 	{
1902 	  poly_uint64 nunits
1903 	    = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1904 	  tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1905 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1906 	    {
1907 	      vec<constructor_elt, va_gc> *v;
1908 	      tree part_width = TYPE_SIZE (compute_type);
1909 	      tree index = bitsize_int (0);
1910 	      int nunits = nunits_for_known_piecewise_op (arg_type);
1911 	      int delta = tree_to_uhwi (part_width)
1912 			  / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
1913 	      int i;
1914 	      location_t loc = gimple_location (gsi_stmt (*gsi));
1915 
1916 	      if (compute_type != arg_type)
1917 		warning_at (loc, OPT_Wvector_operation_performance,
1918 			    "vector operation will be expanded piecewise");
1919 	      else
1920 		{
1921 		  nunits = 1;
1922 		  delta = 1;
1923 		}
1924 
1925 	      vec_alloc (v, (nunits + delta - 1) / delta * 2);
1926 	      for (i = 0; i < nunits;
1927 		   i += delta, index = int_const_binop (PLUS_EXPR, index,
1928 							part_width))
1929 		{
1930 		  tree a = arg;
1931 		  if (compute_type != arg_type)
1932 		    a = tree_vec_extract (gsi, compute_type, a, part_width,
1933 					  index);
1934 		  tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1935 		  constructor_elt ce = { NULL_TREE, result };
1936 		  v->quick_push (ce);
1937 		  ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1938 		  v->quick_push (ce);
1939 		}
1940 
1941 	      new_rhs = build_constructor (ret_type, v);
1942 	      g = gimple_build_assign (lhs, new_rhs);
1943 	      gsi_replace (gsi, g, false);
1944 	      return;
1945 	    }
1946 	}
1947     }
1948 
1949   new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1950 				     TREE_TYPE (arg_type), arg,
1951 				     NULL_TREE, code, ret_type);
1952   g = gimple_build_assign (lhs, new_rhs);
1953   gsi_replace (gsi, g, false);
1954 }
1955 
1956 /* Process one statement.  If we identify a vector operation, expand it.  */
1957 
1958 static void
expand_vector_operations_1(gimple_stmt_iterator * gsi)1959 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1960 {
1961   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1962   enum tree_code code;
1963   optab op = unknown_optab;
1964   enum gimple_rhs_class rhs_class;
1965   tree new_rhs;
1966 
1967   /* Only consider code == GIMPLE_ASSIGN. */
1968   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1969   if (!stmt)
1970     {
1971       if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
1972 	expand_vector_conversion (gsi);
1973       return;
1974     }
1975 
1976   code = gimple_assign_rhs_code (stmt);
1977   rhs_class = get_gimple_rhs_class (code);
1978   lhs = gimple_assign_lhs (stmt);
1979 
1980   if (code == VEC_PERM_EXPR)
1981     {
1982       lower_vec_perm (gsi);
1983       return;
1984     }
1985 
1986   if (code == VEC_COND_EXPR)
1987     {
1988       expand_vector_condition (gsi);
1989       return;
1990     }
1991 
1992   if (code == COND_EXPR
1993       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1994       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1995     {
1996       expand_vector_scalar_condition (gsi);
1997       return;
1998     }
1999 
2000   if (code == CONSTRUCTOR
2001       && TREE_CODE (lhs) == SSA_NAME
2002       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2003       && !gimple_clobber_p (stmt)
2004       && optimize)
2005     {
2006       optimize_vector_constructor (gsi);
2007       return;
2008     }
2009 
2010   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2011     return;
2012 
2013   rhs1 = gimple_assign_rhs1 (stmt);
2014   type = gimple_expr_type (stmt);
2015   if (rhs_class == GIMPLE_BINARY_RHS)
2016     rhs2 = gimple_assign_rhs2 (stmt);
2017 
2018   if (!VECTOR_TYPE_P (type)
2019       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2020     return;
2021 
2022   /* A scalar operation pretending to be a vector one.  */
2023   if (VECTOR_BOOLEAN_TYPE_P (type)
2024       && !VECTOR_MODE_P (TYPE_MODE (type))
2025       && TYPE_MODE (type) != BLKmode
2026       && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2027 	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2028 	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2029 	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2030     return;
2031 
2032   /* If the vector operation is operating on all same vector elements
2033      implement it with a scalar operation and a splat if the target
2034      supports the scalar operation.  */
2035   tree srhs1, srhs2 = NULL_TREE;
2036   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2037       && (rhs2 == NULL_TREE
2038 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2039 	      && (srhs2 = rhs2))
2040 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2041       /* As we query direct optabs restrict to non-convert operations.  */
2042       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2043     {
2044       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2045       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2046 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2047 	{
2048 	  tree slhs = make_ssa_name (TREE_TYPE (srhs1));
2049 	  gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
2050 	  gsi_insert_before (gsi, repl, GSI_SAME_STMT);
2051 	  gimple_assign_set_rhs_from_tree (gsi,
2052 					   build_vector_from_val (type, slhs));
2053 	  update_stmt (stmt);
2054 	  return;
2055 	}
2056     }
2057 
2058   if (CONVERT_EXPR_CODE_P (code)
2059       || code == FLOAT_EXPR
2060       || code == FIX_TRUNC_EXPR
2061       || code == VIEW_CONVERT_EXPR)
2062     return;
2063 
2064   /* The signedness is determined from input argument.  */
2065   if (code == VEC_UNPACK_FLOAT_HI_EXPR
2066       || code == VEC_UNPACK_FLOAT_LO_EXPR
2067       || code == VEC_PACK_FLOAT_EXPR)
2068     {
2069       /* We do not know how to scalarize those.  */
2070       return;
2071     }
2072 
2073   /* For widening/narrowing vector operations, the relevant type is of the
2074      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
2075      calculated in the same way above.  */
2076   if (code == WIDEN_SUM_EXPR
2077       || code == VEC_WIDEN_MULT_HI_EXPR
2078       || code == VEC_WIDEN_MULT_LO_EXPR
2079       || code == VEC_WIDEN_MULT_EVEN_EXPR
2080       || code == VEC_WIDEN_MULT_ODD_EXPR
2081       || code == VEC_UNPACK_HI_EXPR
2082       || code == VEC_UNPACK_LO_EXPR
2083       || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2084       || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2085       || code == VEC_PACK_TRUNC_EXPR
2086       || code == VEC_PACK_SAT_EXPR
2087       || code == VEC_PACK_FIX_TRUNC_EXPR
2088       || code == VEC_WIDEN_LSHIFT_HI_EXPR
2089       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2090     {
2091       /* We do not know how to scalarize those.  */
2092       return;
2093     }
2094 
2095   /* Choose between vector shift/rotate by vector and vector shift/rotate by
2096      scalar */
2097   if (code == LSHIFT_EXPR
2098       || code == RSHIFT_EXPR
2099       || code == LROTATE_EXPR
2100       || code == RROTATE_EXPR)
2101     {
2102       optab opv;
2103 
2104       /* Check whether we have vector <op> {x,x,x,x} where x
2105          could be a scalar variable or a constant.  Transform
2106          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
2107       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2108         {
2109           tree first;
2110 
2111           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2112             {
2113               gimple_assign_set_rhs2 (stmt, first);
2114               update_stmt (stmt);
2115               rhs2 = first;
2116             }
2117         }
2118 
2119       opv = optab_for_tree_code (code, type, optab_vector);
2120       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2121 	op = opv;
2122       else
2123 	{
2124           op = optab_for_tree_code (code, type, optab_scalar);
2125 
2126 	  compute_type = get_compute_type (code, op, type);
2127 	  if (compute_type == type)
2128 	    return;
2129 	  /* The rtl expander will expand vector/scalar as vector/vector
2130 	     if necessary.  Pick one with wider vector type.  */
2131 	  tree compute_vtype = get_compute_type (code, opv, type);
2132 	  if (subparts_gt (compute_vtype, compute_type))
2133 	    {
2134 	      compute_type = compute_vtype;
2135 	      op = opv;
2136 	    }
2137 	}
2138 
2139       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2140 	{
2141 	  if (compute_type == NULL_TREE)
2142 	    compute_type = get_compute_type (code, op, type);
2143 	  if (compute_type == type)
2144 	    return;
2145 	  /* Before splitting vector rotates into scalar rotates,
2146 	     see if we can't use vector shifts and BIT_IOR_EXPR
2147 	     instead.  For vector by vector rotates we'd also
2148 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2149 	     for now, fold doesn't seem to create such rotates anyway.  */
2150 	  if (compute_type == TREE_TYPE (type)
2151 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2152 	    {
2153 	      optab oplv = vashl_optab, opl = ashl_optab;
2154 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2155 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2156 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2157 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2158 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2159 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2160 	      /* The rtl expander will expand vector/scalar as vector/vector
2161 		 if necessary.  Pick one with wider vector type.  */
2162 	      if (subparts_gt (compute_lvtype, compute_ltype))
2163 		{
2164 		  compute_ltype = compute_lvtype;
2165 		  opl = oplv;
2166 		}
2167 	      if (subparts_gt (compute_rvtype, compute_rtype))
2168 		{
2169 		  compute_rtype = compute_rvtype;
2170 		  opr = oprv;
2171 		}
2172 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2173 		 BIT_IOR_EXPR.  */
2174 	      compute_type = compute_ltype;
2175 	      if (subparts_gt (compute_type, compute_rtype))
2176 		compute_type = compute_rtype;
2177 	      if (subparts_gt (compute_type, compute_otype))
2178 		compute_type = compute_otype;
2179 	      /* Verify all 3 operations can be performed in that type.  */
2180 	      if (compute_type != TREE_TYPE (type))
2181 		{
2182 		  if (optab_handler (opl, TYPE_MODE (compute_type))
2183 		      == CODE_FOR_nothing
2184 		      || optab_handler (opr, TYPE_MODE (compute_type))
2185 			 == CODE_FOR_nothing
2186 		      || optab_handler (opo, TYPE_MODE (compute_type))
2187 			 == CODE_FOR_nothing)
2188 		    compute_type = TREE_TYPE (type);
2189 		}
2190 	    }
2191 	}
2192     }
2193   else
2194     op = optab_for_tree_code (code, type, optab_default);
2195 
2196   /* Optabs will try converting a negation into a subtraction, so
2197      look for it as well.  TODO: negation of floating-point vectors
2198      might be turned into an exclusive OR toggling the sign bit.  */
2199   if (op == unknown_optab
2200       && code == NEGATE_EXPR
2201       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2202     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2203 
2204   if (compute_type == NULL_TREE)
2205     compute_type = get_compute_type (code, op, type);
2206   if (compute_type == type)
2207     return;
2208 
2209   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
2210 
2211   /* Leave expression untouched for later expansion.  */
2212   if (new_rhs == NULL_TREE)
2213     return;
2214 
2215   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2216     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2217                                new_rhs);
2218 
2219   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
2220      way to do it is change expand_vector_operation and its callees to
2221      return a tree_code, RHS1 and RHS2 instead of a tree. */
2222   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2223   update_stmt (gsi_stmt (*gsi));
2224 }
2225 
2226 /* Use this to lower vector operations introduced by the vectorizer,
2227    if it may need the bit-twiddling tricks implemented in this file.  */
2228 
2229 static unsigned int
expand_vector_operations(void)2230 expand_vector_operations (void)
2231 {
2232   gimple_stmt_iterator gsi;
2233   basic_block bb;
2234   bool cfg_changed = false;
2235 
2236   FOR_EACH_BB_FN (bb, cfun)
2237     {
2238       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2239 	{
2240 	  expand_vector_operations_1 (&gsi);
2241 	  /* ???  If we do not cleanup EH then we will ICE in
2242 	     verification.  But in reality we have created wrong-code
2243 	     as we did not properly transition EH info and edges to
2244 	     the piecewise computations.  */
2245 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2246 	      && gimple_purge_dead_eh_edges (bb))
2247 	    cfg_changed = true;
2248 	}
2249     }
2250 
2251   return cfg_changed ? TODO_cleanup_cfg : 0;
2252 }
2253 
2254 namespace {
2255 
2256 const pass_data pass_data_lower_vector =
2257 {
2258   GIMPLE_PASS, /* type */
2259   "veclower", /* name */
2260   OPTGROUP_VEC, /* optinfo_flags */
2261   TV_NONE, /* tv_id */
2262   PROP_cfg, /* properties_required */
2263   PROP_gimple_lvec, /* properties_provided */
2264   0, /* properties_destroyed */
2265   0, /* todo_flags_start */
2266   TODO_update_ssa, /* todo_flags_finish */
2267 };
2268 
2269 class pass_lower_vector : public gimple_opt_pass
2270 {
2271 public:
pass_lower_vector(gcc::context * ctxt)2272   pass_lower_vector (gcc::context *ctxt)
2273     : gimple_opt_pass (pass_data_lower_vector, ctxt)
2274   {}
2275 
2276   /* opt_pass methods: */
gate(function * fun)2277   virtual bool gate (function *fun)
2278     {
2279       return !(fun->curr_properties & PROP_gimple_lvec);
2280     }
2281 
execute(function *)2282   virtual unsigned int execute (function *)
2283     {
2284       return expand_vector_operations ();
2285     }
2286 
2287 }; // class pass_lower_vector
2288 
2289 } // anon namespace
2290 
2291 gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)2292 make_pass_lower_vector (gcc::context *ctxt)
2293 {
2294   return new pass_lower_vector (ctxt);
2295 }
2296 
2297 namespace {
2298 
2299 const pass_data pass_data_lower_vector_ssa =
2300 {
2301   GIMPLE_PASS, /* type */
2302   "veclower2", /* name */
2303   OPTGROUP_VEC, /* optinfo_flags */
2304   TV_NONE, /* tv_id */
2305   PROP_cfg, /* properties_required */
2306   PROP_gimple_lvec, /* properties_provided */
2307   0, /* properties_destroyed */
2308   0, /* todo_flags_start */
2309   ( TODO_update_ssa
2310     | TODO_cleanup_cfg ), /* todo_flags_finish */
2311 };
2312 
2313 class pass_lower_vector_ssa : public gimple_opt_pass
2314 {
2315 public:
pass_lower_vector_ssa(gcc::context * ctxt)2316   pass_lower_vector_ssa (gcc::context *ctxt)
2317     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2318   {}
2319 
2320   /* opt_pass methods: */
clone()2321   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)2322   virtual unsigned int execute (function *)
2323     {
2324       return expand_vector_operations ();
2325     }
2326 
2327 }; // class pass_lower_vector_ssa
2328 
2329 } // anon namespace
2330 
2331 gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)2332 make_pass_lower_vector_ssa (gcc::context *ctxt)
2333 {
2334   return new pass_lower_vector_ssa (ctxt);
2335 }
2336 
2337 #include "gt-tree-vect-generic.h"
2338