1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 
41 
42 static void expand_vector_operations_1 (gimple_stmt_iterator *);
43 
44 
45 /* Build a constant of type TYPE, made of VALUE's bits replicated
46    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
47 static tree
build_replicated_const(tree type,tree inner_type,HOST_WIDE_INT value)48 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
49 {
50   int width = tree_to_uhwi (TYPE_SIZE (inner_type));
51   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
52     / HOST_BITS_PER_WIDE_INT;
53   unsigned HOST_WIDE_INT low, mask;
54   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
55   int i;
56 
57   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
58 
59   if (width == HOST_BITS_PER_WIDE_INT)
60     low = value;
61   else
62     {
63       mask = ((HOST_WIDE_INT)1 << width) - 1;
64       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
65     }
66 
67   for (i = 0; i < n; i++)
68     a[i] = low;
69 
70   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
71   return wide_int_to_tree
72     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
73 }
74 
75 static GTY(()) tree vector_inner_type;
76 static GTY(()) tree vector_last_type;
77 static GTY(()) int vector_last_nunits;
78 
79 /* Return a suitable vector types made of SUBPARTS units each of mode
80    "word_mode" (the global variable).  */
81 static tree
build_word_mode_vector_type(int nunits)82 build_word_mode_vector_type (int nunits)
83 {
84   if (!vector_inner_type)
85     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
86   else if (vector_last_nunits == nunits)
87     {
88       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
89       return vector_last_type;
90     }
91 
92   /* We build a new type, but we canonicalize it nevertheless,
93      because it still saves some memory.  */
94   vector_last_nunits = nunits;
95   vector_last_type = type_hash_canon (nunits,
96 				      build_vector_type (vector_inner_type,
97 							 nunits));
98   return vector_last_type;
99 }
100 
101 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
102 			      tree, tree, tree, tree, tree, enum tree_code,
103 			      tree);
104 
105 static inline tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 		  tree t, tree bitsize, tree bitpos)
108 {
109   if (TREE_CODE (t) == SSA_NAME)
110     {
111       gimple *def_stmt = SSA_NAME_DEF_STMT (t);
112       if (is_gimple_assign (def_stmt)
113 	  && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
114 	      || (bitpos
115 		  && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
116 	t = gimple_assign_rhs1 (def_stmt);
117     }
118   if (bitpos)
119     {
120       if (TREE_CODE (type) == BOOLEAN_TYPE)
121 	{
122 	  tree itype
123 	    = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
124 	  tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
125 					bitsize, bitpos);
126 	  return gimplify_build2 (gsi, NE_EXPR, type, field,
127 				  build_zero_cst (itype));
128 	}
129       else
130 	return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
131     }
132   else
133     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
134 }
135 
136 static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)137 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
138 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
139 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
140 {
141   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
142   return gimplify_build1 (gsi, code, inner_type, a);
143 }
144 
145 static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)146 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
147 	  tree bitpos, tree bitsize, enum tree_code code,
148 	  tree type ATTRIBUTE_UNUSED)
149 {
150   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
151     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
152   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
153     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
154   return gimplify_build2 (gsi, code, inner_type, a, b);
155 }
156 
157 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
158 
159    INNER_TYPE is the type of A and B elements
160 
161    returned expression is of signed integer type with the
162    size equal to the size of INNER_TYPE.  */
163 static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)164 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
165 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
166 {
167   tree stype = TREE_TYPE (type);
168   tree cst_false = build_zero_cst (stype);
169   tree cst_true = build_all_ones_cst (stype);
170   tree cmp;
171 
172   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
173   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
174 
175   cmp = build2 (code, boolean_type_node, a, b);
176   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
177 }
178 
179 /* Expand vector addition to scalars.  This does bit twiddling
180    in order to increase parallelism:
181 
182    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
183            (a ^ b) & 0x80808080
184 
185    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
186             (a ^ ~b) & 0x80808080
187 
188    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
189 
190    This optimization should be done only if 4 vector items or more
191    fit into a word.  */
192 static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)193 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
194 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
195 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
196 {
197   tree inner_type = TREE_TYPE (TREE_TYPE (a));
198   unsigned HOST_WIDE_INT max;
199   tree low_bits, high_bits, a_low, b_low, result_low, signs;
200 
201   max = GET_MODE_MASK (TYPE_MODE (inner_type));
202   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
203   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
204 
205   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
206   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
207 
208   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
209   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
210   if (code == PLUS_EXPR)
211     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
212   else
213     {
214       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
215       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
216     }
217 
218   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
219   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
220   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
221 }
222 
223 static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)224 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
225 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
226 	   tree bitsize ATTRIBUTE_UNUSED,
227 	   enum tree_code code ATTRIBUTE_UNUSED,
228 	   tree type ATTRIBUTE_UNUSED)
229 {
230   tree inner_type = TREE_TYPE (TREE_TYPE (b));
231   HOST_WIDE_INT max;
232   tree low_bits, high_bits, b_low, result_low, signs;
233 
234   max = GET_MODE_MASK (TYPE_MODE (inner_type));
235   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
236   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
237 
238   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
239 
240   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
241   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
242   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
243   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
244   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
245 }
246 
247 /* Expand a vector operation to scalars, by using many operations
248    whose type is the vector type's inner type.  */
249 static tree
expand_vector_piecewise(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree inner_type,tree a,tree b,enum tree_code code)250 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
251 			 tree type, tree inner_type,
252 			 tree a, tree b, enum tree_code code)
253 {
254   vec<constructor_elt, va_gc> *v;
255   tree part_width = TYPE_SIZE (inner_type);
256   tree index = bitsize_int (0);
257   int nunits = TYPE_VECTOR_SUBPARTS (type);
258   int delta = tree_to_uhwi (part_width)
259 	      / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
260   int i;
261   location_t loc = gimple_location (gsi_stmt (*gsi));
262 
263   if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
264     warning_at (loc, OPT_Wvector_operation_performance,
265 		"vector operation will be expanded piecewise");
266   else
267     warning_at (loc, OPT_Wvector_operation_performance,
268 		"vector operation will be expanded in parallel");
269 
270   vec_alloc (v, (nunits + delta - 1) / delta);
271   for (i = 0; i < nunits;
272        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
273     {
274       tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
275       constructor_elt ce = {NULL_TREE, result};
276       v->quick_push (ce);
277     }
278 
279   return build_constructor (type, v);
280 }
281 
282 /* Expand a vector operation to scalars with the freedom to use
283    a scalar integer type, or to use a different size for the items
284    in the vector type.  */
285 static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)286 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
287 			tree a, tree b,
288 			enum tree_code code)
289 {
290   tree result, compute_type;
291   machine_mode mode;
292   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
293   location_t loc = gimple_location (gsi_stmt (*gsi));
294 
295   /* We have three strategies.  If the type is already correct, just do
296      the operation an element at a time.  Else, if the vector is wider than
297      one word, do it a word at a time; finally, if the vector is smaller
298      than one word, do it as a scalar.  */
299   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
300      return expand_vector_piecewise (gsi, f,
301 				     type, TREE_TYPE (type),
302 				     a, b, code);
303   else if (n_words > 1)
304     {
305       tree word_type = build_word_mode_vector_type (n_words);
306       result = expand_vector_piecewise (gsi, f,
307 				        word_type, TREE_TYPE (word_type),
308 					a, b, code);
309       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
310                                          GSI_SAME_STMT);
311     }
312   else
313     {
314       /* Use a single scalar operation with a mode no wider than word_mode.  */
315       mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
316       compute_type = lang_hooks.types.type_for_mode (mode, 1);
317       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
318       warning_at (loc, OPT_Wvector_operation_performance,
319 	          "vector operation will be expanded with a "
320 		  "single scalar operation");
321     }
322 
323   return result;
324 }
325 
326 /* Expand a vector operation to scalars; for integer types we can use
327    special bit twiddling tricks to do the sums a word at a time, using
328    function F_PARALLEL instead of F.  These tricks are done only if
329    they can process at least four items, that is, only if the vector
330    holds at least four items and if a word can hold four items.  */
331 static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)332 expand_vector_addition (gimple_stmt_iterator *gsi,
333 			elem_op_func f, elem_op_func f_parallel,
334 			tree type, tree a, tree b, enum tree_code code)
335 {
336   int parts_per_word = UNITS_PER_WORD
337 	  	       / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
338 
339   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
340       && parts_per_word >= 4
341       && TYPE_VECTOR_SUBPARTS (type) >= 4)
342     return expand_vector_parallel (gsi, f_parallel,
343 				   type, a, b, code);
344   else
345     return expand_vector_piecewise (gsi, f,
346 				    type, TREE_TYPE (type),
347 				    a, b, code);
348 }
349 
350 /* Try to expand vector comparison expression OP0 CODE OP1 by
351    querying optab if the following expression:
352 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
353    can be expanded.  */
354 static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)355 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
356                           tree op1, enum tree_code code)
357 {
358   tree t;
359   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type)
360       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
361     t = expand_vector_piecewise (gsi, do_compare, type,
362 				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
363   else
364     t = NULL_TREE;
365 
366   return t;
367 }
368 
369 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
370    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
371    the result if successful, otherwise return NULL_TREE.  */
372 static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)373 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
374 {
375   optab op;
376   unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
377   bool scalar_shift = true;
378 
379   for (i = 1; i < nunits; i++)
380     {
381       if (shiftcnts[i] != shiftcnts[0])
382 	scalar_shift = false;
383     }
384 
385   if (scalar_shift && shiftcnts[0] == 0)
386     return op0;
387 
388   if (scalar_shift)
389     {
390       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
391       if (op != unknown_optab
392 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
393 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
394 				build_int_cst (NULL_TREE, shiftcnts[0]));
395     }
396 
397   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
398   if (op != unknown_optab
399       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
400     {
401       tree *vec = XALLOCAVEC (tree, nunits);
402       for (i = 0; i < nunits; i++)
403 	vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
404       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
405 			      build_vector (type, vec));
406     }
407 
408   return NULL_TREE;
409 }
410 
411 /* Try to expand integer vector division by constant using
412    widening multiply, shifts and additions.  */
413 static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)414 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
415 		      tree op1, enum tree_code code)
416 {
417   bool use_pow2 = true;
418   bool has_vector_shift = true;
419   int mode = -1, this_mode;
420   int pre_shift = -1, post_shift;
421   unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
422   int *shifts = XALLOCAVEC (int, nunits * 4);
423   int *pre_shifts = shifts + nunits;
424   int *post_shifts = pre_shifts + nunits;
425   int *shift_temps = post_shifts + nunits;
426   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
427   int prec = TYPE_PRECISION (TREE_TYPE (type));
428   int dummy_int;
429   unsigned int i;
430   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
431   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
432   tree *vec;
433   tree cur_op, mulcst, tem;
434   optab op;
435 
436   if (prec > HOST_BITS_PER_WIDE_INT)
437     return NULL_TREE;
438 
439   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
440   if (op == unknown_optab
441       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
442     has_vector_shift = false;
443 
444   /* Analysis phase.  Determine if all op1 elements are either power
445      of two and it is possible to expand it using shifts (or for remainder
446      using masking).  Additionally compute the multiplicative constants
447      and pre and post shifts if the division is to be expanded using
448      widening or high part multiplication plus shifts.  */
449   for (i = 0; i < nunits; i++)
450     {
451       tree cst = VECTOR_CST_ELT (op1, i);
452       unsigned HOST_WIDE_INT ml;
453 
454       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
455 	return NULL_TREE;
456       pre_shifts[i] = 0;
457       post_shifts[i] = 0;
458       mulc[i] = 0;
459       if (use_pow2
460 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
461 	use_pow2 = false;
462       if (use_pow2)
463 	{
464 	  shifts[i] = tree_log2 (cst);
465 	  if (shifts[i] != shifts[0]
466 	      && code == TRUNC_DIV_EXPR
467 	      && !has_vector_shift)
468 	    use_pow2 = false;
469 	}
470       if (mode == -2)
471 	continue;
472       if (sign_p == UNSIGNED)
473 	{
474 	  unsigned HOST_WIDE_INT mh;
475 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
476 
477 	  if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
478 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
479 	    return NULL_TREE;
480 
481 	  if (d <= 1)
482 	    {
483 	      mode = -2;
484 	      continue;
485 	    }
486 
487 	  /* Find a suitable multiplier and right shift count
488 	     instead of multiplying with D.  */
489 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
490 
491 	  /* If the suggested multiplier is more than SIZE bits, we can
492 	     do better for even divisors, using an initial right shift.  */
493 	  if ((mh != 0 && (d & 1) == 0)
494 	      || (!has_vector_shift && pre_shift != -1))
495 	    {
496 	      if (has_vector_shift)
497 		pre_shift = floor_log2 (d & -d);
498 	      else if (pre_shift == -1)
499 		{
500 		  unsigned int j;
501 		  for (j = 0; j < nunits; j++)
502 		    {
503 		      tree cst2 = VECTOR_CST_ELT (op1, j);
504 		      unsigned HOST_WIDE_INT d2;
505 		      int this_pre_shift;
506 
507 		      if (!tree_fits_uhwi_p (cst2))
508 			return NULL_TREE;
509 		      d2 = tree_to_uhwi (cst2) & mask;
510 		      if (d2 == 0)
511 			return NULL_TREE;
512 		      this_pre_shift = floor_log2 (d2 & -d2);
513 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
514 			pre_shift = this_pre_shift;
515 		    }
516 		  if (i != 0 && pre_shift != 0)
517 		    {
518 		      /* Restart.  */
519 		      i = -1U;
520 		      mode = -1;
521 		      continue;
522 		    }
523 		}
524 	      if (pre_shift != 0)
525 		{
526 		  if ((d >> pre_shift) <= 1)
527 		    {
528 		      mode = -2;
529 		      continue;
530 		    }
531 		  mh = choose_multiplier (d >> pre_shift, prec,
532 					  prec - pre_shift,
533 					  &ml, &post_shift, &dummy_int);
534 		  gcc_assert (!mh);
535 		  pre_shifts[i] = pre_shift;
536 		}
537 	    }
538 	  if (!mh)
539 	    this_mode = 0;
540 	  else
541 	    this_mode = 1;
542 	}
543       else
544 	{
545 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
546 	  unsigned HOST_WIDE_INT abs_d;
547 
548 	  if (d == -1)
549 	    return NULL_TREE;
550 
551 	  /* Since d might be INT_MIN, we have to cast to
552 	     unsigned HOST_WIDE_INT before negating to avoid
553 	     undefined signed overflow.  */
554 	  abs_d = (d >= 0
555 		  ? (unsigned HOST_WIDE_INT) d
556 		  : - (unsigned HOST_WIDE_INT) d);
557 
558 	  /* n rem d = n rem -d */
559 	  if (code == TRUNC_MOD_EXPR && d < 0)
560 	    d = abs_d;
561 	  else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
562 	    {
563 	      /* This case is not handled correctly below.  */
564 	      mode = -2;
565 	      continue;
566 	    }
567 	  if (abs_d <= 1)
568 	    {
569 	      mode = -2;
570 	      continue;
571 	    }
572 
573 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
574 			     &post_shift, &dummy_int);
575 	  if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
576 	    {
577 	      this_mode = 4 + (d < 0);
578 	      ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
579 	    }
580 	  else
581 	    this_mode = 2 + (d < 0);
582 	}
583       mulc[i] = ml;
584       post_shifts[i] = post_shift;
585       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
586 	  || post_shift >= prec
587 	  || pre_shifts[i] >= prec)
588 	this_mode = -2;
589 
590       if (i == 0)
591 	mode = this_mode;
592       else if (mode != this_mode)
593 	mode = -2;
594     }
595 
596   vec = XALLOCAVEC (tree, nunits);
597 
598   if (use_pow2)
599     {
600       tree addend = NULL_TREE;
601       if (sign_p == SIGNED)
602 	{
603 	  tree uns_type;
604 
605 	  /* Both division and remainder sequences need
606 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
607 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
608 	     if none of the shifts is 0, or as the conditional.  */
609 	  for (i = 0; i < nunits; i++)
610 	    if (shifts[i] == 0)
611 	      break;
612 	  uns_type
613 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
614 				 nunits);
615 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
616 	    {
617 	      for (i = 0; i < nunits; i++)
618 		shift_temps[i] = prec - 1;
619 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
620 	      if (cur_op != NULL_TREE)
621 		{
622 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
623 					    uns_type, cur_op);
624 		  for (i = 0; i < nunits; i++)
625 		    shift_temps[i] = prec - shifts[i];
626 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
627 		  if (cur_op != NULL_TREE)
628 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
629 					      type, cur_op);
630 		}
631 	    }
632 	  if (addend == NULL_TREE
633 	      && expand_vec_cond_expr_p (type, type))
634 	    {
635 	      tree zero, cst, cond, mask_type;
636 	      gimple *stmt;
637 
638 	      mask_type = build_same_sized_truth_vector_type (type);
639 	      zero = build_zero_cst (type);
640 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
641 	      for (i = 0; i < nunits; i++)
642 		vec[i] = build_int_cst (TREE_TYPE (type),
643 					((unsigned HOST_WIDE_INT) 1
644 					 << shifts[i]) - 1);
645 	      cst = build_vector (type, vec);
646 	      addend = make_ssa_name (type);
647 	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
648 					  cst, zero);
649 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
650 	    }
651 	}
652       if (code == TRUNC_DIV_EXPR)
653 	{
654 	  if (sign_p == UNSIGNED)
655 	    {
656 	      /* q = op0 >> shift;  */
657 	      cur_op = add_rshift (gsi, type, op0, shifts);
658 	      if (cur_op != NULL_TREE)
659 		return cur_op;
660 	    }
661 	  else if (addend != NULL_TREE)
662 	    {
663 	      /* t1 = op0 + addend;
664 		 q = t1 >> shift;  */
665 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
666 	      if (op != unknown_optab
667 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
668 		{
669 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
670 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
671 		  if (cur_op != NULL_TREE)
672 		    return cur_op;
673 		}
674 	    }
675 	}
676       else
677 	{
678 	  tree mask;
679 	  for (i = 0; i < nunits; i++)
680 	    vec[i] = build_int_cst (TREE_TYPE (type),
681 				    ((unsigned HOST_WIDE_INT) 1
682 				     << shifts[i]) - 1);
683 	  mask = build_vector (type, vec);
684 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
685 	  if (op != unknown_optab
686 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
687 	    {
688 	      if (sign_p == UNSIGNED)
689 		/* r = op0 & mask;  */
690 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
691 	      else if (addend != NULL_TREE)
692 		{
693 		  /* t1 = op0 + addend;
694 		     t2 = t1 & mask;
695 		     r = t2 - addend;  */
696 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
697 		  if (op != unknown_optab
698 		      && optab_handler (op, TYPE_MODE (type))
699 			 != CODE_FOR_nothing)
700 		    {
701 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
702 						addend);
703 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
704 						cur_op, mask);
705 		      op = optab_for_tree_code (MINUS_EXPR, type,
706 						optab_default);
707 		      if (op != unknown_optab
708 			  && optab_handler (op, TYPE_MODE (type))
709 			     != CODE_FOR_nothing)
710 			return gimplify_build2 (gsi, MINUS_EXPR, type,
711 						cur_op, addend);
712 		    }
713 		}
714 	    }
715 	}
716     }
717 
718   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
719     return NULL_TREE;
720 
721   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
722     return NULL_TREE;
723 
724   cur_op = op0;
725 
726   switch (mode)
727     {
728     case 0:
729       gcc_assert (sign_p == UNSIGNED);
730       /* t1 = oprnd0 >> pre_shift;
731 	 t2 = t1 h* ml;
732 	 q = t2 >> post_shift;  */
733       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
734       if (cur_op == NULL_TREE)
735 	return NULL_TREE;
736       break;
737     case 1:
738       gcc_assert (sign_p == UNSIGNED);
739       for (i = 0; i < nunits; i++)
740 	{
741 	  shift_temps[i] = 1;
742 	  post_shifts[i]--;
743 	}
744       break;
745     case 2:
746     case 3:
747     case 4:
748     case 5:
749       gcc_assert (sign_p == SIGNED);
750       for (i = 0; i < nunits; i++)
751 	shift_temps[i] = prec - 1;
752       break;
753     default:
754       return NULL_TREE;
755     }
756 
757   for (i = 0; i < nunits; i++)
758     vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
759   mulcst = build_vector (type, vec);
760 
761   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
762 
763   switch (mode)
764     {
765     case 0:
766       /* t1 = oprnd0 >> pre_shift;
767 	 t2 = t1 h* ml;
768 	 q = t2 >> post_shift;  */
769       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
770       break;
771     case 1:
772       /* t1 = oprnd0 h* ml;
773 	 t2 = oprnd0 - t1;
774 	 t3 = t2 >> 1;
775 	 t4 = t1 + t3;
776 	 q = t4 >> (post_shift - 1);  */
777       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
778       if (op == unknown_optab
779 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
780 	return NULL_TREE;
781       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
782       tem = add_rshift (gsi, type, tem, shift_temps);
783       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
784       if (op == unknown_optab
785 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
786 	return NULL_TREE;
787       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
788       cur_op = add_rshift (gsi, type, tem, post_shifts);
789       if (cur_op == NULL_TREE)
790 	return NULL_TREE;
791       break;
792     case 2:
793     case 3:
794     case 4:
795     case 5:
796       /* t1 = oprnd0 h* ml;
797 	 t2 = t1; [ iff (mode & 2) != 0 ]
798 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
799 	 t3 = t2 >> post_shift;
800 	 t4 = oprnd0 >> (prec - 1);
801 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
802 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
803       if ((mode & 2) == 0)
804 	{
805 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
806 	  if (op == unknown_optab
807 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
808 	    return NULL_TREE;
809 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
810 	}
811       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
812       if (cur_op == NULL_TREE)
813 	return NULL_TREE;
814       tem = add_rshift (gsi, type, op0, shift_temps);
815       if (tem == NULL_TREE)
816 	return NULL_TREE;
817       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
818       if (op == unknown_optab
819 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
820 	return NULL_TREE;
821       if ((mode & 1) == 0)
822 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
823       else
824 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
825       break;
826     default:
827       gcc_unreachable ();
828     }
829 
830   if (code == TRUNC_DIV_EXPR)
831     return cur_op;
832 
833   /* We divided.  Now finish by:
834      t1 = q * oprnd1;
835      r = oprnd0 - t1;  */
836   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
837   if (op == unknown_optab
838       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
839     return NULL_TREE;
840   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
841   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
842   if (op == unknown_optab
843       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
844     return NULL_TREE;
845   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
846 }
847 
848 /* Expand a vector condition to scalars, by using many conditions
849    on the vector's elements.  */
850 static void
expand_vector_condition(gimple_stmt_iterator * gsi)851 expand_vector_condition (gimple_stmt_iterator *gsi)
852 {
853   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
854   tree type = gimple_expr_type (stmt);
855   tree a = gimple_assign_rhs1 (stmt);
856   tree a1 = a;
857   tree a2 = NULL_TREE;
858   bool a_is_comparison = false;
859   tree b = gimple_assign_rhs2 (stmt);
860   tree c = gimple_assign_rhs3 (stmt);
861   vec<constructor_elt, va_gc> *v;
862   tree constr;
863   tree inner_type = TREE_TYPE (type);
864   tree cond_type = TREE_TYPE (TREE_TYPE (a));
865   tree comp_inner_type = cond_type;
866   tree width = TYPE_SIZE (inner_type);
867   tree index = bitsize_int (0);
868   int nunits = TYPE_VECTOR_SUBPARTS (type);
869   int i;
870   location_t loc = gimple_location (gsi_stmt (*gsi));
871 
872   if (!is_gimple_val (a))
873     {
874       gcc_assert (COMPARISON_CLASS_P (a));
875       a_is_comparison = true;
876       a1 = TREE_OPERAND (a, 0);
877       a2 = TREE_OPERAND (a, 1);
878       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
879     }
880 
881   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
882     return;
883 
884   /* TODO: try and find a smaller vector type.  */
885 
886   warning_at (loc, OPT_Wvector_operation_performance,
887 	      "vector condition will be expanded piecewise");
888 
889   vec_alloc (v, nunits);
890   for (i = 0; i < nunits;
891        i++, index = int_const_binop (PLUS_EXPR, index, width))
892     {
893       tree aa, result;
894       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
895       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
896       if (a_is_comparison)
897 	{
898 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
899 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
900 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
901 	}
902       else
903 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
904       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
905       constructor_elt ce = {NULL_TREE, result};
906       v->quick_push (ce);
907     }
908 
909   constr = build_constructor (type, v);
910   gimple_assign_set_rhs_from_tree (gsi, constr);
911   update_stmt (gsi_stmt (*gsi));
912 }
913 
914 static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code)915 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
916 			 gassign *assign, enum tree_code code)
917 {
918   machine_mode compute_mode = TYPE_MODE (compute_type);
919 
920   /* If the compute mode is not a vector mode (hence we are not decomposing
921      a BLKmode vector to smaller, hardware-supported vectors), we may want
922      to expand the operations in parallel.  */
923   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
924       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
925       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
926       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
927       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
928       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
929     switch (code)
930       {
931       case PLUS_EXPR:
932       case MINUS_EXPR:
933         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
934 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
935 					 gimple_assign_rhs1 (assign),
936 					 gimple_assign_rhs2 (assign), code);
937 	break;
938 
939       case NEGATE_EXPR:
940         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
941           return expand_vector_addition (gsi, do_unop, do_negate, type,
942 		      		         gimple_assign_rhs1 (assign),
943 					 NULL_TREE, code);
944 	break;
945 
946       case BIT_AND_EXPR:
947       case BIT_IOR_EXPR:
948       case BIT_XOR_EXPR:
949         return expand_vector_parallel (gsi, do_binop, type,
950 		      		       gimple_assign_rhs1 (assign),
951 				       gimple_assign_rhs2 (assign), code);
952 
953       case BIT_NOT_EXPR:
954         return expand_vector_parallel (gsi, do_unop, type,
955 		      		       gimple_assign_rhs1 (assign),
956         			       NULL_TREE, code);
957       case EQ_EXPR:
958       case NE_EXPR:
959       case GT_EXPR:
960       case LT_EXPR:
961       case GE_EXPR:
962       case LE_EXPR:
963       case UNEQ_EXPR:
964       case UNGT_EXPR:
965       case UNLT_EXPR:
966       case UNGE_EXPR:
967       case UNLE_EXPR:
968       case LTGT_EXPR:
969       case ORDERED_EXPR:
970       case UNORDERED_EXPR:
971 	{
972 	  tree rhs1 = gimple_assign_rhs1 (assign);
973 	  tree rhs2 = gimple_assign_rhs2 (assign);
974 
975 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
976 	}
977 
978       case TRUNC_DIV_EXPR:
979       case TRUNC_MOD_EXPR:
980 	{
981 	  tree rhs1 = gimple_assign_rhs1 (assign);
982 	  tree rhs2 = gimple_assign_rhs2 (assign);
983 	  tree ret;
984 
985 	  if (!optimize
986 	      || !VECTOR_INTEGER_TYPE_P (type)
987 	      || TREE_CODE (rhs2) != VECTOR_CST
988 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
989 	    break;
990 
991 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
992 	  if (ret != NULL_TREE)
993 	    return ret;
994 	  break;
995 	}
996 
997       default:
998 	break;
999       }
1000 
1001   if (TREE_CODE_CLASS (code) == tcc_unary)
1002     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1003 				    gimple_assign_rhs1 (assign),
1004 				    NULL_TREE, code);
1005   else
1006     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1007 				    gimple_assign_rhs1 (assign),
1008 				    gimple_assign_rhs2 (assign), code);
1009 }
1010 
1011 /* Try to optimize
1012    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1013    style stmts into:
1014    _9 = { b_7, b_7, b_7, b_7 };
1015    a_5 = _9 + { 0, 3, 6, 9 };
1016    because vector splat operation is usually more efficient
1017    than piecewise initialization of the vector.  */
1018 
1019 static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)1020 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1021 {
1022   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1023   tree lhs = gimple_assign_lhs (stmt);
1024   tree rhs = gimple_assign_rhs1 (stmt);
1025   tree type = TREE_TYPE (rhs);
1026   unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1027   bool all_same = true;
1028   constructor_elt *elt;
1029   tree *cst;
1030   gimple *g;
1031   tree base = NULL_TREE;
1032   optab op;
1033 
1034   if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1035     return;
1036   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1037   if (op == unknown_optab
1038       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1039     return;
1040   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1041     if (TREE_CODE (elt->value) != SSA_NAME
1042 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1043       return;
1044     else
1045       {
1046 	tree this_base = elt->value;
1047 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1048 	  all_same = false;
1049 	for (j = 0; j < nelts + 1; j++)
1050 	  {
1051 	    g = SSA_NAME_DEF_STMT (this_base);
1052 	    if (is_gimple_assign (g)
1053 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1054 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1055 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1056 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1057 	      this_base = gimple_assign_rhs1 (g);
1058 	    else
1059 	      break;
1060 	  }
1061 	if (i == 0)
1062 	  base = this_base;
1063 	else if (this_base != base)
1064 	  return;
1065       }
1066   if (all_same)
1067     return;
1068   cst = XALLOCAVEC (tree, nelts);
1069   for (i = 0; i < nelts; i++)
1070     {
1071       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1072       cst[i] = build_zero_cst (TREE_TYPE (base));
1073       while (this_base != base)
1074 	{
1075 	  g = SSA_NAME_DEF_STMT (this_base);
1076 	  cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1077 				cst[i], gimple_assign_rhs2 (g));
1078 	  if (cst[i] == NULL_TREE
1079 	      || TREE_CODE (cst[i]) != INTEGER_CST
1080 	      || TREE_OVERFLOW (cst[i]))
1081 	    return;
1082 	  this_base = gimple_assign_rhs1 (g);
1083 	}
1084     }
1085   for (i = 0; i < nelts; i++)
1086     CONSTRUCTOR_ELT (rhs, i)->value = base;
1087   g = gimple_build_assign (make_ssa_name (type), rhs);
1088   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1089   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1090 			   build_vector (type, cst));
1091   gsi_replace (gsi, g, false);
1092 }
1093 
1094 /* Return a type for the widest vector mode whose components are of type
1095    TYPE, or NULL_TREE if none is found.  */
1096 
1097 static tree
type_for_widest_vector_mode(tree type,optab op)1098 type_for_widest_vector_mode (tree type, optab op)
1099 {
1100   machine_mode inner_mode = TYPE_MODE (type);
1101   machine_mode best_mode = VOIDmode, mode;
1102   int best_nunits = 0;
1103 
1104   if (SCALAR_FLOAT_MODE_P (inner_mode))
1105     mode = MIN_MODE_VECTOR_FLOAT;
1106   else if (SCALAR_FRACT_MODE_P (inner_mode))
1107     mode = MIN_MODE_VECTOR_FRACT;
1108   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1109     mode = MIN_MODE_VECTOR_UFRACT;
1110   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1111     mode = MIN_MODE_VECTOR_ACCUM;
1112   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1113     mode = MIN_MODE_VECTOR_UACCUM;
1114   else
1115     mode = MIN_MODE_VECTOR_INT;
1116 
1117   for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1118     if (GET_MODE_INNER (mode) == inner_mode
1119         && GET_MODE_NUNITS (mode) > best_nunits
1120 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1121       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1122 
1123   if (best_mode == VOIDmode)
1124     return NULL_TREE;
1125   else
1126     return build_vector_type_for_mode (type, best_mode);
1127 }
1128 
1129 
1130 /* Build a reference to the element of the vector VECT.  Function
1131    returns either the element itself, either BIT_FIELD_REF, or an
1132    ARRAY_REF expression.
1133 
1134    GSI is required to insert temporary variables while building a
1135    refernece to the element of the vector VECT.
1136 
1137    PTMPVEC is a pointer to the temporary variable for caching
1138    purposes.  In case when PTMPVEC is NULL new temporary variable
1139    will be created.  */
1140 static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)1141 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1142 {
1143   tree vect_type, vect_elt_type;
1144   gimple *asgn;
1145   tree tmpvec;
1146   tree arraytype;
1147   bool need_asgn = true;
1148   unsigned int elements;
1149 
1150   vect_type = TREE_TYPE (vect);
1151   vect_elt_type = TREE_TYPE (vect_type);
1152   elements = TYPE_VECTOR_SUBPARTS (vect_type);
1153 
1154   if (TREE_CODE (idx) == INTEGER_CST)
1155     {
1156       unsigned HOST_WIDE_INT index;
1157 
1158       /* Given that we're about to compute a binary modulus,
1159 	 we don't care about the high bits of the value.  */
1160       index = TREE_INT_CST_LOW (idx);
1161       if (!tree_fits_uhwi_p (idx) || index >= elements)
1162 	{
1163 	  index &= elements - 1;
1164 	  idx = build_int_cst (TREE_TYPE (idx), index);
1165 	}
1166 
1167       /* When lowering a vector statement sequence do some easy
1168          simplification by looking through intermediate vector results.  */
1169       if (TREE_CODE (vect) == SSA_NAME)
1170 	{
1171 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1172 	  if (is_gimple_assign (def_stmt)
1173 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1174 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1175 	    vect = gimple_assign_rhs1 (def_stmt);
1176 	}
1177 
1178       if (TREE_CODE (vect) == VECTOR_CST)
1179 	return VECTOR_CST_ELT (vect, index);
1180       else if (TREE_CODE (vect) == CONSTRUCTOR
1181 	       && (CONSTRUCTOR_NELTS (vect) == 0
1182 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1183 		      != VECTOR_TYPE))
1184         {
1185 	  if (index < CONSTRUCTOR_NELTS (vect))
1186 	    return CONSTRUCTOR_ELT (vect, index)->value;
1187           return build_zero_cst (vect_elt_type);
1188         }
1189       else
1190         {
1191 	  tree size = TYPE_SIZE (vect_elt_type);
1192 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1193 				  size);
1194 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1195         }
1196     }
1197 
1198   if (!ptmpvec)
1199     tmpvec = create_tmp_var (vect_type, "vectmp");
1200   else if (!*ptmpvec)
1201     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1202   else
1203     {
1204       tmpvec = *ptmpvec;
1205       need_asgn = false;
1206     }
1207 
1208   if (need_asgn)
1209     {
1210       TREE_ADDRESSABLE (tmpvec) = 1;
1211       asgn = gimple_build_assign (tmpvec, vect);
1212       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1213     }
1214 
1215   arraytype = build_array_type_nelts (vect_elt_type, elements);
1216   return build4 (ARRAY_REF, vect_elt_type,
1217                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1218                  idx, NULL_TREE, NULL_TREE);
1219 }
1220 
1221 /* Check if VEC_PERM_EXPR within the given setting is supported
1222    by hardware, or lower it piecewise.
1223 
1224    When VEC_PERM_EXPR has the same first and second operands:
1225    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1226    {v0[mask[0]], v0[mask[1]], ...}
1227    MASK and V0 must have the same number of elements.
1228 
1229    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1230    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1231    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1232    same number of arguments.  */
1233 
1234 static void
lower_vec_perm(gimple_stmt_iterator * gsi)1235 lower_vec_perm (gimple_stmt_iterator *gsi)
1236 {
1237   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1238   tree mask = gimple_assign_rhs3 (stmt);
1239   tree vec0 = gimple_assign_rhs1 (stmt);
1240   tree vec1 = gimple_assign_rhs2 (stmt);
1241   tree vect_type = TREE_TYPE (vec0);
1242   tree mask_type = TREE_TYPE (mask);
1243   tree vect_elt_type = TREE_TYPE (vect_type);
1244   tree mask_elt_type = TREE_TYPE (mask_type);
1245   unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1246   vec<constructor_elt, va_gc> *v;
1247   tree constr, t, si, i_val;
1248   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1249   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1250   location_t loc = gimple_location (gsi_stmt (*gsi));
1251   unsigned i;
1252 
1253   if (TREE_CODE (mask) == SSA_NAME)
1254     {
1255       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1256       if (is_gimple_assign (def_stmt)
1257 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1258 	mask = gimple_assign_rhs1 (def_stmt);
1259     }
1260 
1261   if (TREE_CODE (mask) == VECTOR_CST)
1262     {
1263       unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1264 
1265       for (i = 0; i < elements; ++i)
1266 	sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1267 		      & (2 * elements - 1));
1268 
1269       if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1270 	{
1271 	  gimple_assign_set_rhs3 (stmt, mask);
1272 	  update_stmt (stmt);
1273 	  return;
1274 	}
1275       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1276 	 vector as VEC1 and a right element shift MASK.  */
1277       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1278 	  != CODE_FOR_nothing
1279 	  && TREE_CODE (vec1) == VECTOR_CST
1280 	  && initializer_zerop (vec1)
1281 	  && sel_int[0]
1282 	  && sel_int[0] < elements)
1283 	{
1284 	  for (i = 1; i < elements; ++i)
1285 	    {
1286 	      unsigned int expected = i + sel_int[0];
1287 	      /* Indices into the second vector are all equivalent.  */
1288 	      if (MIN (elements, (unsigned) sel_int[i])
1289 		  != MIN (elements, expected))
1290  		break;
1291 	    }
1292 	  if (i == elements)
1293 	    {
1294 	      gimple_assign_set_rhs3 (stmt, mask);
1295 	      update_stmt (stmt);
1296 	      return;
1297 	    }
1298 	}
1299     }
1300   else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1301     return;
1302 
1303   warning_at (loc, OPT_Wvector_operation_performance,
1304               "vector shuffling operation will be expanded piecewise");
1305 
1306   vec_alloc (v, elements);
1307   for (i = 0; i < elements; i++)
1308     {
1309       si = size_int (i);
1310       i_val = vector_element (gsi, mask, si, &masktmp);
1311 
1312       if (TREE_CODE (i_val) == INTEGER_CST)
1313         {
1314 	  unsigned HOST_WIDE_INT index;
1315 
1316 	  index = TREE_INT_CST_LOW (i_val);
1317 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1318 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1319 
1320           if (two_operand_p && (index & elements) != 0)
1321 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1322 	  else
1323 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1324 
1325           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1326 					true, GSI_SAME_STMT);
1327         }
1328       else
1329         {
1330 	  tree cond = NULL_TREE, v0_val;
1331 
1332 	  if (two_operand_p)
1333 	    {
1334 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1335 			          build_int_cst (mask_elt_type, elements));
1336 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1337 					       true, GSI_SAME_STMT);
1338 	    }
1339 
1340 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1341 			       build_int_cst (mask_elt_type, elements - 1));
1342 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1343 					    true, GSI_SAME_STMT);
1344 
1345 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1346 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1347 					     true, GSI_SAME_STMT);
1348 
1349 	  if (two_operand_p)
1350 	    {
1351 	      tree v1_val;
1352 
1353 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1354 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1355 						 true, GSI_SAME_STMT);
1356 
1357 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1358 				  cond, build_zero_cst (mask_elt_type));
1359 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1360 				  cond, v0_val, v1_val);
1361               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1362 					    true, GSI_SAME_STMT);
1363             }
1364 	  else
1365 	    t = v0_val;
1366         }
1367 
1368       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1369     }
1370 
1371   constr = build_constructor (vect_type, v);
1372   gimple_assign_set_rhs_from_tree (gsi, constr);
1373   update_stmt (gsi_stmt (*gsi));
1374 }
1375 
1376 /* If OP is a uniform vector return the element it is a splat from.  */
1377 
1378 static tree
ssa_uniform_vector_p(tree op)1379 ssa_uniform_vector_p (tree op)
1380 {
1381   if (TREE_CODE (op) == VECTOR_CST
1382       || TREE_CODE (op) == CONSTRUCTOR)
1383     return uniform_vector_p (op);
1384   if (TREE_CODE (op) == SSA_NAME)
1385     {
1386       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1387       if (gimple_assign_single_p (def_stmt))
1388 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1389     }
1390   return NULL_TREE;
1391 }
1392 
1393 /* Return type in which CODE operation with optab OP can be
1394    computed.  */
1395 
1396 static tree
get_compute_type(enum tree_code code,optab op,tree type)1397 get_compute_type (enum tree_code code, optab op, tree type)
1398 {
1399   /* For very wide vectors, try using a smaller vector mode.  */
1400   tree compute_type = type;
1401   if (op
1402       && (!VECTOR_MODE_P (TYPE_MODE (type))
1403 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1404     {
1405       tree vector_compute_type
1406 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
1407       if (vector_compute_type != NULL_TREE
1408 	  && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1409 	      < TYPE_VECTOR_SUBPARTS (compute_type))
1410 	  && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1
1411 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1412 	      != CODE_FOR_nothing))
1413 	compute_type = vector_compute_type;
1414     }
1415 
1416   /* If we are breaking a BLKmode vector into smaller pieces,
1417      type_for_widest_vector_mode has already looked into the optab,
1418      so skip these checks.  */
1419   if (compute_type == type)
1420     {
1421       machine_mode compute_mode = TYPE_MODE (compute_type);
1422       if (VECTOR_MODE_P (compute_mode))
1423 	{
1424 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1425 	    return compute_type;
1426 	  if (code == MULT_HIGHPART_EXPR
1427 	      && can_mult_highpart_p (compute_mode,
1428 				      TYPE_UNSIGNED (compute_type)))
1429 	    return compute_type;
1430 	}
1431       /* There is no operation in hardware, so fall back to scalars.  */
1432       compute_type = TREE_TYPE (type);
1433     }
1434 
1435   return compute_type;
1436 }
1437 
1438 /* Helper function of expand_vector_operations_1.  Return number of
1439    vector elements for vector types or 1 for other types.  */
1440 
1441 static inline int
count_type_subparts(tree type)1442 count_type_subparts (tree type)
1443 {
1444   return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1445 }
1446 
1447 static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)1448 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1449 	 tree bitpos, tree bitsize, enum tree_code code,
1450 	 tree type ATTRIBUTE_UNUSED)
1451 {
1452   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1453     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1454   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1455     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1456   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1457   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1458 }
1459 
1460 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1461 static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)1462 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1463 {
1464   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1465   tree type = gimple_expr_type (stmt);
1466   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1467   machine_mode compute_mode = TYPE_MODE (compute_type);
1468   gcc_assert (compute_mode != BLKmode);
1469   tree lhs = gimple_assign_lhs (stmt);
1470   tree rhs2 = gimple_assign_rhs2 (stmt);
1471   tree rhs3 = gimple_assign_rhs3 (stmt);
1472   tree new_rhs;
1473 
1474   /* If the compute mode is not a vector mode (hence we are not decomposing
1475      a BLKmode vector to smaller, hardware-supported vectors), we may want
1476      to expand the operations in parallel.  */
1477   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1478       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1479       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1480       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1481       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1482       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1483     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1484 				      COND_EXPR);
1485   else
1486     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1487 				       rhs2, rhs3, COND_EXPR);
1488   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1489     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1490 			       new_rhs);
1491 
1492   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1493      way to do it is change expand_vector_operation and its callees to
1494      return a tree_code, RHS1 and RHS2 instead of a tree. */
1495   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1496   update_stmt (gsi_stmt (*gsi));
1497 }
1498 
1499 /* Process one statement.  If we identify a vector operation, expand it.  */
1500 
1501 static void
expand_vector_operations_1(gimple_stmt_iterator * gsi)1502 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1503 {
1504   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1505   enum tree_code code;
1506   optab op = unknown_optab;
1507   enum gimple_rhs_class rhs_class;
1508   tree new_rhs;
1509 
1510   /* Only consider code == GIMPLE_ASSIGN. */
1511   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1512   if (!stmt)
1513     return;
1514 
1515   code = gimple_assign_rhs_code (stmt);
1516   rhs_class = get_gimple_rhs_class (code);
1517   lhs = gimple_assign_lhs (stmt);
1518 
1519   if (code == VEC_PERM_EXPR)
1520     {
1521       lower_vec_perm (gsi);
1522       return;
1523     }
1524 
1525   if (code == VEC_COND_EXPR)
1526     {
1527       expand_vector_condition (gsi);
1528       return;
1529     }
1530 
1531   if (code == COND_EXPR
1532       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1533       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1534     {
1535       expand_vector_scalar_condition (gsi);
1536       return;
1537     }
1538 
1539   if (code == CONSTRUCTOR
1540       && TREE_CODE (lhs) == SSA_NAME
1541       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1542       && !gimple_clobber_p (stmt)
1543       && optimize)
1544     {
1545       optimize_vector_constructor (gsi);
1546       return;
1547     }
1548 
1549   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1550     return;
1551 
1552   rhs1 = gimple_assign_rhs1 (stmt);
1553   type = gimple_expr_type (stmt);
1554   if (rhs_class == GIMPLE_BINARY_RHS)
1555     rhs2 = gimple_assign_rhs2 (stmt);
1556 
1557   if (TREE_CODE (type) != VECTOR_TYPE)
1558     return;
1559 
1560   /* If the vector operation is operating on all same vector elements
1561      implement it with a scalar operation and a splat if the target
1562      supports the scalar operation.  */
1563   tree srhs1, srhs2 = NULL_TREE;
1564   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1565       && (rhs2 == NULL_TREE
1566 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1567 	      && (srhs2 = rhs2))
1568 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1569       /* As we query direct optabs restrict to non-convert operations.  */
1570       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1571     {
1572       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1573       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1574 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1575 	{
1576 	  tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1577 	  gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1578 	  gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1579 	  gimple_assign_set_rhs_from_tree (gsi,
1580 					   build_vector_from_val (type, slhs));
1581 	  update_stmt (stmt);
1582 	  return;
1583 	}
1584     }
1585 
1586   /* A scalar operation pretending to be a vector one.  */
1587   if (VECTOR_BOOLEAN_TYPE_P (type)
1588       && !VECTOR_MODE_P (TYPE_MODE (type))
1589       && TYPE_MODE (type) != BLKmode)
1590     return;
1591 
1592   if (CONVERT_EXPR_CODE_P (code)
1593       || code == FLOAT_EXPR
1594       || code == FIX_TRUNC_EXPR
1595       || code == VIEW_CONVERT_EXPR)
1596     return;
1597 
1598   /* The signedness is determined from input argument.  */
1599   if (code == VEC_UNPACK_FLOAT_HI_EXPR
1600       || code == VEC_UNPACK_FLOAT_LO_EXPR)
1601     type = TREE_TYPE (rhs1);
1602 
1603   /* For widening/narrowing vector operations, the relevant type is of the
1604      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
1605      calculated in the same way above.  */
1606   if (code == WIDEN_SUM_EXPR
1607       || code == VEC_WIDEN_MULT_HI_EXPR
1608       || code == VEC_WIDEN_MULT_LO_EXPR
1609       || code == VEC_WIDEN_MULT_EVEN_EXPR
1610       || code == VEC_WIDEN_MULT_ODD_EXPR
1611       || code == VEC_UNPACK_HI_EXPR
1612       || code == VEC_UNPACK_LO_EXPR
1613       || code == VEC_PACK_TRUNC_EXPR
1614       || code == VEC_PACK_SAT_EXPR
1615       || code == VEC_PACK_FIX_TRUNC_EXPR
1616       || code == VEC_WIDEN_LSHIFT_HI_EXPR
1617       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1618     type = TREE_TYPE (rhs1);
1619 
1620   /* Choose between vector shift/rotate by vector and vector shift/rotate by
1621      scalar */
1622   if (code == LSHIFT_EXPR
1623       || code == RSHIFT_EXPR
1624       || code == LROTATE_EXPR
1625       || code == RROTATE_EXPR)
1626     {
1627       optab opv;
1628 
1629       /* Check whether we have vector <op> {x,x,x,x} where x
1630          could be a scalar variable or a constant.  Transform
1631          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
1632       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1633         {
1634           tree first;
1635 
1636           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1637             {
1638               gimple_assign_set_rhs2 (stmt, first);
1639               update_stmt (stmt);
1640               rhs2 = first;
1641             }
1642         }
1643 
1644       opv = optab_for_tree_code (code, type, optab_vector);
1645       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1646 	op = opv;
1647       else
1648 	{
1649           op = optab_for_tree_code (code, type, optab_scalar);
1650 
1651 	  compute_type = get_compute_type (code, op, type);
1652 	  if (compute_type == type)
1653 	    return;
1654 	  /* The rtl expander will expand vector/scalar as vector/vector
1655 	     if necessary.  Pick one with wider vector type.  */
1656 	  tree compute_vtype = get_compute_type (code, opv, type);
1657 	  if (count_type_subparts (compute_vtype)
1658 	      > count_type_subparts (compute_type))
1659 	    {
1660 	      compute_type = compute_vtype;
1661 	      op = opv;
1662 	    }
1663 	}
1664 
1665       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1666 	{
1667 	  if (compute_type == NULL_TREE)
1668 	    compute_type = get_compute_type (code, op, type);
1669 	  if (compute_type == type)
1670 	    return;
1671 	  /* Before splitting vector rotates into scalar rotates,
1672 	     see if we can't use vector shifts and BIT_IOR_EXPR
1673 	     instead.  For vector by vector rotates we'd also
1674 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1675 	     for now, fold doesn't seem to create such rotates anyway.  */
1676 	  if (compute_type == TREE_TYPE (type)
1677 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1678 	    {
1679 	      optab oplv = vashl_optab, opl = ashl_optab;
1680 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1681 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1682 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1683 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1684 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1685 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1686 	      /* The rtl expander will expand vector/scalar as vector/vector
1687 		 if necessary.  Pick one with wider vector type.  */
1688 	      if (count_type_subparts (compute_lvtype)
1689 		  > count_type_subparts (compute_ltype))
1690 		{
1691 		  compute_ltype = compute_lvtype;
1692 		  opl = oplv;
1693 		}
1694 	      if (count_type_subparts (compute_rvtype)
1695 		  > count_type_subparts (compute_rtype))
1696 		{
1697 		  compute_rtype = compute_rvtype;
1698 		  opr = oprv;
1699 		}
1700 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1701 		 BIT_IOR_EXPR.  */
1702 	      compute_type = compute_ltype;
1703 	      if (count_type_subparts (compute_type)
1704 		  > count_type_subparts (compute_rtype))
1705 		compute_type = compute_rtype;
1706 	      if (count_type_subparts (compute_type)
1707 		  > count_type_subparts (compute_otype))
1708 		compute_type = compute_otype;
1709 	      /* Verify all 3 operations can be performed in that type.  */
1710 	      if (compute_type != TREE_TYPE (type))
1711 		{
1712 		  if (optab_handler (opl, TYPE_MODE (compute_type))
1713 		      == CODE_FOR_nothing
1714 		      || optab_handler (opr, TYPE_MODE (compute_type))
1715 			 == CODE_FOR_nothing
1716 		      || optab_handler (opo, TYPE_MODE (compute_type))
1717 			 == CODE_FOR_nothing)
1718 		    compute_type = TREE_TYPE (type);
1719 		}
1720 	    }
1721 	}
1722     }
1723   else
1724     op = optab_for_tree_code (code, type, optab_default);
1725 
1726   /* Optabs will try converting a negation into a subtraction, so
1727      look for it as well.  TODO: negation of floating-point vectors
1728      might be turned into an exclusive OR toggling the sign bit.  */
1729   if (op == unknown_optab
1730       && code == NEGATE_EXPR
1731       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1732     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1733 
1734   if (compute_type == NULL_TREE)
1735     compute_type = get_compute_type (code, op, type);
1736   if (compute_type == type)
1737     return;
1738 
1739   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1740 
1741   /* Leave expression untouched for later expansion.  */
1742   if (new_rhs == NULL_TREE)
1743     return;
1744 
1745   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1746     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1747                                new_rhs);
1748 
1749   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1750      way to do it is change expand_vector_operation and its callees to
1751      return a tree_code, RHS1 and RHS2 instead of a tree. */
1752   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1753   update_stmt (gsi_stmt (*gsi));
1754 }
1755 
1756 /* Use this to lower vector operations introduced by the vectorizer,
1757    if it may need the bit-twiddling tricks implemented in this file.  */
1758 
1759 static unsigned int
expand_vector_operations(void)1760 expand_vector_operations (void)
1761 {
1762   gimple_stmt_iterator gsi;
1763   basic_block bb;
1764   bool cfg_changed = false;
1765 
1766   FOR_EACH_BB_FN (bb, cfun)
1767     {
1768       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1769 	{
1770 	  expand_vector_operations_1 (&gsi);
1771 	  /* ???  If we do not cleanup EH then we will ICE in
1772 	     verification.  But in reality we have created wrong-code
1773 	     as we did not properly transition EH info and edges to
1774 	     the piecewise computations.  */
1775 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1776 	      && gimple_purge_dead_eh_edges (bb))
1777 	    cfg_changed = true;
1778 	}
1779     }
1780 
1781   return cfg_changed ? TODO_cleanup_cfg : 0;
1782 }
1783 
1784 namespace {
1785 
1786 const pass_data pass_data_lower_vector =
1787 {
1788   GIMPLE_PASS, /* type */
1789   "veclower", /* name */
1790   OPTGROUP_VEC, /* optinfo_flags */
1791   TV_NONE, /* tv_id */
1792   PROP_cfg, /* properties_required */
1793   PROP_gimple_lvec, /* properties_provided */
1794   0, /* properties_destroyed */
1795   0, /* todo_flags_start */
1796   TODO_update_ssa, /* todo_flags_finish */
1797 };
1798 
1799 class pass_lower_vector : public gimple_opt_pass
1800 {
1801 public:
pass_lower_vector(gcc::context * ctxt)1802   pass_lower_vector (gcc::context *ctxt)
1803     : gimple_opt_pass (pass_data_lower_vector, ctxt)
1804   {}
1805 
1806   /* opt_pass methods: */
gate(function * fun)1807   virtual bool gate (function *fun)
1808     {
1809       return !(fun->curr_properties & PROP_gimple_lvec);
1810     }
1811 
execute(function *)1812   virtual unsigned int execute (function *)
1813     {
1814       return expand_vector_operations ();
1815     }
1816 
1817 }; // class pass_lower_vector
1818 
1819 } // anon namespace
1820 
1821 gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)1822 make_pass_lower_vector (gcc::context *ctxt)
1823 {
1824   return new pass_lower_vector (ctxt);
1825 }
1826 
1827 namespace {
1828 
1829 const pass_data pass_data_lower_vector_ssa =
1830 {
1831   GIMPLE_PASS, /* type */
1832   "veclower2", /* name */
1833   OPTGROUP_VEC, /* optinfo_flags */
1834   TV_NONE, /* tv_id */
1835   PROP_cfg, /* properties_required */
1836   PROP_gimple_lvec, /* properties_provided */
1837   0, /* properties_destroyed */
1838   0, /* todo_flags_start */
1839   ( TODO_update_ssa
1840     | TODO_cleanup_cfg ), /* todo_flags_finish */
1841 };
1842 
1843 class pass_lower_vector_ssa : public gimple_opt_pass
1844 {
1845 public:
pass_lower_vector_ssa(gcc::context * ctxt)1846   pass_lower_vector_ssa (gcc::context *ctxt)
1847     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1848   {}
1849 
1850   /* opt_pass methods: */
clone()1851   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)1852   virtual unsigned int execute (function *)
1853     {
1854       return expand_vector_operations ();
1855     }
1856 
1857 }; // class pass_lower_vector_ssa
1858 
1859 } // anon namespace
1860 
1861 gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)1862 make_pass_lower_vector_ssa (gcc::context *ctxt)
1863 {
1864   return new pass_lower_vector_ssa (ctxt);
1865 }
1866 
1867 #include "gt-tree-vect-generic.h"
1868