1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 
43 
44 static void expand_vector_operations_1 (gimple_stmt_iterator *);
45 
46 /* Return the number of elements in a vector type TYPE that we have
47    already decided needs to be expanded piecewise.  We don't support
48    this kind of expansion for variable-length vectors, since we should
49    always check for target support before introducing uses of those.  */
50 static unsigned int
51 nunits_for_known_piecewise_op (const_tree type)
52 {
53   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
54 }
55 
56 /* Return true if TYPE1 has more elements than TYPE2, where either
57    type may be a vector or a scalar.  */
58 
59 static inline bool
60 subparts_gt (tree type1, tree type2)
61 {
62   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
63   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
64   return known_gt (n1, n2);
65 }
66 
67 /* Build a constant of type TYPE, made of VALUE's bits replicated
68    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
69 static tree
70 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
71 {
72   int width = tree_to_uhwi (TYPE_SIZE (inner_type));
73   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
74     / HOST_BITS_PER_WIDE_INT;
75   unsigned HOST_WIDE_INT low, mask;
76   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
77   int i;
78 
79   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
80 
81   if (width == HOST_BITS_PER_WIDE_INT)
82     low = value;
83   else
84     {
85       mask = ((HOST_WIDE_INT)1 << width) - 1;
86       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
87     }
88 
89   for (i = 0; i < n; i++)
90     a[i] = low;
91 
92   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
93   return wide_int_to_tree
94     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
95 }
96 
97 static GTY(()) tree vector_inner_type;
98 static GTY(()) tree vector_last_type;
99 static GTY(()) int vector_last_nunits;
100 
101 /* Return a suitable vector types made of SUBPARTS units each of mode
102    "word_mode" (the global variable).  */
103 static tree
104 build_word_mode_vector_type (int nunits)
105 {
106   if (!vector_inner_type)
107     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
108   else if (vector_last_nunits == nunits)
109     {
110       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
111       return vector_last_type;
112     }
113 
114   vector_last_nunits = nunits;
115   vector_last_type = build_vector_type (vector_inner_type, nunits);
116   return vector_last_type;
117 }
118 
119 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
120 			      tree, tree, tree, tree, tree, enum tree_code,
121 			      tree);
122 
123 static inline tree
124 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
125 		  tree t, tree bitsize, tree bitpos)
126 {
127   if (TREE_CODE (t) == SSA_NAME)
128     {
129       gimple *def_stmt = SSA_NAME_DEF_STMT (t);
130       if (is_gimple_assign (def_stmt)
131 	  && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
132 	      || (bitpos
133 		  && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
134 	t = gimple_assign_rhs1 (def_stmt);
135     }
136   if (bitpos)
137     {
138       if (TREE_CODE (type) == BOOLEAN_TYPE)
139 	{
140 	  tree itype
141 	    = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
142 	  tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
143 					bitsize, bitpos);
144 	  return gimplify_build2 (gsi, NE_EXPR, type, field,
145 				  build_zero_cst (itype));
146 	}
147       else
148 	return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
149     }
150   else
151     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
152 }
153 
154 static tree
155 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
156 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
157 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
158 {
159   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
160   return gimplify_build1 (gsi, code, inner_type, a);
161 }
162 
163 static tree
164 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
165 	  tree bitpos, tree bitsize, enum tree_code code,
166 	  tree type ATTRIBUTE_UNUSED)
167 {
168   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
169     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
170   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
171     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
172   return gimplify_build2 (gsi, code, inner_type, a, b);
173 }
174 
175 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
176 
177    INNER_TYPE is the type of A and B elements
178 
179    returned expression is of signed integer type with the
180    size equal to the size of INNER_TYPE.  */
181 static tree
182 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
183 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
184 {
185   tree stype = TREE_TYPE (type);
186   tree cst_false = build_zero_cst (stype);
187   tree cst_true = build_all_ones_cst (stype);
188   tree cmp;
189 
190   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
191   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
192 
193   cmp = build2 (code, boolean_type_node, a, b);
194   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
195 }
196 
197 /* Expand vector addition to scalars.  This does bit twiddling
198    in order to increase parallelism:
199 
200    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
201            (a ^ b) & 0x80808080
202 
203    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
204             (a ^ ~b) & 0x80808080
205 
206    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
207 
208    This optimization should be done only if 4 vector items or more
209    fit into a word.  */
210 static tree
211 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
212 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
213 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
214 {
215   tree inner_type = TREE_TYPE (TREE_TYPE (a));
216   unsigned HOST_WIDE_INT max;
217   tree low_bits, high_bits, a_low, b_low, result_low, signs;
218 
219   max = GET_MODE_MASK (TYPE_MODE (inner_type));
220   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
221   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
222 
223   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
224   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
225 
226   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
227   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
228   if (code == PLUS_EXPR)
229     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
230   else
231     {
232       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
233       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
234     }
235 
236   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
237   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
238   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
239 }
240 
241 static tree
242 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
243 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
244 	   tree bitsize ATTRIBUTE_UNUSED,
245 	   enum tree_code code ATTRIBUTE_UNUSED,
246 	   tree type ATTRIBUTE_UNUSED)
247 {
248   tree inner_type = TREE_TYPE (TREE_TYPE (b));
249   HOST_WIDE_INT max;
250   tree low_bits, high_bits, b_low, result_low, signs;
251 
252   max = GET_MODE_MASK (TYPE_MODE (inner_type));
253   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
254   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
255 
256   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
257 
258   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
259   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
260   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
261   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
262   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
263 }
264 
265 /* Expand a vector operation to scalars, by using many operations
266    whose type is the vector type's inner type.  */
267 static tree
268 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
269 			 tree type, tree inner_type,
270 			 tree a, tree b, enum tree_code code)
271 {
272   vec<constructor_elt, va_gc> *v;
273   tree part_width = TYPE_SIZE (inner_type);
274   tree index = bitsize_int (0);
275   int nunits = nunits_for_known_piecewise_op (type);
276   int delta = tree_to_uhwi (part_width)
277 	      / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
278   int i;
279   location_t loc = gimple_location (gsi_stmt (*gsi));
280 
281   if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
282     warning_at (loc, OPT_Wvector_operation_performance,
283 		"vector operation will be expanded piecewise");
284   else
285     warning_at (loc, OPT_Wvector_operation_performance,
286 		"vector operation will be expanded in parallel");
287 
288   vec_alloc (v, (nunits + delta - 1) / delta);
289   for (i = 0; i < nunits;
290        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
291     {
292       tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
293       constructor_elt ce = {NULL_TREE, result};
294       v->quick_push (ce);
295     }
296 
297   return build_constructor (type, v);
298 }
299 
300 /* Expand a vector operation to scalars with the freedom to use
301    a scalar integer type, or to use a different size for the items
302    in the vector type.  */
303 static tree
304 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
305 			tree a, tree b,
306 			enum tree_code code)
307 {
308   tree result, compute_type;
309   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
310   location_t loc = gimple_location (gsi_stmt (*gsi));
311 
312   /* We have three strategies.  If the type is already correct, just do
313      the operation an element at a time.  Else, if the vector is wider than
314      one word, do it a word at a time; finally, if the vector is smaller
315      than one word, do it as a scalar.  */
316   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
317      return expand_vector_piecewise (gsi, f,
318 				     type, TREE_TYPE (type),
319 				     a, b, code);
320   else if (n_words > 1)
321     {
322       tree word_type = build_word_mode_vector_type (n_words);
323       result = expand_vector_piecewise (gsi, f,
324 				        word_type, TREE_TYPE (word_type),
325 					a, b, code);
326       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
327                                          GSI_SAME_STMT);
328     }
329   else
330     {
331       /* Use a single scalar operation with a mode no wider than word_mode.  */
332       scalar_int_mode mode
333 	= int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
334       compute_type = lang_hooks.types.type_for_mode (mode, 1);
335       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
336       warning_at (loc, OPT_Wvector_operation_performance,
337 	          "vector operation will be expanded with a "
338 		  "single scalar operation");
339     }
340 
341   return result;
342 }
343 
344 /* Expand a vector operation to scalars; for integer types we can use
345    special bit twiddling tricks to do the sums a word at a time, using
346    function F_PARALLEL instead of F.  These tricks are done only if
347    they can process at least four items, that is, only if the vector
348    holds at least four items and if a word can hold four items.  */
349 static tree
350 expand_vector_addition (gimple_stmt_iterator *gsi,
351 			elem_op_func f, elem_op_func f_parallel,
352 			tree type, tree a, tree b, enum tree_code code)
353 {
354   int parts_per_word = UNITS_PER_WORD
355 	  	       / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
356 
357   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
358       && parts_per_word >= 4
359       && nunits_for_known_piecewise_op (type) >= 4)
360     return expand_vector_parallel (gsi, f_parallel,
361 				   type, a, b, code);
362   else
363     return expand_vector_piecewise (gsi, f,
364 				    type, TREE_TYPE (type),
365 				    a, b, code);
366 }
367 
368 /* Try to expand vector comparison expression OP0 CODE OP1 by
369    querying optab if the following expression:
370 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
371    can be expanded.  */
372 static tree
373 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
374                           tree op1, enum tree_code code)
375 {
376   tree t;
377   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
378       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
379     t = expand_vector_piecewise (gsi, do_compare, type,
380 				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
381   else
382     t = NULL_TREE;
383 
384   return t;
385 }
386 
387 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
388    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
389    the result if successful, otherwise return NULL_TREE.  */
390 static tree
391 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
392 {
393   optab op;
394   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
395   bool scalar_shift = true;
396 
397   for (i = 1; i < nunits; i++)
398     {
399       if (shiftcnts[i] != shiftcnts[0])
400 	scalar_shift = false;
401     }
402 
403   if (scalar_shift && shiftcnts[0] == 0)
404     return op0;
405 
406   if (scalar_shift)
407     {
408       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
409       if (op != unknown_optab
410 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
411 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
412 				build_int_cst (NULL_TREE, shiftcnts[0]));
413     }
414 
415   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
416   if (op != unknown_optab
417       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
418     {
419       tree_vector_builder vec (type, nunits, 1);
420       for (i = 0; i < nunits; i++)
421 	vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
422       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
423     }
424 
425   return NULL_TREE;
426 }
427 
428 /* Try to expand integer vector division by constant using
429    widening multiply, shifts and additions.  */
430 static tree
431 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
432 		      tree op1, enum tree_code code)
433 {
434   bool use_pow2 = true;
435   bool has_vector_shift = true;
436   int mode = -1, this_mode;
437   int pre_shift = -1, post_shift;
438   unsigned int nunits = nunits_for_known_piecewise_op (type);
439   int *shifts = XALLOCAVEC (int, nunits * 4);
440   int *pre_shifts = shifts + nunits;
441   int *post_shifts = pre_shifts + nunits;
442   int *shift_temps = post_shifts + nunits;
443   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
444   int prec = TYPE_PRECISION (TREE_TYPE (type));
445   int dummy_int;
446   unsigned int i;
447   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
448   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
449   tree cur_op, mulcst, tem;
450   optab op;
451 
452   if (prec > HOST_BITS_PER_WIDE_INT)
453     return NULL_TREE;
454 
455   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
456   if (op == unknown_optab
457       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
458     has_vector_shift = false;
459 
460   /* Analysis phase.  Determine if all op1 elements are either power
461      of two and it is possible to expand it using shifts (or for remainder
462      using masking).  Additionally compute the multiplicative constants
463      and pre and post shifts if the division is to be expanded using
464      widening or high part multiplication plus shifts.  */
465   for (i = 0; i < nunits; i++)
466     {
467       tree cst = VECTOR_CST_ELT (op1, i);
468       unsigned HOST_WIDE_INT ml;
469 
470       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
471 	return NULL_TREE;
472       pre_shifts[i] = 0;
473       post_shifts[i] = 0;
474       mulc[i] = 0;
475       if (use_pow2
476 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
477 	use_pow2 = false;
478       if (use_pow2)
479 	{
480 	  shifts[i] = tree_log2 (cst);
481 	  if (shifts[i] != shifts[0]
482 	      && code == TRUNC_DIV_EXPR
483 	      && !has_vector_shift)
484 	    use_pow2 = false;
485 	}
486       if (mode == -2)
487 	continue;
488       if (sign_p == UNSIGNED)
489 	{
490 	  unsigned HOST_WIDE_INT mh;
491 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
492 
493 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
494 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
495 	    return NULL_TREE;
496 
497 	  if (d <= 1)
498 	    {
499 	      mode = -2;
500 	      continue;
501 	    }
502 
503 	  /* Find a suitable multiplier and right shift count
504 	     instead of multiplying with D.  */
505 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
506 
507 	  /* If the suggested multiplier is more than SIZE bits, we can
508 	     do better for even divisors, using an initial right shift.  */
509 	  if ((mh != 0 && (d & 1) == 0)
510 	      || (!has_vector_shift && pre_shift != -1))
511 	    {
512 	      if (has_vector_shift)
513 		pre_shift = ctz_or_zero (d);
514 	      else if (pre_shift == -1)
515 		{
516 		  unsigned int j;
517 		  for (j = 0; j < nunits; j++)
518 		    {
519 		      tree cst2 = VECTOR_CST_ELT (op1, j);
520 		      unsigned HOST_WIDE_INT d2;
521 		      int this_pre_shift;
522 
523 		      if (!tree_fits_uhwi_p (cst2))
524 			return NULL_TREE;
525 		      d2 = tree_to_uhwi (cst2) & mask;
526 		      if (d2 == 0)
527 			return NULL_TREE;
528 		      this_pre_shift = floor_log2 (d2 & -d2);
529 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
530 			pre_shift = this_pre_shift;
531 		    }
532 		  if (i != 0 && pre_shift != 0)
533 		    {
534 		      /* Restart.  */
535 		      i = -1U;
536 		      mode = -1;
537 		      continue;
538 		    }
539 		}
540 	      if (pre_shift != 0)
541 		{
542 		  if ((d >> pre_shift) <= 1)
543 		    {
544 		      mode = -2;
545 		      continue;
546 		    }
547 		  mh = choose_multiplier (d >> pre_shift, prec,
548 					  prec - pre_shift,
549 					  &ml, &post_shift, &dummy_int);
550 		  gcc_assert (!mh);
551 		  pre_shifts[i] = pre_shift;
552 		}
553 	    }
554 	  if (!mh)
555 	    this_mode = 0;
556 	  else
557 	    this_mode = 1;
558 	}
559       else
560 	{
561 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
562 	  unsigned HOST_WIDE_INT abs_d;
563 
564 	  if (d == -1)
565 	    return NULL_TREE;
566 
567 	  /* Since d might be INT_MIN, we have to cast to
568 	     unsigned HOST_WIDE_INT before negating to avoid
569 	     undefined signed overflow.  */
570 	  abs_d = (d >= 0
571 		  ? (unsigned HOST_WIDE_INT) d
572 		  : - (unsigned HOST_WIDE_INT) d);
573 
574 	  /* n rem d = n rem -d */
575 	  if (code == TRUNC_MOD_EXPR && d < 0)
576 	    d = abs_d;
577 	  else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
578 	    {
579 	      /* This case is not handled correctly below.  */
580 	      mode = -2;
581 	      continue;
582 	    }
583 	  if (abs_d <= 1)
584 	    {
585 	      mode = -2;
586 	      continue;
587 	    }
588 
589 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
590 			     &post_shift, &dummy_int);
591 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
592 	    {
593 	      this_mode = 4 + (d < 0);
594 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
595 	    }
596 	  else
597 	    this_mode = 2 + (d < 0);
598 	}
599       mulc[i] = ml;
600       post_shifts[i] = post_shift;
601       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
602 	  || post_shift >= prec
603 	  || pre_shifts[i] >= prec)
604 	this_mode = -2;
605 
606       if (i == 0)
607 	mode = this_mode;
608       else if (mode != this_mode)
609 	mode = -2;
610     }
611 
612   if (use_pow2)
613     {
614       tree addend = NULL_TREE;
615       if (sign_p == SIGNED)
616 	{
617 	  tree uns_type;
618 
619 	  /* Both division and remainder sequences need
620 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
621 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
622 	     if none of the shifts is 0, or as the conditional.  */
623 	  for (i = 0; i < nunits; i++)
624 	    if (shifts[i] == 0)
625 	      break;
626 	  uns_type
627 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
628 				 nunits);
629 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
630 	    {
631 	      for (i = 0; i < nunits; i++)
632 		shift_temps[i] = prec - 1;
633 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
634 	      if (cur_op != NULL_TREE)
635 		{
636 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
637 					    uns_type, cur_op);
638 		  for (i = 0; i < nunits; i++)
639 		    shift_temps[i] = prec - shifts[i];
640 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
641 		  if (cur_op != NULL_TREE)
642 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
643 					      type, cur_op);
644 		}
645 	    }
646 	  if (addend == NULL_TREE
647 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
648 	    {
649 	      tree zero, cst, cond, mask_type;
650 	      gimple *stmt;
651 
652 	      mask_type = build_same_sized_truth_vector_type (type);
653 	      zero = build_zero_cst (type);
654 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
655 	      tree_vector_builder vec (type, nunits, 1);
656 	      for (i = 0; i < nunits; i++)
657 		vec.quick_push (build_int_cst (TREE_TYPE (type),
658 					       (HOST_WIDE_INT_1U
659 						<< shifts[i]) - 1));
660 	      cst = vec.build ();
661 	      addend = make_ssa_name (type);
662 	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
663 					  cst, zero);
664 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
665 	    }
666 	}
667       if (code == TRUNC_DIV_EXPR)
668 	{
669 	  if (sign_p == UNSIGNED)
670 	    {
671 	      /* q = op0 >> shift;  */
672 	      cur_op = add_rshift (gsi, type, op0, shifts);
673 	      if (cur_op != NULL_TREE)
674 		return cur_op;
675 	    }
676 	  else if (addend != NULL_TREE)
677 	    {
678 	      /* t1 = op0 + addend;
679 		 q = t1 >> shift;  */
680 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
681 	      if (op != unknown_optab
682 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
683 		{
684 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
685 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
686 		  if (cur_op != NULL_TREE)
687 		    return cur_op;
688 		}
689 	    }
690 	}
691       else
692 	{
693 	  tree mask;
694 	  tree_vector_builder vec (type, nunits, 1);
695 	  for (i = 0; i < nunits; i++)
696 	    vec.quick_push (build_int_cst (TREE_TYPE (type),
697 					   (HOST_WIDE_INT_1U
698 					    << shifts[i]) - 1));
699 	  mask = vec.build ();
700 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
701 	  if (op != unknown_optab
702 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
703 	    {
704 	      if (sign_p == UNSIGNED)
705 		/* r = op0 & mask;  */
706 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
707 	      else if (addend != NULL_TREE)
708 		{
709 		  /* t1 = op0 + addend;
710 		     t2 = t1 & mask;
711 		     r = t2 - addend;  */
712 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
713 		  if (op != unknown_optab
714 		      && optab_handler (op, TYPE_MODE (type))
715 			 != CODE_FOR_nothing)
716 		    {
717 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
718 						addend);
719 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
720 						cur_op, mask);
721 		      op = optab_for_tree_code (MINUS_EXPR, type,
722 						optab_default);
723 		      if (op != unknown_optab
724 			  && optab_handler (op, TYPE_MODE (type))
725 			     != CODE_FOR_nothing)
726 			return gimplify_build2 (gsi, MINUS_EXPR, type,
727 						cur_op, addend);
728 		    }
729 		}
730 	    }
731 	}
732     }
733 
734   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
735     return NULL_TREE;
736 
737   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
738     return NULL_TREE;
739 
740   cur_op = op0;
741 
742   switch (mode)
743     {
744     case 0:
745       gcc_assert (sign_p == UNSIGNED);
746       /* t1 = oprnd0 >> pre_shift;
747 	 t2 = t1 h* ml;
748 	 q = t2 >> post_shift;  */
749       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
750       if (cur_op == NULL_TREE)
751 	return NULL_TREE;
752       break;
753     case 1:
754       gcc_assert (sign_p == UNSIGNED);
755       for (i = 0; i < nunits; i++)
756 	{
757 	  shift_temps[i] = 1;
758 	  post_shifts[i]--;
759 	}
760       break;
761     case 2:
762     case 3:
763     case 4:
764     case 5:
765       gcc_assert (sign_p == SIGNED);
766       for (i = 0; i < nunits; i++)
767 	shift_temps[i] = prec - 1;
768       break;
769     default:
770       return NULL_TREE;
771     }
772 
773   tree_vector_builder vec (type, nunits, 1);
774   for (i = 0; i < nunits; i++)
775     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
776   mulcst = vec.build ();
777 
778   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
779 
780   switch (mode)
781     {
782     case 0:
783       /* t1 = oprnd0 >> pre_shift;
784 	 t2 = t1 h* ml;
785 	 q = t2 >> post_shift;  */
786       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
787       break;
788     case 1:
789       /* t1 = oprnd0 h* ml;
790 	 t2 = oprnd0 - t1;
791 	 t3 = t2 >> 1;
792 	 t4 = t1 + t3;
793 	 q = t4 >> (post_shift - 1);  */
794       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
795       if (op == unknown_optab
796 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
797 	return NULL_TREE;
798       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
799       tem = add_rshift (gsi, type, tem, shift_temps);
800       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
801       if (op == unknown_optab
802 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
803 	return NULL_TREE;
804       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
805       cur_op = add_rshift (gsi, type, tem, post_shifts);
806       if (cur_op == NULL_TREE)
807 	return NULL_TREE;
808       break;
809     case 2:
810     case 3:
811     case 4:
812     case 5:
813       /* t1 = oprnd0 h* ml;
814 	 t2 = t1; [ iff (mode & 2) != 0 ]
815 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
816 	 t3 = t2 >> post_shift;
817 	 t4 = oprnd0 >> (prec - 1);
818 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
819 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
820       if ((mode & 2) == 0)
821 	{
822 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
823 	  if (op == unknown_optab
824 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
825 	    return NULL_TREE;
826 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
827 	}
828       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
829       if (cur_op == NULL_TREE)
830 	return NULL_TREE;
831       tem = add_rshift (gsi, type, op0, shift_temps);
832       if (tem == NULL_TREE)
833 	return NULL_TREE;
834       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
835       if (op == unknown_optab
836 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
837 	return NULL_TREE;
838       if ((mode & 1) == 0)
839 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
840       else
841 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
842       break;
843     default:
844       gcc_unreachable ();
845     }
846 
847   if (code == TRUNC_DIV_EXPR)
848     return cur_op;
849 
850   /* We divided.  Now finish by:
851      t1 = q * oprnd1;
852      r = oprnd0 - t1;  */
853   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
854   if (op == unknown_optab
855       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
856     return NULL_TREE;
857   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
858   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
859   if (op == unknown_optab
860       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
861     return NULL_TREE;
862   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
863 }
864 
865 /* Expand a vector condition to scalars, by using many conditions
866    on the vector's elements.  */
867 static void
868 expand_vector_condition (gimple_stmt_iterator *gsi)
869 {
870   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
871   tree type = gimple_expr_type (stmt);
872   tree a = gimple_assign_rhs1 (stmt);
873   tree a1 = a;
874   tree a2 = NULL_TREE;
875   bool a_is_comparison = false;
876   tree b = gimple_assign_rhs2 (stmt);
877   tree c = gimple_assign_rhs3 (stmt);
878   vec<constructor_elt, va_gc> *v;
879   tree constr;
880   tree inner_type = TREE_TYPE (type);
881   tree cond_type = TREE_TYPE (TREE_TYPE (a));
882   tree comp_inner_type = cond_type;
883   tree width = TYPE_SIZE (inner_type);
884   tree index = bitsize_int (0);
885   tree comp_width = width;
886   tree comp_index = index;
887   int i;
888   location_t loc = gimple_location (gsi_stmt (*gsi));
889 
890   if (!is_gimple_val (a))
891     {
892       gcc_assert (COMPARISON_CLASS_P (a));
893       a_is_comparison = true;
894       a1 = TREE_OPERAND (a, 0);
895       a2 = TREE_OPERAND (a, 1);
896       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
897       comp_width = TYPE_SIZE (comp_inner_type);
898     }
899 
900   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
901     return;
902 
903   /* Handle vector boolean types with bitmasks.  If there is a comparison
904      and we can expand the comparison into the vector boolean bitmask,
905      or otherwise if it is compatible with type, we can transform
906       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
907      into
908       tmp_6 = x_2 < y_3;
909       tmp_7 = tmp_6 & vbfld_4;
910       tmp_8 = ~tmp_6;
911       tmp_9 = tmp_8 & vbfld_5;
912       vbfld_1 = tmp_7 | tmp_9;
913      Similarly for vbfld_10 instead of x_2 < y_3.  */
914   if (VECTOR_BOOLEAN_TYPE_P (type)
915       && SCALAR_INT_MODE_P (TYPE_MODE (type))
916       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
917 		   TYPE_VECTOR_SUBPARTS (type)
918 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
919       && (a_is_comparison
920 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
921 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
922     {
923       if (a_is_comparison)
924 	a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
925       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
926       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
927       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
928       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
929       gimple_assign_set_rhs_from_tree (gsi, a);
930       update_stmt (gsi_stmt (*gsi));
931       return;
932     }
933 
934   /* TODO: try and find a smaller vector type.  */
935 
936   warning_at (loc, OPT_Wvector_operation_performance,
937 	      "vector condition will be expanded piecewise");
938 
939   int nunits = nunits_for_known_piecewise_op (type);
940   vec_alloc (v, nunits);
941   for (i = 0; i < nunits; i++)
942     {
943       tree aa, result;
944       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
945       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
946       if (a_is_comparison)
947 	{
948 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
949 				       comp_width, comp_index);
950 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
951 				       comp_width, comp_index);
952 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
953 	}
954       else
955 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
956       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
957       constructor_elt ce = {NULL_TREE, result};
958       v->quick_push (ce);
959       index = int_const_binop (PLUS_EXPR, index, width);
960       if (width == comp_width)
961 	comp_index = index;
962       else
963 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
964     }
965 
966   constr = build_constructor (type, v);
967   gimple_assign_set_rhs_from_tree (gsi, constr);
968   update_stmt (gsi_stmt (*gsi));
969 }
970 
971 static tree
972 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
973 			 gassign *assign, enum tree_code code)
974 {
975   machine_mode compute_mode = TYPE_MODE (compute_type);
976 
977   /* If the compute mode is not a vector mode (hence we are not decomposing
978      a BLKmode vector to smaller, hardware-supported vectors), we may want
979      to expand the operations in parallel.  */
980   if (!VECTOR_MODE_P (compute_mode))
981     switch (code)
982       {
983       case PLUS_EXPR:
984       case MINUS_EXPR:
985         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
986 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
987 					 gimple_assign_rhs1 (assign),
988 					 gimple_assign_rhs2 (assign), code);
989 	break;
990 
991       case NEGATE_EXPR:
992         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
993           return expand_vector_addition (gsi, do_unop, do_negate, type,
994 		      		         gimple_assign_rhs1 (assign),
995 					 NULL_TREE, code);
996 	break;
997 
998       case BIT_AND_EXPR:
999       case BIT_IOR_EXPR:
1000       case BIT_XOR_EXPR:
1001         return expand_vector_parallel (gsi, do_binop, type,
1002 		      		       gimple_assign_rhs1 (assign),
1003 				       gimple_assign_rhs2 (assign), code);
1004 
1005       case BIT_NOT_EXPR:
1006         return expand_vector_parallel (gsi, do_unop, type,
1007 		      		       gimple_assign_rhs1 (assign),
1008         			       NULL_TREE, code);
1009       case EQ_EXPR:
1010       case NE_EXPR:
1011       case GT_EXPR:
1012       case LT_EXPR:
1013       case GE_EXPR:
1014       case LE_EXPR:
1015       case UNEQ_EXPR:
1016       case UNGT_EXPR:
1017       case UNLT_EXPR:
1018       case UNGE_EXPR:
1019       case UNLE_EXPR:
1020       case LTGT_EXPR:
1021       case ORDERED_EXPR:
1022       case UNORDERED_EXPR:
1023 	{
1024 	  tree rhs1 = gimple_assign_rhs1 (assign);
1025 	  tree rhs2 = gimple_assign_rhs2 (assign);
1026 
1027 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1028 	}
1029 
1030       case TRUNC_DIV_EXPR:
1031       case TRUNC_MOD_EXPR:
1032 	{
1033 	  tree rhs1 = gimple_assign_rhs1 (assign);
1034 	  tree rhs2 = gimple_assign_rhs2 (assign);
1035 	  tree ret;
1036 
1037 	  if (!optimize
1038 	      || !VECTOR_INTEGER_TYPE_P (type)
1039 	      || TREE_CODE (rhs2) != VECTOR_CST
1040 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1041 	    break;
1042 
1043 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1044 	  if (ret != NULL_TREE)
1045 	    return ret;
1046 	  break;
1047 	}
1048 
1049       default:
1050 	break;
1051       }
1052 
1053   if (TREE_CODE_CLASS (code) == tcc_unary)
1054     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1055 				    gimple_assign_rhs1 (assign),
1056 				    NULL_TREE, code);
1057   else
1058     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1059 				    gimple_assign_rhs1 (assign),
1060 				    gimple_assign_rhs2 (assign), code);
1061 }
1062 
1063 /* Try to optimize
1064    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1065    style stmts into:
1066    _9 = { b_7, b_7, b_7, b_7 };
1067    a_5 = _9 + { 0, 3, 6, 9 };
1068    because vector splat operation is usually more efficient
1069    than piecewise initialization of the vector.  */
1070 
1071 static void
1072 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1073 {
1074   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1075   tree lhs = gimple_assign_lhs (stmt);
1076   tree rhs = gimple_assign_rhs1 (stmt);
1077   tree type = TREE_TYPE (rhs);
1078   unsigned int i, j;
1079   unsigned HOST_WIDE_INT nelts;
1080   bool all_same = true;
1081   constructor_elt *elt;
1082   gimple *g;
1083   tree base = NULL_TREE;
1084   optab op;
1085 
1086   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1087       || nelts <= 2
1088       || CONSTRUCTOR_NELTS (rhs) != nelts)
1089     return;
1090   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1091   if (op == unknown_optab
1092       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1093     return;
1094   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1095     if (TREE_CODE (elt->value) != SSA_NAME
1096 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1097       return;
1098     else
1099       {
1100 	tree this_base = elt->value;
1101 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1102 	  all_same = false;
1103 	for (j = 0; j < nelts + 1; j++)
1104 	  {
1105 	    g = SSA_NAME_DEF_STMT (this_base);
1106 	    if (is_gimple_assign (g)
1107 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1108 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1109 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1110 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1111 	      this_base = gimple_assign_rhs1 (g);
1112 	    else
1113 	      break;
1114 	  }
1115 	if (i == 0)
1116 	  base = this_base;
1117 	else if (this_base != base)
1118 	  return;
1119       }
1120   if (all_same)
1121     return;
1122   tree_vector_builder cst (type, nelts, 1);
1123   for (i = 0; i < nelts; i++)
1124     {
1125       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1126       tree elt = build_zero_cst (TREE_TYPE (base));
1127       while (this_base != base)
1128 	{
1129 	  g = SSA_NAME_DEF_STMT (this_base);
1130 	  elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1131 			     elt, gimple_assign_rhs2 (g));
1132 	  if (elt == NULL_TREE
1133 	      || TREE_CODE (elt) != INTEGER_CST
1134 	      || TREE_OVERFLOW (elt))
1135 	    return;
1136 	  this_base = gimple_assign_rhs1 (g);
1137 	}
1138       cst.quick_push (elt);
1139     }
1140   for (i = 0; i < nelts; i++)
1141     CONSTRUCTOR_ELT (rhs, i)->value = base;
1142   g = gimple_build_assign (make_ssa_name (type), rhs);
1143   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1144   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1145 			   cst.build ());
1146   gsi_replace (gsi, g, false);
1147 }
1148 
1149 /* Return a type for the widest vector mode whose components are of type
1150    TYPE, or NULL_TREE if none is found.  */
1151 
1152 static tree
1153 type_for_widest_vector_mode (tree type, optab op)
1154 {
1155   machine_mode inner_mode = TYPE_MODE (type);
1156   machine_mode best_mode = VOIDmode, mode;
1157   poly_int64 best_nunits = 0;
1158 
1159   if (SCALAR_FLOAT_MODE_P (inner_mode))
1160     mode = MIN_MODE_VECTOR_FLOAT;
1161   else if (SCALAR_FRACT_MODE_P (inner_mode))
1162     mode = MIN_MODE_VECTOR_FRACT;
1163   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1164     mode = MIN_MODE_VECTOR_UFRACT;
1165   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1166     mode = MIN_MODE_VECTOR_ACCUM;
1167   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1168     mode = MIN_MODE_VECTOR_UACCUM;
1169   else if (inner_mode == BImode)
1170     mode = MIN_MODE_VECTOR_BOOL;
1171   else
1172     mode = MIN_MODE_VECTOR_INT;
1173 
1174   FOR_EACH_MODE_FROM (mode, mode)
1175     if (GET_MODE_INNER (mode) == inner_mode
1176 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1177 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1178       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1179 
1180   if (best_mode == VOIDmode)
1181     return NULL_TREE;
1182   else
1183     return build_vector_type_for_mode (type, best_mode);
1184 }
1185 
1186 
1187 /* Build a reference to the element of the vector VECT.  Function
1188    returns either the element itself, either BIT_FIELD_REF, or an
1189    ARRAY_REF expression.
1190 
1191    GSI is required to insert temporary variables while building a
1192    refernece to the element of the vector VECT.
1193 
1194    PTMPVEC is a pointer to the temporary variable for caching
1195    purposes.  In case when PTMPVEC is NULL new temporary variable
1196    will be created.  */
1197 static tree
1198 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1199 {
1200   tree vect_type, vect_elt_type;
1201   gimple *asgn;
1202   tree tmpvec;
1203   tree arraytype;
1204   bool need_asgn = true;
1205   unsigned int elements;
1206 
1207   vect_type = TREE_TYPE (vect);
1208   vect_elt_type = TREE_TYPE (vect_type);
1209   elements = nunits_for_known_piecewise_op (vect_type);
1210 
1211   if (TREE_CODE (idx) == INTEGER_CST)
1212     {
1213       unsigned HOST_WIDE_INT index;
1214 
1215       /* Given that we're about to compute a binary modulus,
1216 	 we don't care about the high bits of the value.  */
1217       index = TREE_INT_CST_LOW (idx);
1218       if (!tree_fits_uhwi_p (idx) || index >= elements)
1219 	{
1220 	  index &= elements - 1;
1221 	  idx = build_int_cst (TREE_TYPE (idx), index);
1222 	}
1223 
1224       /* When lowering a vector statement sequence do some easy
1225          simplification by looking through intermediate vector results.  */
1226       if (TREE_CODE (vect) == SSA_NAME)
1227 	{
1228 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1229 	  if (is_gimple_assign (def_stmt)
1230 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1231 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1232 	    vect = gimple_assign_rhs1 (def_stmt);
1233 	}
1234 
1235       if (TREE_CODE (vect) == VECTOR_CST)
1236 	return VECTOR_CST_ELT (vect, index);
1237       else if (TREE_CODE (vect) == CONSTRUCTOR
1238 	       && (CONSTRUCTOR_NELTS (vect) == 0
1239 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1240 		      != VECTOR_TYPE))
1241         {
1242 	  if (index < CONSTRUCTOR_NELTS (vect))
1243 	    return CONSTRUCTOR_ELT (vect, index)->value;
1244           return build_zero_cst (vect_elt_type);
1245         }
1246       else
1247         {
1248 	  tree size = TYPE_SIZE (vect_elt_type);
1249 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1250 				  size);
1251 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1252         }
1253     }
1254 
1255   if (!ptmpvec)
1256     tmpvec = create_tmp_var (vect_type, "vectmp");
1257   else if (!*ptmpvec)
1258     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1259   else
1260     {
1261       tmpvec = *ptmpvec;
1262       need_asgn = false;
1263     }
1264 
1265   if (need_asgn)
1266     {
1267       TREE_ADDRESSABLE (tmpvec) = 1;
1268       asgn = gimple_build_assign (tmpvec, vect);
1269       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1270     }
1271 
1272   arraytype = build_array_type_nelts (vect_elt_type, elements);
1273   return build4 (ARRAY_REF, vect_elt_type,
1274                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1275                  idx, NULL_TREE, NULL_TREE);
1276 }
1277 
1278 /* Check if VEC_PERM_EXPR within the given setting is supported
1279    by hardware, or lower it piecewise.
1280 
1281    When VEC_PERM_EXPR has the same first and second operands:
1282    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1283    {v0[mask[0]], v0[mask[1]], ...}
1284    MASK and V0 must have the same number of elements.
1285 
1286    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1287    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1288    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1289    same number of arguments.  */
1290 
1291 static void
1292 lower_vec_perm (gimple_stmt_iterator *gsi)
1293 {
1294   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1295   tree mask = gimple_assign_rhs3 (stmt);
1296   tree vec0 = gimple_assign_rhs1 (stmt);
1297   tree vec1 = gimple_assign_rhs2 (stmt);
1298   tree vect_type = TREE_TYPE (vec0);
1299   tree mask_type = TREE_TYPE (mask);
1300   tree vect_elt_type = TREE_TYPE (vect_type);
1301   tree mask_elt_type = TREE_TYPE (mask_type);
1302   unsigned HOST_WIDE_INT elements;
1303   vec<constructor_elt, va_gc> *v;
1304   tree constr, t, si, i_val;
1305   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1306   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1307   location_t loc = gimple_location (gsi_stmt (*gsi));
1308   unsigned i;
1309 
1310   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1311     return;
1312 
1313   if (TREE_CODE (mask) == SSA_NAME)
1314     {
1315       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1316       if (is_gimple_assign (def_stmt)
1317 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1318 	mask = gimple_assign_rhs1 (def_stmt);
1319     }
1320 
1321   vec_perm_builder sel_int;
1322 
1323   if (TREE_CODE (mask) == VECTOR_CST
1324       && tree_to_vec_perm_builder (&sel_int, mask))
1325     {
1326       vec_perm_indices indices (sel_int, 2, elements);
1327       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1328 	{
1329 	  gimple_assign_set_rhs3 (stmt, mask);
1330 	  update_stmt (stmt);
1331 	  return;
1332 	}
1333       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1334 	 vector as VEC1 and a right element shift MASK.  */
1335       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1336 	  != CODE_FOR_nothing
1337 	  && TREE_CODE (vec1) == VECTOR_CST
1338 	  && initializer_zerop (vec1)
1339 	  && maybe_ne (indices[0], 0)
1340 	  && known_lt (poly_uint64 (indices[0]), elements))
1341 	{
1342 	  bool ok_p = indices.series_p (0, 1, indices[0], 1);
1343 	  if (!ok_p)
1344 	    {
1345 	      for (i = 1; i < elements; ++i)
1346 		{
1347 		  poly_uint64 actual = indices[i];
1348 		  poly_uint64 expected = i + indices[0];
1349 		  /* Indices into the second vector are all equivalent.  */
1350 		  if (maybe_lt (actual, elements)
1351 		      ? maybe_ne (actual, expected)
1352 		      : maybe_lt (expected, elements))
1353 		    break;
1354 		}
1355 	      ok_p = i == elements;
1356 	    }
1357 	  if (ok_p)
1358 	    {
1359 	      gimple_assign_set_rhs3 (stmt, mask);
1360 	      update_stmt (stmt);
1361 	      return;
1362 	    }
1363 	}
1364     }
1365   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1366     return;
1367 
1368   warning_at (loc, OPT_Wvector_operation_performance,
1369               "vector shuffling operation will be expanded piecewise");
1370 
1371   vec_alloc (v, elements);
1372   for (i = 0; i < elements; i++)
1373     {
1374       si = size_int (i);
1375       i_val = vector_element (gsi, mask, si, &masktmp);
1376 
1377       if (TREE_CODE (i_val) == INTEGER_CST)
1378         {
1379 	  unsigned HOST_WIDE_INT index;
1380 
1381 	  index = TREE_INT_CST_LOW (i_val);
1382 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1383 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1384 
1385           if (two_operand_p && (index & elements) != 0)
1386 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1387 	  else
1388 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1389 
1390           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1391 					true, GSI_SAME_STMT);
1392         }
1393       else
1394         {
1395 	  tree cond = NULL_TREE, v0_val;
1396 
1397 	  if (two_operand_p)
1398 	    {
1399 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1400 			          build_int_cst (mask_elt_type, elements));
1401 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1402 					       true, GSI_SAME_STMT);
1403 	    }
1404 
1405 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1406 			       build_int_cst (mask_elt_type, elements - 1));
1407 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1408 					    true, GSI_SAME_STMT);
1409 
1410 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1411 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1412 					     true, GSI_SAME_STMT);
1413 
1414 	  if (two_operand_p)
1415 	    {
1416 	      tree v1_val;
1417 
1418 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1419 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1420 						 true, GSI_SAME_STMT);
1421 
1422 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1423 				  cond, build_zero_cst (mask_elt_type));
1424 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1425 				  cond, v0_val, v1_val);
1426               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1427 					    true, GSI_SAME_STMT);
1428             }
1429 	  else
1430 	    t = v0_val;
1431         }
1432 
1433       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1434     }
1435 
1436   constr = build_constructor (vect_type, v);
1437   gimple_assign_set_rhs_from_tree (gsi, constr);
1438   update_stmt (gsi_stmt (*gsi));
1439 }
1440 
1441 /* If OP is a uniform vector return the element it is a splat from.  */
1442 
1443 static tree
1444 ssa_uniform_vector_p (tree op)
1445 {
1446   if (TREE_CODE (op) == VECTOR_CST
1447       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1448       || TREE_CODE (op) == CONSTRUCTOR)
1449     return uniform_vector_p (op);
1450   if (TREE_CODE (op) == SSA_NAME)
1451     {
1452       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1453       if (gimple_assign_single_p (def_stmt))
1454 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1455     }
1456   return NULL_TREE;
1457 }
1458 
1459 /* Return type in which CODE operation with optab OP can be
1460    computed.  */
1461 
1462 static tree
1463 get_compute_type (enum tree_code code, optab op, tree type)
1464 {
1465   /* For very wide vectors, try using a smaller vector mode.  */
1466   tree compute_type = type;
1467   if (op
1468       && (!VECTOR_MODE_P (TYPE_MODE (type))
1469 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1470     {
1471       tree vector_compute_type
1472 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
1473       if (vector_compute_type != NULL_TREE
1474 	  && subparts_gt (compute_type, vector_compute_type)
1475 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1476 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1477 	      != CODE_FOR_nothing))
1478 	compute_type = vector_compute_type;
1479     }
1480 
1481   /* If we are breaking a BLKmode vector into smaller pieces,
1482      type_for_widest_vector_mode has already looked into the optab,
1483      so skip these checks.  */
1484   if (compute_type == type)
1485     {
1486       machine_mode compute_mode = TYPE_MODE (compute_type);
1487       if (VECTOR_MODE_P (compute_mode))
1488 	{
1489 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1490 	    return compute_type;
1491 	  if (code == MULT_HIGHPART_EXPR
1492 	      && can_mult_highpart_p (compute_mode,
1493 				      TYPE_UNSIGNED (compute_type)))
1494 	    return compute_type;
1495 	}
1496       /* There is no operation in hardware, so fall back to scalars.  */
1497       compute_type = TREE_TYPE (type);
1498     }
1499 
1500   return compute_type;
1501 }
1502 
1503 static tree
1504 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1505 	 tree bitpos, tree bitsize, enum tree_code code,
1506 	 tree type ATTRIBUTE_UNUSED)
1507 {
1508   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1509     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1510   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1511     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1512   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1513   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1514 }
1515 
1516 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1517 static void
1518 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1519 {
1520   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1521   tree type = gimple_expr_type (stmt);
1522   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1523   machine_mode compute_mode = TYPE_MODE (compute_type);
1524   gcc_assert (compute_mode != BLKmode);
1525   tree lhs = gimple_assign_lhs (stmt);
1526   tree rhs2 = gimple_assign_rhs2 (stmt);
1527   tree rhs3 = gimple_assign_rhs3 (stmt);
1528   tree new_rhs;
1529 
1530   /* If the compute mode is not a vector mode (hence we are not decomposing
1531      a BLKmode vector to smaller, hardware-supported vectors), we may want
1532      to expand the operations in parallel.  */
1533   if (!VECTOR_MODE_P (compute_mode))
1534     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1535 				      COND_EXPR);
1536   else
1537     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1538 				       rhs2, rhs3, COND_EXPR);
1539   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1540     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1541 			       new_rhs);
1542 
1543   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1544      way to do it is change expand_vector_operation and its callees to
1545      return a tree_code, RHS1 and RHS2 instead of a tree. */
1546   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1547   update_stmt (gsi_stmt (*gsi));
1548 }
1549 
1550 /* Process one statement.  If we identify a vector operation, expand it.  */
1551 
1552 static void
1553 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1554 {
1555   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1556   enum tree_code code;
1557   optab op = unknown_optab;
1558   enum gimple_rhs_class rhs_class;
1559   tree new_rhs;
1560 
1561   /* Only consider code == GIMPLE_ASSIGN. */
1562   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1563   if (!stmt)
1564     return;
1565 
1566   code = gimple_assign_rhs_code (stmt);
1567   rhs_class = get_gimple_rhs_class (code);
1568   lhs = gimple_assign_lhs (stmt);
1569 
1570   if (code == VEC_PERM_EXPR)
1571     {
1572       lower_vec_perm (gsi);
1573       return;
1574     }
1575 
1576   if (code == VEC_COND_EXPR)
1577     {
1578       expand_vector_condition (gsi);
1579       return;
1580     }
1581 
1582   if (code == COND_EXPR
1583       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1584       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1585     {
1586       expand_vector_scalar_condition (gsi);
1587       return;
1588     }
1589 
1590   if (code == CONSTRUCTOR
1591       && TREE_CODE (lhs) == SSA_NAME
1592       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1593       && !gimple_clobber_p (stmt)
1594       && optimize)
1595     {
1596       optimize_vector_constructor (gsi);
1597       return;
1598     }
1599 
1600   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1601     return;
1602 
1603   rhs1 = gimple_assign_rhs1 (stmt);
1604   type = gimple_expr_type (stmt);
1605   if (rhs_class == GIMPLE_BINARY_RHS)
1606     rhs2 = gimple_assign_rhs2 (stmt);
1607 
1608   if (!VECTOR_TYPE_P (type)
1609       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
1610     return;
1611 
1612   /* A scalar operation pretending to be a vector one.  */
1613   if (VECTOR_BOOLEAN_TYPE_P (type)
1614       && !VECTOR_MODE_P (TYPE_MODE (type))
1615       && TYPE_MODE (type) != BLKmode)
1616     return;
1617 
1618   /* If the vector operation is operating on all same vector elements
1619      implement it with a scalar operation and a splat if the target
1620      supports the scalar operation.  */
1621   tree srhs1, srhs2 = NULL_TREE;
1622   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1623       && (rhs2 == NULL_TREE
1624 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1625 	      && (srhs2 = rhs2))
1626 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1627       /* As we query direct optabs restrict to non-convert operations.  */
1628       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1629     {
1630       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1631       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1632 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1633 	{
1634 	  tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1635 	  gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1636 	  gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1637 	  gimple_assign_set_rhs_from_tree (gsi,
1638 					   build_vector_from_val (type, slhs));
1639 	  update_stmt (stmt);
1640 	  return;
1641 	}
1642     }
1643 
1644   if (CONVERT_EXPR_CODE_P (code)
1645       || code == FLOAT_EXPR
1646       || code == FIX_TRUNC_EXPR
1647       || code == VIEW_CONVERT_EXPR)
1648     return;
1649 
1650   /* The signedness is determined from input argument.  */
1651   if (code == VEC_UNPACK_FLOAT_HI_EXPR
1652       || code == VEC_UNPACK_FLOAT_LO_EXPR)
1653     {
1654       type = TREE_TYPE (rhs1);
1655       /* We do not know how to scalarize those.  */
1656       return;
1657     }
1658 
1659   /* For widening/narrowing vector operations, the relevant type is of the
1660      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
1661      calculated in the same way above.  */
1662   if (code == WIDEN_SUM_EXPR
1663       || code == VEC_WIDEN_MULT_HI_EXPR
1664       || code == VEC_WIDEN_MULT_LO_EXPR
1665       || code == VEC_WIDEN_MULT_EVEN_EXPR
1666       || code == VEC_WIDEN_MULT_ODD_EXPR
1667       || code == VEC_UNPACK_HI_EXPR
1668       || code == VEC_UNPACK_LO_EXPR
1669       || code == VEC_PACK_TRUNC_EXPR
1670       || code == VEC_PACK_SAT_EXPR
1671       || code == VEC_PACK_FIX_TRUNC_EXPR
1672       || code == VEC_WIDEN_LSHIFT_HI_EXPR
1673       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1674     {
1675       type = TREE_TYPE (rhs1);
1676       /* We do not know how to scalarize those.  */
1677       return;
1678     }
1679 
1680   /* Choose between vector shift/rotate by vector and vector shift/rotate by
1681      scalar */
1682   if (code == LSHIFT_EXPR
1683       || code == RSHIFT_EXPR
1684       || code == LROTATE_EXPR
1685       || code == RROTATE_EXPR)
1686     {
1687       optab opv;
1688 
1689       /* Check whether we have vector <op> {x,x,x,x} where x
1690          could be a scalar variable or a constant.  Transform
1691          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
1692       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1693         {
1694           tree first;
1695 
1696           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1697             {
1698               gimple_assign_set_rhs2 (stmt, first);
1699               update_stmt (stmt);
1700               rhs2 = first;
1701             }
1702         }
1703 
1704       opv = optab_for_tree_code (code, type, optab_vector);
1705       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1706 	op = opv;
1707       else
1708 	{
1709           op = optab_for_tree_code (code, type, optab_scalar);
1710 
1711 	  compute_type = get_compute_type (code, op, type);
1712 	  if (compute_type == type)
1713 	    return;
1714 	  /* The rtl expander will expand vector/scalar as vector/vector
1715 	     if necessary.  Pick one with wider vector type.  */
1716 	  tree compute_vtype = get_compute_type (code, opv, type);
1717 	  if (subparts_gt (compute_vtype, compute_type))
1718 	    {
1719 	      compute_type = compute_vtype;
1720 	      op = opv;
1721 	    }
1722 	}
1723 
1724       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1725 	{
1726 	  if (compute_type == NULL_TREE)
1727 	    compute_type = get_compute_type (code, op, type);
1728 	  if (compute_type == type)
1729 	    return;
1730 	  /* Before splitting vector rotates into scalar rotates,
1731 	     see if we can't use vector shifts and BIT_IOR_EXPR
1732 	     instead.  For vector by vector rotates we'd also
1733 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1734 	     for now, fold doesn't seem to create such rotates anyway.  */
1735 	  if (compute_type == TREE_TYPE (type)
1736 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1737 	    {
1738 	      optab oplv = vashl_optab, opl = ashl_optab;
1739 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1740 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1741 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1742 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1743 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1744 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1745 	      /* The rtl expander will expand vector/scalar as vector/vector
1746 		 if necessary.  Pick one with wider vector type.  */
1747 	      if (subparts_gt (compute_lvtype, compute_ltype))
1748 		{
1749 		  compute_ltype = compute_lvtype;
1750 		  opl = oplv;
1751 		}
1752 	      if (subparts_gt (compute_rvtype, compute_rtype))
1753 		{
1754 		  compute_rtype = compute_rvtype;
1755 		  opr = oprv;
1756 		}
1757 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1758 		 BIT_IOR_EXPR.  */
1759 	      compute_type = compute_ltype;
1760 	      if (subparts_gt (compute_type, compute_rtype))
1761 		compute_type = compute_rtype;
1762 	      if (subparts_gt (compute_type, compute_otype))
1763 		compute_type = compute_otype;
1764 	      /* Verify all 3 operations can be performed in that type.  */
1765 	      if (compute_type != TREE_TYPE (type))
1766 		{
1767 		  if (optab_handler (opl, TYPE_MODE (compute_type))
1768 		      == CODE_FOR_nothing
1769 		      || optab_handler (opr, TYPE_MODE (compute_type))
1770 			 == CODE_FOR_nothing
1771 		      || optab_handler (opo, TYPE_MODE (compute_type))
1772 			 == CODE_FOR_nothing)
1773 		    compute_type = TREE_TYPE (type);
1774 		}
1775 	    }
1776 	}
1777     }
1778   else
1779     op = optab_for_tree_code (code, type, optab_default);
1780 
1781   /* Optabs will try converting a negation into a subtraction, so
1782      look for it as well.  TODO: negation of floating-point vectors
1783      might be turned into an exclusive OR toggling the sign bit.  */
1784   if (op == unknown_optab
1785       && code == NEGATE_EXPR
1786       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1787     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1788 
1789   if (compute_type == NULL_TREE)
1790     compute_type = get_compute_type (code, op, type);
1791   if (compute_type == type)
1792     return;
1793 
1794   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1795 
1796   /* Leave expression untouched for later expansion.  */
1797   if (new_rhs == NULL_TREE)
1798     return;
1799 
1800   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1801     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1802                                new_rhs);
1803 
1804   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1805      way to do it is change expand_vector_operation and its callees to
1806      return a tree_code, RHS1 and RHS2 instead of a tree. */
1807   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1808   update_stmt (gsi_stmt (*gsi));
1809 }
1810 
1811 /* Use this to lower vector operations introduced by the vectorizer,
1812    if it may need the bit-twiddling tricks implemented in this file.  */
1813 
1814 static unsigned int
1815 expand_vector_operations (void)
1816 {
1817   gimple_stmt_iterator gsi;
1818   basic_block bb;
1819   bool cfg_changed = false;
1820 
1821   FOR_EACH_BB_FN (bb, cfun)
1822     {
1823       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1824 	{
1825 	  expand_vector_operations_1 (&gsi);
1826 	  /* ???  If we do not cleanup EH then we will ICE in
1827 	     verification.  But in reality we have created wrong-code
1828 	     as we did not properly transition EH info and edges to
1829 	     the piecewise computations.  */
1830 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1831 	      && gimple_purge_dead_eh_edges (bb))
1832 	    cfg_changed = true;
1833 	}
1834     }
1835 
1836   return cfg_changed ? TODO_cleanup_cfg : 0;
1837 }
1838 
1839 namespace {
1840 
1841 const pass_data pass_data_lower_vector =
1842 {
1843   GIMPLE_PASS, /* type */
1844   "veclower", /* name */
1845   OPTGROUP_VEC, /* optinfo_flags */
1846   TV_NONE, /* tv_id */
1847   PROP_cfg, /* properties_required */
1848   PROP_gimple_lvec, /* properties_provided */
1849   0, /* properties_destroyed */
1850   0, /* todo_flags_start */
1851   TODO_update_ssa, /* todo_flags_finish */
1852 };
1853 
1854 class pass_lower_vector : public gimple_opt_pass
1855 {
1856 public:
1857   pass_lower_vector (gcc::context *ctxt)
1858     : gimple_opt_pass (pass_data_lower_vector, ctxt)
1859   {}
1860 
1861   /* opt_pass methods: */
1862   virtual bool gate (function *fun)
1863     {
1864       return !(fun->curr_properties & PROP_gimple_lvec);
1865     }
1866 
1867   virtual unsigned int execute (function *)
1868     {
1869       return expand_vector_operations ();
1870     }
1871 
1872 }; // class pass_lower_vector
1873 
1874 } // anon namespace
1875 
1876 gimple_opt_pass *
1877 make_pass_lower_vector (gcc::context *ctxt)
1878 {
1879   return new pass_lower_vector (ctxt);
1880 }
1881 
1882 namespace {
1883 
1884 const pass_data pass_data_lower_vector_ssa =
1885 {
1886   GIMPLE_PASS, /* type */
1887   "veclower2", /* name */
1888   OPTGROUP_VEC, /* optinfo_flags */
1889   TV_NONE, /* tv_id */
1890   PROP_cfg, /* properties_required */
1891   PROP_gimple_lvec, /* properties_provided */
1892   0, /* properties_destroyed */
1893   0, /* todo_flags_start */
1894   ( TODO_update_ssa
1895     | TODO_cleanup_cfg ), /* todo_flags_finish */
1896 };
1897 
1898 class pass_lower_vector_ssa : public gimple_opt_pass
1899 {
1900 public:
1901   pass_lower_vector_ssa (gcc::context *ctxt)
1902     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1903   {}
1904 
1905   /* opt_pass methods: */
1906   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1907   virtual unsigned int execute (function *)
1908     {
1909       return expand_vector_operations ();
1910     }
1911 
1912 }; // class pass_lower_vector_ssa
1913 
1914 } // anon namespace
1915 
1916 gimple_opt_pass *
1917 make_pass_lower_vector_ssa (gcc::context *ctxt)
1918 {
1919   return new pass_lower_vector_ssa (ctxt);
1920 }
1921 
1922 #include "gt-tree-vect-generic.h"
1923