1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 
43 
44 static void expand_vector_operations_1 (gimple_stmt_iterator *);
45 
46 /* Return the number of elements in a vector type TYPE that we have
47    already decided needs to be expanded piecewise.  We don't support
48    this kind of expansion for variable-length vectors, since we should
49    always check for target support before introducing uses of those.  */
50 static unsigned int
51 nunits_for_known_piecewise_op (const_tree type)
52 {
53   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
54 }
55 
56 /* Return true if TYPE1 has more elements than TYPE2, where either
57    type may be a vector or a scalar.  */
58 
59 static inline bool
60 subparts_gt (tree type1, tree type2)
61 {
62   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
63   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
64   return known_gt (n1, n2);
65 }
66 
67 /* Build a constant of type TYPE, made of VALUE's bits replicated
68    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
69 static tree
70 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
71 {
72   int width = tree_to_uhwi (TYPE_SIZE (inner_type));
73   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
74     / HOST_BITS_PER_WIDE_INT;
75   unsigned HOST_WIDE_INT low, mask;
76   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
77   int i;
78 
79   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
80 
81   if (width == HOST_BITS_PER_WIDE_INT)
82     low = value;
83   else
84     {
85       mask = ((HOST_WIDE_INT)1 << width) - 1;
86       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
87     }
88 
89   for (i = 0; i < n; i++)
90     a[i] = low;
91 
92   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
93   return wide_int_to_tree
94     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
95 }
96 
97 static GTY(()) tree vector_inner_type;
98 static GTY(()) tree vector_last_type;
99 static GTY(()) int vector_last_nunits;
100 
101 /* Return a suitable vector types made of SUBPARTS units each of mode
102    "word_mode" (the global variable).  */
103 static tree
104 build_word_mode_vector_type (int nunits)
105 {
106   if (!vector_inner_type)
107     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
108   else if (vector_last_nunits == nunits)
109     {
110       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
111       return vector_last_type;
112     }
113 
114   /* We build a new type, but we canonicalize it nevertheless,
115      because it still saves some memory.  */
116   vector_last_nunits = nunits;
117   vector_last_type = type_hash_canon (nunits,
118 				      build_vector_type (vector_inner_type,
119 							 nunits));
120   return vector_last_type;
121 }
122 
123 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
124 			      tree, tree, tree, tree, tree, enum tree_code,
125 			      tree);
126 
127 static inline tree
128 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
129 		  tree t, tree bitsize, tree bitpos)
130 {
131   if (TREE_CODE (t) == SSA_NAME)
132     {
133       gimple *def_stmt = SSA_NAME_DEF_STMT (t);
134       if (is_gimple_assign (def_stmt)
135 	  && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
136 	      || (bitpos
137 		  && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
138 	t = gimple_assign_rhs1 (def_stmt);
139     }
140   if (bitpos)
141     {
142       if (TREE_CODE (type) == BOOLEAN_TYPE)
143 	{
144 	  tree itype
145 	    = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
146 	  tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
147 					bitsize, bitpos);
148 	  return gimplify_build2 (gsi, NE_EXPR, type, field,
149 				  build_zero_cst (itype));
150 	}
151       else
152 	return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
153     }
154   else
155     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
156 }
157 
158 static tree
159 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
160 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
161 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
162 {
163   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
164   return gimplify_build1 (gsi, code, inner_type, a);
165 }
166 
167 static tree
168 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
169 	  tree bitpos, tree bitsize, enum tree_code code,
170 	  tree type ATTRIBUTE_UNUSED)
171 {
172   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
173     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
174   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
175     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
176   return gimplify_build2 (gsi, code, inner_type, a, b);
177 }
178 
179 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
180 
181    INNER_TYPE is the type of A and B elements
182 
183    returned expression is of signed integer type with the
184    size equal to the size of INNER_TYPE.  */
185 static tree
186 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
187 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
188 {
189   tree stype = TREE_TYPE (type);
190   tree cst_false = build_zero_cst (stype);
191   tree cst_true = build_all_ones_cst (stype);
192   tree cmp;
193 
194   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
195   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
196 
197   cmp = build2 (code, boolean_type_node, a, b);
198   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
199 }
200 
201 /* Expand vector addition to scalars.  This does bit twiddling
202    in order to increase parallelism:
203 
204    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
205            (a ^ b) & 0x80808080
206 
207    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
208             (a ^ ~b) & 0x80808080
209 
210    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
211 
212    This optimization should be done only if 4 vector items or more
213    fit into a word.  */
214 static tree
215 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
216 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
217 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
218 {
219   tree inner_type = TREE_TYPE (TREE_TYPE (a));
220   unsigned HOST_WIDE_INT max;
221   tree low_bits, high_bits, a_low, b_low, result_low, signs;
222 
223   max = GET_MODE_MASK (TYPE_MODE (inner_type));
224   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
225   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
226 
227   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
228   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
229 
230   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
231   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
232   if (code == PLUS_EXPR)
233     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
234   else
235     {
236       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
237       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
238     }
239 
240   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
241   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
242   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
243 }
244 
245 static tree
246 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
247 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
248 	   tree bitsize ATTRIBUTE_UNUSED,
249 	   enum tree_code code ATTRIBUTE_UNUSED,
250 	   tree type ATTRIBUTE_UNUSED)
251 {
252   tree inner_type = TREE_TYPE (TREE_TYPE (b));
253   HOST_WIDE_INT max;
254   tree low_bits, high_bits, b_low, result_low, signs;
255 
256   max = GET_MODE_MASK (TYPE_MODE (inner_type));
257   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
258   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
259 
260   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
261 
262   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
263   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
264   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
265   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
266   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
267 }
268 
269 /* Expand a vector operation to scalars, by using many operations
270    whose type is the vector type's inner type.  */
271 static tree
272 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
273 			 tree type, tree inner_type,
274 			 tree a, tree b, enum tree_code code)
275 {
276   vec<constructor_elt, va_gc> *v;
277   tree part_width = TYPE_SIZE (inner_type);
278   tree index = bitsize_int (0);
279   int nunits = nunits_for_known_piecewise_op (type);
280   int delta = tree_to_uhwi (part_width)
281 	      / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
282   int i;
283   location_t loc = gimple_location (gsi_stmt (*gsi));
284 
285   if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
286     warning_at (loc, OPT_Wvector_operation_performance,
287 		"vector operation will be expanded piecewise");
288   else
289     warning_at (loc, OPT_Wvector_operation_performance,
290 		"vector operation will be expanded in parallel");
291 
292   vec_alloc (v, (nunits + delta - 1) / delta);
293   for (i = 0; i < nunits;
294        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
295     {
296       tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
297       constructor_elt ce = {NULL_TREE, result};
298       v->quick_push (ce);
299     }
300 
301   return build_constructor (type, v);
302 }
303 
304 /* Expand a vector operation to scalars with the freedom to use
305    a scalar integer type, or to use a different size for the items
306    in the vector type.  */
307 static tree
308 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
309 			tree a, tree b,
310 			enum tree_code code)
311 {
312   tree result, compute_type;
313   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
314   location_t loc = gimple_location (gsi_stmt (*gsi));
315 
316   /* We have three strategies.  If the type is already correct, just do
317      the operation an element at a time.  Else, if the vector is wider than
318      one word, do it a word at a time; finally, if the vector is smaller
319      than one word, do it as a scalar.  */
320   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
321      return expand_vector_piecewise (gsi, f,
322 				     type, TREE_TYPE (type),
323 				     a, b, code);
324   else if (n_words > 1)
325     {
326       tree word_type = build_word_mode_vector_type (n_words);
327       result = expand_vector_piecewise (gsi, f,
328 				        word_type, TREE_TYPE (word_type),
329 					a, b, code);
330       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
331                                          GSI_SAME_STMT);
332     }
333   else
334     {
335       /* Use a single scalar operation with a mode no wider than word_mode.  */
336       scalar_int_mode mode
337 	= int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
338       compute_type = lang_hooks.types.type_for_mode (mode, 1);
339       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
340       warning_at (loc, OPT_Wvector_operation_performance,
341 	          "vector operation will be expanded with a "
342 		  "single scalar operation");
343     }
344 
345   return result;
346 }
347 
348 /* Expand a vector operation to scalars; for integer types we can use
349    special bit twiddling tricks to do the sums a word at a time, using
350    function F_PARALLEL instead of F.  These tricks are done only if
351    they can process at least four items, that is, only if the vector
352    holds at least four items and if a word can hold four items.  */
353 static tree
354 expand_vector_addition (gimple_stmt_iterator *gsi,
355 			elem_op_func f, elem_op_func f_parallel,
356 			tree type, tree a, tree b, enum tree_code code)
357 {
358   int parts_per_word = UNITS_PER_WORD
359 	  	       / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
360 
361   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
362       && parts_per_word >= 4
363       && nunits_for_known_piecewise_op (type) >= 4)
364     return expand_vector_parallel (gsi, f_parallel,
365 				   type, a, b, code);
366   else
367     return expand_vector_piecewise (gsi, f,
368 				    type, TREE_TYPE (type),
369 				    a, b, code);
370 }
371 
372 /* Try to expand vector comparison expression OP0 CODE OP1 by
373    querying optab if the following expression:
374 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
375    can be expanded.  */
376 static tree
377 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
378                           tree op1, enum tree_code code)
379 {
380   tree t;
381   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
382       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
383     t = expand_vector_piecewise (gsi, do_compare, type,
384 				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
385   else
386     t = NULL_TREE;
387 
388   return t;
389 }
390 
391 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
392    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
393    the result if successful, otherwise return NULL_TREE.  */
394 static tree
395 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
396 {
397   optab op;
398   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
399   bool scalar_shift = true;
400 
401   for (i = 1; i < nunits; i++)
402     {
403       if (shiftcnts[i] != shiftcnts[0])
404 	scalar_shift = false;
405     }
406 
407   if (scalar_shift && shiftcnts[0] == 0)
408     return op0;
409 
410   if (scalar_shift)
411     {
412       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
413       if (op != unknown_optab
414 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
415 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
416 				build_int_cst (NULL_TREE, shiftcnts[0]));
417     }
418 
419   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
420   if (op != unknown_optab
421       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
422     {
423       tree_vector_builder vec (type, nunits, 1);
424       for (i = 0; i < nunits; i++)
425 	vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
426       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
427     }
428 
429   return NULL_TREE;
430 }
431 
432 /* Try to expand integer vector division by constant using
433    widening multiply, shifts and additions.  */
434 static tree
435 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
436 		      tree op1, enum tree_code code)
437 {
438   bool use_pow2 = true;
439   bool has_vector_shift = true;
440   int mode = -1, this_mode;
441   int pre_shift = -1, post_shift;
442   unsigned int nunits = nunits_for_known_piecewise_op (type);
443   int *shifts = XALLOCAVEC (int, nunits * 4);
444   int *pre_shifts = shifts + nunits;
445   int *post_shifts = pre_shifts + nunits;
446   int *shift_temps = post_shifts + nunits;
447   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
448   int prec = TYPE_PRECISION (TREE_TYPE (type));
449   int dummy_int;
450   unsigned int i;
451   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
452   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
453   tree cur_op, mulcst, tem;
454   optab op;
455 
456   if (prec > HOST_BITS_PER_WIDE_INT)
457     return NULL_TREE;
458 
459   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
460   if (op == unknown_optab
461       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
462     has_vector_shift = false;
463 
464   /* Analysis phase.  Determine if all op1 elements are either power
465      of two and it is possible to expand it using shifts (or for remainder
466      using masking).  Additionally compute the multiplicative constants
467      and pre and post shifts if the division is to be expanded using
468      widening or high part multiplication plus shifts.  */
469   for (i = 0; i < nunits; i++)
470     {
471       tree cst = VECTOR_CST_ELT (op1, i);
472       unsigned HOST_WIDE_INT ml;
473 
474       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
475 	return NULL_TREE;
476       pre_shifts[i] = 0;
477       post_shifts[i] = 0;
478       mulc[i] = 0;
479       if (use_pow2
480 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
481 	use_pow2 = false;
482       if (use_pow2)
483 	{
484 	  shifts[i] = tree_log2 (cst);
485 	  if (shifts[i] != shifts[0]
486 	      && code == TRUNC_DIV_EXPR
487 	      && !has_vector_shift)
488 	    use_pow2 = false;
489 	}
490       if (mode == -2)
491 	continue;
492       if (sign_p == UNSIGNED)
493 	{
494 	  unsigned HOST_WIDE_INT mh;
495 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
496 
497 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
498 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
499 	    return NULL_TREE;
500 
501 	  if (d <= 1)
502 	    {
503 	      mode = -2;
504 	      continue;
505 	    }
506 
507 	  /* Find a suitable multiplier and right shift count
508 	     instead of multiplying with D.  */
509 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
510 
511 	  /* If the suggested multiplier is more than SIZE bits, we can
512 	     do better for even divisors, using an initial right shift.  */
513 	  if ((mh != 0 && (d & 1) == 0)
514 	      || (!has_vector_shift && pre_shift != -1))
515 	    {
516 	      if (has_vector_shift)
517 		pre_shift = ctz_or_zero (d);
518 	      else if (pre_shift == -1)
519 		{
520 		  unsigned int j;
521 		  for (j = 0; j < nunits; j++)
522 		    {
523 		      tree cst2 = VECTOR_CST_ELT (op1, j);
524 		      unsigned HOST_WIDE_INT d2;
525 		      int this_pre_shift;
526 
527 		      if (!tree_fits_uhwi_p (cst2))
528 			return NULL_TREE;
529 		      d2 = tree_to_uhwi (cst2) & mask;
530 		      if (d2 == 0)
531 			return NULL_TREE;
532 		      this_pre_shift = floor_log2 (d2 & -d2);
533 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
534 			pre_shift = this_pre_shift;
535 		    }
536 		  if (i != 0 && pre_shift != 0)
537 		    {
538 		      /* Restart.  */
539 		      i = -1U;
540 		      mode = -1;
541 		      continue;
542 		    }
543 		}
544 	      if (pre_shift != 0)
545 		{
546 		  if ((d >> pre_shift) <= 1)
547 		    {
548 		      mode = -2;
549 		      continue;
550 		    }
551 		  mh = choose_multiplier (d >> pre_shift, prec,
552 					  prec - pre_shift,
553 					  &ml, &post_shift, &dummy_int);
554 		  gcc_assert (!mh);
555 		  pre_shifts[i] = pre_shift;
556 		}
557 	    }
558 	  if (!mh)
559 	    this_mode = 0;
560 	  else
561 	    this_mode = 1;
562 	}
563       else
564 	{
565 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
566 	  unsigned HOST_WIDE_INT abs_d;
567 
568 	  if (d == -1)
569 	    return NULL_TREE;
570 
571 	  /* Since d might be INT_MIN, we have to cast to
572 	     unsigned HOST_WIDE_INT before negating to avoid
573 	     undefined signed overflow.  */
574 	  abs_d = (d >= 0
575 		  ? (unsigned HOST_WIDE_INT) d
576 		  : - (unsigned HOST_WIDE_INT) d);
577 
578 	  /* n rem d = n rem -d */
579 	  if (code == TRUNC_MOD_EXPR && d < 0)
580 	    d = abs_d;
581 	  else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
582 	    {
583 	      /* This case is not handled correctly below.  */
584 	      mode = -2;
585 	      continue;
586 	    }
587 	  if (abs_d <= 1)
588 	    {
589 	      mode = -2;
590 	      continue;
591 	    }
592 
593 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
594 			     &post_shift, &dummy_int);
595 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
596 	    {
597 	      this_mode = 4 + (d < 0);
598 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
599 	    }
600 	  else
601 	    this_mode = 2 + (d < 0);
602 	}
603       mulc[i] = ml;
604       post_shifts[i] = post_shift;
605       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
606 	  || post_shift >= prec
607 	  || pre_shifts[i] >= prec)
608 	this_mode = -2;
609 
610       if (i == 0)
611 	mode = this_mode;
612       else if (mode != this_mode)
613 	mode = -2;
614     }
615 
616   if (use_pow2)
617     {
618       tree addend = NULL_TREE;
619       if (sign_p == SIGNED)
620 	{
621 	  tree uns_type;
622 
623 	  /* Both division and remainder sequences need
624 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
625 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
626 	     if none of the shifts is 0, or as the conditional.  */
627 	  for (i = 0; i < nunits; i++)
628 	    if (shifts[i] == 0)
629 	      break;
630 	  uns_type
631 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
632 				 nunits);
633 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
634 	    {
635 	      for (i = 0; i < nunits; i++)
636 		shift_temps[i] = prec - 1;
637 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
638 	      if (cur_op != NULL_TREE)
639 		{
640 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
641 					    uns_type, cur_op);
642 		  for (i = 0; i < nunits; i++)
643 		    shift_temps[i] = prec - shifts[i];
644 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
645 		  if (cur_op != NULL_TREE)
646 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
647 					      type, cur_op);
648 		}
649 	    }
650 	  if (addend == NULL_TREE
651 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
652 	    {
653 	      tree zero, cst, cond, mask_type;
654 	      gimple *stmt;
655 
656 	      mask_type = build_same_sized_truth_vector_type (type);
657 	      zero = build_zero_cst (type);
658 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
659 	      tree_vector_builder vec (type, nunits, 1);
660 	      for (i = 0; i < nunits; i++)
661 		vec.quick_push (build_int_cst (TREE_TYPE (type),
662 					       (HOST_WIDE_INT_1U
663 						<< shifts[i]) - 1));
664 	      cst = vec.build ();
665 	      addend = make_ssa_name (type);
666 	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
667 					  cst, zero);
668 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
669 	    }
670 	}
671       if (code == TRUNC_DIV_EXPR)
672 	{
673 	  if (sign_p == UNSIGNED)
674 	    {
675 	      /* q = op0 >> shift;  */
676 	      cur_op = add_rshift (gsi, type, op0, shifts);
677 	      if (cur_op != NULL_TREE)
678 		return cur_op;
679 	    }
680 	  else if (addend != NULL_TREE)
681 	    {
682 	      /* t1 = op0 + addend;
683 		 q = t1 >> shift;  */
684 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
685 	      if (op != unknown_optab
686 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
687 		{
688 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
689 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
690 		  if (cur_op != NULL_TREE)
691 		    return cur_op;
692 		}
693 	    }
694 	}
695       else
696 	{
697 	  tree mask;
698 	  tree_vector_builder vec (type, nunits, 1);
699 	  for (i = 0; i < nunits; i++)
700 	    vec.quick_push (build_int_cst (TREE_TYPE (type),
701 					   (HOST_WIDE_INT_1U
702 					    << shifts[i]) - 1));
703 	  mask = vec.build ();
704 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
705 	  if (op != unknown_optab
706 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
707 	    {
708 	      if (sign_p == UNSIGNED)
709 		/* r = op0 & mask;  */
710 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
711 	      else if (addend != NULL_TREE)
712 		{
713 		  /* t1 = op0 + addend;
714 		     t2 = t1 & mask;
715 		     r = t2 - addend;  */
716 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
717 		  if (op != unknown_optab
718 		      && optab_handler (op, TYPE_MODE (type))
719 			 != CODE_FOR_nothing)
720 		    {
721 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
722 						addend);
723 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
724 						cur_op, mask);
725 		      op = optab_for_tree_code (MINUS_EXPR, type,
726 						optab_default);
727 		      if (op != unknown_optab
728 			  && optab_handler (op, TYPE_MODE (type))
729 			     != CODE_FOR_nothing)
730 			return gimplify_build2 (gsi, MINUS_EXPR, type,
731 						cur_op, addend);
732 		    }
733 		}
734 	    }
735 	}
736     }
737 
738   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
739     return NULL_TREE;
740 
741   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
742     return NULL_TREE;
743 
744   cur_op = op0;
745 
746   switch (mode)
747     {
748     case 0:
749       gcc_assert (sign_p == UNSIGNED);
750       /* t1 = oprnd0 >> pre_shift;
751 	 t2 = t1 h* ml;
752 	 q = t2 >> post_shift;  */
753       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
754       if (cur_op == NULL_TREE)
755 	return NULL_TREE;
756       break;
757     case 1:
758       gcc_assert (sign_p == UNSIGNED);
759       for (i = 0; i < nunits; i++)
760 	{
761 	  shift_temps[i] = 1;
762 	  post_shifts[i]--;
763 	}
764       break;
765     case 2:
766     case 3:
767     case 4:
768     case 5:
769       gcc_assert (sign_p == SIGNED);
770       for (i = 0; i < nunits; i++)
771 	shift_temps[i] = prec - 1;
772       break;
773     default:
774       return NULL_TREE;
775     }
776 
777   tree_vector_builder vec (type, nunits, 1);
778   for (i = 0; i < nunits; i++)
779     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
780   mulcst = vec.build ();
781 
782   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
783 
784   switch (mode)
785     {
786     case 0:
787       /* t1 = oprnd0 >> pre_shift;
788 	 t2 = t1 h* ml;
789 	 q = t2 >> post_shift;  */
790       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
791       break;
792     case 1:
793       /* t1 = oprnd0 h* ml;
794 	 t2 = oprnd0 - t1;
795 	 t3 = t2 >> 1;
796 	 t4 = t1 + t3;
797 	 q = t4 >> (post_shift - 1);  */
798       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
799       if (op == unknown_optab
800 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
801 	return NULL_TREE;
802       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
803       tem = add_rshift (gsi, type, tem, shift_temps);
804       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
805       if (op == unknown_optab
806 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
807 	return NULL_TREE;
808       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
809       cur_op = add_rshift (gsi, type, tem, post_shifts);
810       if (cur_op == NULL_TREE)
811 	return NULL_TREE;
812       break;
813     case 2:
814     case 3:
815     case 4:
816     case 5:
817       /* t1 = oprnd0 h* ml;
818 	 t2 = t1; [ iff (mode & 2) != 0 ]
819 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
820 	 t3 = t2 >> post_shift;
821 	 t4 = oprnd0 >> (prec - 1);
822 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
823 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
824       if ((mode & 2) == 0)
825 	{
826 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
827 	  if (op == unknown_optab
828 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
829 	    return NULL_TREE;
830 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
831 	}
832       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
833       if (cur_op == NULL_TREE)
834 	return NULL_TREE;
835       tem = add_rshift (gsi, type, op0, shift_temps);
836       if (tem == NULL_TREE)
837 	return NULL_TREE;
838       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
839       if (op == unknown_optab
840 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
841 	return NULL_TREE;
842       if ((mode & 1) == 0)
843 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
844       else
845 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
846       break;
847     default:
848       gcc_unreachable ();
849     }
850 
851   if (code == TRUNC_DIV_EXPR)
852     return cur_op;
853 
854   /* We divided.  Now finish by:
855      t1 = q * oprnd1;
856      r = oprnd0 - t1;  */
857   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
858   if (op == unknown_optab
859       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
860     return NULL_TREE;
861   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
862   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
863   if (op == unknown_optab
864       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
865     return NULL_TREE;
866   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
867 }
868 
869 /* Expand a vector condition to scalars, by using many conditions
870    on the vector's elements.  */
871 static void
872 expand_vector_condition (gimple_stmt_iterator *gsi)
873 {
874   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
875   tree type = gimple_expr_type (stmt);
876   tree a = gimple_assign_rhs1 (stmt);
877   tree a1 = a;
878   tree a2 = NULL_TREE;
879   bool a_is_comparison = false;
880   tree b = gimple_assign_rhs2 (stmt);
881   tree c = gimple_assign_rhs3 (stmt);
882   vec<constructor_elt, va_gc> *v;
883   tree constr;
884   tree inner_type = TREE_TYPE (type);
885   tree cond_type = TREE_TYPE (TREE_TYPE (a));
886   tree comp_inner_type = cond_type;
887   tree width = TYPE_SIZE (inner_type);
888   tree index = bitsize_int (0);
889   tree comp_width = width;
890   tree comp_index = index;
891   int i;
892   location_t loc = gimple_location (gsi_stmt (*gsi));
893 
894   if (!is_gimple_val (a))
895     {
896       gcc_assert (COMPARISON_CLASS_P (a));
897       a_is_comparison = true;
898       a1 = TREE_OPERAND (a, 0);
899       a2 = TREE_OPERAND (a, 1);
900       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
901       comp_width = TYPE_SIZE (comp_inner_type);
902     }
903 
904   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
905     return;
906 
907   /* Handle vector boolean types with bitmasks.  If there is a comparison
908      and we can expand the comparison into the vector boolean bitmask,
909      or otherwise if it is compatible with type, we can transform
910       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
911      into
912       tmp_6 = x_2 < y_3;
913       tmp_7 = tmp_6 & vbfld_4;
914       tmp_8 = ~tmp_6;
915       tmp_9 = tmp_8 & vbfld_5;
916       vbfld_1 = tmp_7 | tmp_9;
917      Similarly for vbfld_10 instead of x_2 < y_3.  */
918   if (VECTOR_BOOLEAN_TYPE_P (type)
919       && SCALAR_INT_MODE_P (TYPE_MODE (type))
920       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
921 		   TYPE_VECTOR_SUBPARTS (type)
922 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
923       && (a_is_comparison
924 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
925 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
926     {
927       if (a_is_comparison)
928 	a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
929       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
930       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
931       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
932       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
933       gimple_assign_set_rhs_from_tree (gsi, a);
934       update_stmt (gsi_stmt (*gsi));
935       return;
936     }
937 
938   /* TODO: try and find a smaller vector type.  */
939 
940   warning_at (loc, OPT_Wvector_operation_performance,
941 	      "vector condition will be expanded piecewise");
942 
943   int nunits = nunits_for_known_piecewise_op (type);
944   vec_alloc (v, nunits);
945   for (i = 0; i < nunits; i++)
946     {
947       tree aa, result;
948       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
949       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
950       if (a_is_comparison)
951 	{
952 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
953 				       comp_width, comp_index);
954 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
955 				       comp_width, comp_index);
956 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
957 	}
958       else
959 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
960       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
961       constructor_elt ce = {NULL_TREE, result};
962       v->quick_push (ce);
963       index = int_const_binop (PLUS_EXPR, index, width);
964       if (width == comp_width)
965 	comp_index = index;
966       else
967 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
968     }
969 
970   constr = build_constructor (type, v);
971   gimple_assign_set_rhs_from_tree (gsi, constr);
972   update_stmt (gsi_stmt (*gsi));
973 }
974 
975 static tree
976 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
977 			 gassign *assign, enum tree_code code)
978 {
979   machine_mode compute_mode = TYPE_MODE (compute_type);
980 
981   /* If the compute mode is not a vector mode (hence we are not decomposing
982      a BLKmode vector to smaller, hardware-supported vectors), we may want
983      to expand the operations in parallel.  */
984   if (!VECTOR_MODE_P (compute_mode))
985     switch (code)
986       {
987       case PLUS_EXPR:
988       case MINUS_EXPR:
989         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
990 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
991 					 gimple_assign_rhs1 (assign),
992 					 gimple_assign_rhs2 (assign), code);
993 	break;
994 
995       case NEGATE_EXPR:
996         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
997           return expand_vector_addition (gsi, do_unop, do_negate, type,
998 		      		         gimple_assign_rhs1 (assign),
999 					 NULL_TREE, code);
1000 	break;
1001 
1002       case BIT_AND_EXPR:
1003       case BIT_IOR_EXPR:
1004       case BIT_XOR_EXPR:
1005         return expand_vector_parallel (gsi, do_binop, type,
1006 		      		       gimple_assign_rhs1 (assign),
1007 				       gimple_assign_rhs2 (assign), code);
1008 
1009       case BIT_NOT_EXPR:
1010         return expand_vector_parallel (gsi, do_unop, type,
1011 		      		       gimple_assign_rhs1 (assign),
1012         			       NULL_TREE, code);
1013       case EQ_EXPR:
1014       case NE_EXPR:
1015       case GT_EXPR:
1016       case LT_EXPR:
1017       case GE_EXPR:
1018       case LE_EXPR:
1019       case UNEQ_EXPR:
1020       case UNGT_EXPR:
1021       case UNLT_EXPR:
1022       case UNGE_EXPR:
1023       case UNLE_EXPR:
1024       case LTGT_EXPR:
1025       case ORDERED_EXPR:
1026       case UNORDERED_EXPR:
1027 	{
1028 	  tree rhs1 = gimple_assign_rhs1 (assign);
1029 	  tree rhs2 = gimple_assign_rhs2 (assign);
1030 
1031 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1032 	}
1033 
1034       case TRUNC_DIV_EXPR:
1035       case TRUNC_MOD_EXPR:
1036 	{
1037 	  tree rhs1 = gimple_assign_rhs1 (assign);
1038 	  tree rhs2 = gimple_assign_rhs2 (assign);
1039 	  tree ret;
1040 
1041 	  if (!optimize
1042 	      || !VECTOR_INTEGER_TYPE_P (type)
1043 	      || TREE_CODE (rhs2) != VECTOR_CST
1044 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1045 	    break;
1046 
1047 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1048 	  if (ret != NULL_TREE)
1049 	    return ret;
1050 	  break;
1051 	}
1052 
1053       default:
1054 	break;
1055       }
1056 
1057   if (TREE_CODE_CLASS (code) == tcc_unary)
1058     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1059 				    gimple_assign_rhs1 (assign),
1060 				    NULL_TREE, code);
1061   else
1062     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1063 				    gimple_assign_rhs1 (assign),
1064 				    gimple_assign_rhs2 (assign), code);
1065 }
1066 
1067 /* Try to optimize
1068    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1069    style stmts into:
1070    _9 = { b_7, b_7, b_7, b_7 };
1071    a_5 = _9 + { 0, 3, 6, 9 };
1072    because vector splat operation is usually more efficient
1073    than piecewise initialization of the vector.  */
1074 
1075 static void
1076 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1077 {
1078   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1079   tree lhs = gimple_assign_lhs (stmt);
1080   tree rhs = gimple_assign_rhs1 (stmt);
1081   tree type = TREE_TYPE (rhs);
1082   unsigned int i, j;
1083   unsigned HOST_WIDE_INT nelts;
1084   bool all_same = true;
1085   constructor_elt *elt;
1086   gimple *g;
1087   tree base = NULL_TREE;
1088   optab op;
1089 
1090   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1091       || nelts <= 2
1092       || CONSTRUCTOR_NELTS (rhs) != nelts)
1093     return;
1094   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1095   if (op == unknown_optab
1096       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1097     return;
1098   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1099     if (TREE_CODE (elt->value) != SSA_NAME
1100 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1101       return;
1102     else
1103       {
1104 	tree this_base = elt->value;
1105 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1106 	  all_same = false;
1107 	for (j = 0; j < nelts + 1; j++)
1108 	  {
1109 	    g = SSA_NAME_DEF_STMT (this_base);
1110 	    if (is_gimple_assign (g)
1111 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1112 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1113 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1114 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1115 	      this_base = gimple_assign_rhs1 (g);
1116 	    else
1117 	      break;
1118 	  }
1119 	if (i == 0)
1120 	  base = this_base;
1121 	else if (this_base != base)
1122 	  return;
1123       }
1124   if (all_same)
1125     return;
1126   tree_vector_builder cst (type, nelts, 1);
1127   for (i = 0; i < nelts; i++)
1128     {
1129       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1130       tree elt = build_zero_cst (TREE_TYPE (base));
1131       while (this_base != base)
1132 	{
1133 	  g = SSA_NAME_DEF_STMT (this_base);
1134 	  elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1135 			     elt, gimple_assign_rhs2 (g));
1136 	  if (elt == NULL_TREE
1137 	      || TREE_CODE (elt) != INTEGER_CST
1138 	      || TREE_OVERFLOW (elt))
1139 	    return;
1140 	  this_base = gimple_assign_rhs1 (g);
1141 	}
1142       cst.quick_push (elt);
1143     }
1144   for (i = 0; i < nelts; i++)
1145     CONSTRUCTOR_ELT (rhs, i)->value = base;
1146   g = gimple_build_assign (make_ssa_name (type), rhs);
1147   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1148   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1149 			   cst.build ());
1150   gsi_replace (gsi, g, false);
1151 }
1152 
1153 /* Return a type for the widest vector mode whose components are of type
1154    TYPE, or NULL_TREE if none is found.  */
1155 
1156 static tree
1157 type_for_widest_vector_mode (tree type, optab op)
1158 {
1159   machine_mode inner_mode = TYPE_MODE (type);
1160   machine_mode best_mode = VOIDmode, mode;
1161   poly_int64 best_nunits = 0;
1162 
1163   if (SCALAR_FLOAT_MODE_P (inner_mode))
1164     mode = MIN_MODE_VECTOR_FLOAT;
1165   else if (SCALAR_FRACT_MODE_P (inner_mode))
1166     mode = MIN_MODE_VECTOR_FRACT;
1167   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1168     mode = MIN_MODE_VECTOR_UFRACT;
1169   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1170     mode = MIN_MODE_VECTOR_ACCUM;
1171   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1172     mode = MIN_MODE_VECTOR_UACCUM;
1173   else if (inner_mode == BImode)
1174     mode = MIN_MODE_VECTOR_BOOL;
1175   else
1176     mode = MIN_MODE_VECTOR_INT;
1177 
1178   FOR_EACH_MODE_FROM (mode, mode)
1179     if (GET_MODE_INNER (mode) == inner_mode
1180 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1181 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1182       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1183 
1184   if (best_mode == VOIDmode)
1185     return NULL_TREE;
1186   else
1187     return build_vector_type_for_mode (type, best_mode);
1188 }
1189 
1190 
1191 /* Build a reference to the element of the vector VECT.  Function
1192    returns either the element itself, either BIT_FIELD_REF, or an
1193    ARRAY_REF expression.
1194 
1195    GSI is required to insert temporary variables while building a
1196    refernece to the element of the vector VECT.
1197 
1198    PTMPVEC is a pointer to the temporary variable for caching
1199    purposes.  In case when PTMPVEC is NULL new temporary variable
1200    will be created.  */
1201 static tree
1202 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1203 {
1204   tree vect_type, vect_elt_type;
1205   gimple *asgn;
1206   tree tmpvec;
1207   tree arraytype;
1208   bool need_asgn = true;
1209   unsigned int elements;
1210 
1211   vect_type = TREE_TYPE (vect);
1212   vect_elt_type = TREE_TYPE (vect_type);
1213   elements = nunits_for_known_piecewise_op (vect_type);
1214 
1215   if (TREE_CODE (idx) == INTEGER_CST)
1216     {
1217       unsigned HOST_WIDE_INT index;
1218 
1219       /* Given that we're about to compute a binary modulus,
1220 	 we don't care about the high bits of the value.  */
1221       index = TREE_INT_CST_LOW (idx);
1222       if (!tree_fits_uhwi_p (idx) || index >= elements)
1223 	{
1224 	  index &= elements - 1;
1225 	  idx = build_int_cst (TREE_TYPE (idx), index);
1226 	}
1227 
1228       /* When lowering a vector statement sequence do some easy
1229          simplification by looking through intermediate vector results.  */
1230       if (TREE_CODE (vect) == SSA_NAME)
1231 	{
1232 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1233 	  if (is_gimple_assign (def_stmt)
1234 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1235 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1236 	    vect = gimple_assign_rhs1 (def_stmt);
1237 	}
1238 
1239       if (TREE_CODE (vect) == VECTOR_CST)
1240 	return VECTOR_CST_ELT (vect, index);
1241       else if (TREE_CODE (vect) == CONSTRUCTOR
1242 	       && (CONSTRUCTOR_NELTS (vect) == 0
1243 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1244 		      != VECTOR_TYPE))
1245         {
1246 	  if (index < CONSTRUCTOR_NELTS (vect))
1247 	    return CONSTRUCTOR_ELT (vect, index)->value;
1248           return build_zero_cst (vect_elt_type);
1249         }
1250       else
1251         {
1252 	  tree size = TYPE_SIZE (vect_elt_type);
1253 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1254 				  size);
1255 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1256         }
1257     }
1258 
1259   if (!ptmpvec)
1260     tmpvec = create_tmp_var (vect_type, "vectmp");
1261   else if (!*ptmpvec)
1262     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1263   else
1264     {
1265       tmpvec = *ptmpvec;
1266       need_asgn = false;
1267     }
1268 
1269   if (need_asgn)
1270     {
1271       TREE_ADDRESSABLE (tmpvec) = 1;
1272       asgn = gimple_build_assign (tmpvec, vect);
1273       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1274     }
1275 
1276   arraytype = build_array_type_nelts (vect_elt_type, elements);
1277   return build4 (ARRAY_REF, vect_elt_type,
1278                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1279                  idx, NULL_TREE, NULL_TREE);
1280 }
1281 
1282 /* Check if VEC_PERM_EXPR within the given setting is supported
1283    by hardware, or lower it piecewise.
1284 
1285    When VEC_PERM_EXPR has the same first and second operands:
1286    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1287    {v0[mask[0]], v0[mask[1]], ...}
1288    MASK and V0 must have the same number of elements.
1289 
1290    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1291    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1292    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1293    same number of arguments.  */
1294 
1295 static void
1296 lower_vec_perm (gimple_stmt_iterator *gsi)
1297 {
1298   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1299   tree mask = gimple_assign_rhs3 (stmt);
1300   tree vec0 = gimple_assign_rhs1 (stmt);
1301   tree vec1 = gimple_assign_rhs2 (stmt);
1302   tree vect_type = TREE_TYPE (vec0);
1303   tree mask_type = TREE_TYPE (mask);
1304   tree vect_elt_type = TREE_TYPE (vect_type);
1305   tree mask_elt_type = TREE_TYPE (mask_type);
1306   unsigned HOST_WIDE_INT elements;
1307   vec<constructor_elt, va_gc> *v;
1308   tree constr, t, si, i_val;
1309   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1310   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1311   location_t loc = gimple_location (gsi_stmt (*gsi));
1312   unsigned i;
1313 
1314   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1315     return;
1316 
1317   if (TREE_CODE (mask) == SSA_NAME)
1318     {
1319       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1320       if (is_gimple_assign (def_stmt)
1321 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1322 	mask = gimple_assign_rhs1 (def_stmt);
1323     }
1324 
1325   vec_perm_builder sel_int;
1326 
1327   if (TREE_CODE (mask) == VECTOR_CST
1328       && tree_to_vec_perm_builder (&sel_int, mask))
1329     {
1330       vec_perm_indices indices (sel_int, 2, elements);
1331       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1332 	{
1333 	  gimple_assign_set_rhs3 (stmt, mask);
1334 	  update_stmt (stmt);
1335 	  return;
1336 	}
1337       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1338 	 vector as VEC1 and a right element shift MASK.  */
1339       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1340 	  != CODE_FOR_nothing
1341 	  && TREE_CODE (vec1) == VECTOR_CST
1342 	  && initializer_zerop (vec1)
1343 	  && maybe_ne (indices[0], 0)
1344 	  && known_lt (poly_uint64 (indices[0]), elements))
1345 	{
1346 	  bool ok_p = indices.series_p (0, 1, indices[0], 1);
1347 	  if (!ok_p)
1348 	    {
1349 	      for (i = 1; i < elements; ++i)
1350 		{
1351 		  poly_uint64 actual = indices[i];
1352 		  poly_uint64 expected = i + indices[0];
1353 		  /* Indices into the second vector are all equivalent.  */
1354 		  if (maybe_lt (actual, elements)
1355 		      ? maybe_ne (actual, expected)
1356 		      : maybe_lt (expected, elements))
1357 		    break;
1358 		}
1359 	      ok_p = i == elements;
1360 	    }
1361 	  if (ok_p)
1362 	    {
1363 	      gimple_assign_set_rhs3 (stmt, mask);
1364 	      update_stmt (stmt);
1365 	      return;
1366 	    }
1367 	}
1368     }
1369   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1370     return;
1371 
1372   warning_at (loc, OPT_Wvector_operation_performance,
1373               "vector shuffling operation will be expanded piecewise");
1374 
1375   vec_alloc (v, elements);
1376   for (i = 0; i < elements; i++)
1377     {
1378       si = size_int (i);
1379       i_val = vector_element (gsi, mask, si, &masktmp);
1380 
1381       if (TREE_CODE (i_val) == INTEGER_CST)
1382         {
1383 	  unsigned HOST_WIDE_INT index;
1384 
1385 	  index = TREE_INT_CST_LOW (i_val);
1386 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1387 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1388 
1389           if (two_operand_p && (index & elements) != 0)
1390 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1391 	  else
1392 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1393 
1394           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1395 					true, GSI_SAME_STMT);
1396         }
1397       else
1398         {
1399 	  tree cond = NULL_TREE, v0_val;
1400 
1401 	  if (two_operand_p)
1402 	    {
1403 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1404 			          build_int_cst (mask_elt_type, elements));
1405 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1406 					       true, GSI_SAME_STMT);
1407 	    }
1408 
1409 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1410 			       build_int_cst (mask_elt_type, elements - 1));
1411 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1412 					    true, GSI_SAME_STMT);
1413 
1414 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1415 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1416 					     true, GSI_SAME_STMT);
1417 
1418 	  if (two_operand_p)
1419 	    {
1420 	      tree v1_val;
1421 
1422 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1423 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1424 						 true, GSI_SAME_STMT);
1425 
1426 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1427 				  cond, build_zero_cst (mask_elt_type));
1428 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1429 				  cond, v0_val, v1_val);
1430               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1431 					    true, GSI_SAME_STMT);
1432             }
1433 	  else
1434 	    t = v0_val;
1435         }
1436 
1437       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1438     }
1439 
1440   constr = build_constructor (vect_type, v);
1441   gimple_assign_set_rhs_from_tree (gsi, constr);
1442   update_stmt (gsi_stmt (*gsi));
1443 }
1444 
1445 /* If OP is a uniform vector return the element it is a splat from.  */
1446 
1447 static tree
1448 ssa_uniform_vector_p (tree op)
1449 {
1450   if (TREE_CODE (op) == VECTOR_CST
1451       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1452       || TREE_CODE (op) == CONSTRUCTOR)
1453     return uniform_vector_p (op);
1454   if (TREE_CODE (op) == SSA_NAME)
1455     {
1456       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1457       if (gimple_assign_single_p (def_stmt))
1458 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1459     }
1460   return NULL_TREE;
1461 }
1462 
1463 /* Return type in which CODE operation with optab OP can be
1464    computed.  */
1465 
1466 static tree
1467 get_compute_type (enum tree_code code, optab op, tree type)
1468 {
1469   /* For very wide vectors, try using a smaller vector mode.  */
1470   tree compute_type = type;
1471   if (op
1472       && (!VECTOR_MODE_P (TYPE_MODE (type))
1473 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1474     {
1475       tree vector_compute_type
1476 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
1477       if (vector_compute_type != NULL_TREE
1478 	  && subparts_gt (compute_type, vector_compute_type)
1479 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1480 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1481 	      != CODE_FOR_nothing))
1482 	compute_type = vector_compute_type;
1483     }
1484 
1485   /* If we are breaking a BLKmode vector into smaller pieces,
1486      type_for_widest_vector_mode has already looked into the optab,
1487      so skip these checks.  */
1488   if (compute_type == type)
1489     {
1490       machine_mode compute_mode = TYPE_MODE (compute_type);
1491       if (VECTOR_MODE_P (compute_mode))
1492 	{
1493 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1494 	    return compute_type;
1495 	  if (code == MULT_HIGHPART_EXPR
1496 	      && can_mult_highpart_p (compute_mode,
1497 				      TYPE_UNSIGNED (compute_type)))
1498 	    return compute_type;
1499 	}
1500       /* There is no operation in hardware, so fall back to scalars.  */
1501       compute_type = TREE_TYPE (type);
1502     }
1503 
1504   return compute_type;
1505 }
1506 
1507 static tree
1508 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1509 	 tree bitpos, tree bitsize, enum tree_code code,
1510 	 tree type ATTRIBUTE_UNUSED)
1511 {
1512   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1513     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1514   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1515     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1516   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1517   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1518 }
1519 
1520 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1521 static void
1522 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1523 {
1524   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1525   tree type = gimple_expr_type (stmt);
1526   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1527   machine_mode compute_mode = TYPE_MODE (compute_type);
1528   gcc_assert (compute_mode != BLKmode);
1529   tree lhs = gimple_assign_lhs (stmt);
1530   tree rhs2 = gimple_assign_rhs2 (stmt);
1531   tree rhs3 = gimple_assign_rhs3 (stmt);
1532   tree new_rhs;
1533 
1534   /* If the compute mode is not a vector mode (hence we are not decomposing
1535      a BLKmode vector to smaller, hardware-supported vectors), we may want
1536      to expand the operations in parallel.  */
1537   if (!VECTOR_MODE_P (compute_mode))
1538     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1539 				      COND_EXPR);
1540   else
1541     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1542 				       rhs2, rhs3, COND_EXPR);
1543   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1544     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1545 			       new_rhs);
1546 
1547   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1548      way to do it is change expand_vector_operation and its callees to
1549      return a tree_code, RHS1 and RHS2 instead of a tree. */
1550   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1551   update_stmt (gsi_stmt (*gsi));
1552 }
1553 
1554 /* Process one statement.  If we identify a vector operation, expand it.  */
1555 
1556 static void
1557 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1558 {
1559   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1560   enum tree_code code;
1561   optab op = unknown_optab;
1562   enum gimple_rhs_class rhs_class;
1563   tree new_rhs;
1564 
1565   /* Only consider code == GIMPLE_ASSIGN. */
1566   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1567   if (!stmt)
1568     return;
1569 
1570   code = gimple_assign_rhs_code (stmt);
1571   rhs_class = get_gimple_rhs_class (code);
1572   lhs = gimple_assign_lhs (stmt);
1573 
1574   if (code == VEC_PERM_EXPR)
1575     {
1576       lower_vec_perm (gsi);
1577       return;
1578     }
1579 
1580   if (code == VEC_COND_EXPR)
1581     {
1582       expand_vector_condition (gsi);
1583       return;
1584     }
1585 
1586   if (code == COND_EXPR
1587       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1588       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1589     {
1590       expand_vector_scalar_condition (gsi);
1591       return;
1592     }
1593 
1594   if (code == CONSTRUCTOR
1595       && TREE_CODE (lhs) == SSA_NAME
1596       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1597       && !gimple_clobber_p (stmt)
1598       && optimize)
1599     {
1600       optimize_vector_constructor (gsi);
1601       return;
1602     }
1603 
1604   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1605     return;
1606 
1607   rhs1 = gimple_assign_rhs1 (stmt);
1608   type = gimple_expr_type (stmt);
1609   if (rhs_class == GIMPLE_BINARY_RHS)
1610     rhs2 = gimple_assign_rhs2 (stmt);
1611 
1612   if (!VECTOR_TYPE_P (type)
1613       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
1614     return;
1615 
1616   /* If the vector operation is operating on all same vector elements
1617      implement it with a scalar operation and a splat if the target
1618      supports the scalar operation.  */
1619   tree srhs1, srhs2 = NULL_TREE;
1620   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1621       && (rhs2 == NULL_TREE
1622 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1623 	      && (srhs2 = rhs2))
1624 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1625       /* As we query direct optabs restrict to non-convert operations.  */
1626       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1627     {
1628       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1629       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1630 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1631 	{
1632 	  tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1633 	  gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1634 	  gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1635 	  gimple_assign_set_rhs_from_tree (gsi,
1636 					   build_vector_from_val (type, slhs));
1637 	  update_stmt (stmt);
1638 	  return;
1639 	}
1640     }
1641 
1642   /* A scalar operation pretending to be a vector one.  */
1643   if (VECTOR_BOOLEAN_TYPE_P (type)
1644       && !VECTOR_MODE_P (TYPE_MODE (type))
1645       && TYPE_MODE (type) != BLKmode)
1646     return;
1647 
1648   if (CONVERT_EXPR_CODE_P (code)
1649       || code == FLOAT_EXPR
1650       || code == FIX_TRUNC_EXPR
1651       || code == VIEW_CONVERT_EXPR)
1652     return;
1653 
1654   /* The signedness is determined from input argument.  */
1655   if (code == VEC_UNPACK_FLOAT_HI_EXPR
1656       || code == VEC_UNPACK_FLOAT_LO_EXPR)
1657     {
1658       type = TREE_TYPE (rhs1);
1659       /* We do not know how to scalarize those.  */
1660       return;
1661     }
1662 
1663   /* For widening/narrowing vector operations, the relevant type is of the
1664      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
1665      calculated in the same way above.  */
1666   if (code == WIDEN_SUM_EXPR
1667       || code == VEC_WIDEN_MULT_HI_EXPR
1668       || code == VEC_WIDEN_MULT_LO_EXPR
1669       || code == VEC_WIDEN_MULT_EVEN_EXPR
1670       || code == VEC_WIDEN_MULT_ODD_EXPR
1671       || code == VEC_UNPACK_HI_EXPR
1672       || code == VEC_UNPACK_LO_EXPR
1673       || code == VEC_PACK_TRUNC_EXPR
1674       || code == VEC_PACK_SAT_EXPR
1675       || code == VEC_PACK_FIX_TRUNC_EXPR
1676       || code == VEC_WIDEN_LSHIFT_HI_EXPR
1677       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1678     {
1679       type = TREE_TYPE (rhs1);
1680       /* We do not know how to scalarize those.  */
1681       return;
1682     }
1683 
1684   /* Choose between vector shift/rotate by vector and vector shift/rotate by
1685      scalar */
1686   if (code == LSHIFT_EXPR
1687       || code == RSHIFT_EXPR
1688       || code == LROTATE_EXPR
1689       || code == RROTATE_EXPR)
1690     {
1691       optab opv;
1692 
1693       /* Check whether we have vector <op> {x,x,x,x} where x
1694          could be a scalar variable or a constant.  Transform
1695          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
1696       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1697         {
1698           tree first;
1699 
1700           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1701             {
1702               gimple_assign_set_rhs2 (stmt, first);
1703               update_stmt (stmt);
1704               rhs2 = first;
1705             }
1706         }
1707 
1708       opv = optab_for_tree_code (code, type, optab_vector);
1709       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1710 	op = opv;
1711       else
1712 	{
1713           op = optab_for_tree_code (code, type, optab_scalar);
1714 
1715 	  compute_type = get_compute_type (code, op, type);
1716 	  if (compute_type == type)
1717 	    return;
1718 	  /* The rtl expander will expand vector/scalar as vector/vector
1719 	     if necessary.  Pick one with wider vector type.  */
1720 	  tree compute_vtype = get_compute_type (code, opv, type);
1721 	  if (subparts_gt (compute_vtype, compute_type))
1722 	    {
1723 	      compute_type = compute_vtype;
1724 	      op = opv;
1725 	    }
1726 	}
1727 
1728       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1729 	{
1730 	  if (compute_type == NULL_TREE)
1731 	    compute_type = get_compute_type (code, op, type);
1732 	  if (compute_type == type)
1733 	    return;
1734 	  /* Before splitting vector rotates into scalar rotates,
1735 	     see if we can't use vector shifts and BIT_IOR_EXPR
1736 	     instead.  For vector by vector rotates we'd also
1737 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1738 	     for now, fold doesn't seem to create such rotates anyway.  */
1739 	  if (compute_type == TREE_TYPE (type)
1740 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1741 	    {
1742 	      optab oplv = vashl_optab, opl = ashl_optab;
1743 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1744 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1745 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1746 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1747 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1748 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1749 	      /* The rtl expander will expand vector/scalar as vector/vector
1750 		 if necessary.  Pick one with wider vector type.  */
1751 	      if (subparts_gt (compute_lvtype, compute_ltype))
1752 		{
1753 		  compute_ltype = compute_lvtype;
1754 		  opl = oplv;
1755 		}
1756 	      if (subparts_gt (compute_rvtype, compute_rtype))
1757 		{
1758 		  compute_rtype = compute_rvtype;
1759 		  opr = oprv;
1760 		}
1761 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1762 		 BIT_IOR_EXPR.  */
1763 	      compute_type = compute_ltype;
1764 	      if (subparts_gt (compute_type, compute_rtype))
1765 		compute_type = compute_rtype;
1766 	      if (subparts_gt (compute_type, compute_otype))
1767 		compute_type = compute_otype;
1768 	      /* Verify all 3 operations can be performed in that type.  */
1769 	      if (compute_type != TREE_TYPE (type))
1770 		{
1771 		  if (optab_handler (opl, TYPE_MODE (compute_type))
1772 		      == CODE_FOR_nothing
1773 		      || optab_handler (opr, TYPE_MODE (compute_type))
1774 			 == CODE_FOR_nothing
1775 		      || optab_handler (opo, TYPE_MODE (compute_type))
1776 			 == CODE_FOR_nothing)
1777 		    compute_type = TREE_TYPE (type);
1778 		}
1779 	    }
1780 	}
1781     }
1782   else
1783     op = optab_for_tree_code (code, type, optab_default);
1784 
1785   /* Optabs will try converting a negation into a subtraction, so
1786      look for it as well.  TODO: negation of floating-point vectors
1787      might be turned into an exclusive OR toggling the sign bit.  */
1788   if (op == unknown_optab
1789       && code == NEGATE_EXPR
1790       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1791     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1792 
1793   if (compute_type == NULL_TREE)
1794     compute_type = get_compute_type (code, op, type);
1795   if (compute_type == type)
1796     return;
1797 
1798   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1799 
1800   /* Leave expression untouched for later expansion.  */
1801   if (new_rhs == NULL_TREE)
1802     return;
1803 
1804   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1805     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1806                                new_rhs);
1807 
1808   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1809      way to do it is change expand_vector_operation and its callees to
1810      return a tree_code, RHS1 and RHS2 instead of a tree. */
1811   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1812   update_stmt (gsi_stmt (*gsi));
1813 }
1814 
1815 /* Use this to lower vector operations introduced by the vectorizer,
1816    if it may need the bit-twiddling tricks implemented in this file.  */
1817 
1818 static unsigned int
1819 expand_vector_operations (void)
1820 {
1821   gimple_stmt_iterator gsi;
1822   basic_block bb;
1823   bool cfg_changed = false;
1824 
1825   FOR_EACH_BB_FN (bb, cfun)
1826     {
1827       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1828 	{
1829 	  expand_vector_operations_1 (&gsi);
1830 	  /* ???  If we do not cleanup EH then we will ICE in
1831 	     verification.  But in reality we have created wrong-code
1832 	     as we did not properly transition EH info and edges to
1833 	     the piecewise computations.  */
1834 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1835 	      && gimple_purge_dead_eh_edges (bb))
1836 	    cfg_changed = true;
1837 	}
1838     }
1839 
1840   return cfg_changed ? TODO_cleanup_cfg : 0;
1841 }
1842 
1843 namespace {
1844 
1845 const pass_data pass_data_lower_vector =
1846 {
1847   GIMPLE_PASS, /* type */
1848   "veclower", /* name */
1849   OPTGROUP_VEC, /* optinfo_flags */
1850   TV_NONE, /* tv_id */
1851   PROP_cfg, /* properties_required */
1852   PROP_gimple_lvec, /* properties_provided */
1853   0, /* properties_destroyed */
1854   0, /* todo_flags_start */
1855   TODO_update_ssa, /* todo_flags_finish */
1856 };
1857 
1858 class pass_lower_vector : public gimple_opt_pass
1859 {
1860 public:
1861   pass_lower_vector (gcc::context *ctxt)
1862     : gimple_opt_pass (pass_data_lower_vector, ctxt)
1863   {}
1864 
1865   /* opt_pass methods: */
1866   virtual bool gate (function *fun)
1867     {
1868       return !(fun->curr_properties & PROP_gimple_lvec);
1869     }
1870 
1871   virtual unsigned int execute (function *)
1872     {
1873       return expand_vector_operations ();
1874     }
1875 
1876 }; // class pass_lower_vector
1877 
1878 } // anon namespace
1879 
1880 gimple_opt_pass *
1881 make_pass_lower_vector (gcc::context *ctxt)
1882 {
1883   return new pass_lower_vector (ctxt);
1884 }
1885 
1886 namespace {
1887 
1888 const pass_data pass_data_lower_vector_ssa =
1889 {
1890   GIMPLE_PASS, /* type */
1891   "veclower2", /* name */
1892   OPTGROUP_VEC, /* optinfo_flags */
1893   TV_NONE, /* tv_id */
1894   PROP_cfg, /* properties_required */
1895   PROP_gimple_lvec, /* properties_provided */
1896   0, /* properties_destroyed */
1897   0, /* todo_flags_start */
1898   ( TODO_update_ssa
1899     | TODO_cleanup_cfg ), /* todo_flags_finish */
1900 };
1901 
1902 class pass_lower_vector_ssa : public gimple_opt_pass
1903 {
1904 public:
1905   pass_lower_vector_ssa (gcc::context *ctxt)
1906     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1907   {}
1908 
1909   /* opt_pass methods: */
1910   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1911   virtual unsigned int execute (function *)
1912     {
1913       return expand_vector_operations ();
1914     }
1915 
1916 }; // class pass_lower_vector_ssa
1917 
1918 } // anon namespace
1919 
1920 gimple_opt_pass *
1921 make_pass_lower_vector_ssa (gcc::context *ctxt)
1922 {
1923   return new pass_lower_vector_ssa (ctxt);
1924 }
1925 
1926 #include "gt-tree-vect-generic.h"
1927