1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34 
35 /* Need to include rtl.h, expr.h, etc. for optabs.  */
36 #include "expr.h"
37 #include "optabs.h"
38 
39 
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
41 
42 
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
47 {
48   int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49   int n = HOST_BITS_PER_WIDE_INT / width;
50   unsigned HOST_WIDE_INT low, high, mask;
51   tree ret;
52 
53   gcc_assert (n);
54 
55   if (width == HOST_BITS_PER_WIDE_INT)
56     low = value;
57   else
58     {
59       mask = ((HOST_WIDE_INT)1 << width) - 1;
60       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61     }
62 
63   if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64     low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65   else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66     high = 0;
67   else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
68     high = low;
69   else
70     gcc_unreachable ();
71 
72   ret = build_int_cst_wide (type, low, high);
73   return ret;
74 }
75 
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
79 
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81    "word_mode" (the global variable).  */
82 static tree
83 build_word_mode_vector_type (int nunits)
84 {
85   if (!vector_inner_type)
86     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87   else if (vector_last_nunits == nunits)
88     {
89       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90       return vector_last_type;
91     }
92 
93   /* We build a new type, but we canonicalize it nevertheless,
94      because it still saves some memory.  */
95   vector_last_nunits = nunits;
96   vector_last_type = type_hash_canon (nunits,
97 				      build_vector_type (vector_inner_type,
98 							 nunits));
99   return vector_last_type;
100 }
101 
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 			      tree, tree, tree, tree, tree, enum tree_code);
104 
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 		  tree t, tree bitsize, tree bitpos)
108 {
109   if (bitpos)
110     return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111   else
112     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
113 }
114 
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 	 enum tree_code code)
119 {
120   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121   return gimplify_build1 (gsi, code, inner_type, a);
122 }
123 
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 	  tree bitpos, tree bitsize, enum tree_code code)
127 {
128   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
129     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
130   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
131     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
132   return gimplify_build2 (gsi, code, inner_type, a, b);
133 }
134 
135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
136 
137    INNER_TYPE is the type of A and B elements
138 
139    returned expression is of signed integer type with the
140    size equal to the size of INNER_TYPE.  */
141 static tree
142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143 	  tree bitpos, tree bitsize, enum tree_code code)
144 {
145   tree comp_type;
146 
147   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
149 
150   comp_type = build_nonstandard_integer_type
151 		      (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
152 
153   return gimplify_build3 (gsi, COND_EXPR, comp_type,
154 			  fold_build2 (code, boolean_type_node, a, b),
155 			  build_int_cst (comp_type, -1),
156 			  build_int_cst (comp_type, 0));
157 }
158 
159 /* Expand vector addition to scalars.  This does bit twiddling
160    in order to increase parallelism:
161 
162    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163            (a ^ b) & 0x80808080
164 
165    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
166             (a ^ ~b) & 0x80808080
167 
168    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
169 
170    This optimization should be done only if 4 vector items or more
171    fit into a word.  */
172 static tree
173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
174 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175 	       enum tree_code code)
176 {
177   tree inner_type = TREE_TYPE (TREE_TYPE (a));
178   unsigned HOST_WIDE_INT max;
179   tree low_bits, high_bits, a_low, b_low, result_low, signs;
180 
181   max = GET_MODE_MASK (TYPE_MODE (inner_type));
182   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
183   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
184 
185   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
186   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
187 
188   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
189   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
190   if (code == PLUS_EXPR)
191     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192   else
193     {
194       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
195       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
196     }
197 
198   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
199   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
200   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
201 }
202 
203 static tree
204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
205 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
206 	   tree bitsize ATTRIBUTE_UNUSED,
207 	   enum tree_code code ATTRIBUTE_UNUSED)
208 {
209   tree inner_type = TREE_TYPE (TREE_TYPE (b));
210   HOST_WIDE_INT max;
211   tree low_bits, high_bits, b_low, result_low, signs;
212 
213   max = GET_MODE_MASK (TYPE_MODE (inner_type));
214   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
215   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
216 
217   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
218 
219   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
220   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
221   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
222   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
223   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
224 }
225 
226 /* Expand a vector operation to scalars, by using many operations
227    whose type is the vector type's inner type.  */
228 static tree
229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
230 			 tree type, tree inner_type,
231 			 tree a, tree b, enum tree_code code)
232 {
233   VEC(constructor_elt,gc) *v;
234   tree part_width = TYPE_SIZE (inner_type);
235   tree index = bitsize_int (0);
236   int nunits = TYPE_VECTOR_SUBPARTS (type);
237   int delta = tree_low_cst (part_width, 1)
238 	      / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
239   int i;
240   location_t loc = gimple_location (gsi_stmt (*gsi));
241 
242   if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
243     warning_at (loc, OPT_Wvector_operation_performance,
244 		"vector operation will be expanded piecewise");
245   else
246     warning_at (loc, OPT_Wvector_operation_performance,
247 		"vector operation will be expanded in parallel");
248 
249   v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
250   for (i = 0; i < nunits;
251        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
252     {
253       tree result = f (gsi, inner_type, a, b, index, part_width, code);
254       constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
255       ce->index = NULL_TREE;
256       ce->value = result;
257     }
258 
259   return build_constructor (type, v);
260 }
261 
262 /* Expand a vector operation to scalars with the freedom to use
263    a scalar integer type, or to use a different size for the items
264    in the vector type.  */
265 static tree
266 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
267 			tree a, tree b,
268 			enum tree_code code)
269 {
270   tree result, compute_type;
271   enum machine_mode mode;
272   int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
273   location_t loc = gimple_location (gsi_stmt (*gsi));
274 
275   /* We have three strategies.  If the type is already correct, just do
276      the operation an element at a time.  Else, if the vector is wider than
277      one word, do it a word at a time; finally, if the vector is smaller
278      than one word, do it as a scalar.  */
279   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
280      return expand_vector_piecewise (gsi, f,
281 				     type, TREE_TYPE (type),
282 				     a, b, code);
283   else if (n_words > 1)
284     {
285       tree word_type = build_word_mode_vector_type (n_words);
286       result = expand_vector_piecewise (gsi, f,
287 				        word_type, TREE_TYPE (word_type),
288 					a, b, code);
289       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
290                                          GSI_SAME_STMT);
291     }
292   else
293     {
294       /* Use a single scalar operation with a mode no wider than word_mode.  */
295       mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
296       compute_type = lang_hooks.types.type_for_mode (mode, 1);
297       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
298       warning_at (loc, OPT_Wvector_operation_performance,
299 	          "vector operation will be expanded with a "
300 		  "single scalar operation");
301     }
302 
303   return result;
304 }
305 
306 /* Expand a vector operation to scalars; for integer types we can use
307    special bit twiddling tricks to do the sums a word at a time, using
308    function F_PARALLEL instead of F.  These tricks are done only if
309    they can process at least four items, that is, only if the vector
310    holds at least four items and if a word can hold four items.  */
311 static tree
312 expand_vector_addition (gimple_stmt_iterator *gsi,
313 			elem_op_func f, elem_op_func f_parallel,
314 			tree type, tree a, tree b, enum tree_code code)
315 {
316   int parts_per_word = UNITS_PER_WORD
317 	  	       / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
318 
319   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
320       && parts_per_word >= 4
321       && TYPE_VECTOR_SUBPARTS (type) >= 4)
322     return expand_vector_parallel (gsi, f_parallel,
323 				   type, a, b, code);
324   else
325     return expand_vector_piecewise (gsi, f,
326 				    type, TREE_TYPE (type),
327 				    a, b, code);
328 }
329 
330 /* Check if vector VEC consists of all the equal elements and
331    that the number of elements corresponds to the type of VEC.
332    The function returns first element of the vector
333    or NULL_TREE if the vector is not uniform.  */
334 static tree
335 uniform_vector_p (tree vec)
336 {
337   tree first, t, els;
338   unsigned i;
339 
340   if (vec == NULL_TREE)
341     return NULL_TREE;
342 
343   if (TREE_CODE (vec) == VECTOR_CST)
344     {
345       els = TREE_VECTOR_CST_ELTS (vec);
346       first = TREE_VALUE (els);
347       els = TREE_CHAIN (els);
348 
349       for (t = els; t; t = TREE_CHAIN (t))
350 	if (!operand_equal_p (first, TREE_VALUE (t), 0))
351 	  return NULL_TREE;
352 
353       return first;
354     }
355 
356   else if (TREE_CODE (vec) == CONSTRUCTOR)
357     {
358       first = error_mark_node;
359 
360       FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
361         {
362           if (i == 0)
363             {
364               first = t;
365               continue;
366             }
367 	  if (!operand_equal_p (first, t, 0))
368 	    return NULL_TREE;
369         }
370       if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
371 	return NULL_TREE;
372 
373       return first;
374     }
375 
376   return NULL_TREE;
377 }
378 
379 /* Try to expand vector comparison expression OP0 CODE OP1 by
380    querying optab if the following expression:
381 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
382    can be expanded.  */
383 static tree
384 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
385                           tree op1, enum tree_code code)
386 {
387   tree t;
388   if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
389     t = expand_vector_piecewise (gsi, do_compare, type,
390 				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
391   else
392     t = NULL_TREE;
393 
394   return t;
395 }
396 
397 static tree
398 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
399 			 gimple assign, enum tree_code code)
400 {
401   enum machine_mode compute_mode = TYPE_MODE (compute_type);
402 
403   /* If the compute mode is not a vector mode (hence we are not decomposing
404      a BLKmode vector to smaller, hardware-supported vectors), we may want
405      to expand the operations in parallel.  */
406   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
407       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
408       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
409       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
410       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
411       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
412     switch (code)
413       {
414       case PLUS_EXPR:
415       case MINUS_EXPR:
416         if (!TYPE_OVERFLOW_TRAPS (type))
417 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
418 					 gimple_assign_rhs1 (assign),
419 					 gimple_assign_rhs2 (assign), code);
420 	break;
421 
422       case NEGATE_EXPR:
423         if (!TYPE_OVERFLOW_TRAPS (type))
424           return expand_vector_addition (gsi, do_unop, do_negate, type,
425 		      		         gimple_assign_rhs1 (assign),
426 					 NULL_TREE, code);
427 	break;
428 
429       case BIT_AND_EXPR:
430       case BIT_IOR_EXPR:
431       case BIT_XOR_EXPR:
432         return expand_vector_parallel (gsi, do_binop, type,
433 		      		       gimple_assign_rhs1 (assign),
434 				       gimple_assign_rhs2 (assign), code);
435 
436       case BIT_NOT_EXPR:
437         return expand_vector_parallel (gsi, do_unop, type,
438 		      		       gimple_assign_rhs1 (assign),
439         			       NULL_TREE, code);
440       case EQ_EXPR:
441       case NE_EXPR:
442       case GT_EXPR:
443       case LT_EXPR:
444       case GE_EXPR:
445       case LE_EXPR:
446       case UNEQ_EXPR:
447       case UNGT_EXPR:
448       case UNLT_EXPR:
449       case UNGE_EXPR:
450       case UNLE_EXPR:
451       case LTGT_EXPR:
452       case ORDERED_EXPR:
453       case UNORDERED_EXPR:
454 	{
455 	  tree rhs1 = gimple_assign_rhs1 (assign);
456 	  tree rhs2 = gimple_assign_rhs2 (assign);
457 
458 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
459 	}
460       default:
461 	break;
462       }
463 
464   if (TREE_CODE_CLASS (code) == tcc_unary)
465     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
466 				    gimple_assign_rhs1 (assign),
467 				    NULL_TREE, code);
468   else
469     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
470 				    gimple_assign_rhs1 (assign),
471 				    gimple_assign_rhs2 (assign), code);
472 }
473 
474 /* Return a type for the widest vector mode whose components are of type
475    TYPE, or NULL_TREE if none is found.  */
476 
477 static tree
478 type_for_widest_vector_mode (tree type, optab op)
479 {
480   enum machine_mode inner_mode = TYPE_MODE (type);
481   enum machine_mode best_mode = VOIDmode, mode;
482   int best_nunits = 0;
483 
484   if (SCALAR_FLOAT_MODE_P (inner_mode))
485     mode = MIN_MODE_VECTOR_FLOAT;
486   else if (SCALAR_FRACT_MODE_P (inner_mode))
487     mode = MIN_MODE_VECTOR_FRACT;
488   else if (SCALAR_UFRACT_MODE_P (inner_mode))
489     mode = MIN_MODE_VECTOR_UFRACT;
490   else if (SCALAR_ACCUM_MODE_P (inner_mode))
491     mode = MIN_MODE_VECTOR_ACCUM;
492   else if (SCALAR_UACCUM_MODE_P (inner_mode))
493     mode = MIN_MODE_VECTOR_UACCUM;
494   else
495     mode = MIN_MODE_VECTOR_INT;
496 
497   for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
498     if (GET_MODE_INNER (mode) == inner_mode
499         && GET_MODE_NUNITS (mode) > best_nunits
500 	&& optab_handler (op, mode) != CODE_FOR_nothing)
501       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
502 
503   if (best_mode == VOIDmode)
504     return NULL_TREE;
505   else
506     return build_vector_type_for_mode (type, best_mode);
507 }
508 
509 
510 /* Build a reference to the element of the vector VECT.  Function
511    returns either the element itself, either BIT_FIELD_REF, or an
512    ARRAY_REF expression.
513 
514    GSI is requred to insert temporary variables while building a
515    refernece to the element of the vector VECT.
516 
517    PTMPVEC is a pointer to the temporary variable for caching
518    purposes.  In case when PTMPVEC is NULL new temporary variable
519    will be created.  */
520 static tree
521 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
522 {
523   tree vect_type, vect_elt_type;
524   gimple asgn;
525   tree tmpvec;
526   tree arraytype;
527   bool need_asgn = true;
528   unsigned int elements;
529 
530   vect_type = TREE_TYPE (vect);
531   vect_elt_type = TREE_TYPE (vect_type);
532   elements = TYPE_VECTOR_SUBPARTS (vect_type);
533 
534   if (TREE_CODE (idx) == INTEGER_CST)
535     {
536       unsigned HOST_WIDE_INT index;
537 
538       /* Given that we're about to compute a binary modulus,
539 	 we don't care about the high bits of the value.  */
540       index = TREE_INT_CST_LOW (idx);
541       if (!host_integerp (idx, 1) || index >= elements)
542 	{
543 	  index &= elements - 1;
544 	  idx = build_int_cst (TREE_TYPE (idx), index);
545 	}
546 
547       /* When lowering a vector statement sequence do some easy
548          simplification by looking through intermediate vector results.  */
549       if (TREE_CODE (vect) == SSA_NAME)
550 	{
551 	  gimple def_stmt = SSA_NAME_DEF_STMT (vect);
552 	  if (is_gimple_assign (def_stmt)
553 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
554 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
555 	    vect = gimple_assign_rhs1 (def_stmt);
556 	}
557 
558       if (TREE_CODE (vect) == VECTOR_CST)
559         {
560 	  unsigned i;
561 	  tree vals = TREE_VECTOR_CST_ELTS (vect);
562 	  for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
563 	    if (i == index)
564 	       return TREE_VALUE (vals);
565 	  return build_zero_cst (vect_elt_type);
566         }
567       else if (TREE_CODE (vect) == CONSTRUCTOR)
568         {
569           unsigned i;
570           tree elt_i, elt_v;
571 
572 	  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
573             if (operand_equal_p (elt_i, idx, 0))
574               return elt_v;
575           return build_zero_cst (vect_elt_type);
576         }
577       else
578         {
579 	  tree size = TYPE_SIZE (vect_elt_type);
580 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
581 				  size);
582 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
583         }
584     }
585 
586   if (!ptmpvec)
587     tmpvec = create_tmp_var (vect_type, "vectmp");
588   else if (!*ptmpvec)
589     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
590   else
591     {
592       tmpvec = *ptmpvec;
593       need_asgn = false;
594     }
595 
596   if (need_asgn)
597     {
598       TREE_ADDRESSABLE (tmpvec) = 1;
599       asgn = gimple_build_assign (tmpvec, vect);
600       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
601     }
602 
603   arraytype = build_array_type_nelts (vect_elt_type, elements);
604   return build4 (ARRAY_REF, vect_elt_type,
605                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
606                  idx, NULL_TREE, NULL_TREE);
607 }
608 
609 /* Check if VEC_PERM_EXPR within the given setting is supported
610    by hardware, or lower it piecewise.
611 
612    When VEC_PERM_EXPR has the same first and second operands:
613    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
614    {v0[mask[0]], v0[mask[1]], ...}
615    MASK and V0 must have the same number of elements.
616 
617    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
618    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
619    V0 and V1 must have the same type.  MASK, V0, V1 must have the
620    same number of arguments.  */
621 
622 static void
623 lower_vec_perm (gimple_stmt_iterator *gsi)
624 {
625   gimple stmt = gsi_stmt (*gsi);
626   tree mask = gimple_assign_rhs3 (stmt);
627   tree vec0 = gimple_assign_rhs1 (stmt);
628   tree vec1 = gimple_assign_rhs2 (stmt);
629   tree vect_type = TREE_TYPE (vec0);
630   tree mask_type = TREE_TYPE (mask);
631   tree vect_elt_type = TREE_TYPE (vect_type);
632   tree mask_elt_type = TREE_TYPE (mask_type);
633   unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
634   VEC(constructor_elt,gc) *v;
635   tree constr, t, si, i_val;
636   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
637   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
638   location_t loc = gimple_location (gsi_stmt (*gsi));
639   unsigned i;
640 
641   if (TREE_CODE (mask) == VECTOR_CST)
642     {
643       unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
644       tree vals = TREE_VECTOR_CST_ELTS (mask);
645 
646       for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
647 	sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)) & (2 * elements - 1);
648 
649       if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
650 	return;
651     }
652   else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
653     return;
654 
655   warning_at (loc, OPT_Wvector_operation_performance,
656               "vector shuffling operation will be expanded piecewise");
657 
658   v = VEC_alloc (constructor_elt, gc, elements);
659   for (i = 0; i < elements; i++)
660     {
661       si = size_int (i);
662       i_val = vector_element (gsi, mask, si, &masktmp);
663 
664       if (TREE_CODE (i_val) == INTEGER_CST)
665         {
666 	  unsigned HOST_WIDE_INT index;
667 
668 	  index = TREE_INT_CST_LOW (i_val);
669 	  if (!host_integerp (i_val, 1) || index >= elements)
670 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
671 
672           if (two_operand_p && (index & elements) != 0)
673 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
674 	  else
675 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
676 
677           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
678 					true, GSI_SAME_STMT);
679         }
680       else
681         {
682 	  tree cond = NULL_TREE, v0_val;
683 
684 	  if (two_operand_p)
685 	    {
686 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
687 			          build_int_cst (mask_elt_type, elements));
688 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
689 					       true, GSI_SAME_STMT);
690 	    }
691 
692 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
693 			       build_int_cst (mask_elt_type, elements - 1));
694 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
695 					    true, GSI_SAME_STMT);
696 
697 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
698 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
699 					     true, GSI_SAME_STMT);
700 
701 	  if (two_operand_p)
702 	    {
703 	      tree v1_val;
704 
705 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
706 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
707 						 true, GSI_SAME_STMT);
708 
709 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
710 				  cond, build_zero_cst (mask_elt_type));
711 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
712 				  cond, v0_val, v1_val);
713               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
714 					    true, GSI_SAME_STMT);
715             }
716 	  else
717 	    t = v0_val;
718         }
719 
720       CONSTRUCTOR_APPEND_ELT (v, si, t);
721     }
722 
723   constr = build_constructor (vect_type, v);
724   gimple_assign_set_rhs_from_tree (gsi, constr);
725   update_stmt (gsi_stmt (*gsi));
726 }
727 
728 /* Process one statement.  If we identify a vector operation, expand it.  */
729 
730 static void
731 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
732 {
733   gimple stmt = gsi_stmt (*gsi);
734   tree lhs, rhs1, rhs2 = NULL, type, compute_type;
735   enum tree_code code;
736   enum machine_mode compute_mode;
737   optab op = NULL;
738   enum gimple_rhs_class rhs_class;
739   tree new_rhs;
740 
741   if (gimple_code (stmt) != GIMPLE_ASSIGN)
742     return;
743 
744   code = gimple_assign_rhs_code (stmt);
745   rhs_class = get_gimple_rhs_class (code);
746   lhs = gimple_assign_lhs (stmt);
747 
748   if (code == VEC_PERM_EXPR)
749     {
750       lower_vec_perm (gsi);
751       return;
752     }
753 
754   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
755     return;
756 
757   rhs1 = gimple_assign_rhs1 (stmt);
758   type = gimple_expr_type (stmt);
759   if (rhs_class == GIMPLE_BINARY_RHS)
760     rhs2 = gimple_assign_rhs2 (stmt);
761 
762   if (TREE_CODE (type) != VECTOR_TYPE)
763     return;
764 
765   if (code == NOP_EXPR
766       || code == FLOAT_EXPR
767       || code == FIX_TRUNC_EXPR
768       || code == VIEW_CONVERT_EXPR)
769     return;
770 
771   gcc_assert (code != CONVERT_EXPR);
772 
773   /* The signedness is determined from input argument.  */
774   if (code == VEC_UNPACK_FLOAT_HI_EXPR
775       || code == VEC_UNPACK_FLOAT_LO_EXPR)
776     type = TREE_TYPE (rhs1);
777 
778   /* Choose between vector shift/rotate by vector and vector shift/rotate by
779      scalar */
780   if (code == LSHIFT_EXPR
781       || code == RSHIFT_EXPR
782       || code == LROTATE_EXPR
783       || code == RROTATE_EXPR)
784     {
785       optab opv;
786 
787       /* Check whether we have vector <op> {x,x,x,x} where x
788          could be a scalar variable or a constant.  Transform
789          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
790       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
791         {
792           tree first;
793           gimple def_stmt;
794 
795           if ((TREE_CODE (rhs2) == VECTOR_CST
796 	       && (first = uniform_vector_p (rhs2)) != NULL_TREE)
797 	      || (TREE_CODE (rhs2) == SSA_NAME
798 		  && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
799 		  && gimple_assign_single_p (def_stmt)
800 		  && (first = uniform_vector_p
801 		      (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
802             {
803               gimple_assign_set_rhs2 (stmt, first);
804               update_stmt (stmt);
805               rhs2 = first;
806             }
807         }
808 
809       opv = optab_for_tree_code (code, type, optab_vector);
810       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
811 	op = opv;
812       else
813 	{
814           op = optab_for_tree_code (code, type, optab_scalar);
815 
816 	  /* The rtl expander will expand vector/scalar as vector/vector
817 	     if necessary.  Don't bother converting the stmt here.  */
818 	  if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
819 	      && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
820 	    return;
821 	}
822     }
823   else
824     op = optab_for_tree_code (code, type, optab_default);
825 
826   /* For widening/narrowing vector operations, the relevant type is of the
827      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
828      calculated in the same way above.  */
829   if (code == WIDEN_SUM_EXPR
830       || code == VEC_WIDEN_MULT_HI_EXPR
831       || code == VEC_WIDEN_MULT_LO_EXPR
832       || code == VEC_UNPACK_HI_EXPR
833       || code == VEC_UNPACK_LO_EXPR
834       || code == VEC_PACK_TRUNC_EXPR
835       || code == VEC_PACK_SAT_EXPR
836       || code == VEC_PACK_FIX_TRUNC_EXPR
837       || code == VEC_WIDEN_LSHIFT_HI_EXPR
838       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
839     type = TREE_TYPE (rhs1);
840 
841   /* Optabs will try converting a negation into a subtraction, so
842      look for it as well.  TODO: negation of floating-point vectors
843      might be turned into an exclusive OR toggling the sign bit.  */
844   if (op == NULL
845       && code == NEGATE_EXPR
846       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
847     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
848 
849   /* For very wide vectors, try using a smaller vector mode.  */
850   compute_type = type;
851   if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
852     {
853       tree vector_compute_type
854         = type_for_widest_vector_mode (TREE_TYPE (type), op);
855       if (vector_compute_type != NULL_TREE
856 	  && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
857 	      < TYPE_VECTOR_SUBPARTS (compute_type))
858 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
859 	      != CODE_FOR_nothing))
860 	compute_type = vector_compute_type;
861     }
862 
863   /* If we are breaking a BLKmode vector into smaller pieces,
864      type_for_widest_vector_mode has already looked into the optab,
865      so skip these checks.  */
866   if (compute_type == type)
867     {
868       compute_mode = TYPE_MODE (compute_type);
869       if (VECTOR_MODE_P (compute_mode)
870           && op != NULL
871 	  && optab_handler (op, compute_mode) != CODE_FOR_nothing)
872 	return;
873       else
874 	/* There is no operation in hardware, so fall back to scalars.  */
875 	compute_type = TREE_TYPE (type);
876     }
877 
878   gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
879   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
880 
881   /* Leave expression untouched for later expansion.  */
882   if (new_rhs == NULL_TREE)
883     return;
884 
885   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
886     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
887                                new_rhs);
888 
889   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
890      way to do it is change expand_vector_operation and its callees to
891      return a tree_code, RHS1 and RHS2 instead of a tree. */
892   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
893   update_stmt (gsi_stmt (*gsi));
894 }
895 
896 /* Use this to lower vector operations introduced by the vectorizer,
897    if it may need the bit-twiddling tricks implemented in this file.  */
898 
899 static bool
900 gate_expand_vector_operations_ssa (void)
901 {
902   return optimize == 0;
903 }
904 
905 static unsigned int
906 expand_vector_operations (void)
907 {
908   gimple_stmt_iterator gsi;
909   basic_block bb;
910   bool cfg_changed = false;
911 
912   FOR_EACH_BB (bb)
913     {
914       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
915 	{
916 	  expand_vector_operations_1 (&gsi);
917 	  /* ???  If we do not cleanup EH then we will ICE in
918 	     verification.  But in reality we have created wrong-code
919 	     as we did not properly transition EH info and edges to
920 	     the piecewise computations.  */
921 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
922 	      && gimple_purge_dead_eh_edges (bb))
923 	    cfg_changed = true;
924 	}
925     }
926 
927   return cfg_changed ? TODO_cleanup_cfg : 0;
928 }
929 
930 struct gimple_opt_pass pass_lower_vector =
931 {
932  {
933   GIMPLE_PASS,
934   "veclower",				/* name */
935   gate_expand_vector_operations_ssa,    /* gate */
936   expand_vector_operations,		/* execute */
937   NULL,					/* sub */
938   NULL,					/* next */
939   0,					/* static_pass_number */
940   TV_NONE,				/* tv_id */
941   PROP_cfg,				/* properties_required */
942   0,					/* properties_provided */
943   0,					/* properties_destroyed */
944   0,					/* todo_flags_start */
945   TODO_update_ssa	                /* todo_flags_finish */
946     | TODO_verify_ssa
947     | TODO_verify_stmts | TODO_verify_flow
948     | TODO_cleanup_cfg
949  }
950 };
951 
952 struct gimple_opt_pass pass_lower_vector_ssa =
953 {
954  {
955   GIMPLE_PASS,
956   "veclower2",				/* name */
957   0,	                                /* gate */
958   expand_vector_operations,		/* execute */
959   NULL,					/* sub */
960   NULL,					/* next */
961   0,					/* static_pass_number */
962   TV_NONE,				/* tv_id */
963   PROP_cfg,				/* properties_required */
964   0,					/* properties_provided */
965   0,					/* properties_destroyed */
966   0,					/* todo_flags_start */
967   TODO_update_ssa	                /* todo_flags_finish */
968     | TODO_verify_ssa
969     | TODO_verify_stmts | TODO_verify_flow
970     | TODO_cleanup_cfg
971  }
972 };
973 
974 #include "gt-tree-vect-generic.h"
975