1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "recog.h"		/* FIXME: for insn_data */
45 
46 
47 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
48 
49 /* Return the number of elements in a vector type TYPE that we have
50    already decided needs to be expanded piecewise.  We don't support
51    this kind of expansion for variable-length vectors, since we should
52    always check for target support before introducing uses of those.  */
53 static unsigned int
nunits_for_known_piecewise_op(const_tree type)54 nunits_for_known_piecewise_op (const_tree type)
55 {
56   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
57 }
58 
59 /* Return true if TYPE1 has more elements than TYPE2, where either
60    type may be a vector or a scalar.  */
61 
62 static inline bool
subparts_gt(tree type1,tree type2)63 subparts_gt (tree type1, tree type2)
64 {
65   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
66   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
67   return known_gt (n1, n2);
68 }
69 
70 /* Build a constant of type TYPE, made of VALUE's bits replicated
71    every WIDTH bits to fit TYPE's precision.  */
72 static tree
build_replicated_const(tree type,unsigned int width,HOST_WIDE_INT value)73 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
74 {
75   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76     / HOST_BITS_PER_WIDE_INT;
77   unsigned HOST_WIDE_INT low, mask;
78   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79   int i;
80 
81   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
82 
83   if (width == HOST_BITS_PER_WIDE_INT)
84     low = value;
85   else
86     {
87       mask = ((HOST_WIDE_INT)1 << width) - 1;
88       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
89     }
90 
91   for (i = 0; i < n; i++)
92     a[i] = low;
93 
94   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95   return wide_int_to_tree
96     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
97 }
98 
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
102 
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104    "word_mode" (the global variable).  */
105 static tree
build_word_mode_vector_type(int nunits)106 build_word_mode_vector_type (int nunits)
107 {
108   if (!vector_inner_type)
109     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110   else if (vector_last_nunits == nunits)
111     {
112       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113       return vector_last_type;
114     }
115 
116   vector_last_nunits = nunits;
117   vector_last_type = build_vector_type (vector_inner_type, nunits);
118   return vector_last_type;
119 }
120 
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 			      tree, tree, tree, tree, tree, enum tree_code,
123 			      tree);
124 
125 tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 		  tree t, tree bitsize, tree bitpos)
128 {
129   if (TREE_CODE (t) == SSA_NAME)
130     {
131       gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132       if (is_gimple_assign (def_stmt)
133 	  && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 	      || (bitpos
135 		  && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 	t = gimple_assign_rhs1 (def_stmt);
137     }
138   if (bitpos)
139     return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
140   else
141     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
142 }
143 
144 static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)145 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
146 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
147 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
148 {
149   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
150   return gimplify_build1 (gsi, code, inner_type, a);
151 }
152 
153 static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)154 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
155 	  tree bitpos, tree bitsize, enum tree_code code,
156 	  tree type ATTRIBUTE_UNUSED)
157 {
158   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
159     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
160   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
161     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
162   return gimplify_build2 (gsi, code, inner_type, a, b);
163 }
164 
165 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
166 
167    INNER_TYPE is the type of A and B elements
168 
169    returned expression is of signed integer type with the
170    size equal to the size of INNER_TYPE.  */
171 static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)172 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
173 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
174 {
175   tree stype = TREE_TYPE (type);
176   tree cst_false = build_zero_cst (stype);
177   tree cst_true = build_all_ones_cst (stype);
178   tree cmp;
179 
180   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
181   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
182 
183   cmp = build2 (code, boolean_type_node, a, b);
184   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
185 }
186 
187 /* Expand vector addition to scalars.  This does bit twiddling
188    in order to increase parallelism:
189 
190    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
191            (a ^ b) & 0x80808080
192 
193    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
194             (a ^ ~b) & 0x80808080
195 
196    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
197 
198    This optimization should be done only if 4 vector items or more
199    fit into a word.  */
200 static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)201 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
202 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
203 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
204 {
205   unsigned int width = vector_element_bits (TREE_TYPE (a));
206   tree inner_type = TREE_TYPE (TREE_TYPE (a));
207   unsigned HOST_WIDE_INT max;
208   tree low_bits, high_bits, a_low, b_low, result_low, signs;
209 
210   max = GET_MODE_MASK (TYPE_MODE (inner_type));
211   low_bits = build_replicated_const (word_type, width, max >> 1);
212   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
213 
214   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
215   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
216 
217   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
218   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
219   if (code == PLUS_EXPR)
220     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
221   else
222     {
223       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
224       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
225     }
226 
227   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
228   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
229   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
230 }
231 
232 static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)233 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
234 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
235 	   tree bitsize ATTRIBUTE_UNUSED,
236 	   enum tree_code code ATTRIBUTE_UNUSED,
237 	   tree type ATTRIBUTE_UNUSED)
238 {
239   unsigned int width = vector_element_bits (TREE_TYPE (b));
240   tree inner_type = TREE_TYPE (TREE_TYPE (b));
241   HOST_WIDE_INT max;
242   tree low_bits, high_bits, b_low, result_low, signs;
243 
244   max = GET_MODE_MASK (TYPE_MODE (inner_type));
245   low_bits = build_replicated_const (word_type, width, max >> 1);
246   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
247 
248   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
249 
250   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
251   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
252   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
253   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
254   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
255 }
256 
257 /* Expand a vector operation to scalars, by using many operations
258    whose type is the vector type's inner type.  */
259 static tree
260 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
261 			 tree type, tree inner_type,
262 			 tree a, tree b, enum tree_code code,
263 			 tree ret_type = NULL_TREE)
264 {
265   vec<constructor_elt, va_gc> *v;
266   tree part_width = TYPE_SIZE (inner_type);
267   tree index = bitsize_int (0);
268   int nunits = nunits_for_known_piecewise_op (type);
269   int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
270   int i;
271   location_t loc = gimple_location (gsi_stmt (*gsi));
272 
273   if (ret_type
274       || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
275     warning_at (loc, OPT_Wvector_operation_performance,
276 		"vector operation will be expanded piecewise");
277   else
278     warning_at (loc, OPT_Wvector_operation_performance,
279 		"vector operation will be expanded in parallel");
280 
281   if (!ret_type)
282     ret_type = type;
283   vec_alloc (v, (nunits + delta - 1) / delta);
284   for (i = 0; i < nunits;
285        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
286     {
287       tree result = f (gsi, inner_type, a, b, index, part_width, code,
288 		       ret_type);
289       constructor_elt ce = {NULL_TREE, result};
290       v->quick_push (ce);
291     }
292 
293   return build_constructor (ret_type, v);
294 }
295 
296 /* Expand a vector operation to scalars with the freedom to use
297    a scalar integer type, or to use a different size for the items
298    in the vector type.  */
299 static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)300 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
301 			tree a, tree b, enum tree_code code)
302 {
303   tree result, compute_type;
304   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
305   location_t loc = gimple_location (gsi_stmt (*gsi));
306 
307   /* We have three strategies.  If the type is already correct, just do
308      the operation an element at a time.  Else, if the vector is wider than
309      one word, do it a word at a time; finally, if the vector is smaller
310      than one word, do it as a scalar.  */
311   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
312      return expand_vector_piecewise (gsi, f,
313 				     type, TREE_TYPE (type),
314 				     a, b, code);
315   else if (n_words > 1)
316     {
317       tree word_type = build_word_mode_vector_type (n_words);
318       result = expand_vector_piecewise (gsi, f,
319 				        word_type, TREE_TYPE (word_type),
320 					a, b, code);
321       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
322                                          GSI_SAME_STMT);
323     }
324   else
325     {
326       /* Use a single scalar operation with a mode no wider than word_mode.  */
327       scalar_int_mode mode
328 	= int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
329       compute_type = lang_hooks.types.type_for_mode (mode, 1);
330       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
331       warning_at (loc, OPT_Wvector_operation_performance,
332 	          "vector operation will be expanded with a "
333 		  "single scalar operation");
334     }
335 
336   return result;
337 }
338 
339 /* Expand a vector operation to scalars; for integer types we can use
340    special bit twiddling tricks to do the sums a word at a time, using
341    function F_PARALLEL instead of F.  These tricks are done only if
342    they can process at least four items, that is, only if the vector
343    holds at least four items and if a word can hold four items.  */
344 static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)345 expand_vector_addition (gimple_stmt_iterator *gsi,
346 			elem_op_func f, elem_op_func f_parallel,
347 			tree type, tree a, tree b, enum tree_code code)
348 {
349   int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
350 
351   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
352       && parts_per_word >= 4
353       && nunits_for_known_piecewise_op (type) >= 4)
354     return expand_vector_parallel (gsi, f_parallel,
355 				   type, a, b, code);
356   else
357     return expand_vector_piecewise (gsi, f,
358 				    type, TREE_TYPE (type),
359 				    a, b, code);
360 }
361 
362 static bool
363 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
364 
365 /* Try to expand vector comparison expression OP0 CODE OP1 by
366    querying optab if the following expression:
367 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
368    can be expanded.  */
369 static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code,bitmap dce_ssa_names)370 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
371 			  tree op1, enum tree_code code,
372 			  bitmap dce_ssa_names)
373 {
374   tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
375   use_operand_p use_p;
376   imm_use_iterator iterator;
377   bool vec_cond_expr_only = true;
378 
379   /* As seen in PR95830, we should not expand comparisons that are only
380      feeding a VEC_COND_EXPR statement.  */
381   auto_vec<gimple *> uses;
382   FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
383     uses.safe_push (USE_STMT (use_p));
384 
385   for (unsigned i = 0; i < uses.length (); i ++)
386     {
387       gassign *use = dyn_cast<gassign *> (uses[i]);
388       if (use != NULL
389 	  && gimple_assign_rhs_code (use) == VEC_COND_EXPR
390 	  && gimple_assign_rhs1 (use) == lhs)
391 	{
392 	  gimple_stmt_iterator it = gsi_for_stmt (use);
393 	  if (!expand_vector_condition (&it, dce_ssa_names))
394 	    {
395 	      vec_cond_expr_only = false;
396 	      break;
397 	    }
398 	}
399       else
400 	{
401 	  vec_cond_expr_only = false;
402 	  break;
403 	}
404     }
405 
406   if (!uses.is_empty () && vec_cond_expr_only)
407     return NULL_TREE;
408 
409   tree t;
410   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
411     {
412       if (VECTOR_BOOLEAN_TYPE_P (type)
413 	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
414 	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
415 		       TYPE_VECTOR_SUBPARTS (type)
416 		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
417 						(TREE_TYPE (type)))))
418 	{
419 	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
420 	  tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
421 	  tree index = bitsize_int (0);
422 	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
423 	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
424 	  tree ret_type = build_nonstandard_integer_type (prec, 1);
425 	  tree ret_inner_type = boolean_type_node;
426 	  int i;
427 	  location_t loc = gimple_location (gsi_stmt (*gsi));
428 	  t = build_zero_cst (ret_type);
429 
430 	  if (TYPE_PRECISION (ret_inner_type) != 1)
431 	    ret_inner_type = build_nonstandard_integer_type (1, 1);
432 	  warning_at (loc, OPT_Wvector_operation_performance,
433 		      "vector operation will be expanded piecewise");
434 	  for (i = 0; i < nunits;
435 	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
436 	    {
437 	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
438 					 index);
439 	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
440 					 index);
441 	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
442 	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
443 				   bitsize_int (i));
444 	    }
445 	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
446 	}
447       else
448 	t = expand_vector_piecewise (gsi, do_compare, type,
449 				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
450 				     code);
451     }
452   else
453     t = NULL_TREE;
454 
455   return t;
456 }
457 
458 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
459    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
460    the result if successful, otherwise return NULL_TREE.  */
461 static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)462 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
463 {
464   optab op;
465   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
466   bool scalar_shift = true;
467 
468   for (i = 1; i < nunits; i++)
469     {
470       if (shiftcnts[i] != shiftcnts[0])
471 	scalar_shift = false;
472     }
473 
474   if (scalar_shift && shiftcnts[0] == 0)
475     return op0;
476 
477   if (scalar_shift)
478     {
479       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
480       if (op != unknown_optab
481 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
482 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
483 				build_int_cst (NULL_TREE, shiftcnts[0]));
484     }
485 
486   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
487   if (op != unknown_optab
488       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
489     {
490       tree_vector_builder vec (type, nunits, 1);
491       for (i = 0; i < nunits; i++)
492 	vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
493       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
494     }
495 
496   return NULL_TREE;
497 }
498 
499 /* Try to expand integer vector division by constant using
500    widening multiply, shifts and additions.  */
501 static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)502 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
503 		      tree op1, enum tree_code code)
504 {
505   bool use_pow2 = true;
506   bool has_vector_shift = true;
507   bool use_abs_op1 = false;
508   int mode = -1, this_mode;
509   int pre_shift = -1, post_shift;
510   unsigned int nunits = nunits_for_known_piecewise_op (type);
511   int *shifts = XALLOCAVEC (int, nunits * 4);
512   int *pre_shifts = shifts + nunits;
513   int *post_shifts = pre_shifts + nunits;
514   int *shift_temps = post_shifts + nunits;
515   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
516   int prec = TYPE_PRECISION (TREE_TYPE (type));
517   int dummy_int;
518   unsigned int i;
519   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
520   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
521   tree cur_op, mulcst, tem;
522   optab op;
523 
524   if (prec > HOST_BITS_PER_WIDE_INT)
525     return NULL_TREE;
526 
527   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
528   if (op == unknown_optab
529       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
530     has_vector_shift = false;
531 
532   /* Analysis phase.  Determine if all op1 elements are either power
533      of two and it is possible to expand it using shifts (or for remainder
534      using masking).  Additionally compute the multiplicative constants
535      and pre and post shifts if the division is to be expanded using
536      widening or high part multiplication plus shifts.  */
537   for (i = 0; i < nunits; i++)
538     {
539       tree cst = VECTOR_CST_ELT (op1, i);
540       unsigned HOST_WIDE_INT ml;
541 
542       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
543 	return NULL_TREE;
544       pre_shifts[i] = 0;
545       post_shifts[i] = 0;
546       mulc[i] = 0;
547       if (use_pow2
548 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
549 	use_pow2 = false;
550       if (use_pow2)
551 	{
552 	  shifts[i] = tree_log2 (cst);
553 	  if (shifts[i] != shifts[0]
554 	      && code == TRUNC_DIV_EXPR
555 	      && !has_vector_shift)
556 	    use_pow2 = false;
557 	}
558       if (mode == -2)
559 	continue;
560       if (sign_p == UNSIGNED)
561 	{
562 	  unsigned HOST_WIDE_INT mh;
563 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
564 
565 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
566 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
567 	    return NULL_TREE;
568 
569 	  if (d <= 1)
570 	    {
571 	      mode = -2;
572 	      continue;
573 	    }
574 
575 	  /* Find a suitable multiplier and right shift count
576 	     instead of multiplying with D.  */
577 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
578 
579 	  /* If the suggested multiplier is more than SIZE bits, we can
580 	     do better for even divisors, using an initial right shift.  */
581 	  if ((mh != 0 && (d & 1) == 0)
582 	      || (!has_vector_shift && pre_shift != -1))
583 	    {
584 	      if (has_vector_shift)
585 		pre_shift = ctz_or_zero (d);
586 	      else if (pre_shift == -1)
587 		{
588 		  unsigned int j;
589 		  for (j = 0; j < nunits; j++)
590 		    {
591 		      tree cst2 = VECTOR_CST_ELT (op1, j);
592 		      unsigned HOST_WIDE_INT d2;
593 		      int this_pre_shift;
594 
595 		      if (!tree_fits_uhwi_p (cst2))
596 			return NULL_TREE;
597 		      d2 = tree_to_uhwi (cst2) & mask;
598 		      if (d2 == 0)
599 			return NULL_TREE;
600 		      this_pre_shift = floor_log2 (d2 & -d2);
601 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
602 			pre_shift = this_pre_shift;
603 		    }
604 		  if (i != 0 && pre_shift != 0)
605 		    {
606 		      /* Restart.  */
607 		      i = -1U;
608 		      mode = -1;
609 		      continue;
610 		    }
611 		}
612 	      if (pre_shift != 0)
613 		{
614 		  if ((d >> pre_shift) <= 1)
615 		    {
616 		      mode = -2;
617 		      continue;
618 		    }
619 		  mh = choose_multiplier (d >> pre_shift, prec,
620 					  prec - pre_shift,
621 					  &ml, &post_shift, &dummy_int);
622 		  gcc_assert (!mh);
623 		  pre_shifts[i] = pre_shift;
624 		}
625 	    }
626 	  if (!mh)
627 	    this_mode = 0;
628 	  else
629 	    this_mode = 1;
630 	}
631       else
632 	{
633 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
634 	  unsigned HOST_WIDE_INT abs_d;
635 
636 	  if (d == -1)
637 	    return NULL_TREE;
638 
639 	  /* Since d might be INT_MIN, we have to cast to
640 	     unsigned HOST_WIDE_INT before negating to avoid
641 	     undefined signed overflow.  */
642 	  abs_d = (d >= 0
643 		  ? (unsigned HOST_WIDE_INT) d
644 		  : - (unsigned HOST_WIDE_INT) d);
645 
646 	  /* n rem d = n rem -d */
647 	  if (code == TRUNC_MOD_EXPR && d < 0)
648 	    {
649 	      d = abs_d;
650 	      use_abs_op1 = true;
651 	    }
652 	  if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
653 	    {
654 	      /* This case is not handled correctly below.  */
655 	      mode = -2;
656 	      continue;
657 	    }
658 	  if (abs_d <= 1)
659 	    {
660 	      mode = -2;
661 	      continue;
662 	    }
663 
664 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
665 			     &post_shift, &dummy_int);
666 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
667 	    {
668 	      this_mode = 4 + (d < 0);
669 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
670 	    }
671 	  else
672 	    this_mode = 2 + (d < 0);
673 	}
674       mulc[i] = ml;
675       post_shifts[i] = post_shift;
676       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
677 	  || post_shift >= prec
678 	  || pre_shifts[i] >= prec)
679 	this_mode = -2;
680 
681       if (i == 0)
682 	mode = this_mode;
683       else if (mode != this_mode)
684 	mode = -2;
685     }
686 
687   if (use_pow2)
688     {
689       tree addend = NULL_TREE;
690       if (sign_p == SIGNED)
691 	{
692 	  tree uns_type;
693 
694 	  /* Both division and remainder sequences need
695 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
696 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
697 	     if none of the shifts is 0, or as the conditional.  */
698 	  for (i = 0; i < nunits; i++)
699 	    if (shifts[i] == 0)
700 	      break;
701 	  uns_type
702 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
703 				 nunits);
704 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
705 	    {
706 	      for (i = 0; i < nunits; i++)
707 		shift_temps[i] = prec - 1;
708 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
709 	      if (cur_op != NULL_TREE)
710 		{
711 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
712 					    uns_type, cur_op);
713 		  for (i = 0; i < nunits; i++)
714 		    shift_temps[i] = prec - shifts[i];
715 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
716 		  if (cur_op != NULL_TREE)
717 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
718 					      type, cur_op);
719 		}
720 	    }
721 	  if (addend == NULL_TREE
722 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
723 	    {
724 	      tree zero, cst, mask_type, mask;
725 	      gimple *stmt, *cond;
726 
727 	      mask_type = truth_type_for (type);
728 	      zero = build_zero_cst (type);
729 	      mask = make_ssa_name (mask_type);
730 	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
731 	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
732 	      tree_vector_builder vec (type, nunits, 1);
733 	      for (i = 0; i < nunits; i++)
734 		vec.quick_push (build_int_cst (TREE_TYPE (type),
735 					       (HOST_WIDE_INT_1U
736 						<< shifts[i]) - 1));
737 	      cst = vec.build ();
738 	      addend = make_ssa_name (type);
739 	      stmt
740 		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
741 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
742 	    }
743 	}
744       if (code == TRUNC_DIV_EXPR)
745 	{
746 	  if (sign_p == UNSIGNED)
747 	    {
748 	      /* q = op0 >> shift;  */
749 	      cur_op = add_rshift (gsi, type, op0, shifts);
750 	      if (cur_op != NULL_TREE)
751 		return cur_op;
752 	    }
753 	  else if (addend != NULL_TREE)
754 	    {
755 	      /* t1 = op0 + addend;
756 		 q = t1 >> shift;  */
757 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
758 	      if (op != unknown_optab
759 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
760 		{
761 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
762 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
763 		  if (cur_op != NULL_TREE)
764 		    return cur_op;
765 		}
766 	    }
767 	}
768       else
769 	{
770 	  tree mask;
771 	  tree_vector_builder vec (type, nunits, 1);
772 	  for (i = 0; i < nunits; i++)
773 	    vec.quick_push (build_int_cst (TREE_TYPE (type),
774 					   (HOST_WIDE_INT_1U
775 					    << shifts[i]) - 1));
776 	  mask = vec.build ();
777 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
778 	  if (op != unknown_optab
779 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
780 	    {
781 	      if (sign_p == UNSIGNED)
782 		/* r = op0 & mask;  */
783 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
784 	      else if (addend != NULL_TREE)
785 		{
786 		  /* t1 = op0 + addend;
787 		     t2 = t1 & mask;
788 		     r = t2 - addend;  */
789 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
790 		  if (op != unknown_optab
791 		      && optab_handler (op, TYPE_MODE (type))
792 			 != CODE_FOR_nothing)
793 		    {
794 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
795 						addend);
796 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
797 						cur_op, mask);
798 		      op = optab_for_tree_code (MINUS_EXPR, type,
799 						optab_default);
800 		      if (op != unknown_optab
801 			  && optab_handler (op, TYPE_MODE (type))
802 			     != CODE_FOR_nothing)
803 			return gimplify_build2 (gsi, MINUS_EXPR, type,
804 						cur_op, addend);
805 		    }
806 		}
807 	    }
808 	}
809     }
810 
811   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
812     return NULL_TREE;
813 
814   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
815     return NULL_TREE;
816 
817   cur_op = op0;
818 
819   switch (mode)
820     {
821     case 0:
822       gcc_assert (sign_p == UNSIGNED);
823       /* t1 = oprnd0 >> pre_shift;
824 	 t2 = t1 h* ml;
825 	 q = t2 >> post_shift;  */
826       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
827       if (cur_op == NULL_TREE)
828 	return NULL_TREE;
829       break;
830     case 1:
831       gcc_assert (sign_p == UNSIGNED);
832       for (i = 0; i < nunits; i++)
833 	{
834 	  shift_temps[i] = 1;
835 	  post_shifts[i]--;
836 	}
837       break;
838     case 2:
839     case 3:
840     case 4:
841     case 5:
842       gcc_assert (sign_p == SIGNED);
843       for (i = 0; i < nunits; i++)
844 	shift_temps[i] = prec - 1;
845       break;
846     default:
847       return NULL_TREE;
848     }
849 
850   tree_vector_builder vec (type, nunits, 1);
851   for (i = 0; i < nunits; i++)
852     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
853   mulcst = vec.build ();
854 
855   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
856 
857   switch (mode)
858     {
859     case 0:
860       /* t1 = oprnd0 >> pre_shift;
861 	 t2 = t1 h* ml;
862 	 q = t2 >> post_shift;  */
863       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
864       break;
865     case 1:
866       /* t1 = oprnd0 h* ml;
867 	 t2 = oprnd0 - t1;
868 	 t3 = t2 >> 1;
869 	 t4 = t1 + t3;
870 	 q = t4 >> (post_shift - 1);  */
871       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
872       if (op == unknown_optab
873 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
874 	return NULL_TREE;
875       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
876       tem = add_rshift (gsi, type, tem, shift_temps);
877       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
878       if (op == unknown_optab
879 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
880 	return NULL_TREE;
881       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
882       cur_op = add_rshift (gsi, type, tem, post_shifts);
883       if (cur_op == NULL_TREE)
884 	return NULL_TREE;
885       break;
886     case 2:
887     case 3:
888     case 4:
889     case 5:
890       /* t1 = oprnd0 h* ml;
891 	 t2 = t1; [ iff (mode & 2) != 0 ]
892 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
893 	 t3 = t2 >> post_shift;
894 	 t4 = oprnd0 >> (prec - 1);
895 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
896 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
897       if ((mode & 2) == 0)
898 	{
899 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
900 	  if (op == unknown_optab
901 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
902 	    return NULL_TREE;
903 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
904 	}
905       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
906       if (cur_op == NULL_TREE)
907 	return NULL_TREE;
908       tem = add_rshift (gsi, type, op0, shift_temps);
909       if (tem == NULL_TREE)
910 	return NULL_TREE;
911       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
912       if (op == unknown_optab
913 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
914 	return NULL_TREE;
915       if ((mode & 1) == 0)
916 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
917       else
918 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
919       break;
920     default:
921       gcc_unreachable ();
922     }
923 
924   if (code == TRUNC_DIV_EXPR)
925     return cur_op;
926 
927   /* We divided.  Now finish by:
928      t1 = q * oprnd1;
929      r = oprnd0 - t1;  */
930   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
931   if (op == unknown_optab
932       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
933     return NULL_TREE;
934   if (use_abs_op1)
935     {
936       tree_vector_builder elts;
937       if (!elts.new_unary_operation (type, op1, false))
938 	return NULL_TREE;
939       unsigned int count = elts.encoded_nelts ();
940       for (unsigned int i = 0; i < count; ++i)
941 	{
942 	  tree elem1 = VECTOR_CST_ELT (op1, i);
943 
944 	  tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
945 	  if (elt == NULL_TREE)
946 	    return NULL_TREE;
947 	  elts.quick_push (elt);
948 	}
949       op1 = elts.build ();
950     }
951   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
952   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
953   if (op == unknown_optab
954       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
955     return NULL_TREE;
956   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
957 }
958 
959 /* Expand a vector condition to scalars, by using many conditions
960    on the vector's elements.  */
961 
962 static bool
expand_vector_condition(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)963 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
964 {
965   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
966   tree type = gimple_expr_type (stmt);
967   tree a = gimple_assign_rhs1 (stmt);
968   tree a1 = a;
969   tree a2 = NULL_TREE;
970   bool a_is_comparison = false;
971   bool a_is_scalar_bitmask = false;
972   tree b = gimple_assign_rhs2 (stmt);
973   tree c = gimple_assign_rhs3 (stmt);
974   vec<constructor_elt, va_gc> *v;
975   tree constr;
976   tree inner_type = TREE_TYPE (type);
977   tree width = vector_element_bits_tree (type);
978   tree cond_type = TREE_TYPE (TREE_TYPE (a));
979   tree comp_inner_type = cond_type;
980   tree index = bitsize_int (0);
981   tree comp_width = width;
982   tree comp_index = index;
983   location_t loc = gimple_location (gsi_stmt (*gsi));
984   tree_code code = TREE_CODE (a);
985   gassign *assign = NULL;
986 
987   if (code == SSA_NAME)
988     {
989       assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
990       if (assign != NULL
991 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
992 	{
993 	  a_is_comparison = true;
994 	  a1 = gimple_assign_rhs1 (assign);
995 	  a2 = gimple_assign_rhs2 (assign);
996 	  code = gimple_assign_rhs_code (assign);
997 	  comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
998 	  comp_width = vector_element_bits_tree (TREE_TYPE (a1));
999 	}
1000     }
1001 
1002   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
1003     {
1004       gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1005       return true;
1006     }
1007 
1008   /* Handle vector boolean types with bitmasks.  If there is a comparison
1009      and we can expand the comparison into the vector boolean bitmask,
1010      or otherwise if it is compatible with type, we can transform
1011       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1012      into
1013       tmp_6 = x_2 < y_3;
1014       tmp_7 = tmp_6 & vbfld_4;
1015       tmp_8 = ~tmp_6;
1016       tmp_9 = tmp_8 & vbfld_5;
1017       vbfld_1 = tmp_7 | tmp_9;
1018      Similarly for vbfld_10 instead of x_2 < y_3.  */
1019   if (VECTOR_BOOLEAN_TYPE_P (type)
1020       && SCALAR_INT_MODE_P (TYPE_MODE (type))
1021       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1022 		   TYPE_VECTOR_SUBPARTS (type)
1023 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1024       && (a_is_comparison
1025 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
1026 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1027     {
1028       if (a_is_comparison)
1029 	a = gimplify_build2 (gsi, code, type, a1, a2);
1030       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1031       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1032       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1033       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1034       gimple_assign_set_rhs_from_tree (gsi, a);
1035       update_stmt (gsi_stmt (*gsi));
1036       return true;
1037     }
1038 
1039   /* TODO: try and find a smaller vector type.  */
1040 
1041   warning_at (loc, OPT_Wvector_operation_performance,
1042 	      "vector condition will be expanded piecewise");
1043 
1044   if (!a_is_comparison
1045       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1046       && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1047       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1048 		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1049 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1050 						(TREE_TYPE (TREE_TYPE (a))))))
1051     {
1052       a_is_scalar_bitmask = true;
1053       int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1054       tree atype = build_nonstandard_integer_type (prec, 1);
1055       a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1056     }
1057 
1058   int nunits = nunits_for_known_piecewise_op (type);
1059   vec_alloc (v, nunits);
1060   for (int i = 0; i < nunits; i++)
1061     {
1062       tree aa, result;
1063       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1064       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1065       if (a_is_comparison)
1066 	{
1067 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1068 				       comp_width, comp_index);
1069 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1070 				       comp_width, comp_index);
1071 	  aa = fold_build2 (code, cond_type, aa1, aa2);
1072 	}
1073       else if (a_is_scalar_bitmask)
1074 	{
1075 	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1076 	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1077 				    a, wide_int_to_tree (TREE_TYPE (a), w));
1078 	  aa = fold_build2 (NE_EXPR, boolean_type_node, result,
1079 			    build_zero_cst (TREE_TYPE (a)));
1080 	}
1081       else
1082 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
1083       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1084       constructor_elt ce = {NULL_TREE, result};
1085       v->quick_push (ce);
1086       index = int_const_binop (PLUS_EXPR, index, width);
1087       if (width == comp_width)
1088 	comp_index = index;
1089       else
1090 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1091     }
1092 
1093   constr = build_constructor (type, v);
1094   gimple_assign_set_rhs_from_tree (gsi, constr);
1095   update_stmt (gsi_stmt (*gsi));
1096 
1097   if (a_is_comparison)
1098     bitmap_set_bit (dce_ssa_names,
1099 		    SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1100 
1101   return false;
1102 }
1103 
1104 static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code,bitmap dce_ssa_names)1105 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1106 			 gassign *assign, enum tree_code code,
1107 			 bitmap dce_ssa_names)
1108 {
1109   machine_mode compute_mode = TYPE_MODE (compute_type);
1110 
1111   /* If the compute mode is not a vector mode (hence we are not decomposing
1112      a BLKmode vector to smaller, hardware-supported vectors), we may want
1113      to expand the operations in parallel.  */
1114   if (!VECTOR_MODE_P (compute_mode))
1115     switch (code)
1116       {
1117       case PLUS_EXPR:
1118       case MINUS_EXPR:
1119         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1120 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1121 					 gimple_assign_rhs1 (assign),
1122 					 gimple_assign_rhs2 (assign), code);
1123 	break;
1124 
1125       case NEGATE_EXPR:
1126         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1127           return expand_vector_addition (gsi, do_unop, do_negate, type,
1128 		      		         gimple_assign_rhs1 (assign),
1129 					 NULL_TREE, code);
1130 	break;
1131 
1132       case BIT_AND_EXPR:
1133       case BIT_IOR_EXPR:
1134       case BIT_XOR_EXPR:
1135         return expand_vector_parallel (gsi, do_binop, type,
1136 		      		       gimple_assign_rhs1 (assign),
1137 				       gimple_assign_rhs2 (assign), code);
1138 
1139       case BIT_NOT_EXPR:
1140         return expand_vector_parallel (gsi, do_unop, type,
1141 		      		       gimple_assign_rhs1 (assign),
1142         			       NULL_TREE, code);
1143       case EQ_EXPR:
1144       case NE_EXPR:
1145       case GT_EXPR:
1146       case LT_EXPR:
1147       case GE_EXPR:
1148       case LE_EXPR:
1149       case UNEQ_EXPR:
1150       case UNGT_EXPR:
1151       case UNLT_EXPR:
1152       case UNGE_EXPR:
1153       case UNLE_EXPR:
1154       case LTGT_EXPR:
1155       case ORDERED_EXPR:
1156       case UNORDERED_EXPR:
1157 	{
1158 	  tree rhs1 = gimple_assign_rhs1 (assign);
1159 	  tree rhs2 = gimple_assign_rhs2 (assign);
1160 
1161 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
1162 					   dce_ssa_names);
1163 	}
1164 
1165       case TRUNC_DIV_EXPR:
1166       case TRUNC_MOD_EXPR:
1167 	{
1168 	  tree rhs1 = gimple_assign_rhs1 (assign);
1169 	  tree rhs2 = gimple_assign_rhs2 (assign);
1170 	  tree ret;
1171 
1172 	  if (!optimize
1173 	      || !VECTOR_INTEGER_TYPE_P (type)
1174 	      || TREE_CODE (rhs2) != VECTOR_CST
1175 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1176 	    break;
1177 
1178 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1179 	  if (ret != NULL_TREE)
1180 	    return ret;
1181 	  break;
1182 	}
1183 
1184       default:
1185 	break;
1186       }
1187 
1188   if (TREE_CODE_CLASS (code) == tcc_unary)
1189     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1190 				    gimple_assign_rhs1 (assign),
1191 				    NULL_TREE, code);
1192   else
1193     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1194 				    gimple_assign_rhs1 (assign),
1195 				    gimple_assign_rhs2 (assign), code);
1196 }
1197 
1198 /* Try to optimize
1199    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1200    style stmts into:
1201    _9 = { b_7, b_7, b_7, b_7 };
1202    a_5 = _9 + { 0, 3, 6, 9 };
1203    because vector splat operation is usually more efficient
1204    than piecewise initialization of the vector.  */
1205 
1206 static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)1207 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1208 {
1209   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1210   tree lhs = gimple_assign_lhs (stmt);
1211   tree rhs = gimple_assign_rhs1 (stmt);
1212   tree type = TREE_TYPE (rhs);
1213   unsigned int i, j;
1214   unsigned HOST_WIDE_INT nelts;
1215   bool all_same = true;
1216   constructor_elt *elt;
1217   gimple *g;
1218   tree base = NULL_TREE;
1219   optab op;
1220 
1221   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1222       || nelts <= 2
1223       || CONSTRUCTOR_NELTS (rhs) != nelts)
1224     return;
1225   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1226   if (op == unknown_optab
1227       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1228     return;
1229   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1230     if (TREE_CODE (elt->value) != SSA_NAME
1231 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1232       return;
1233     else
1234       {
1235 	tree this_base = elt->value;
1236 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1237 	  all_same = false;
1238 	for (j = 0; j < nelts + 1; j++)
1239 	  {
1240 	    g = SSA_NAME_DEF_STMT (this_base);
1241 	    if (is_gimple_assign (g)
1242 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1243 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1244 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1245 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1246 	      this_base = gimple_assign_rhs1 (g);
1247 	    else
1248 	      break;
1249 	  }
1250 	if (i == 0)
1251 	  base = this_base;
1252 	else if (this_base != base)
1253 	  return;
1254       }
1255   if (all_same)
1256     return;
1257   tree_vector_builder cst (type, nelts, 1);
1258   for (i = 0; i < nelts; i++)
1259     {
1260       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1261       tree elt = build_zero_cst (TREE_TYPE (base));
1262       while (this_base != base)
1263 	{
1264 	  g = SSA_NAME_DEF_STMT (this_base);
1265 	  elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1266 			     elt, gimple_assign_rhs2 (g));
1267 	  if (elt == NULL_TREE
1268 	      || TREE_CODE (elt) != INTEGER_CST
1269 	      || TREE_OVERFLOW (elt))
1270 	    return;
1271 	  this_base = gimple_assign_rhs1 (g);
1272 	}
1273       cst.quick_push (elt);
1274     }
1275   for (i = 0; i < nelts; i++)
1276     CONSTRUCTOR_ELT (rhs, i)->value = base;
1277   g = gimple_build_assign (make_ssa_name (type), rhs);
1278   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1279   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1280 			   cst.build ());
1281   gsi_replace (gsi, g, false);
1282 }
1283 
1284 /* Return a type for the widest vector mode whose components are of type
1285    TYPE, or NULL_TREE if none is found.  */
1286 
1287 static tree
type_for_widest_vector_mode(tree type,optab op)1288 type_for_widest_vector_mode (tree type, optab op)
1289 {
1290   machine_mode inner_mode = TYPE_MODE (type);
1291   machine_mode best_mode = VOIDmode, mode;
1292   poly_int64 best_nunits = 0;
1293 
1294   if (SCALAR_FLOAT_MODE_P (inner_mode))
1295     mode = MIN_MODE_VECTOR_FLOAT;
1296   else if (SCALAR_FRACT_MODE_P (inner_mode))
1297     mode = MIN_MODE_VECTOR_FRACT;
1298   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1299     mode = MIN_MODE_VECTOR_UFRACT;
1300   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1301     mode = MIN_MODE_VECTOR_ACCUM;
1302   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1303     mode = MIN_MODE_VECTOR_UACCUM;
1304   else if (inner_mode == BImode)
1305     mode = MIN_MODE_VECTOR_BOOL;
1306   else
1307     mode = MIN_MODE_VECTOR_INT;
1308 
1309   FOR_EACH_MODE_FROM (mode, mode)
1310     if (GET_MODE_INNER (mode) == inner_mode
1311 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1312 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1313       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1314 
1315   if (best_mode == VOIDmode)
1316     return NULL_TREE;
1317   else
1318     return build_vector_type_for_mode (type, best_mode);
1319 }
1320 
1321 
1322 /* Build a reference to the element of the vector VECT.  Function
1323    returns either the element itself, either BIT_FIELD_REF, or an
1324    ARRAY_REF expression.
1325 
1326    GSI is required to insert temporary variables while building a
1327    refernece to the element of the vector VECT.
1328 
1329    PTMPVEC is a pointer to the temporary variable for caching
1330    purposes.  In case when PTMPVEC is NULL new temporary variable
1331    will be created.  */
1332 static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)1333 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1334 {
1335   tree vect_type, vect_elt_type;
1336   gimple *asgn;
1337   tree tmpvec;
1338   tree arraytype;
1339   bool need_asgn = true;
1340   unsigned int elements;
1341 
1342   vect_type = TREE_TYPE (vect);
1343   vect_elt_type = TREE_TYPE (vect_type);
1344   elements = nunits_for_known_piecewise_op (vect_type);
1345 
1346   if (TREE_CODE (idx) == INTEGER_CST)
1347     {
1348       unsigned HOST_WIDE_INT index;
1349 
1350       /* Given that we're about to compute a binary modulus,
1351 	 we don't care about the high bits of the value.  */
1352       index = TREE_INT_CST_LOW (idx);
1353       if (!tree_fits_uhwi_p (idx) || index >= elements)
1354 	{
1355 	  index &= elements - 1;
1356 	  idx = build_int_cst (TREE_TYPE (idx), index);
1357 	}
1358 
1359       /* When lowering a vector statement sequence do some easy
1360          simplification by looking through intermediate vector results.  */
1361       if (TREE_CODE (vect) == SSA_NAME)
1362 	{
1363 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1364 	  if (is_gimple_assign (def_stmt)
1365 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1366 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1367 	    vect = gimple_assign_rhs1 (def_stmt);
1368 	}
1369 
1370       if (TREE_CODE (vect) == VECTOR_CST)
1371 	return VECTOR_CST_ELT (vect, index);
1372       else if (TREE_CODE (vect) == CONSTRUCTOR
1373 	       && (CONSTRUCTOR_NELTS (vect) == 0
1374 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1375 		      != VECTOR_TYPE))
1376         {
1377 	  if (index < CONSTRUCTOR_NELTS (vect))
1378 	    return CONSTRUCTOR_ELT (vect, index)->value;
1379           return build_zero_cst (vect_elt_type);
1380         }
1381       else
1382         {
1383 	  tree size = vector_element_bits_tree (vect_type);
1384 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1385 				  size);
1386 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1387         }
1388     }
1389 
1390   if (!ptmpvec)
1391     tmpvec = create_tmp_var (vect_type, "vectmp");
1392   else if (!*ptmpvec)
1393     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1394   else
1395     {
1396       tmpvec = *ptmpvec;
1397       need_asgn = false;
1398     }
1399 
1400   if (need_asgn)
1401     {
1402       TREE_ADDRESSABLE (tmpvec) = 1;
1403       asgn = gimple_build_assign (tmpvec, vect);
1404       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1405     }
1406 
1407   arraytype = build_array_type_nelts (vect_elt_type, elements);
1408   return build4 (ARRAY_REF, vect_elt_type,
1409                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1410                  idx, NULL_TREE, NULL_TREE);
1411 }
1412 
1413 /* Check if VEC_PERM_EXPR within the given setting is supported
1414    by hardware, or lower it piecewise.
1415 
1416    When VEC_PERM_EXPR has the same first and second operands:
1417    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1418    {v0[mask[0]], v0[mask[1]], ...}
1419    MASK and V0 must have the same number of elements.
1420 
1421    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1422    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1423    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1424    same number of arguments.  */
1425 
1426 static void
lower_vec_perm(gimple_stmt_iterator * gsi)1427 lower_vec_perm (gimple_stmt_iterator *gsi)
1428 {
1429   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1430   tree mask = gimple_assign_rhs3 (stmt);
1431   tree vec0 = gimple_assign_rhs1 (stmt);
1432   tree vec1 = gimple_assign_rhs2 (stmt);
1433   tree vect_type = TREE_TYPE (vec0);
1434   tree mask_type = TREE_TYPE (mask);
1435   tree vect_elt_type = TREE_TYPE (vect_type);
1436   tree mask_elt_type = TREE_TYPE (mask_type);
1437   unsigned HOST_WIDE_INT elements;
1438   vec<constructor_elt, va_gc> *v;
1439   tree constr, t, si, i_val;
1440   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1441   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1442   location_t loc = gimple_location (gsi_stmt (*gsi));
1443   unsigned i;
1444 
1445   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1446     return;
1447 
1448   if (TREE_CODE (mask) == SSA_NAME)
1449     {
1450       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1451       if (is_gimple_assign (def_stmt)
1452 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1453 	mask = gimple_assign_rhs1 (def_stmt);
1454     }
1455 
1456   vec_perm_builder sel_int;
1457 
1458   if (TREE_CODE (mask) == VECTOR_CST
1459       && tree_to_vec_perm_builder (&sel_int, mask))
1460     {
1461       vec_perm_indices indices (sel_int, 2, elements);
1462       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1463 	{
1464 	  gimple_assign_set_rhs3 (stmt, mask);
1465 	  update_stmt (stmt);
1466 	  return;
1467 	}
1468       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1469 	 vector as VEC1 and a right element shift MASK.  */
1470       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1471 	  != CODE_FOR_nothing
1472 	  && TREE_CODE (vec1) == VECTOR_CST
1473 	  && initializer_zerop (vec1)
1474 	  && maybe_ne (indices[0], 0)
1475 	  && known_lt (poly_uint64 (indices[0]), elements))
1476 	{
1477 	  bool ok_p = indices.series_p (0, 1, indices[0], 1);
1478 	  if (!ok_p)
1479 	    {
1480 	      for (i = 1; i < elements; ++i)
1481 		{
1482 		  poly_uint64 actual = indices[i];
1483 		  poly_uint64 expected = i + indices[0];
1484 		  /* Indices into the second vector are all equivalent.  */
1485 		  if (maybe_lt (actual, elements)
1486 		      ? maybe_ne (actual, expected)
1487 		      : maybe_lt (expected, elements))
1488 		    break;
1489 		}
1490 	      ok_p = i == elements;
1491 	    }
1492 	  if (ok_p)
1493 	    {
1494 	      gimple_assign_set_rhs3 (stmt, mask);
1495 	      update_stmt (stmt);
1496 	      return;
1497 	    }
1498 	}
1499       /* And similarly vec_shl pattern.  */
1500       if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1501 	  != CODE_FOR_nothing
1502 	  && TREE_CODE (vec0) == VECTOR_CST
1503 	  && initializer_zerop (vec0))
1504 	{
1505 	  unsigned int first = 0;
1506 	  for (i = 0; i < elements; ++i)
1507 	    if (known_eq (poly_uint64 (indices[i]), elements))
1508 	      {
1509 		if (i == 0 || first)
1510 		  break;
1511 		first = i;
1512 	      }
1513 	    else if (first
1514 		     ? maybe_ne (poly_uint64 (indices[i]),
1515 					      elements + i - first)
1516 		     : maybe_ge (poly_uint64 (indices[i]), elements))
1517 	      break;
1518 	  if (first && i == elements)
1519 	    {
1520 	      gimple_assign_set_rhs3 (stmt, mask);
1521 	      update_stmt (stmt);
1522 	      return;
1523 	    }
1524 	}
1525     }
1526   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1527     return;
1528 
1529   warning_at (loc, OPT_Wvector_operation_performance,
1530               "vector shuffling operation will be expanded piecewise");
1531 
1532   vec_alloc (v, elements);
1533   for (i = 0; i < elements; i++)
1534     {
1535       si = size_int (i);
1536       i_val = vector_element (gsi, mask, si, &masktmp);
1537 
1538       if (TREE_CODE (i_val) == INTEGER_CST)
1539         {
1540 	  unsigned HOST_WIDE_INT index;
1541 
1542 	  index = TREE_INT_CST_LOW (i_val);
1543 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1544 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1545 
1546           if (two_operand_p && (index & elements) != 0)
1547 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1548 	  else
1549 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1550 
1551           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1552 					true, GSI_SAME_STMT);
1553         }
1554       else
1555         {
1556 	  tree cond = NULL_TREE, v0_val;
1557 
1558 	  if (two_operand_p)
1559 	    {
1560 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1561 			          build_int_cst (mask_elt_type, elements));
1562 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1563 					       true, GSI_SAME_STMT);
1564 	    }
1565 
1566 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1567 			       build_int_cst (mask_elt_type, elements - 1));
1568 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1569 					    true, GSI_SAME_STMT);
1570 
1571 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1572 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1573 					     true, GSI_SAME_STMT);
1574 
1575 	  if (two_operand_p)
1576 	    {
1577 	      tree v1_val;
1578 
1579 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1580 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1581 						 true, GSI_SAME_STMT);
1582 
1583 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1584 				  cond, build_zero_cst (mask_elt_type));
1585 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1586 				  cond, v0_val, v1_val);
1587               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1588 					    true, GSI_SAME_STMT);
1589             }
1590 	  else
1591 	    t = v0_val;
1592         }
1593 
1594       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1595     }
1596 
1597   constr = build_constructor (vect_type, v);
1598   gimple_assign_set_rhs_from_tree (gsi, constr);
1599   update_stmt (gsi_stmt (*gsi));
1600 }
1601 
1602 /* If OP is a uniform vector return the element it is a splat from.  */
1603 
1604 static tree
ssa_uniform_vector_p(tree op)1605 ssa_uniform_vector_p (tree op)
1606 {
1607   if (TREE_CODE (op) == VECTOR_CST
1608       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1609       || TREE_CODE (op) == CONSTRUCTOR)
1610     return uniform_vector_p (op);
1611   if (TREE_CODE (op) == SSA_NAME)
1612     {
1613       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1614       if (gimple_assign_single_p (def_stmt))
1615 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1616     }
1617   return NULL_TREE;
1618 }
1619 
1620 /* Return type in which CODE operation with optab OP can be
1621    computed.  */
1622 
1623 static tree
get_compute_type(enum tree_code code,optab op,tree type)1624 get_compute_type (enum tree_code code, optab op, tree type)
1625 {
1626   /* For very wide vectors, try using a smaller vector mode.  */
1627   tree compute_type = type;
1628   if (op
1629       && (!VECTOR_MODE_P (TYPE_MODE (type))
1630 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1631     {
1632       tree vector_compute_type
1633 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
1634       if (vector_compute_type != NULL_TREE
1635 	  && subparts_gt (compute_type, vector_compute_type)
1636 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1637 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1638 	      != CODE_FOR_nothing))
1639 	compute_type = vector_compute_type;
1640     }
1641 
1642   /* If we are breaking a BLKmode vector into smaller pieces,
1643      type_for_widest_vector_mode has already looked into the optab,
1644      so skip these checks.  */
1645   if (compute_type == type)
1646     {
1647       machine_mode compute_mode = TYPE_MODE (compute_type);
1648       if (VECTOR_MODE_P (compute_mode))
1649 	{
1650 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1651 	    return compute_type;
1652 	  if (code == MULT_HIGHPART_EXPR
1653 	      && can_mult_highpart_p (compute_mode,
1654 				      TYPE_UNSIGNED (compute_type)))
1655 	    return compute_type;
1656 	}
1657       /* There is no operation in hardware, so fall back to scalars.  */
1658       compute_type = TREE_TYPE (type);
1659     }
1660 
1661   return compute_type;
1662 }
1663 
1664 static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)1665 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1666 	 tree bitpos, tree bitsize, enum tree_code code,
1667 	 tree type ATTRIBUTE_UNUSED)
1668 {
1669   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1670     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1671   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1672     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1673   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1674   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1675 }
1676 
1677 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1678 static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)1679 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1680 {
1681   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1682   tree type = gimple_expr_type (stmt);
1683   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1684   machine_mode compute_mode = TYPE_MODE (compute_type);
1685   gcc_assert (compute_mode != BLKmode);
1686   tree lhs = gimple_assign_lhs (stmt);
1687   tree rhs2 = gimple_assign_rhs2 (stmt);
1688   tree rhs3 = gimple_assign_rhs3 (stmt);
1689   tree new_rhs;
1690 
1691   /* If the compute mode is not a vector mode (hence we are not decomposing
1692      a BLKmode vector to smaller, hardware-supported vectors), we may want
1693      to expand the operations in parallel.  */
1694   if (!VECTOR_MODE_P (compute_mode))
1695     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1696 				      COND_EXPR);
1697   else
1698     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1699 				       rhs2, rhs3, COND_EXPR);
1700   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1701     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1702 			       new_rhs);
1703 
1704   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1705      way to do it is change expand_vector_operation and its callees to
1706      return a tree_code, RHS1 and RHS2 instead of a tree. */
1707   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1708   update_stmt (gsi_stmt (*gsi));
1709 }
1710 
1711 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1712    lowering.  If INNER_TYPE is not a vector type, this is a scalar
1713    fallback.  */
1714 
1715 static tree
do_vec_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree decl,tree bitpos,tree bitsize,enum tree_code code,tree type)1716 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1717 		   tree decl, tree bitpos, tree bitsize,
1718 		   enum tree_code code, tree type)
1719 {
1720   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1721   if (!VECTOR_TYPE_P (inner_type))
1722     return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1723   if (code == CALL_EXPR)
1724     {
1725       gimple *g = gimple_build_call (decl, 1, a);
1726       tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1727       gimple_call_set_lhs (g, lhs);
1728       gsi_insert_before (gsi, g, GSI_SAME_STMT);
1729       return lhs;
1730     }
1731   else
1732     {
1733       tree outer_type = build_vector_type (TREE_TYPE (type),
1734 					   TYPE_VECTOR_SUBPARTS (inner_type));
1735       return gimplify_build1 (gsi, code, outer_type, a);
1736     }
1737 }
1738 
1739 /* Similarly, but for narrowing conversion.  */
1740 
1741 static tree
do_vec_narrow_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree,tree bitpos,tree,enum tree_code code,tree type)1742 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1743 			  tree, tree bitpos, tree, enum tree_code code,
1744 			  tree type)
1745 {
1746   tree itype = build_vector_type (TREE_TYPE (inner_type),
1747 				  exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1748 					     2));
1749   tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1750   tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1751 			     int_const_binop (PLUS_EXPR, bitpos,
1752 					      TYPE_SIZE (itype)));
1753   tree outer_type = build_vector_type (TREE_TYPE (type),
1754 				       TYPE_VECTOR_SUBPARTS (inner_type));
1755   return gimplify_build2 (gsi, code, outer_type, b, c);
1756 }
1757 
1758 /* Expand VEC_CONVERT ifn call.  */
1759 
1760 static void
expand_vector_conversion(gimple_stmt_iterator * gsi)1761 expand_vector_conversion (gimple_stmt_iterator *gsi)
1762 {
1763   gimple *stmt = gsi_stmt (*gsi);
1764   gimple *g;
1765   tree lhs = gimple_call_lhs (stmt);
1766   if (lhs == NULL_TREE)
1767     {
1768       g = gimple_build_nop ();
1769       gsi_replace (gsi, g, false);
1770       return;
1771     }
1772   tree arg = gimple_call_arg (stmt, 0);
1773   tree ret_type = TREE_TYPE (lhs);
1774   tree arg_type = TREE_TYPE (arg);
1775   tree new_rhs, compute_type = TREE_TYPE (arg_type);
1776   enum tree_code code = NOP_EXPR;
1777   enum tree_code code1 = ERROR_MARK;
1778   enum { NARROW, NONE, WIDEN } modifier = NONE;
1779   optab optab1 = unknown_optab;
1780 
1781   gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1782   if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1783       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1784     code = FIX_TRUNC_EXPR;
1785   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1786 	   && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1787     code = FLOAT_EXPR;
1788   unsigned int ret_elt_bits = vector_element_bits (ret_type);
1789   unsigned int arg_elt_bits = vector_element_bits (arg_type);
1790   if (ret_elt_bits < arg_elt_bits)
1791     modifier = NARROW;
1792   else if (ret_elt_bits > arg_elt_bits)
1793     modifier = WIDEN;
1794 
1795   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1796     {
1797       if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1798 	{
1799 	  g = gimple_build_assign (lhs, code1, arg);
1800 	  gsi_replace (gsi, g, false);
1801 	  return;
1802 	}
1803       /* Can't use get_compute_type here, as supportable_convert_operation
1804 	 doesn't necessarily use an optab and needs two arguments.  */
1805       tree vec_compute_type
1806 	= type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1807       if (vec_compute_type
1808 	  && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1809 	  && subparts_gt (arg_type, vec_compute_type))
1810 	{
1811 	  unsigned HOST_WIDE_INT nelts
1812 	    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1813 	  while (nelts > 1)
1814 	    {
1815 	      tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1816 	      tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1817 	      if (supportable_convert_operation (code, ret1_type, arg1_type,
1818 						 &code1))
1819 		{
1820 		  new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1821 						     ret_type, arg1_type, arg,
1822 						     NULL_TREE, code1);
1823 		  g = gimple_build_assign (lhs, new_rhs);
1824 		  gsi_replace (gsi, g, false);
1825 		  return;
1826 		}
1827 	      nelts = nelts / 2;
1828 	    }
1829 	}
1830     }
1831   else if (modifier == NARROW)
1832     {
1833       switch (code)
1834 	{
1835 	CASE_CONVERT:
1836 	  code1 = VEC_PACK_TRUNC_EXPR;
1837 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1838 	  break;
1839 	case FIX_TRUNC_EXPR:
1840 	  code1 = VEC_PACK_FIX_TRUNC_EXPR;
1841 	  /* The signedness is determined from output operand.  */
1842 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1843 	  break;
1844 	case FLOAT_EXPR:
1845 	  code1 = VEC_PACK_FLOAT_EXPR;
1846 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1847 	  break;
1848 	default:
1849 	  gcc_unreachable ();
1850 	}
1851 
1852       if (optab1)
1853 	compute_type = get_compute_type (code1, optab1, arg_type);
1854       enum insn_code icode1;
1855       if (VECTOR_TYPE_P (compute_type)
1856 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1857 	      != CODE_FOR_nothing)
1858 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1859 	{
1860 	  tree cretd_type
1861 	    = build_vector_type (TREE_TYPE (ret_type),
1862 				 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1863 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1864 	    {
1865 	      if (compute_type == arg_type)
1866 		{
1867 		  new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1868 					     arg, build_zero_cst (arg_type));
1869 		  new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1870 					      TYPE_SIZE (ret_type),
1871 					      bitsize_int (0));
1872 		  g = gimple_build_assign (lhs, new_rhs);
1873 		  gsi_replace (gsi, g, false);
1874 		  return;
1875 		}
1876 	      tree dcompute_type
1877 		= build_vector_type (TREE_TYPE (compute_type),
1878 				     TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1879 	      if (TYPE_MAIN_VARIANT (dcompute_type)
1880 		  == TYPE_MAIN_VARIANT (arg_type))
1881 		new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1882 						    NULL_TREE, bitsize_int (0),
1883 						    NULL_TREE, code1,
1884 						    ret_type);
1885 	      else
1886 		new_rhs = expand_vector_piecewise (gsi,
1887 						   do_vec_narrow_conversion,
1888 						   arg_type, dcompute_type,
1889 						   arg, NULL_TREE, code1,
1890 						   ret_type);
1891 	      g = gimple_build_assign (lhs, new_rhs);
1892 	      gsi_replace (gsi, g, false);
1893 	      return;
1894 	    }
1895 	}
1896     }
1897   else if (modifier == WIDEN)
1898     {
1899       enum tree_code code2 = ERROR_MARK;
1900       optab optab2 = unknown_optab;
1901       switch (code)
1902 	{
1903 	CASE_CONVERT:
1904 	  code1 = VEC_UNPACK_LO_EXPR;
1905           code2 = VEC_UNPACK_HI_EXPR;
1906 	  break;
1907 	case FIX_TRUNC_EXPR:
1908 	  code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1909 	  code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1910 	  break;
1911 	case FLOAT_EXPR:
1912 	  code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1913 	  code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1914 	  break;
1915 	default:
1916 	  gcc_unreachable ();
1917 	}
1918       if (BYTES_BIG_ENDIAN)
1919 	std::swap (code1, code2);
1920 
1921       if (code == FIX_TRUNC_EXPR)
1922 	{
1923 	  /* The signedness is determined from output operand.  */
1924 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1925 	  optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1926 	}
1927       else
1928 	{
1929 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1930 	  optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1931 	}
1932 
1933       if (optab1 && optab2)
1934 	compute_type = get_compute_type (code1, optab1, arg_type);
1935 
1936       enum insn_code icode1, icode2;
1937       if (VECTOR_TYPE_P (compute_type)
1938 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1939 	      != CODE_FOR_nothing)
1940 	  && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1941 	      != CODE_FOR_nothing)
1942 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1943 	  && (insn_data[icode1].operand[0].mode
1944 	      == insn_data[icode2].operand[0].mode))
1945 	{
1946 	  poly_uint64 nunits
1947 	    = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1948 	  tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1949 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1950 	    {
1951 	      vec<constructor_elt, va_gc> *v;
1952 	      tree part_width = TYPE_SIZE (compute_type);
1953 	      tree index = bitsize_int (0);
1954 	      int nunits = nunits_for_known_piecewise_op (arg_type);
1955 	      int delta = tree_to_uhwi (part_width) / arg_elt_bits;
1956 	      int i;
1957 	      location_t loc = gimple_location (gsi_stmt (*gsi));
1958 
1959 	      if (compute_type != arg_type)
1960 		warning_at (loc, OPT_Wvector_operation_performance,
1961 			    "vector operation will be expanded piecewise");
1962 	      else
1963 		{
1964 		  nunits = 1;
1965 		  delta = 1;
1966 		}
1967 
1968 	      vec_alloc (v, (nunits + delta - 1) / delta * 2);
1969 	      for (i = 0; i < nunits;
1970 		   i += delta, index = int_const_binop (PLUS_EXPR, index,
1971 							part_width))
1972 		{
1973 		  tree a = arg;
1974 		  if (compute_type != arg_type)
1975 		    a = tree_vec_extract (gsi, compute_type, a, part_width,
1976 					  index);
1977 		  tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1978 		  constructor_elt ce = { NULL_TREE, result };
1979 		  v->quick_push (ce);
1980 		  ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1981 		  v->quick_push (ce);
1982 		}
1983 
1984 	      new_rhs = build_constructor (ret_type, v);
1985 	      g = gimple_build_assign (lhs, new_rhs);
1986 	      gsi_replace (gsi, g, false);
1987 	      return;
1988 	    }
1989 	}
1990     }
1991 
1992   new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1993 				     TREE_TYPE (arg_type), arg,
1994 				     NULL_TREE, code, ret_type);
1995   g = gimple_build_assign (lhs, new_rhs);
1996   gsi_replace (gsi, g, false);
1997 }
1998 
1999 /* Process one statement.  If we identify a vector operation, expand it.  */
2000 
2001 static void
expand_vector_operations_1(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)2002 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2003 			    bitmap dce_ssa_names)
2004 {
2005   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2006   enum tree_code code;
2007   optab op = unknown_optab;
2008   enum gimple_rhs_class rhs_class;
2009   tree new_rhs;
2010 
2011   /* Only consider code == GIMPLE_ASSIGN. */
2012   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2013   if (!stmt)
2014     {
2015       if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2016 	expand_vector_conversion (gsi);
2017       return;
2018     }
2019 
2020   code = gimple_assign_rhs_code (stmt);
2021   rhs_class = get_gimple_rhs_class (code);
2022   lhs = gimple_assign_lhs (stmt);
2023 
2024   if (code == VEC_PERM_EXPR)
2025     {
2026       lower_vec_perm (gsi);
2027       return;
2028     }
2029 
2030   if (code == VEC_COND_EXPR)
2031     {
2032       expand_vector_condition (gsi, dce_ssa_names);
2033       return;
2034     }
2035 
2036   if (code == COND_EXPR
2037       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2038       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2039     {
2040       expand_vector_scalar_condition (gsi);
2041       return;
2042     }
2043 
2044   if (code == CONSTRUCTOR
2045       && TREE_CODE (lhs) == SSA_NAME
2046       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2047       && !gimple_clobber_p (stmt)
2048       && optimize)
2049     {
2050       optimize_vector_constructor (gsi);
2051       return;
2052     }
2053 
2054   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2055     return;
2056 
2057   rhs1 = gimple_assign_rhs1 (stmt);
2058   type = gimple_expr_type (stmt);
2059   if (rhs_class == GIMPLE_BINARY_RHS)
2060     rhs2 = gimple_assign_rhs2 (stmt);
2061 
2062   if (!VECTOR_TYPE_P (type)
2063       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2064     return;
2065 
2066   /* A scalar operation pretending to be a vector one.  */
2067   if (VECTOR_BOOLEAN_TYPE_P (type)
2068       && !VECTOR_MODE_P (TYPE_MODE (type))
2069       && TYPE_MODE (type) != BLKmode
2070       && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2071 	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2072 	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2073 	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2074     return;
2075 
2076   /* If the vector operation is operating on all same vector elements
2077      implement it with a scalar operation and a splat if the target
2078      supports the scalar operation.  */
2079   tree srhs1, srhs2 = NULL_TREE;
2080   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2081       && (rhs2 == NULL_TREE
2082 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2083 	      && (srhs2 = rhs2))
2084 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2085       /* As we query direct optabs restrict to non-convert operations.  */
2086       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2087     {
2088       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2089       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2090 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2091 	{
2092 	  tree slhs = make_ssa_name (TREE_TYPE (TREE_TYPE (lhs)));
2093 	  gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
2094 	  gsi_insert_before (gsi, repl, GSI_SAME_STMT);
2095 	  gimple_assign_set_rhs_from_tree (gsi,
2096 					   build_vector_from_val (type, slhs));
2097 	  update_stmt (stmt);
2098 	  return;
2099 	}
2100     }
2101 
2102   if (CONVERT_EXPR_CODE_P (code)
2103       || code == FLOAT_EXPR
2104       || code == FIX_TRUNC_EXPR
2105       || code == VIEW_CONVERT_EXPR)
2106     return;
2107 
2108   /* The signedness is determined from input argument.  */
2109   if (code == VEC_UNPACK_FLOAT_HI_EXPR
2110       || code == VEC_UNPACK_FLOAT_LO_EXPR
2111       || code == VEC_PACK_FLOAT_EXPR)
2112     {
2113       /* We do not know how to scalarize those.  */
2114       return;
2115     }
2116 
2117   /* For widening/narrowing vector operations, the relevant type is of the
2118      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
2119      calculated in the same way above.  */
2120   if (code == WIDEN_SUM_EXPR
2121       || code == VEC_WIDEN_PLUS_HI_EXPR
2122       || code == VEC_WIDEN_PLUS_LO_EXPR
2123       || code == VEC_WIDEN_MINUS_HI_EXPR
2124       || code == VEC_WIDEN_MINUS_LO_EXPR
2125       || code == VEC_WIDEN_MULT_HI_EXPR
2126       || code == VEC_WIDEN_MULT_LO_EXPR
2127       || code == VEC_WIDEN_MULT_EVEN_EXPR
2128       || code == VEC_WIDEN_MULT_ODD_EXPR
2129       || code == VEC_UNPACK_HI_EXPR
2130       || code == VEC_UNPACK_LO_EXPR
2131       || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2132       || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2133       || code == VEC_PACK_TRUNC_EXPR
2134       || code == VEC_PACK_SAT_EXPR
2135       || code == VEC_PACK_FIX_TRUNC_EXPR
2136       || code == VEC_WIDEN_LSHIFT_HI_EXPR
2137       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2138     {
2139       /* We do not know how to scalarize those.  */
2140       return;
2141     }
2142 
2143   /* Choose between vector shift/rotate by vector and vector shift/rotate by
2144      scalar */
2145   if (code == LSHIFT_EXPR
2146       || code == RSHIFT_EXPR
2147       || code == LROTATE_EXPR
2148       || code == RROTATE_EXPR)
2149     {
2150       optab opv;
2151 
2152       /* Check whether we have vector <op> {x,x,x,x} where x
2153          could be a scalar variable or a constant.  Transform
2154          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
2155       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2156         {
2157           tree first;
2158 
2159           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2160             {
2161               gimple_assign_set_rhs2 (stmt, first);
2162               update_stmt (stmt);
2163               rhs2 = first;
2164             }
2165         }
2166 
2167       opv = optab_for_tree_code (code, type, optab_vector);
2168       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2169 	op = opv;
2170       else
2171 	{
2172           op = optab_for_tree_code (code, type, optab_scalar);
2173 
2174 	  compute_type = get_compute_type (code, op, type);
2175 	  if (compute_type == type)
2176 	    return;
2177 	  /* The rtl expander will expand vector/scalar as vector/vector
2178 	     if necessary.  Pick one with wider vector type.  */
2179 	  tree compute_vtype = get_compute_type (code, opv, type);
2180 	  if (subparts_gt (compute_vtype, compute_type))
2181 	    {
2182 	      compute_type = compute_vtype;
2183 	      op = opv;
2184 	    }
2185 	}
2186 
2187       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2188 	{
2189 	  if (compute_type == NULL_TREE)
2190 	    compute_type = get_compute_type (code, op, type);
2191 	  if (compute_type == type)
2192 	    return;
2193 	  /* Before splitting vector rotates into scalar rotates,
2194 	     see if we can't use vector shifts and BIT_IOR_EXPR
2195 	     instead.  For vector by vector rotates we'd also
2196 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2197 	     for now, fold doesn't seem to create such rotates anyway.  */
2198 	  if (compute_type == TREE_TYPE (type)
2199 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2200 	    {
2201 	      optab oplv = vashl_optab, opl = ashl_optab;
2202 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2203 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2204 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2205 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2206 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2207 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2208 	      /* The rtl expander will expand vector/scalar as vector/vector
2209 		 if necessary.  Pick one with wider vector type.  */
2210 	      if (subparts_gt (compute_lvtype, compute_ltype))
2211 		{
2212 		  compute_ltype = compute_lvtype;
2213 		  opl = oplv;
2214 		}
2215 	      if (subparts_gt (compute_rvtype, compute_rtype))
2216 		{
2217 		  compute_rtype = compute_rvtype;
2218 		  opr = oprv;
2219 		}
2220 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2221 		 BIT_IOR_EXPR.  */
2222 	      compute_type = compute_ltype;
2223 	      if (subparts_gt (compute_type, compute_rtype))
2224 		compute_type = compute_rtype;
2225 	      if (subparts_gt (compute_type, compute_otype))
2226 		compute_type = compute_otype;
2227 	      /* Verify all 3 operations can be performed in that type.  */
2228 	      if (compute_type != TREE_TYPE (type))
2229 		{
2230 		  if (optab_handler (opl, TYPE_MODE (compute_type))
2231 		      == CODE_FOR_nothing
2232 		      || optab_handler (opr, TYPE_MODE (compute_type))
2233 			 == CODE_FOR_nothing
2234 		      || optab_handler (opo, TYPE_MODE (compute_type))
2235 			 == CODE_FOR_nothing)
2236 		    compute_type = TREE_TYPE (type);
2237 		}
2238 	    }
2239 	}
2240     }
2241   else
2242     op = optab_for_tree_code (code, type, optab_default);
2243 
2244   /* Optabs will try converting a negation into a subtraction, so
2245      look for it as well.  TODO: negation of floating-point vectors
2246      might be turned into an exclusive OR toggling the sign bit.  */
2247   if (op == unknown_optab
2248       && code == NEGATE_EXPR
2249       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2250     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2251 
2252   if (compute_type == NULL_TREE)
2253     compute_type = get_compute_type (code, op, type);
2254   if (compute_type == type)
2255     return;
2256 
2257   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
2258 				     dce_ssa_names);
2259 
2260   /* Leave expression untouched for later expansion.  */
2261   if (new_rhs == NULL_TREE)
2262     return;
2263 
2264   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2265     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2266                                new_rhs);
2267 
2268   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
2269      way to do it is change expand_vector_operation and its callees to
2270      return a tree_code, RHS1 and RHS2 instead of a tree. */
2271   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2272   update_stmt (gsi_stmt (*gsi));
2273 }
2274 
2275 /* Use this to lower vector operations introduced by the vectorizer,
2276    if it may need the bit-twiddling tricks implemented in this file.  */
2277 
2278 static unsigned int
expand_vector_operations(void)2279 expand_vector_operations (void)
2280 {
2281   gimple_stmt_iterator gsi;
2282   basic_block bb;
2283   bool cfg_changed = false;
2284 
2285   auto_bitmap dce_ssa_names;
2286 
2287   FOR_EACH_BB_FN (bb, cfun)
2288     {
2289       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2290 	{
2291 	  expand_vector_operations_1 (&gsi, dce_ssa_names);
2292 	  /* ???  If we do not cleanup EH then we will ICE in
2293 	     verification.  But in reality we have created wrong-code
2294 	     as we did not properly transition EH info and edges to
2295 	     the piecewise computations.  */
2296 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2297 	      && gimple_purge_dead_eh_edges (bb))
2298 	    cfg_changed = true;
2299 	}
2300     }
2301 
2302   simple_dce_from_worklist (dce_ssa_names);
2303 
2304   return cfg_changed ? TODO_cleanup_cfg : 0;
2305 }
2306 
2307 namespace {
2308 
2309 const pass_data pass_data_lower_vector =
2310 {
2311   GIMPLE_PASS, /* type */
2312   "veclower", /* name */
2313   OPTGROUP_VEC, /* optinfo_flags */
2314   TV_NONE, /* tv_id */
2315   PROP_cfg, /* properties_required */
2316   PROP_gimple_lvec, /* properties_provided */
2317   0, /* properties_destroyed */
2318   0, /* todo_flags_start */
2319   TODO_update_ssa, /* todo_flags_finish */
2320 };
2321 
2322 class pass_lower_vector : public gimple_opt_pass
2323 {
2324 public:
pass_lower_vector(gcc::context * ctxt)2325   pass_lower_vector (gcc::context *ctxt)
2326     : gimple_opt_pass (pass_data_lower_vector, ctxt)
2327   {}
2328 
2329   /* opt_pass methods: */
gate(function * fun)2330   virtual bool gate (function *fun)
2331     {
2332       return !(fun->curr_properties & PROP_gimple_lvec);
2333     }
2334 
execute(function *)2335   virtual unsigned int execute (function *)
2336     {
2337       return expand_vector_operations ();
2338     }
2339 
2340 }; // class pass_lower_vector
2341 
2342 } // anon namespace
2343 
2344 gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)2345 make_pass_lower_vector (gcc::context *ctxt)
2346 {
2347   return new pass_lower_vector (ctxt);
2348 }
2349 
2350 namespace {
2351 
2352 const pass_data pass_data_lower_vector_ssa =
2353 {
2354   GIMPLE_PASS, /* type */
2355   "veclower2", /* name */
2356   OPTGROUP_VEC, /* optinfo_flags */
2357   TV_NONE, /* tv_id */
2358   PROP_cfg, /* properties_required */
2359   PROP_gimple_lvec, /* properties_provided */
2360   0, /* properties_destroyed */
2361   0, /* todo_flags_start */
2362   ( TODO_update_ssa
2363     | TODO_cleanup_cfg ), /* todo_flags_finish */
2364 };
2365 
2366 class pass_lower_vector_ssa : public gimple_opt_pass
2367 {
2368 public:
pass_lower_vector_ssa(gcc::context * ctxt)2369   pass_lower_vector_ssa (gcc::context *ctxt)
2370     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2371   {}
2372 
2373   /* opt_pass methods: */
clone()2374   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)2375   virtual unsigned int execute (function *)
2376     {
2377       return expand_vector_operations ();
2378     }
2379 
2380 }; // class pass_lower_vector_ssa
2381 
2382 } // anon namespace
2383 
2384 gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)2385 make_pass_lower_vector_ssa (gcc::context *ctxt)
2386 {
2387   return new pass_lower_vector_ssa (ctxt);
2388 }
2389 
2390 #include "gt-tree-vect-generic.h"
2391