1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
stmt_in_inner_loop_p(vec_info * vinfo,class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
read_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
write_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
vect_clobber_variable(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
840
841 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec)842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
846 {
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
849
850 for (i = 0; i < pwr + 1; i++)
851 {
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
855 }
856
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
862
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 }
868
869 /* Returns true if the current function returns DECL. */
870
871 static bool
cfun_returns(tree decl)872 cfun_returns (tree decl)
873 {
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
877 {
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
887 do
888 {
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
890 }
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
896 }
897 return false;
898 }
899
900 /* Function vect_model_store_cost
901
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
904
905 static void
vect_model_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
910 {
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
918
919 if (vls_type == VLS_STORE_INVARIANT)
920 {
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
924 }
925
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
930
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
935
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 {
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
954 }
955
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 {
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
966 }
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
969
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
972 {
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
978 }
979
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
989 {
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
993 {
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
998 {
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1007 }
1008 }
1009 }
1010
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1015 }
1016
1017
1018 /* Calculate cost of DR's memory access. */
1019 void
vect_get_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1023 {
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1027
1028 switch (alignment_support_scheme)
1029 {
1030 case dr_aligned:
1031 {
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1035
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1040 }
1041
1042 case dr_unaligned_supported:
1043 {
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1054 }
1055
1056 case dr_unaligned_unsupported:
1057 {
1058 *inside_cost = VECT_MAX_COST;
1059
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1064 }
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071
1072 /* Function vect_model_load_cost
1073
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1078
1079 static void
vect_model_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,unsigned ncopies,poly_uint64 vf,vect_memory_access_type memory_access_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1085 {
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1088
1089 gcc_assert (cost_vec);
1090
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1094
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1096 {
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms, n_loads;
1102 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1103 vf, true, &n_perms, &n_loads);
1104 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1105 first_stmt_info, 0, vect_body);
1106
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1109 ncopies = n_loads;
1110 }
1111
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info = stmt_info;
1115 if (!slp_node && grouped_access_p)
1116 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1117
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p = (first_stmt_info == stmt_info);
1122
1123 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1124 ones we actually need. Account for the cost of unused results. */
1125 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1126 {
1127 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1128 stmt_vec_info next_stmt_info = first_stmt_info;
1129 do
1130 {
1131 gaps -= 1;
1132 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1133 }
1134 while (next_stmt_info);
1135 if (gaps)
1136 {
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: %d unused vectors.\n",
1140 gaps);
1141 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, false,
1142 &inside_cost, &prologue_cost,
1143 cost_vec, cost_vec, true);
1144 }
1145 }
1146
1147 /* We assume that the cost of a single load-lanes instruction is
1148 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1149 access is instead being provided by a load-and-permute operation,
1150 include the cost of the permutes. */
1151 if (first_stmt_p
1152 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1153 {
1154 /* Uses an even and odd extract operations or shuffle operations
1155 for each needed permute. */
1156 int group_size = DR_GROUP_SIZE (first_stmt_info);
1157 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 stmt_info, 0, vect_body);
1160
1161 if (dump_enabled_p ())
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: strided group_size = %d .\n",
1164 group_size);
1165 }
1166
1167 /* The loads themselves. */
1168 if (memory_access_type == VMAT_ELEMENTWISE
1169 || memory_access_type == VMAT_GATHER_SCATTER)
1170 {
1171 /* N scalar loads plus gathering them into a vector. */
1172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174 inside_cost += record_stmt_cost (cost_vec,
1175 ncopies * assumed_nunits,
1176 scalar_load, stmt_info, 0, vect_body);
1177 }
1178 else
1179 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 &inside_cost, &prologue_cost,
1181 cost_vec, cost_vec, true);
1182 if (memory_access_type == VMAT_ELEMENTWISE
1183 || memory_access_type == VMAT_STRIDED_SLP)
1184 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 stmt_info, 0, vect_body);
1186
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: inside_cost = %d, "
1190 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1191 }
1192
1193
1194 /* Calculate cost of DR's memory access. */
1195 void
vect_get_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 bool add_realign_cost, unsigned int *inside_cost,
1198 unsigned int *prologue_cost,
1199 stmt_vector_for_cost *prologue_cost_vec,
1200 stmt_vector_for_cost *body_cost_vec,
1201 bool record_prologue_costs)
1202 {
1203 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204 int alignment_support_scheme
1205 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1206
1207 switch (alignment_support_scheme)
1208 {
1209 case dr_aligned:
1210 {
1211 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 stmt_info, 0, vect_body);
1213
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: aligned.\n");
1217
1218 break;
1219 }
1220 case dr_unaligned_supported:
1221 {
1222 /* Here, we assign an additional cost for the unaligned load. */
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 unaligned_load, stmt_info,
1225 DR_MISALIGNMENT (dr_info),
1226 vect_body);
1227
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: unaligned supported by "
1231 "hardware.\n");
1232
1233 break;
1234 }
1235 case dr_explicit_realign:
1236 {
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 vector_load, stmt_info, 0, vect_body);
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 vec_perm, stmt_info, 0, vect_body);
1241
1242 /* FIXME: If the misalignment remains fixed across the iterations of
1243 the containing loop, the following cost should be added to the
1244 prologue costs. */
1245 if (targetm.vectorize.builtin_mask_for_load)
1246 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 stmt_info, 0, vect_body);
1248
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign\n");
1252
1253 break;
1254 }
1255 case dr_explicit_realign_optimized:
1256 {
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned software "
1260 "pipelined.\n");
1261
1262 /* Unaligned software pipeline has a load of an address, an initial
1263 load, and possibly a mask operation to "prime" the loop. However,
1264 if this is an access in a group of loads, which provide grouped
1265 access, then the above cost should only be considered for one
1266 access in the group. Inside the loop, there is a load op
1267 and a realignment op. */
1268
1269 if (add_realign_cost && record_prologue_costs)
1270 {
1271 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 vector_stmt, stmt_info,
1273 0, vect_prologue);
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 vector_stmt, stmt_info,
1277 0, vect_prologue);
1278 }
1279
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 stmt_info, 0, vect_body);
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 stmt_info, 0, vect_body);
1284
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: explicit realign optimized"
1288 "\n");
1289
1290 break;
1291 }
1292
1293 case dr_unaligned_unsupported:
1294 {
1295 *inside_cost = VECT_MAX_COST;
1296
1297 if (dump_enabled_p ())
1298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299 "vect_model_load_cost: unsupported access.\n");
1300 break;
1301 }
1302
1303 default:
1304 gcc_unreachable ();
1305 }
1306 }
1307
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309 the loop preheader for the vectorized stmt STMT_VINFO. */
1310
1311 static void
vect_init_vector_1(vec_info * vinfo,stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 gimple_stmt_iterator *gsi)
1314 {
1315 if (gsi)
1316 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317 else
1318 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1319
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "created new init_stmt: %G", new_stmt);
1323 }
1324
1325 /* Function vect_init_vector.
1326
1327 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1329 vector type a vector with all elements equal to VAL is created first.
1330 Place the initialization at GSI if it is not NULL. Otherwise, place the
1331 initialization at the loop preheader.
1332 Return the DEF of INIT_STMT.
1333 It will be used in the vectorization of STMT_INFO. */
1334
1335 tree
vect_init_vector(vec_info * vinfo,stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 gimple_stmt_iterator *gsi)
1338 {
1339 gimple *init_stmt;
1340 tree new_temp;
1341
1342 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1343 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1344 {
1345 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1347 {
1348 /* Scalar boolean value should be transformed into
1349 all zeros or all ones value before building a vector. */
1350 if (VECTOR_BOOLEAN_TYPE_P (type))
1351 {
1352 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 tree false_val = build_zero_cst (TREE_TYPE (type));
1354
1355 if (CONSTANT_CLASS_P (val))
1356 val = integer_zerop (val) ? false_val : true_val;
1357 else
1358 {
1359 new_temp = make_ssa_name (TREE_TYPE (type));
1360 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 val, true_val, false_val);
1362 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 val = new_temp;
1364 }
1365 }
1366 else
1367 {
1368 gimple_seq stmts = NULL;
1369 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 TREE_TYPE (type), val);
1372 else
1373 /* ??? Condition vectorization expects us to do
1374 promotion of invariant/external defs. */
1375 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 !gsi_end_p (gsi2); )
1378 {
1379 init_stmt = gsi_stmt (gsi2);
1380 gsi_remove (&gsi2, false);
1381 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1382 }
1383 }
1384 }
1385 val = build_vector_from_val (type, val);
1386 }
1387
1388 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389 init_stmt = gimple_build_assign (new_temp, val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 return new_temp;
1392 }
1393
1394
1395 /* Function vect_get_vec_defs_for_operand.
1396
1397 OP is an operand in STMT_VINFO. This function returns a vector of
1398 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1399
1400 In the case that OP is an SSA_NAME which is defined in the loop, then
1401 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1402
1403 In case OP is an invariant or constant, a new stmt that creates a vector def
1404 needs to be introduced. VECTYPE may be used to specify a required type for
1405 vector invariant. */
1406
1407 void
vect_get_vec_defs_for_operand(vec_info * vinfo,stmt_vec_info stmt_vinfo,unsigned ncopies,tree op,vec<tree> * vec_oprnds,tree vectype)1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 unsigned ncopies,
1410 tree op, vec<tree> *vec_oprnds, tree vectype)
1411 {
1412 gimple *def_stmt;
1413 enum vect_def_type dt;
1414 bool is_simple_use;
1415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1416
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "vect_get_vec_defs_for_operand: %T\n", op);
1420
1421 stmt_vec_info def_stmt_info;
1422 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 &def_stmt_info, &def_stmt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1427
1428 vec_oprnds->create (ncopies);
1429 if (dt == vect_constant_def || dt == vect_external_def)
1430 {
1431 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432 tree vector_type;
1433
1434 if (vectype)
1435 vector_type = vectype;
1436 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 vector_type = truth_type_for (stmt_vectype);
1439 else
1440 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1441
1442 gcc_assert (vector_type);
1443 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444 while (ncopies--)
1445 vec_oprnds->quick_push (vop);
1446 }
1447 else
1448 {
1449 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451 for (unsigned i = 0; i < ncopies; ++i)
1452 vec_oprnds->quick_push (gimple_get_lhs
1453 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1454 }
1455 }
1456
1457
1458 /* Get vectorized definitions for OP0 and OP1. */
1459
1460 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree vectype0,tree op1,vec<tree> * vec_oprnds1,tree vectype1,tree op2,vec<tree> * vec_oprnds2,tree vectype2,tree op3,vec<tree> * vec_oprnds3,tree vectype3)1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 unsigned ncopies,
1463 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1467 {
1468 if (slp_node)
1469 {
1470 if (op0)
1471 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472 if (op1)
1473 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474 if (op2)
1475 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476 if (op3)
1477 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1478 }
1479 else
1480 {
1481 if (op0)
1482 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 op0, vec_oprnds0, vectype0);
1484 if (op1)
1485 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 op1, vec_oprnds1, vectype1);
1487 if (op2)
1488 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 op2, vec_oprnds2, vectype2);
1490 if (op3)
1491 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 op3, vec_oprnds3, vectype3);
1493 }
1494 }
1495
1496 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree op1,vec<tree> * vec_oprnds1,tree op2,vec<tree> * vec_oprnds2,tree op3,vec<tree> * vec_oprnds3)1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 unsigned ncopies,
1499 tree op0, vec<tree> *vec_oprnds0,
1500 tree op1, vec<tree> *vec_oprnds1,
1501 tree op2, vec<tree> *vec_oprnds2,
1502 tree op3, vec<tree> *vec_oprnds3)
1503 {
1504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 op0, vec_oprnds0, NULL_TREE,
1506 op1, vec_oprnds1, NULL_TREE,
1507 op2, vec_oprnds2, NULL_TREE,
1508 op3, vec_oprnds3, NULL_TREE);
1509 }
1510
1511 /* Helper function called by vect_finish_replace_stmt and
1512 vect_finish_stmt_generation. Set the location of the new
1513 statement and create and return a stmt_vec_info for it. */
1514
1515 static void
vect_finish_stmt_generation_1(vec_info *,stmt_vec_info stmt_info,gimple * vec_stmt)1516 vect_finish_stmt_generation_1 (vec_info *,
1517 stmt_vec_info stmt_info, gimple *vec_stmt)
1518 {
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1521
1522 if (stmt_info)
1523 {
1524 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1525
1526 /* While EH edges will generally prevent vectorization, stmt might
1527 e.g. be in a must-not-throw region. Ensure newly created stmts
1528 that could throw are part of the same region. */
1529 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1532 }
1533 else
1534 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1535 }
1536
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538 which sets the same scalar result as STMT_INFO did. Create and return a
1539 stmt_vec_info for VEC_STMT. */
1540
1541 void
vect_finish_replace_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt)1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1544 {
1545 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1547
1548 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549 gsi_replace (&gsi, vec_stmt, true);
1550
1551 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1552 }
1553
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1556
1557 void
vect_finish_stmt_generation(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 stmt_vec_info stmt_info, gimple *vec_stmt,
1560 gimple_stmt_iterator *gsi)
1561 {
1562 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1563
1564 if (!gsi_end_p (*gsi)
1565 && gimple_has_mem_ops (vec_stmt))
1566 {
1567 gimple *at_stmt = gsi_stmt (*gsi);
1568 tree vuse = gimple_vuse (at_stmt);
1569 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1570 {
1571 tree vdef = gimple_vdef (at_stmt);
1572 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 gimple_set_modified (vec_stmt, true);
1574 /* If we have an SSA vuse and insert a store, update virtual
1575 SSA form to avoid triggering the renamer. Do so only
1576 if we can easily see all uses - which is what almost always
1577 happens with the way vectorized stmts are inserted. */
1578 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 && ((is_gimple_assign (vec_stmt)
1580 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 || (is_gimple_call (vec_stmt)
1582 && !(gimple_call_flags (vec_stmt)
1583 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1584 {
1585 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 gimple_set_vdef (vec_stmt, new_vdef);
1587 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1588 }
1589 }
1590 }
1591 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1593 }
1594
1595 /* We want to vectorize a call to combined function CFN with function
1596 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597 as the types of all inputs. Check whether this is possible using
1598 an internal function, returning its code if so or IFN_LAST if not. */
1599
1600 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 tree vectype_out, tree vectype_in)
1603 {
1604 internal_fn ifn;
1605 if (internal_fn_p (cfn))
1606 ifn = as_internal_fn (cfn);
1607 else
1608 ifn = associated_internal_fn (fndecl);
1609 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1610 {
1611 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612 if (info.vectorizable)
1613 {
1614 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 OPTIMIZE_FOR_SPEED))
1618 return ifn;
1619 }
1620 }
1621 return IFN_LAST;
1622 }
1623
1624
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 gimple_stmt_iterator *);
1627
1628 /* Check whether a load or store statement in the loop described by
1629 LOOP_VINFO is possible in a loop using partial vectors. This is
1630 testing whether the vectorizer pass has the appropriate support,
1631 as well as whether the target does.
1632
1633 VLS_TYPE says whether the statement is a load or store and VECTYPE
1634 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1635 says how the load or store is going to be implemented and GROUP_SIZE
1636 is the number of load or store statements in the containing group.
1637 If the access is a gather load or scatter store, GS_INFO describes
1638 its arguments. If the load or store is conditional, SCALAR_MASK is the
1639 condition under which it occurs.
1640
1641 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642 vectors is not supported, otherwise record the required rgroup control
1643 types. */
1644
1645 static void
check_load_store_for_partial_vectors(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 vec_load_store_type vls_type,
1648 int group_size,
1649 vect_memory_access_type
1650 memory_access_type,
1651 gather_scatter_info *gs_info,
1652 tree scalar_mask)
1653 {
1654 /* Invariant loads need no special support. */
1655 if (memory_access_type == VMAT_INVARIANT)
1656 return;
1657
1658 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659 machine_mode vecmode = TYPE_MODE (vectype);
1660 bool is_load = (vls_type == VLS_LOAD);
1661 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1662 {
1663 if (is_load
1664 ? !vect_load_lanes_supported (vectype, group_size, true)
1665 : !vect_store_lanes_supported (vectype, group_size, true))
1666 {
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "can't operate on partial vectors because"
1670 " the target doesn't have an appropriate"
1671 " load/store-lanes instruction.\n");
1672 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 return;
1674 }
1675 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677 return;
1678 }
1679
1680 if (memory_access_type == VMAT_GATHER_SCATTER)
1681 {
1682 internal_fn ifn = (is_load
1683 ? IFN_MASK_GATHER_LOAD
1684 : IFN_MASK_SCATTER_STORE);
1685 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 gs_info->memory_type,
1687 gs_info->offset_vectype,
1688 gs_info->scale))
1689 {
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't operate on partial vectors because"
1693 " the target doesn't have an appropriate"
1694 " gather load or scatter store instruction.\n");
1695 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 return;
1697 }
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1701 }
1702
1703 if (memory_access_type != VMAT_CONTIGUOUS
1704 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1705 {
1706 /* Element X of the data must come from iteration i * VF + X of the
1707 scalar loop. We need more work to support other mappings. */
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 "can't operate on partial vectors because an"
1711 " access isn't contiguous.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1714 }
1715
1716 if (!VECTOR_MODE_P (vecmode))
1717 {
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "can't operate on partial vectors when emulating"
1721 " vector operations.\n");
1722 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1723 return;
1724 }
1725
1726 /* We might load more scalars than we need for permuting SLP loads.
1727 We checked in get_group_load_store_type that the extra elements
1728 don't leak into a new vector. */
1729 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1730 {
1731 unsigned int nvectors;
1732 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1733 return nvectors;
1734 gcc_unreachable ();
1735 };
1736
1737 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1739 machine_mode mask_mode;
1740 bool using_partial_vectors_p = false;
1741 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1743 {
1744 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1745 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1746 using_partial_vectors_p = true;
1747 }
1748
1749 machine_mode vmode;
1750 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1751 {
1752 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1753 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1754 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1755 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1756 using_partial_vectors_p = true;
1757 }
1758
1759 if (!using_partial_vectors_p)
1760 {
1761 if (dump_enabled_p ())
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 "can't operate on partial vectors because the"
1764 " target doesn't have the appropriate partial"
1765 " vectorization load or store.\n");
1766 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1767 }
1768 }
1769
1770 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1771 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1772 that needs to be applied to all loads and stores in a vectorized loop.
1773 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1774
1775 MASK_TYPE is the type of both masks. If new statements are needed,
1776 insert them before GSI. */
1777
1778 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1779 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1780 gimple_stmt_iterator *gsi)
1781 {
1782 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1783 if (!loop_mask)
1784 return vec_mask;
1785
1786 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1787 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1788 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1789 vec_mask, loop_mask);
1790 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1791 return and_res;
1792 }
1793
1794 /* Determine whether we can use a gather load or scatter store to vectorize
1795 strided load or store STMT_INFO by truncating the current offset to a
1796 smaller width. We need to be able to construct an offset vector:
1797
1798 { 0, X, X*2, X*3, ... }
1799
1800 without loss of precision, where X is STMT_INFO's DR_STEP.
1801
1802 Return true if this is possible, describing the gather load or scatter
1803 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1804
1805 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1806 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1807 loop_vec_info loop_vinfo, bool masked_p,
1808 gather_scatter_info *gs_info)
1809 {
1810 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1811 data_reference *dr = dr_info->dr;
1812 tree step = DR_STEP (dr);
1813 if (TREE_CODE (step) != INTEGER_CST)
1814 {
1815 /* ??? Perhaps we could use range information here? */
1816 if (dump_enabled_p ())
1817 dump_printf_loc (MSG_NOTE, vect_location,
1818 "cannot truncate variable step.\n");
1819 return false;
1820 }
1821
1822 /* Get the number of bits in an element. */
1823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1824 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1825 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1826
1827 /* Set COUNT to the upper limit on the number of elements - 1.
1828 Start with the maximum vectorization factor. */
1829 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1830
1831 /* Try lowering COUNT to the number of scalar latch iterations. */
1832 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 widest_int max_iters;
1834 if (max_loop_iterations (loop, &max_iters)
1835 && max_iters < count)
1836 count = max_iters.to_shwi ();
1837
1838 /* Try scales of 1 and the element size. */
1839 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1840 wi::overflow_type overflow = wi::OVF_NONE;
1841 for (int i = 0; i < 2; ++i)
1842 {
1843 int scale = scales[i];
1844 widest_int factor;
1845 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1846 continue;
1847
1848 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1849 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1850 if (overflow)
1851 continue;
1852 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1853 unsigned int min_offset_bits = wi::min_precision (range, sign);
1854
1855 /* Find the narrowest viable offset type. */
1856 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1857 tree offset_type = build_nonstandard_integer_type (offset_bits,
1858 sign == UNSIGNED);
1859
1860 /* See whether the target supports the operation with an offset
1861 no narrower than OFFSET_TYPE. */
1862 tree memory_type = TREE_TYPE (DR_REF (dr));
1863 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1864 vectype, memory_type, offset_type, scale,
1865 &gs_info->ifn, &gs_info->offset_vectype))
1866 continue;
1867
1868 gs_info->decl = NULL_TREE;
1869 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1870 but we don't need to store that here. */
1871 gs_info->base = NULL_TREE;
1872 gs_info->element_type = TREE_TYPE (vectype);
1873 gs_info->offset = fold_convert (offset_type, step);
1874 gs_info->offset_dt = vect_constant_def;
1875 gs_info->scale = scale;
1876 gs_info->memory_type = memory_type;
1877 return true;
1878 }
1879
1880 if (overflow && dump_enabled_p ())
1881 dump_printf_loc (MSG_NOTE, vect_location,
1882 "truncating gather/scatter offset to %d bits"
1883 " might change its value.\n", element_bits);
1884
1885 return false;
1886 }
1887
1888 /* Return true if we can use gather/scatter internal functions to
1889 vectorize STMT_INFO, which is a grouped or strided load or store.
1890 MASKED_P is true if load or store is conditional. When returning
1891 true, fill in GS_INFO with the information required to perform the
1892 operation. */
1893
1894 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1895 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1896 loop_vec_info loop_vinfo, bool masked_p,
1897 gather_scatter_info *gs_info)
1898 {
1899 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1900 || gs_info->decl)
1901 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1902 masked_p, gs_info);
1903
1904 tree old_offset_type = TREE_TYPE (gs_info->offset);
1905 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1906
1907 gcc_assert (TYPE_PRECISION (new_offset_type)
1908 >= TYPE_PRECISION (old_offset_type));
1909 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1910
1911 if (dump_enabled_p ())
1912 dump_printf_loc (MSG_NOTE, vect_location,
1913 "using gather/scatter for strided/grouped access,"
1914 " scale = %d\n", gs_info->scale);
1915
1916 return true;
1917 }
1918
1919 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1920 elements with a known constant step. Return -1 if that step
1921 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1922
1923 static int
compare_step_with_zero(vec_info * vinfo,stmt_vec_info stmt_info)1924 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1925 {
1926 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1927 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1928 size_zero_node);
1929 }
1930
1931 /* If the target supports a permute mask that reverses the elements in
1932 a vector of type VECTYPE, return that mask, otherwise return null. */
1933
1934 static tree
perm_mask_for_reverse(tree vectype)1935 perm_mask_for_reverse (tree vectype)
1936 {
1937 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1938
1939 /* The encoding has a single stepped pattern. */
1940 vec_perm_builder sel (nunits, 1, 3);
1941 for (int i = 0; i < 3; ++i)
1942 sel.quick_push (nunits - 1 - i);
1943
1944 vec_perm_indices indices (sel, 1, nunits);
1945 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1946 return NULL_TREE;
1947 return vect_gen_perm_mask_checked (vectype, indices);
1948 }
1949
1950 /* A subroutine of get_load_store_type, with a subset of the same
1951 arguments. Handle the case where STMT_INFO is a load or store that
1952 accesses consecutive elements with a negative step. */
1953
1954 static vect_memory_access_type
get_negative_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)1955 get_negative_load_store_type (vec_info *vinfo,
1956 stmt_vec_info stmt_info, tree vectype,
1957 vec_load_store_type vls_type,
1958 unsigned int ncopies)
1959 {
1960 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961 dr_alignment_support alignment_support_scheme;
1962
1963 if (ncopies > 1)
1964 {
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 "multiple types with negative step.\n");
1968 return VMAT_ELEMENTWISE;
1969 }
1970
1971 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1972 dr_info, false);
1973 if (alignment_support_scheme != dr_aligned
1974 && alignment_support_scheme != dr_unaligned_supported)
1975 {
1976 if (dump_enabled_p ())
1977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978 "negative step but alignment required.\n");
1979 return VMAT_ELEMENTWISE;
1980 }
1981
1982 if (vls_type == VLS_STORE_INVARIANT)
1983 {
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "negative step with invariant source;"
1987 " no permute needed.\n");
1988 return VMAT_CONTIGUOUS_DOWN;
1989 }
1990
1991 if (!perm_mask_for_reverse (vectype))
1992 {
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "negative step and reversing not supported.\n");
1996 return VMAT_ELEMENTWISE;
1997 }
1998
1999 return VMAT_CONTIGUOUS_REVERSE;
2000 }
2001
2002 /* STMT_INFO is either a masked or unconditional store. Return the value
2003 being stored. */
2004
2005 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2006 vect_get_store_rhs (stmt_vec_info stmt_info)
2007 {
2008 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2009 {
2010 gcc_assert (gimple_assign_single_p (assign));
2011 return gimple_assign_rhs1 (assign);
2012 }
2013 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2014 {
2015 internal_fn ifn = gimple_call_internal_fn (call);
2016 int index = internal_fn_stored_value_index (ifn);
2017 gcc_assert (index >= 0);
2018 return gimple_call_arg (call, index);
2019 }
2020 gcc_unreachable ();
2021 }
2022
2023 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2024
2025 This function returns a vector type which can be composed with NETLS pieces,
2026 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2027 same vector size as the return vector. It checks target whether supports
2028 pieces-size vector mode for construction firstly, if target fails to, check
2029 pieces-size scalar mode for construction further. It returns NULL_TREE if
2030 fails to find the available composition.
2031
2032 For example, for (vtype=V16QI, nelts=4), we can probably get:
2033 - V16QI with PTYPE V4QI.
2034 - V4SI with PTYPE SI.
2035 - NULL_TREE. */
2036
2037 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2038 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2039 {
2040 gcc_assert (VECTOR_TYPE_P (vtype));
2041 gcc_assert (known_gt (nelts, 0U));
2042
2043 machine_mode vmode = TYPE_MODE (vtype);
2044 if (!VECTOR_MODE_P (vmode))
2045 return NULL_TREE;
2046
2047 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2048 unsigned int pbsize;
2049 if (constant_multiple_p (vbsize, nelts, &pbsize))
2050 {
2051 /* First check if vec_init optab supports construction from
2052 vector pieces directly. */
2053 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2054 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2055 machine_mode rmode;
2056 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2057 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2058 != CODE_FOR_nothing))
2059 {
2060 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2061 return vtype;
2062 }
2063
2064 /* Otherwise check if exists an integer type of the same piece size and
2065 if vec_init optab supports construction from it directly. */
2066 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2067 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2068 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2069 != CODE_FOR_nothing))
2070 {
2071 *ptype = build_nonstandard_integer_type (pbsize, 1);
2072 return build_vector_type (*ptype, nelts);
2073 }
2074 }
2075
2076 return NULL_TREE;
2077 }
2078
2079 /* A subroutine of get_load_store_type, with a subset of the same
2080 arguments. Handle the case where STMT_INFO is part of a grouped load
2081 or store.
2082
2083 For stores, the statements in the group are all consecutive
2084 and there is no gap at the end. For loads, the statements in the
2085 group might not be consecutive; there can be gaps between statements
2086 as well as at the end. */
2087
2088 static bool
get_group_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,dr_alignment_support * alignment_support_scheme,gather_scatter_info * gs_info)2089 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2090 tree vectype, slp_tree slp_node,
2091 bool masked_p, vec_load_store_type vls_type,
2092 vect_memory_access_type *memory_access_type,
2093 dr_alignment_support *alignment_support_scheme,
2094 gather_scatter_info *gs_info)
2095 {
2096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2097 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2098 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2099 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2100 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2104 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2105
2106 /* True if the vectorized statements would access beyond the last
2107 statement in the group. */
2108 bool overrun_p = false;
2109
2110 /* True if we can cope with such overrun by peeling for gaps, so that
2111 there is at least one final scalar iteration after the vector loop. */
2112 bool can_overrun_p = (!masked_p
2113 && vls_type == VLS_LOAD
2114 && loop_vinfo
2115 && !loop->inner);
2116
2117 /* There can only be a gap at the end of the group if the stride is
2118 known at compile time. */
2119 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2120
2121 /* Stores can't yet have gaps. */
2122 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2123
2124 if (slp_node)
2125 {
2126 /* For SLP vectorization we directly vectorize a subchain
2127 without permutation. */
2128 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2129 first_dr_info
2130 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2131 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2132 {
2133 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2134 separated by the stride, until we have a complete vector.
2135 Fall back to scalar accesses if that isn't possible. */
2136 if (multiple_p (nunits, group_size))
2137 *memory_access_type = VMAT_STRIDED_SLP;
2138 else
2139 *memory_access_type = VMAT_ELEMENTWISE;
2140 }
2141 else
2142 {
2143 overrun_p = loop_vinfo && gap != 0;
2144 if (overrun_p && vls_type != VLS_LOAD)
2145 {
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 "Grouped store with gaps requires"
2148 " non-consecutive accesses\n");
2149 return false;
2150 }
2151 /* An overrun is fine if the trailing elements are smaller
2152 than the alignment boundary B. Every vector access will
2153 be a multiple of B and so we are guaranteed to access a
2154 non-gap element in the same B-sized block. */
2155 if (overrun_p
2156 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2159
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alignment_support_scheme;
2165 tree half_vtype;
2166 if (overrun_p
2167 && !masked_p
2168 && (((alignment_support_scheme
2169 = vect_supportable_dr_alignment (vinfo,
2170 first_dr_info, false)))
2171 == dr_aligned
2172 || alignment_support_scheme == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2178
2179 if (overrun_p && !can_overrun_p)
2180 {
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2185 }
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2188 {
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1);
2194 else
2195 {
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2203 }
2204 }
2205 else
2206 {
2207 gcc_assert (!loop_vinfo || cmp > 0);
2208 *memory_access_type = VMAT_CONTIGUOUS;
2209 }
2210 }
2211 }
2212 else
2213 {
2214 /* We can always handle this case using elementwise accesses,
2215 but see if something more efficient is available. */
2216 *memory_access_type = VMAT_ELEMENTWISE;
2217
2218 /* If there is a gap at the end of the group then these optimizations
2219 would access excess elements in the last iteration. */
2220 bool would_overrun_p = (gap != 0);
2221 /* An overrun is fine if the trailing elements are smaller than the
2222 alignment boundary B. Every vector access will be a multiple of B
2223 and so we are guaranteed to access a non-gap element in the
2224 same B-sized block. */
2225 if (would_overrun_p
2226 && !masked_p
2227 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2228 / vect_get_scalar_dr_size (first_dr_info)))
2229 would_overrun_p = false;
2230
2231 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2232 && (can_overrun_p || !would_overrun_p)
2233 && compare_step_with_zero (vinfo, stmt_info) > 0)
2234 {
2235 /* First cope with the degenerate case of a single-element
2236 vector. */
2237 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2238 ;
2239
2240 /* Otherwise try using LOAD/STORE_LANES. */
2241 else if (vls_type == VLS_LOAD
2242 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2243 : vect_store_lanes_supported (vectype, group_size,
2244 masked_p))
2245 {
2246 *memory_access_type = VMAT_LOAD_STORE_LANES;
2247 overrun_p = would_overrun_p;
2248 }
2249
2250 /* If that fails, try using permuting loads. */
2251 else if (vls_type == VLS_LOAD
2252 ? vect_grouped_load_supported (vectype, single_element_p,
2253 group_size)
2254 : vect_grouped_store_supported (vectype, group_size))
2255 {
2256 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2257 overrun_p = would_overrun_p;
2258 }
2259 }
2260
2261 /* As a last resort, trying using a gather load or scatter store.
2262
2263 ??? Although the code can handle all group sizes correctly,
2264 it probably isn't a win to use separate strided accesses based
2265 on nearby locations. Or, even if it's a win over scalar code,
2266 it might not be a win over vectorizing at a lower VF, if that
2267 allows us to use contiguous accesses. */
2268 if (*memory_access_type == VMAT_ELEMENTWISE
2269 && single_element_p
2270 && loop_vinfo
2271 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2272 masked_p, gs_info))
2273 *memory_access_type = VMAT_GATHER_SCATTER;
2274 }
2275
2276 if (*memory_access_type == VMAT_GATHER_SCATTER
2277 || *memory_access_type == VMAT_ELEMENTWISE)
2278 *alignment_support_scheme = dr_unaligned_supported;
2279 else
2280 *alignment_support_scheme
2281 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2282
2283 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2284 {
2285 /* STMT is the leader of the group. Check the operands of all the
2286 stmts of the group. */
2287 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2288 while (next_stmt_info)
2289 {
2290 tree op = vect_get_store_rhs (next_stmt_info);
2291 enum vect_def_type dt;
2292 if (!vect_is_simple_use (op, vinfo, &dt))
2293 {
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 "use not simple.\n");
2297 return false;
2298 }
2299 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2300 }
2301 }
2302
2303 if (overrun_p)
2304 {
2305 gcc_assert (can_overrun_p);
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "Data access with gaps requires scalar "
2309 "epilogue loop\n");
2310 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2311 }
2312
2313 return true;
2314 }
2315
2316 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2317 if there is a memory access type that the vectorized form can use,
2318 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2319 or scatters, fill in GS_INFO accordingly. In addition
2320 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2321 the target does not support the alignment scheme.
2322
2323 SLP says whether we're performing SLP rather than loop vectorization.
2324 MASKED_P is true if the statement is conditional on a vectorized mask.
2325 VECTYPE is the vector type that the vectorized statements will use.
2326 NCOPIES is the number of vector statements that will be needed. */
2327
2328 static bool
get_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,dr_alignment_support * alignment_support_scheme,gather_scatter_info * gs_info)2329 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2330 tree vectype, slp_tree slp_node,
2331 bool masked_p, vec_load_store_type vls_type,
2332 unsigned int ncopies,
2333 vect_memory_access_type *memory_access_type,
2334 dr_alignment_support *alignment_support_scheme,
2335 gather_scatter_info *gs_info)
2336 {
2337 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2338 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2339 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2340 {
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2343 gcc_unreachable ();
2344 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2345 &gs_info->offset_dt,
2346 &gs_info->offset_vectype))
2347 {
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2350 "%s index use not simple.\n",
2351 vls_type == VLS_LOAD ? "gather" : "scatter");
2352 return false;
2353 }
2354 /* Gather-scatter accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2357 }
2358 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2359 {
2360 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2361 masked_p,
2362 vls_type, memory_access_type,
2363 alignment_support_scheme, gs_info))
2364 return false;
2365 }
2366 else if (STMT_VINFO_STRIDED_P (stmt_info))
2367 {
2368 gcc_assert (!slp_node);
2369 if (loop_vinfo
2370 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 masked_p, gs_info))
2372 *memory_access_type = VMAT_GATHER_SCATTER;
2373 else
2374 *memory_access_type = VMAT_ELEMENTWISE;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme = dr_unaligned_supported;
2377 }
2378 else
2379 {
2380 int cmp = compare_step_with_zero (vinfo, stmt_info);
2381 if (cmp == 0)
2382 {
2383 gcc_assert (vls_type == VLS_LOAD);
2384 *memory_access_type = VMAT_INVARIANT;
2385 /* Invariant accesses perform only component accesses, alignment
2386 is irrelevant for them. */
2387 *alignment_support_scheme = dr_unaligned_supported;
2388 }
2389 else
2390 {
2391 if (cmp < 0)
2392 *memory_access_type = get_negative_load_store_type
2393 (vinfo, stmt_info, vectype, vls_type, ncopies);
2394 else
2395 *memory_access_type = VMAT_CONTIGUOUS;
2396 *alignment_support_scheme
2397 = vect_supportable_dr_alignment (vinfo,
2398 STMT_VINFO_DR_INFO (stmt_info),
2399 false);
2400 }
2401 }
2402
2403 if ((*memory_access_type == VMAT_ELEMENTWISE
2404 || *memory_access_type == VMAT_STRIDED_SLP)
2405 && !nunits.is_constant ())
2406 {
2407 if (dump_enabled_p ())
2408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 "Not using elementwise accesses due to variable "
2410 "vectorization factor.\n");
2411 return false;
2412 }
2413
2414 if (*alignment_support_scheme == dr_unaligned_unsupported)
2415 {
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2418 "unsupported unaligned access\n");
2419 return false;
2420 }
2421
2422 /* FIXME: At the moment the cost model seems to underestimate the
2423 cost of using elementwise accesses. This check preserves the
2424 traditional behavior until that can be fixed. */
2425 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2426 if (!first_stmt_info)
2427 first_stmt_info = stmt_info;
2428 if (*memory_access_type == VMAT_ELEMENTWISE
2429 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2430 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2431 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2432 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2433 {
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2436 "not falling back to elementwise accesses\n");
2437 return false;
2438 }
2439 return true;
2440 }
2441
2442 /* Return true if boolean argument MASK is suitable for vectorizing
2443 conditional operation STMT_INFO. When returning true, store the type
2444 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2445 in *MASK_VECTYPE_OUT. */
2446
2447 static bool
vect_check_scalar_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2448 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2449 vect_def_type *mask_dt_out,
2450 tree *mask_vectype_out)
2451 {
2452 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2453 {
2454 if (dump_enabled_p ())
2455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2456 "mask argument is not a boolean.\n");
2457 return false;
2458 }
2459
2460 if (TREE_CODE (mask) != SSA_NAME)
2461 {
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "mask argument is not an SSA name.\n");
2465 return false;
2466 }
2467
2468 enum vect_def_type mask_dt;
2469 tree mask_vectype;
2470 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2471 {
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474 "mask use not simple.\n");
2475 return false;
2476 }
2477
2478 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479 if (!mask_vectype)
2480 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2481
2482 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2483 {
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "could not find an appropriate vector mask type.\n");
2487 return false;
2488 }
2489
2490 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2491 TYPE_VECTOR_SUBPARTS (vectype)))
2492 {
2493 if (dump_enabled_p ())
2494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2495 "vector mask type %T"
2496 " does not match vector data type %T.\n",
2497 mask_vectype, vectype);
2498
2499 return false;
2500 }
2501
2502 *mask_dt_out = mask_dt;
2503 *mask_vectype_out = mask_vectype;
2504 return true;
2505 }
2506
2507 /* Return true if stored value RHS is suitable for vectorizing store
2508 statement STMT_INFO. When returning true, store the type of the
2509 definition in *RHS_DT_OUT, the type of the vectorized store value in
2510 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2511
2512 static bool
vect_check_store_rhs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2513 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2514 slp_tree slp_node, tree rhs,
2515 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2516 vec_load_store_type *vls_type_out)
2517 {
2518 /* In the case this is a store from a constant make sure
2519 native_encode_expr can handle it. */
2520 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2521 {
2522 if (dump_enabled_p ())
2523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2524 "cannot encode constant as a byte sequence.\n");
2525 return false;
2526 }
2527
2528 enum vect_def_type rhs_dt;
2529 tree rhs_vectype;
2530 slp_tree slp_op;
2531 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2532 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2533 {
2534 if (dump_enabled_p ())
2535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2536 "use not simple.\n");
2537 return false;
2538 }
2539
2540 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2541 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2542 {
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "incompatible vector types.\n");
2546 return false;
2547 }
2548
2549 *rhs_dt_out = rhs_dt;
2550 *rhs_vectype_out = rhs_vectype;
2551 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2552 *vls_type_out = VLS_STORE_INVARIANT;
2553 else
2554 *vls_type_out = VLS_STORE;
2555 return true;
2556 }
2557
2558 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2559 Note that we support masks with floating-point type, in which case the
2560 floats are interpreted as a bitmask. */
2561
2562 static tree
vect_build_all_ones_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree masktype)2563 vect_build_all_ones_mask (vec_info *vinfo,
2564 stmt_vec_info stmt_info, tree masktype)
2565 {
2566 if (TREE_CODE (masktype) == INTEGER_TYPE)
2567 return build_int_cst (masktype, -1);
2568 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2569 {
2570 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2571 mask = build_vector_from_val (masktype, mask);
2572 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2573 }
2574 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2575 {
2576 REAL_VALUE_TYPE r;
2577 long tmp[6];
2578 for (int j = 0; j < 6; ++j)
2579 tmp[j] = -1;
2580 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2581 tree mask = build_real (TREE_TYPE (masktype), r);
2582 mask = build_vector_from_val (masktype, mask);
2583 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2584 }
2585 gcc_unreachable ();
2586 }
2587
2588 /* Build an all-zero merge value of type VECTYPE while vectorizing
2589 STMT_INFO as a gather load. */
2590
2591 static tree
vect_build_zero_merge_argument(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype)2592 vect_build_zero_merge_argument (vec_info *vinfo,
2593 stmt_vec_info stmt_info, tree vectype)
2594 {
2595 tree merge;
2596 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2597 merge = build_int_cst (TREE_TYPE (vectype), 0);
2598 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2599 {
2600 REAL_VALUE_TYPE r;
2601 long tmp[6];
2602 for (int j = 0; j < 6; ++j)
2603 tmp[j] = 0;
2604 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2605 merge = build_real (TREE_TYPE (vectype), r);
2606 }
2607 else
2608 gcc_unreachable ();
2609 merge = build_vector_from_val (vectype, merge);
2610 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2611 }
2612
2613 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2614 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2615 the gather load operation. If the load is conditional, MASK is the
2616 unvectorized condition and MASK_DT is its definition type, otherwise
2617 MASK is null. */
2618
2619 static void
vect_build_gather_load_calls(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask)2620 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2621 gimple_stmt_iterator *gsi,
2622 gimple **vec_stmt,
2623 gather_scatter_info *gs_info,
2624 tree mask)
2625 {
2626 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2627 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2628 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2629 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2630 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2631 edge pe = loop_preheader_edge (loop);
2632 enum { NARROW, NONE, WIDEN } modifier;
2633 poly_uint64 gather_off_nunits
2634 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2635
2636 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2637 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2638 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2639 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2640 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2641 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2642 tree scaletype = TREE_VALUE (arglist);
2643 tree real_masktype = masktype;
2644 gcc_checking_assert (types_compatible_p (srctype, rettype)
2645 && (!mask
2646 || TREE_CODE (masktype) == INTEGER_TYPE
2647 || types_compatible_p (srctype, masktype)));
2648 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2649 masktype = truth_type_for (srctype);
2650
2651 tree mask_halftype = masktype;
2652 tree perm_mask = NULL_TREE;
2653 tree mask_perm_mask = NULL_TREE;
2654 if (known_eq (nunits, gather_off_nunits))
2655 modifier = NONE;
2656 else if (known_eq (nunits * 2, gather_off_nunits))
2657 {
2658 modifier = WIDEN;
2659
2660 /* Currently widening gathers and scatters are only supported for
2661 fixed-length vectors. */
2662 int count = gather_off_nunits.to_constant ();
2663 vec_perm_builder sel (count, count, 1);
2664 for (int i = 0; i < count; ++i)
2665 sel.quick_push (i | (count / 2));
2666
2667 vec_perm_indices indices (sel, 1, count);
2668 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2669 indices);
2670 }
2671 else if (known_eq (nunits, gather_off_nunits * 2))
2672 {
2673 modifier = NARROW;
2674
2675 /* Currently narrowing gathers and scatters are only supported for
2676 fixed-length vectors. */
2677 int count = nunits.to_constant ();
2678 vec_perm_builder sel (count, count, 1);
2679 sel.quick_grow (count);
2680 for (int i = 0; i < count; ++i)
2681 sel[i] = i < count / 2 ? i : i + count / 2;
2682 vec_perm_indices indices (sel, 2, count);
2683 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2684
2685 ncopies *= 2;
2686
2687 if (mask && masktype == real_masktype)
2688 {
2689 for (int i = 0; i < count; ++i)
2690 sel[i] = i | (count / 2);
2691 indices.new_vector (sel, 2, count);
2692 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2693 }
2694 else if (mask)
2695 mask_halftype = truth_type_for (gs_info->offset_vectype);
2696 }
2697 else
2698 gcc_unreachable ();
2699
2700 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2701 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2702
2703 tree ptr = fold_convert (ptrtype, gs_info->base);
2704 if (!is_gimple_min_invariant (ptr))
2705 {
2706 gimple_seq seq;
2707 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2708 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2709 gcc_assert (!new_bb);
2710 }
2711
2712 tree scale = build_int_cst (scaletype, gs_info->scale);
2713
2714 tree vec_oprnd0 = NULL_TREE;
2715 tree vec_mask = NULL_TREE;
2716 tree src_op = NULL_TREE;
2717 tree mask_op = NULL_TREE;
2718 tree prev_res = NULL_TREE;
2719
2720 if (!mask)
2721 {
2722 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2723 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2724 }
2725
2726 auto_vec<tree> vec_oprnds0;
2727 auto_vec<tree> vec_masks;
2728 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2729 modifier == WIDEN ? ncopies / 2 : ncopies,
2730 gs_info->offset, &vec_oprnds0);
2731 if (mask)
2732 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2733 modifier == NARROW ? ncopies / 2 : ncopies,
2734 mask, &vec_masks);
2735 for (int j = 0; j < ncopies; ++j)
2736 {
2737 tree op, var;
2738 if (modifier == WIDEN && (j & 1))
2739 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2740 perm_mask, stmt_info, gsi);
2741 else
2742 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2743
2744 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2745 {
2746 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2747 TYPE_VECTOR_SUBPARTS (idxtype)));
2748 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2749 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2750 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2751 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2752 op = var;
2753 }
2754
2755 if (mask)
2756 {
2757 if (mask_perm_mask && (j & 1))
2758 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2759 mask_perm_mask, stmt_info, gsi);
2760 else
2761 {
2762 if (modifier == NARROW)
2763 {
2764 if ((j & 1) == 0)
2765 vec_mask = vec_masks[j / 2];
2766 }
2767 else
2768 vec_mask = vec_masks[j];
2769
2770 mask_op = vec_mask;
2771 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2772 {
2773 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2774 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2775 gcc_assert (known_eq (sub1, sub2));
2776 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2777 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2778 gassign *new_stmt
2779 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2780 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2781 mask_op = var;
2782 }
2783 }
2784 if (modifier == NARROW && masktype != real_masktype)
2785 {
2786 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2787 gassign *new_stmt
2788 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2789 : VEC_UNPACK_LO_EXPR,
2790 mask_op);
2791 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2792 mask_op = var;
2793 }
2794 src_op = mask_op;
2795 }
2796
2797 tree mask_arg = mask_op;
2798 if (masktype != real_masktype)
2799 {
2800 tree utype, optype = TREE_TYPE (mask_op);
2801 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2802 utype = real_masktype;
2803 else
2804 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2805 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2806 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2807 gassign *new_stmt
2808 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2809 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2810 mask_arg = var;
2811 if (!useless_type_conversion_p (real_masktype, utype))
2812 {
2813 gcc_assert (TYPE_PRECISION (utype)
2814 <= TYPE_PRECISION (real_masktype));
2815 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2816 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2817 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2818 mask_arg = var;
2819 }
2820 src_op = build_zero_cst (srctype);
2821 }
2822 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2823 mask_arg, scale);
2824
2825 if (!useless_type_conversion_p (vectype, rettype))
2826 {
2827 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2828 TYPE_VECTOR_SUBPARTS (rettype)));
2829 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2830 gimple_call_set_lhs (new_stmt, op);
2831 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2832 var = make_ssa_name (vec_dest);
2833 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2834 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2835 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2836 }
2837 else
2838 {
2839 var = make_ssa_name (vec_dest, new_stmt);
2840 gimple_call_set_lhs (new_stmt, var);
2841 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2842 }
2843
2844 if (modifier == NARROW)
2845 {
2846 if ((j & 1) == 0)
2847 {
2848 prev_res = var;
2849 continue;
2850 }
2851 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2852 stmt_info, gsi);
2853 new_stmt = SSA_NAME_DEF_STMT (var);
2854 }
2855
2856 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2857 }
2858 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2859 }
2860
2861 /* Prepare the base and offset in GS_INFO for vectorization.
2862 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2863 to the vectorized offset argument for the first copy of STMT_INFO.
2864 STMT_INFO is the statement described by GS_INFO and LOOP is the
2865 containing loop. */
2866
2867 static void
vect_get_gather_scatter_ops(vec_info * vinfo,class loop * loop,stmt_vec_info stmt_info,gather_scatter_info * gs_info,tree * dataref_ptr,vec<tree> * vec_offset,unsigned ncopies)2868 vect_get_gather_scatter_ops (vec_info *vinfo,
2869 class loop *loop, stmt_vec_info stmt_info,
2870 gather_scatter_info *gs_info,
2871 tree *dataref_ptr, vec<tree> *vec_offset,
2872 unsigned ncopies)
2873 {
2874 gimple_seq stmts = NULL;
2875 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2876 if (stmts != NULL)
2877 {
2878 basic_block new_bb;
2879 edge pe = loop_preheader_edge (loop);
2880 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2881 gcc_assert (!new_bb);
2882 }
2883 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2884 vec_offset, gs_info->offset_vectype);
2885 }
2886
2887 /* Prepare to implement a grouped or strided load or store using
2888 the gather load or scatter store operation described by GS_INFO.
2889 STMT_INFO is the load or store statement.
2890
2891 Set *DATAREF_BUMP to the amount that should be added to the base
2892 address after each copy of the vectorized statement. Set *VEC_OFFSET
2893 to an invariant offset vector in which element I has the value
2894 I * DR_STEP / SCALE. */
2895
2896 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)2897 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2898 loop_vec_info loop_vinfo,
2899 gather_scatter_info *gs_info,
2900 tree *dataref_bump, tree *vec_offset)
2901 {
2902 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2903 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2904
2905 tree bump = size_binop (MULT_EXPR,
2906 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2907 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2908 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2909
2910 /* The offset given in GS_INFO can have pointer type, so use the element
2911 type of the vector instead. */
2912 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2913
2914 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2915 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2916 ssize_int (gs_info->scale));
2917 step = fold_convert (offset_type, step);
2918
2919 /* Create {0, X, X*2, X*3, ...}. */
2920 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2921 build_zero_cst (offset_type), step);
2922 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2923 }
2924
2925 /* Return the amount that should be added to a vector pointer to move
2926 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2927 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2928 vectorization. */
2929
2930 static tree
vect_get_data_ptr_increment(vec_info * vinfo,dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)2931 vect_get_data_ptr_increment (vec_info *vinfo,
2932 dr_vec_info *dr_info, tree aggr_type,
2933 vect_memory_access_type memory_access_type)
2934 {
2935 if (memory_access_type == VMAT_INVARIANT)
2936 return size_zero_node;
2937
2938 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2939 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2940 if (tree_int_cst_sgn (step) == -1)
2941 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2942 return iv_step;
2943 }
2944
2945 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2946
2947 static bool
vectorizable_bswap(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_tree * slp_op,tree vectype_in,stmt_vector_for_cost * cost_vec)2948 vectorizable_bswap (vec_info *vinfo,
2949 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2950 gimple **vec_stmt, slp_tree slp_node,
2951 slp_tree *slp_op,
2952 tree vectype_in, stmt_vector_for_cost *cost_vec)
2953 {
2954 tree op, vectype;
2955 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2956 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2957 unsigned ncopies;
2958
2959 op = gimple_call_arg (stmt, 0);
2960 vectype = STMT_VINFO_VECTYPE (stmt_info);
2961 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2962
2963 /* Multiple types in SLP are handled by creating the appropriate number of
2964 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2965 case of SLP. */
2966 if (slp_node)
2967 ncopies = 1;
2968 else
2969 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2970
2971 gcc_assert (ncopies >= 1);
2972
2973 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2974 if (! char_vectype)
2975 return false;
2976
2977 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2978 unsigned word_bytes;
2979 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2980 return false;
2981
2982 /* The encoding uses one stepped pattern for each byte in the word. */
2983 vec_perm_builder elts (num_bytes, word_bytes, 3);
2984 for (unsigned i = 0; i < 3; ++i)
2985 for (unsigned j = 0; j < word_bytes; ++j)
2986 elts.quick_push ((i + 1) * word_bytes - j - 1);
2987
2988 vec_perm_indices indices (elts, 1, num_bytes);
2989 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2990 return false;
2991
2992 if (! vec_stmt)
2993 {
2994 if (slp_node
2995 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2996 {
2997 if (dump_enabled_p ())
2998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2999 "incompatible vector types for invariants\n");
3000 return false;
3001 }
3002
3003 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3004 DUMP_VECT_SCOPE ("vectorizable_bswap");
3005 record_stmt_cost (cost_vec,
3006 1, vector_stmt, stmt_info, 0, vect_prologue);
3007 record_stmt_cost (cost_vec,
3008 slp_node
3009 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3010 vec_perm, stmt_info, 0, vect_body);
3011 return true;
3012 }
3013
3014 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3015
3016 /* Transform. */
3017 vec<tree> vec_oprnds = vNULL;
3018 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3019 op, &vec_oprnds);
3020 /* Arguments are ready. create the new vector stmt. */
3021 unsigned i;
3022 tree vop;
3023 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3024 {
3025 gimple *new_stmt;
3026 tree tem = make_ssa_name (char_vectype);
3027 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3028 char_vectype, vop));
3029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3030 tree tem2 = make_ssa_name (char_vectype);
3031 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3032 tem, tem, bswap_vconst);
3033 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3034 tem = make_ssa_name (vectype);
3035 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3036 vectype, tem2));
3037 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3038 if (slp_node)
3039 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3040 else
3041 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3042 }
3043
3044 if (!slp_node)
3045 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3046
3047 vec_oprnds.release ();
3048 return true;
3049 }
3050
3051 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3052 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3053 in a single step. On success, store the binary pack code in
3054 *CONVERT_CODE. */
3055
3056 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3057 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3058 tree_code *convert_code)
3059 {
3060 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3061 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3062 return false;
3063
3064 tree_code code;
3065 int multi_step_cvt = 0;
3066 auto_vec <tree, 8> interm_types;
3067 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3068 &code, &multi_step_cvt, &interm_types)
3069 || multi_step_cvt)
3070 return false;
3071
3072 *convert_code = code;
3073 return true;
3074 }
3075
3076 /* Function vectorizable_call.
3077
3078 Check if STMT_INFO performs a function call that can be vectorized.
3079 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3080 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3081 Return true if STMT_INFO is vectorizable in this way. */
3082
3083 static bool
vectorizable_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3084 vectorizable_call (vec_info *vinfo,
3085 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3086 gimple **vec_stmt, slp_tree slp_node,
3087 stmt_vector_for_cost *cost_vec)
3088 {
3089 gcall *stmt;
3090 tree vec_dest;
3091 tree scalar_dest;
3092 tree op;
3093 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3094 tree vectype_out, vectype_in;
3095 poly_uint64 nunits_in;
3096 poly_uint64 nunits_out;
3097 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3098 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3099 tree fndecl, new_temp, rhs_type;
3100 enum vect_def_type dt[4]
3101 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3102 vect_unknown_def_type };
3103 tree vectypes[ARRAY_SIZE (dt)] = {};
3104 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3105 int ndts = ARRAY_SIZE (dt);
3106 int ncopies, j;
3107 auto_vec<tree, 8> vargs;
3108 auto_vec<tree, 8> orig_vargs;
3109 enum { NARROW, NONE, WIDEN } modifier;
3110 size_t i, nargs;
3111 tree lhs;
3112
3113 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3114 return false;
3115
3116 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3117 && ! vec_stmt)
3118 return false;
3119
3120 /* Is STMT_INFO a vectorizable call? */
3121 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3122 if (!stmt)
3123 return false;
3124
3125 if (gimple_call_internal_p (stmt)
3126 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3127 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3128 /* Handled by vectorizable_load and vectorizable_store. */
3129 return false;
3130
3131 if (gimple_call_lhs (stmt) == NULL_TREE
3132 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3133 return false;
3134
3135 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3136
3137 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3138
3139 /* Process function arguments. */
3140 rhs_type = NULL_TREE;
3141 vectype_in = NULL_TREE;
3142 nargs = gimple_call_num_args (stmt);
3143
3144 /* Bail out if the function has more than four arguments, we do not have
3145 interesting builtin functions to vectorize with more than two arguments
3146 except for fma. No arguments is also not good. */
3147 if (nargs == 0 || nargs > 4)
3148 return false;
3149
3150 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3151 combined_fn cfn = gimple_call_combined_fn (stmt);
3152 if (cfn == CFN_GOMP_SIMD_LANE)
3153 {
3154 nargs = 0;
3155 rhs_type = unsigned_type_node;
3156 }
3157
3158 int mask_opno = -1;
3159 if (internal_fn_p (cfn))
3160 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3161
3162 for (i = 0; i < nargs; i++)
3163 {
3164 if ((int) i == mask_opno)
3165 {
3166 op = gimple_call_arg (stmt, i);
3167 if (!vect_check_scalar_mask (vinfo,
3168 stmt_info, op, &dt[i], &vectypes[i]))
3169 return false;
3170 continue;
3171 }
3172
3173 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3174 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3175 {
3176 if (dump_enabled_p ())
3177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3178 "use not simple.\n");
3179 return false;
3180 }
3181
3182 /* We can only handle calls with arguments of the same type. */
3183 if (rhs_type
3184 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3185 {
3186 if (dump_enabled_p ())
3187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3188 "argument types differ.\n");
3189 return false;
3190 }
3191 if (!rhs_type)
3192 rhs_type = TREE_TYPE (op);
3193
3194 if (!vectype_in)
3195 vectype_in = vectypes[i];
3196 else if (vectypes[i]
3197 && !types_compatible_p (vectypes[i], vectype_in))
3198 {
3199 if (dump_enabled_p ())
3200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3201 "argument vector types differ.\n");
3202 return false;
3203 }
3204 }
3205 /* If all arguments are external or constant defs, infer the vector type
3206 from the scalar type. */
3207 if (!vectype_in)
3208 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3209 if (vec_stmt)
3210 gcc_assert (vectype_in);
3211 if (!vectype_in)
3212 {
3213 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3215 "no vectype for scalar type %T\n", rhs_type);
3216
3217 return false;
3218 }
3219 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3220 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3221 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3222 by a pack of the two vectors into an SI vector. We would need
3223 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3224 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3225 {
3226 if (dump_enabled_p ())
3227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3228 "mismatched vector sizes %T and %T\n",
3229 vectype_in, vectype_out);
3230 return false;
3231 }
3232
3233 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3234 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3235 {
3236 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3238 "mixed mask and nonmask vector types\n");
3239 return false;
3240 }
3241
3242 /* FORNOW */
3243 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3244 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3245 if (known_eq (nunits_in * 2, nunits_out))
3246 modifier = NARROW;
3247 else if (known_eq (nunits_out, nunits_in))
3248 modifier = NONE;
3249 else if (known_eq (nunits_out * 2, nunits_in))
3250 modifier = WIDEN;
3251 else
3252 return false;
3253
3254 /* We only handle functions that do not read or clobber memory. */
3255 if (gimple_vuse (stmt))
3256 {
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3259 "function reads from or writes to memory.\n");
3260 return false;
3261 }
3262
3263 /* For now, we only vectorize functions if a target specific builtin
3264 is available. TODO -- in some cases, it might be profitable to
3265 insert the calls for pieces of the vector, in order to be able
3266 to vectorize other operations in the loop. */
3267 fndecl = NULL_TREE;
3268 internal_fn ifn = IFN_LAST;
3269 tree callee = gimple_call_fndecl (stmt);
3270
3271 /* First try using an internal function. */
3272 tree_code convert_code = ERROR_MARK;
3273 if (cfn != CFN_LAST
3274 && (modifier == NONE
3275 || (modifier == NARROW
3276 && simple_integer_narrowing (vectype_out, vectype_in,
3277 &convert_code))))
3278 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3279 vectype_in);
3280
3281 /* If that fails, try asking for a target-specific built-in function. */
3282 if (ifn == IFN_LAST)
3283 {
3284 if (cfn != CFN_LAST)
3285 fndecl = targetm.vectorize.builtin_vectorized_function
3286 (cfn, vectype_out, vectype_in);
3287 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3288 fndecl = targetm.vectorize.builtin_md_vectorized_function
3289 (callee, vectype_out, vectype_in);
3290 }
3291
3292 if (ifn == IFN_LAST && !fndecl)
3293 {
3294 if (cfn == CFN_GOMP_SIMD_LANE
3295 && !slp_node
3296 && loop_vinfo
3297 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3298 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3299 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3300 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3301 {
3302 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3303 { 0, 1, 2, ... vf - 1 } vector. */
3304 gcc_assert (nargs == 0);
3305 }
3306 else if (modifier == NONE
3307 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3308 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3309 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3310 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3311 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3312 slp_op, vectype_in, cost_vec);
3313 else
3314 {
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317 "function is not vectorizable.\n");
3318 return false;
3319 }
3320 }
3321
3322 if (slp_node)
3323 ncopies = 1;
3324 else if (modifier == NARROW && ifn == IFN_LAST)
3325 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3326 else
3327 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3328
3329 /* Sanity check: make sure that at least one copy of the vectorized stmt
3330 needs to be generated. */
3331 gcc_assert (ncopies >= 1);
3332
3333 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3334 if (!vec_stmt) /* transformation not required. */
3335 {
3336 if (slp_node)
3337 for (i = 0; i < nargs; ++i)
3338 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3339 {
3340 if (dump_enabled_p ())
3341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3342 "incompatible vector types for invariants\n");
3343 return false;
3344 }
3345 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3346 DUMP_VECT_SCOPE ("vectorizable_call");
3347 vect_model_simple_cost (vinfo, stmt_info,
3348 ncopies, dt, ndts, slp_node, cost_vec);
3349 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3350 record_stmt_cost (cost_vec, ncopies / 2,
3351 vec_promote_demote, stmt_info, 0, vect_body);
3352
3353 if (loop_vinfo && mask_opno >= 0)
3354 {
3355 unsigned int nvectors = (slp_node
3356 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3357 : ncopies);
3358 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3359 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3360 vectype_out, scalar_mask);
3361 }
3362 return true;
3363 }
3364
3365 /* Transform. */
3366
3367 if (dump_enabled_p ())
3368 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3369
3370 /* Handle def. */
3371 scalar_dest = gimple_call_lhs (stmt);
3372 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3373
3374 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3375
3376 if (modifier == NONE || ifn != IFN_LAST)
3377 {
3378 tree prev_res = NULL_TREE;
3379 vargs.safe_grow (nargs, true);
3380 orig_vargs.safe_grow (nargs, true);
3381 auto_vec<vec<tree> > vec_defs (nargs);
3382 for (j = 0; j < ncopies; ++j)
3383 {
3384 /* Build argument list for the vectorized call. */
3385 if (slp_node)
3386 {
3387 vec<tree> vec_oprnds0;
3388
3389 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3390 vec_oprnds0 = vec_defs[0];
3391
3392 /* Arguments are ready. Create the new vector stmt. */
3393 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3394 {
3395 size_t k;
3396 for (k = 0; k < nargs; k++)
3397 {
3398 vec<tree> vec_oprndsk = vec_defs[k];
3399 vargs[k] = vec_oprndsk[i];
3400 }
3401 gimple *new_stmt;
3402 if (modifier == NARROW)
3403 {
3404 /* We don't define any narrowing conditional functions
3405 at present. */
3406 gcc_assert (mask_opno < 0);
3407 tree half_res = make_ssa_name (vectype_in);
3408 gcall *call
3409 = gimple_build_call_internal_vec (ifn, vargs);
3410 gimple_call_set_lhs (call, half_res);
3411 gimple_call_set_nothrow (call, true);
3412 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3413 if ((i & 1) == 0)
3414 {
3415 prev_res = half_res;
3416 continue;
3417 }
3418 new_temp = make_ssa_name (vec_dest);
3419 new_stmt = gimple_build_assign (new_temp, convert_code,
3420 prev_res, half_res);
3421 vect_finish_stmt_generation (vinfo, stmt_info,
3422 new_stmt, gsi);
3423 }
3424 else
3425 {
3426 if (mask_opno >= 0 && masked_loop_p)
3427 {
3428 unsigned int vec_num = vec_oprnds0.length ();
3429 /* Always true for SLP. */
3430 gcc_assert (ncopies == 1);
3431 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3432 vectype_out, i);
3433 vargs[mask_opno] = prepare_load_store_mask
3434 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3435 }
3436
3437 gcall *call;
3438 if (ifn != IFN_LAST)
3439 call = gimple_build_call_internal_vec (ifn, vargs);
3440 else
3441 call = gimple_build_call_vec (fndecl, vargs);
3442 new_temp = make_ssa_name (vec_dest, call);
3443 gimple_call_set_lhs (call, new_temp);
3444 gimple_call_set_nothrow (call, true);
3445 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3446 new_stmt = call;
3447 }
3448 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3449 }
3450 continue;
3451 }
3452
3453 for (i = 0; i < nargs; i++)
3454 {
3455 op = gimple_call_arg (stmt, i);
3456 if (j == 0)
3457 {
3458 vec_defs.quick_push (vNULL);
3459 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3460 op, &vec_defs[i],
3461 vectypes[i]);
3462 }
3463 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3464 }
3465
3466 if (mask_opno >= 0 && masked_loop_p)
3467 {
3468 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3469 vectype_out, j);
3470 vargs[mask_opno]
3471 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3472 vargs[mask_opno], gsi);
3473 }
3474
3475 gimple *new_stmt;
3476 if (cfn == CFN_GOMP_SIMD_LANE)
3477 {
3478 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3479 tree new_var
3480 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3481 gimple *init_stmt = gimple_build_assign (new_var, cst);
3482 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3483 new_temp = make_ssa_name (vec_dest);
3484 new_stmt = gimple_build_assign (new_temp, new_var);
3485 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3486 }
3487 else if (modifier == NARROW)
3488 {
3489 /* We don't define any narrowing conditional functions at
3490 present. */
3491 gcc_assert (mask_opno < 0);
3492 tree half_res = make_ssa_name (vectype_in);
3493 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3494 gimple_call_set_lhs (call, half_res);
3495 gimple_call_set_nothrow (call, true);
3496 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3497 if ((j & 1) == 0)
3498 {
3499 prev_res = half_res;
3500 continue;
3501 }
3502 new_temp = make_ssa_name (vec_dest);
3503 new_stmt = gimple_build_assign (new_temp, convert_code,
3504 prev_res, half_res);
3505 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3506 }
3507 else
3508 {
3509 gcall *call;
3510 if (ifn != IFN_LAST)
3511 call = gimple_build_call_internal_vec (ifn, vargs);
3512 else
3513 call = gimple_build_call_vec (fndecl, vargs);
3514 new_temp = make_ssa_name (vec_dest, call);
3515 gimple_call_set_lhs (call, new_temp);
3516 gimple_call_set_nothrow (call, true);
3517 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3518 new_stmt = call;
3519 }
3520
3521 if (j == (modifier == NARROW ? 1 : 0))
3522 *vec_stmt = new_stmt;
3523 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3524 }
3525 for (i = 0; i < nargs; i++)
3526 {
3527 vec<tree> vec_oprndsi = vec_defs[i];
3528 vec_oprndsi.release ();
3529 }
3530 }
3531 else if (modifier == NARROW)
3532 {
3533 auto_vec<vec<tree> > vec_defs (nargs);
3534 /* We don't define any narrowing conditional functions at present. */
3535 gcc_assert (mask_opno < 0);
3536 for (j = 0; j < ncopies; ++j)
3537 {
3538 /* Build argument list for the vectorized call. */
3539 if (j == 0)
3540 vargs.create (nargs * 2);
3541 else
3542 vargs.truncate (0);
3543
3544 if (slp_node)
3545 {
3546 vec<tree> vec_oprnds0;
3547
3548 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3549 vec_oprnds0 = vec_defs[0];
3550
3551 /* Arguments are ready. Create the new vector stmt. */
3552 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3553 {
3554 size_t k;
3555 vargs.truncate (0);
3556 for (k = 0; k < nargs; k++)
3557 {
3558 vec<tree> vec_oprndsk = vec_defs[k];
3559 vargs.quick_push (vec_oprndsk[i]);
3560 vargs.quick_push (vec_oprndsk[i + 1]);
3561 }
3562 gcall *call;
3563 if (ifn != IFN_LAST)
3564 call = gimple_build_call_internal_vec (ifn, vargs);
3565 else
3566 call = gimple_build_call_vec (fndecl, vargs);
3567 new_temp = make_ssa_name (vec_dest, call);
3568 gimple_call_set_lhs (call, new_temp);
3569 gimple_call_set_nothrow (call, true);
3570 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3571 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3572 }
3573 continue;
3574 }
3575
3576 for (i = 0; i < nargs; i++)
3577 {
3578 op = gimple_call_arg (stmt, i);
3579 if (j == 0)
3580 {
3581 vec_defs.quick_push (vNULL);
3582 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3583 op, &vec_defs[i], vectypes[i]);
3584 }
3585 vec_oprnd0 = vec_defs[i][2*j];
3586 vec_oprnd1 = vec_defs[i][2*j+1];
3587
3588 vargs.quick_push (vec_oprnd0);
3589 vargs.quick_push (vec_oprnd1);
3590 }
3591
3592 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3593 new_temp = make_ssa_name (vec_dest, new_stmt);
3594 gimple_call_set_lhs (new_stmt, new_temp);
3595 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3596
3597 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3598 }
3599
3600 if (!slp_node)
3601 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3602
3603 for (i = 0; i < nargs; i++)
3604 {
3605 vec<tree> vec_oprndsi = vec_defs[i];
3606 vec_oprndsi.release ();
3607 }
3608 }
3609 else
3610 /* No current target implements this case. */
3611 return false;
3612
3613 vargs.release ();
3614
3615 /* The call in STMT might prevent it from being removed in dce.
3616 We however cannot remove it here, due to the way the ssa name
3617 it defines is mapped to the new definition. So just replace
3618 rhs of the statement with something harmless. */
3619
3620 if (slp_node)
3621 return true;
3622
3623 stmt_info = vect_orig_stmt (stmt_info);
3624 lhs = gimple_get_lhs (stmt_info->stmt);
3625
3626 gassign *new_stmt
3627 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3628 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3629
3630 return true;
3631 }
3632
3633
3634 struct simd_call_arg_info
3635 {
3636 tree vectype;
3637 tree op;
3638 HOST_WIDE_INT linear_step;
3639 enum vect_def_type dt;
3640 unsigned int align;
3641 bool simd_lane_linear;
3642 };
3643
3644 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3645 is linear within simd lane (but not within whole loop), note it in
3646 *ARGINFO. */
3647
3648 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3649 vect_simd_lane_linear (tree op, class loop *loop,
3650 struct simd_call_arg_info *arginfo)
3651 {
3652 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3653
3654 if (!is_gimple_assign (def_stmt)
3655 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3656 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3657 return;
3658
3659 tree base = gimple_assign_rhs1 (def_stmt);
3660 HOST_WIDE_INT linear_step = 0;
3661 tree v = gimple_assign_rhs2 (def_stmt);
3662 while (TREE_CODE (v) == SSA_NAME)
3663 {
3664 tree t;
3665 def_stmt = SSA_NAME_DEF_STMT (v);
3666 if (is_gimple_assign (def_stmt))
3667 switch (gimple_assign_rhs_code (def_stmt))
3668 {
3669 case PLUS_EXPR:
3670 t = gimple_assign_rhs2 (def_stmt);
3671 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3672 return;
3673 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3674 v = gimple_assign_rhs1 (def_stmt);
3675 continue;
3676 case MULT_EXPR:
3677 t = gimple_assign_rhs2 (def_stmt);
3678 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3679 return;
3680 linear_step = tree_to_shwi (t);
3681 v = gimple_assign_rhs1 (def_stmt);
3682 continue;
3683 CASE_CONVERT:
3684 t = gimple_assign_rhs1 (def_stmt);
3685 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3686 || (TYPE_PRECISION (TREE_TYPE (v))
3687 < TYPE_PRECISION (TREE_TYPE (t))))
3688 return;
3689 if (!linear_step)
3690 linear_step = 1;
3691 v = t;
3692 continue;
3693 default:
3694 return;
3695 }
3696 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3697 && loop->simduid
3698 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3699 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3700 == loop->simduid))
3701 {
3702 if (!linear_step)
3703 linear_step = 1;
3704 arginfo->linear_step = linear_step;
3705 arginfo->op = base;
3706 arginfo->simd_lane_linear = true;
3707 return;
3708 }
3709 }
3710 }
3711
3712 /* Return the number of elements in vector type VECTYPE, which is associated
3713 with a SIMD clone. At present these vectors always have a constant
3714 length. */
3715
3716 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3717 simd_clone_subparts (tree vectype)
3718 {
3719 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3720 }
3721
3722 /* Function vectorizable_simd_clone_call.
3723
3724 Check if STMT_INFO performs a function call that can be vectorized
3725 by calling a simd clone of the function.
3726 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3727 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3728 Return true if STMT_INFO is vectorizable in this way. */
3729
3730 static bool
vectorizable_simd_clone_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3731 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3732 gimple_stmt_iterator *gsi,
3733 gimple **vec_stmt, slp_tree slp_node,
3734 stmt_vector_for_cost *)
3735 {
3736 tree vec_dest;
3737 tree scalar_dest;
3738 tree op, type;
3739 tree vec_oprnd0 = NULL_TREE;
3740 tree vectype;
3741 poly_uint64 nunits;
3742 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3743 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3744 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3745 tree fndecl, new_temp;
3746 int ncopies, j;
3747 auto_vec<simd_call_arg_info> arginfo;
3748 vec<tree> vargs = vNULL;
3749 size_t i, nargs;
3750 tree lhs, rtype, ratype;
3751 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3752
3753 /* Is STMT a vectorizable call? */
3754 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3755 if (!stmt)
3756 return false;
3757
3758 fndecl = gimple_call_fndecl (stmt);
3759 if (fndecl == NULL_TREE)
3760 return false;
3761
3762 struct cgraph_node *node = cgraph_node::get (fndecl);
3763 if (node == NULL || node->simd_clones == NULL)
3764 return false;
3765
3766 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3767 return false;
3768
3769 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3770 && ! vec_stmt)
3771 return false;
3772
3773 if (gimple_call_lhs (stmt)
3774 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3775 return false;
3776
3777 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3778
3779 vectype = STMT_VINFO_VECTYPE (stmt_info);
3780
3781 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3782 return false;
3783
3784 /* FORNOW */
3785 if (slp_node)
3786 return false;
3787
3788 /* Process function arguments. */
3789 nargs = gimple_call_num_args (stmt);
3790
3791 /* Bail out if the function has zero arguments. */
3792 if (nargs == 0)
3793 return false;
3794
3795 arginfo.reserve (nargs, true);
3796
3797 for (i = 0; i < nargs; i++)
3798 {
3799 simd_call_arg_info thisarginfo;
3800 affine_iv iv;
3801
3802 thisarginfo.linear_step = 0;
3803 thisarginfo.align = 0;
3804 thisarginfo.op = NULL_TREE;
3805 thisarginfo.simd_lane_linear = false;
3806
3807 op = gimple_call_arg (stmt, i);
3808 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3809 &thisarginfo.vectype)
3810 || thisarginfo.dt == vect_uninitialized_def)
3811 {
3812 if (dump_enabled_p ())
3813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3814 "use not simple.\n");
3815 return false;
3816 }
3817
3818 if (thisarginfo.dt == vect_constant_def
3819 || thisarginfo.dt == vect_external_def)
3820 gcc_assert (thisarginfo.vectype == NULL_TREE);
3821 else
3822 {
3823 gcc_assert (thisarginfo.vectype != NULL_TREE);
3824 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3825 {
3826 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3828 "vector mask arguments are not supported\n");
3829 return false;
3830 }
3831 }
3832
3833 /* For linear arguments, the analyze phase should have saved
3834 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3835 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3836 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3837 {
3838 gcc_assert (vec_stmt);
3839 thisarginfo.linear_step
3840 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3841 thisarginfo.op
3842 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3843 thisarginfo.simd_lane_linear
3844 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3845 == boolean_true_node);
3846 /* If loop has been peeled for alignment, we need to adjust it. */
3847 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3848 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3849 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3850 {
3851 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3852 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3853 tree opt = TREE_TYPE (thisarginfo.op);
3854 bias = fold_convert (TREE_TYPE (step), bias);
3855 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3856 thisarginfo.op
3857 = fold_build2 (POINTER_TYPE_P (opt)
3858 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3859 thisarginfo.op, bias);
3860 }
3861 }
3862 else if (!vec_stmt
3863 && thisarginfo.dt != vect_constant_def
3864 && thisarginfo.dt != vect_external_def
3865 && loop_vinfo
3866 && TREE_CODE (op) == SSA_NAME
3867 && simple_iv (loop, loop_containing_stmt (stmt), op,
3868 &iv, false)
3869 && tree_fits_shwi_p (iv.step))
3870 {
3871 thisarginfo.linear_step = tree_to_shwi (iv.step);
3872 thisarginfo.op = iv.base;
3873 }
3874 else if ((thisarginfo.dt == vect_constant_def
3875 || thisarginfo.dt == vect_external_def)
3876 && POINTER_TYPE_P (TREE_TYPE (op)))
3877 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3878 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3879 linear too. */
3880 if (POINTER_TYPE_P (TREE_TYPE (op))
3881 && !thisarginfo.linear_step
3882 && !vec_stmt
3883 && thisarginfo.dt != vect_constant_def
3884 && thisarginfo.dt != vect_external_def
3885 && loop_vinfo
3886 && !slp_node
3887 && TREE_CODE (op) == SSA_NAME)
3888 vect_simd_lane_linear (op, loop, &thisarginfo);
3889
3890 arginfo.quick_push (thisarginfo);
3891 }
3892
3893 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3894 if (!vf.is_constant ())
3895 {
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3898 "not considering SIMD clones; not yet supported"
3899 " for variable-width vectors.\n");
3900 return false;
3901 }
3902
3903 unsigned int badness = 0;
3904 struct cgraph_node *bestn = NULL;
3905 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3906 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3907 else
3908 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3909 n = n->simdclone->next_clone)
3910 {
3911 unsigned int this_badness = 0;
3912 unsigned int num_calls;
3913 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3914 || n->simdclone->nargs != nargs)
3915 continue;
3916 if (num_calls != 1)
3917 this_badness += exact_log2 (num_calls) * 4096;
3918 if (n->simdclone->inbranch)
3919 this_badness += 8192;
3920 int target_badness = targetm.simd_clone.usable (n);
3921 if (target_badness < 0)
3922 continue;
3923 this_badness += target_badness * 512;
3924 /* FORNOW: Have to add code to add the mask argument. */
3925 if (n->simdclone->inbranch)
3926 continue;
3927 for (i = 0; i < nargs; i++)
3928 {
3929 switch (n->simdclone->args[i].arg_type)
3930 {
3931 case SIMD_CLONE_ARG_TYPE_VECTOR:
3932 if (!useless_type_conversion_p
3933 (n->simdclone->args[i].orig_type,
3934 TREE_TYPE (gimple_call_arg (stmt, i))))
3935 i = -1;
3936 else if (arginfo[i].dt == vect_constant_def
3937 || arginfo[i].dt == vect_external_def
3938 || arginfo[i].linear_step)
3939 this_badness += 64;
3940 break;
3941 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3942 if (arginfo[i].dt != vect_constant_def
3943 && arginfo[i].dt != vect_external_def)
3944 i = -1;
3945 break;
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3948 if (arginfo[i].dt == vect_constant_def
3949 || arginfo[i].dt == vect_external_def
3950 || (arginfo[i].linear_step
3951 != n->simdclone->args[i].linear_step))
3952 i = -1;
3953 break;
3954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3960 /* FORNOW */
3961 i = -1;
3962 break;
3963 case SIMD_CLONE_ARG_TYPE_MASK:
3964 gcc_unreachable ();
3965 }
3966 if (i == (size_t) -1)
3967 break;
3968 if (n->simdclone->args[i].alignment > arginfo[i].align)
3969 {
3970 i = -1;
3971 break;
3972 }
3973 if (arginfo[i].align)
3974 this_badness += (exact_log2 (arginfo[i].align)
3975 - exact_log2 (n->simdclone->args[i].alignment));
3976 }
3977 if (i == (size_t) -1)
3978 continue;
3979 if (bestn == NULL || this_badness < badness)
3980 {
3981 bestn = n;
3982 badness = this_badness;
3983 }
3984 }
3985
3986 if (bestn == NULL)
3987 return false;
3988
3989 for (i = 0; i < nargs; i++)
3990 if ((arginfo[i].dt == vect_constant_def
3991 || arginfo[i].dt == vect_external_def)
3992 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3993 {
3994 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3995 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3996 slp_node);
3997 if (arginfo[i].vectype == NULL
3998 || !constant_multiple_p (bestn->simdclone->simdlen,
3999 simd_clone_subparts (arginfo[i].vectype)))
4000 return false;
4001 }
4002
4003 fndecl = bestn->decl;
4004 nunits = bestn->simdclone->simdlen;
4005 ncopies = vector_unroll_factor (vf, nunits);
4006
4007 /* If the function isn't const, only allow it in simd loops where user
4008 has asserted that at least nunits consecutive iterations can be
4009 performed using SIMD instructions. */
4010 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4011 && gimple_vuse (stmt))
4012 return false;
4013
4014 /* Sanity check: make sure that at least one copy of the vectorized stmt
4015 needs to be generated. */
4016 gcc_assert (ncopies >= 1);
4017
4018 if (!vec_stmt) /* transformation not required. */
4019 {
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4021 for (i = 0; i < nargs; i++)
4022 if ((bestn->simdclone->args[i].arg_type
4023 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4024 || (bestn->simdclone->args[i].arg_type
4025 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4026 {
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4028 + 1,
4029 true);
4030 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4031 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4032 ? size_type_node : TREE_TYPE (arginfo[i].op);
4033 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4034 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4035 tree sll = arginfo[i].simd_lane_linear
4036 ? boolean_true_node : boolean_false_node;
4037 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4038 }
4039 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4040 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4041 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4042 dt, slp_node, cost_vec); */
4043 return true;
4044 }
4045
4046 /* Transform. */
4047
4048 if (dump_enabled_p ())
4049 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4050
4051 /* Handle def. */
4052 scalar_dest = gimple_call_lhs (stmt);
4053 vec_dest = NULL_TREE;
4054 rtype = NULL_TREE;
4055 ratype = NULL_TREE;
4056 if (scalar_dest)
4057 {
4058 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4059 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4060 if (TREE_CODE (rtype) == ARRAY_TYPE)
4061 {
4062 ratype = rtype;
4063 rtype = TREE_TYPE (ratype);
4064 }
4065 }
4066
4067 auto_vec<vec<tree> > vec_oprnds;
4068 auto_vec<unsigned> vec_oprnds_i;
4069 vec_oprnds.safe_grow_cleared (nargs, true);
4070 vec_oprnds_i.safe_grow_cleared (nargs, true);
4071 for (j = 0; j < ncopies; ++j)
4072 {
4073 /* Build argument list for the vectorized call. */
4074 if (j == 0)
4075 vargs.create (nargs);
4076 else
4077 vargs.truncate (0);
4078
4079 for (i = 0; i < nargs; i++)
4080 {
4081 unsigned int k, l, m, o;
4082 tree atype;
4083 op = gimple_call_arg (stmt, i);
4084 switch (bestn->simdclone->args[i].arg_type)
4085 {
4086 case SIMD_CLONE_ARG_TYPE_VECTOR:
4087 atype = bestn->simdclone->args[i].vector_type;
4088 o = vector_unroll_factor (nunits,
4089 simd_clone_subparts (atype));
4090 for (m = j * o; m < (j + 1) * o; m++)
4091 {
4092 if (simd_clone_subparts (atype)
4093 < simd_clone_subparts (arginfo[i].vectype))
4094 {
4095 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4096 k = (simd_clone_subparts (arginfo[i].vectype)
4097 / simd_clone_subparts (atype));
4098 gcc_assert ((k & (k - 1)) == 0);
4099 if (m == 0)
4100 {
4101 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4102 ncopies * o / k, op,
4103 &vec_oprnds[i]);
4104 vec_oprnds_i[i] = 0;
4105 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4106 }
4107 else
4108 {
4109 vec_oprnd0 = arginfo[i].op;
4110 if ((m & (k - 1)) == 0)
4111 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4112 }
4113 arginfo[i].op = vec_oprnd0;
4114 vec_oprnd0
4115 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4116 bitsize_int (prec),
4117 bitsize_int ((m & (k - 1)) * prec));
4118 gassign *new_stmt
4119 = gimple_build_assign (make_ssa_name (atype),
4120 vec_oprnd0);
4121 vect_finish_stmt_generation (vinfo, stmt_info,
4122 new_stmt, gsi);
4123 vargs.safe_push (gimple_assign_lhs (new_stmt));
4124 }
4125 else
4126 {
4127 k = (simd_clone_subparts (atype)
4128 / simd_clone_subparts (arginfo[i].vectype));
4129 gcc_assert ((k & (k - 1)) == 0);
4130 vec<constructor_elt, va_gc> *ctor_elts;
4131 if (k != 1)
4132 vec_alloc (ctor_elts, k);
4133 else
4134 ctor_elts = NULL;
4135 for (l = 0; l < k; l++)
4136 {
4137 if (m == 0 && l == 0)
4138 {
4139 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4140 k * o * ncopies,
4141 op,
4142 &vec_oprnds[i]);
4143 vec_oprnds_i[i] = 0;
4144 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4145 }
4146 else
4147 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4148 arginfo[i].op = vec_oprnd0;
4149 if (k == 1)
4150 break;
4151 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4152 vec_oprnd0);
4153 }
4154 if (k == 1)
4155 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4156 atype))
4157 {
4158 vec_oprnd0
4159 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4160 gassign *new_stmt
4161 = gimple_build_assign (make_ssa_name (atype),
4162 vec_oprnd0);
4163 vect_finish_stmt_generation (vinfo, stmt_info,
4164 new_stmt, gsi);
4165 vargs.safe_push (gimple_assign_lhs (new_stmt));
4166 }
4167 else
4168 vargs.safe_push (vec_oprnd0);
4169 else
4170 {
4171 vec_oprnd0 = build_constructor (atype, ctor_elts);
4172 gassign *new_stmt
4173 = gimple_build_assign (make_ssa_name (atype),
4174 vec_oprnd0);
4175 vect_finish_stmt_generation (vinfo, stmt_info,
4176 new_stmt, gsi);
4177 vargs.safe_push (gimple_assign_lhs (new_stmt));
4178 }
4179 }
4180 }
4181 break;
4182 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4183 vargs.safe_push (op);
4184 break;
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4186 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4187 if (j == 0)
4188 {
4189 gimple_seq stmts;
4190 arginfo[i].op
4191 = force_gimple_operand (unshare_expr (arginfo[i].op),
4192 &stmts, true, NULL_TREE);
4193 if (stmts != NULL)
4194 {
4195 basic_block new_bb;
4196 edge pe = loop_preheader_edge (loop);
4197 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4198 gcc_assert (!new_bb);
4199 }
4200 if (arginfo[i].simd_lane_linear)
4201 {
4202 vargs.safe_push (arginfo[i].op);
4203 break;
4204 }
4205 tree phi_res = copy_ssa_name (op);
4206 gphi *new_phi = create_phi_node (phi_res, loop->header);
4207 add_phi_arg (new_phi, arginfo[i].op,
4208 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4209 enum tree_code code
4210 = POINTER_TYPE_P (TREE_TYPE (op))
4211 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4212 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4213 ? sizetype : TREE_TYPE (op);
4214 poly_widest_int cst
4215 = wi::mul (bestn->simdclone->args[i].linear_step,
4216 ncopies * nunits);
4217 tree tcst = wide_int_to_tree (type, cst);
4218 tree phi_arg = copy_ssa_name (op);
4219 gassign *new_stmt
4220 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4221 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4222 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4223 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4224 UNKNOWN_LOCATION);
4225 arginfo[i].op = phi_res;
4226 vargs.safe_push (phi_res);
4227 }
4228 else
4229 {
4230 enum tree_code code
4231 = POINTER_TYPE_P (TREE_TYPE (op))
4232 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4233 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4234 ? sizetype : TREE_TYPE (op);
4235 poly_widest_int cst
4236 = wi::mul (bestn->simdclone->args[i].linear_step,
4237 j * nunits);
4238 tree tcst = wide_int_to_tree (type, cst);
4239 new_temp = make_ssa_name (TREE_TYPE (op));
4240 gassign *new_stmt
4241 = gimple_build_assign (new_temp, code,
4242 arginfo[i].op, tcst);
4243 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4244 vargs.safe_push (new_temp);
4245 }
4246 break;
4247 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4248 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4249 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4250 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4251 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4252 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4253 default:
4254 gcc_unreachable ();
4255 }
4256 }
4257
4258 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4259 if (vec_dest)
4260 {
4261 gcc_assert (ratype
4262 || known_eq (simd_clone_subparts (rtype), nunits));
4263 if (ratype)
4264 new_temp = create_tmp_var (ratype);
4265 else if (useless_type_conversion_p (vectype, rtype))
4266 new_temp = make_ssa_name (vec_dest, new_call);
4267 else
4268 new_temp = make_ssa_name (rtype, new_call);
4269 gimple_call_set_lhs (new_call, new_temp);
4270 }
4271 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4272 gimple *new_stmt = new_call;
4273
4274 if (vec_dest)
4275 {
4276 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4277 {
4278 unsigned int k, l;
4279 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4280 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4281 k = vector_unroll_factor (nunits,
4282 simd_clone_subparts (vectype));
4283 gcc_assert ((k & (k - 1)) == 0);
4284 for (l = 0; l < k; l++)
4285 {
4286 tree t;
4287 if (ratype)
4288 {
4289 t = build_fold_addr_expr (new_temp);
4290 t = build2 (MEM_REF, vectype, t,
4291 build_int_cst (TREE_TYPE (t), l * bytes));
4292 }
4293 else
4294 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4295 bitsize_int (prec), bitsize_int (l * prec));
4296 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4297 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4298
4299 if (j == 0 && l == 0)
4300 *vec_stmt = new_stmt;
4301 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4302 }
4303
4304 if (ratype)
4305 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4306 continue;
4307 }
4308 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4309 {
4310 unsigned int k = (simd_clone_subparts (vectype)
4311 / simd_clone_subparts (rtype));
4312 gcc_assert ((k & (k - 1)) == 0);
4313 if ((j & (k - 1)) == 0)
4314 vec_alloc (ret_ctor_elts, k);
4315 if (ratype)
4316 {
4317 unsigned int m, o;
4318 o = vector_unroll_factor (nunits,
4319 simd_clone_subparts (rtype));
4320 for (m = 0; m < o; m++)
4321 {
4322 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4323 size_int (m), NULL_TREE, NULL_TREE);
4324 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4325 tem);
4326 vect_finish_stmt_generation (vinfo, stmt_info,
4327 new_stmt, gsi);
4328 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4329 gimple_assign_lhs (new_stmt));
4330 }
4331 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4332 }
4333 else
4334 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4335 if ((j & (k - 1)) != k - 1)
4336 continue;
4337 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4338 new_stmt
4339 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4340 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4341
4342 if ((unsigned) j == k - 1)
4343 *vec_stmt = new_stmt;
4344 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4345 continue;
4346 }
4347 else if (ratype)
4348 {
4349 tree t = build_fold_addr_expr (new_temp);
4350 t = build2 (MEM_REF, vectype, t,
4351 build_int_cst (TREE_TYPE (t), 0));
4352 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4353 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4354 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4355 }
4356 else if (!useless_type_conversion_p (vectype, rtype))
4357 {
4358 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4359 new_stmt
4360 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4361 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4362 }
4363 }
4364
4365 if (j == 0)
4366 *vec_stmt = new_stmt;
4367 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4368 }
4369
4370 for (i = 0; i < nargs; ++i)
4371 {
4372 vec<tree> oprndsi = vec_oprnds[i];
4373 oprndsi.release ();
4374 }
4375 vargs.release ();
4376
4377 /* The call in STMT might prevent it from being removed in dce.
4378 We however cannot remove it here, due to the way the ssa name
4379 it defines is mapped to the new definition. So just replace
4380 rhs of the statement with something harmless. */
4381
4382 if (slp_node)
4383 return true;
4384
4385 gimple *new_stmt;
4386 if (scalar_dest)
4387 {
4388 type = TREE_TYPE (scalar_dest);
4389 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4390 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4391 }
4392 else
4393 new_stmt = gimple_build_nop ();
4394 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4395 unlink_stmt_vdef (stmt);
4396
4397 return true;
4398 }
4399
4400
4401 /* Function vect_gen_widened_results_half
4402
4403 Create a vector stmt whose code, type, number of arguments, and result
4404 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4405 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4406 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4407 needs to be created (DECL is a function-decl of a target-builtin).
4408 STMT_INFO is the original scalar stmt that we are vectorizing. */
4409
4410 static gimple *
vect_gen_widened_results_half(vec_info * vinfo,enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4411 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4412 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4413 tree vec_dest, gimple_stmt_iterator *gsi,
4414 stmt_vec_info stmt_info)
4415 {
4416 gimple *new_stmt;
4417 tree new_temp;
4418
4419 /* Generate half of the widened result: */
4420 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4421 if (op_type != binary_op)
4422 vec_oprnd1 = NULL;
4423 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4424 new_temp = make_ssa_name (vec_dest, new_stmt);
4425 gimple_assign_set_lhs (new_stmt, new_temp);
4426 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4427
4428 return new_stmt;
4429 }
4430
4431
4432 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4433 For multi-step conversions store the resulting vectors and call the function
4434 recursively. */
4435
4436 static void
vect_create_vectorized_demotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code)4437 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4438 int multi_step_cvt,
4439 stmt_vec_info stmt_info,
4440 vec<tree> vec_dsts,
4441 gimple_stmt_iterator *gsi,
4442 slp_tree slp_node, enum tree_code code)
4443 {
4444 unsigned int i;
4445 tree vop0, vop1, new_tmp, vec_dest;
4446
4447 vec_dest = vec_dsts.pop ();
4448
4449 for (i = 0; i < vec_oprnds->length (); i += 2)
4450 {
4451 /* Create demotion operation. */
4452 vop0 = (*vec_oprnds)[i];
4453 vop1 = (*vec_oprnds)[i + 1];
4454 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4455 new_tmp = make_ssa_name (vec_dest, new_stmt);
4456 gimple_assign_set_lhs (new_stmt, new_tmp);
4457 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4458
4459 if (multi_step_cvt)
4460 /* Store the resulting vector for next recursive call. */
4461 (*vec_oprnds)[i/2] = new_tmp;
4462 else
4463 {
4464 /* This is the last step of the conversion sequence. Store the
4465 vectors in SLP_NODE or in vector info of the scalar statement
4466 (or in STMT_VINFO_RELATED_STMT chain). */
4467 if (slp_node)
4468 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4469 else
4470 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4471 }
4472 }
4473
4474 /* For multi-step demotion operations we first generate demotion operations
4475 from the source type to the intermediate types, and then combine the
4476 results (stored in VEC_OPRNDS) in demotion operation to the destination
4477 type. */
4478 if (multi_step_cvt)
4479 {
4480 /* At each level of recursion we have half of the operands we had at the
4481 previous level. */
4482 vec_oprnds->truncate ((i+1)/2);
4483 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4484 multi_step_cvt - 1,
4485 stmt_info, vec_dsts, gsi,
4486 slp_node, VEC_PACK_TRUNC_EXPR);
4487 }
4488
4489 vec_dsts.quick_push (vec_dest);
4490 }
4491
4492
4493 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4494 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4495 STMT_INFO. For multi-step conversions store the resulting vectors and
4496 call the function recursively. */
4497
4498 static void
vect_create_vectorized_promotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4499 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4500 vec<tree> *vec_oprnds0,
4501 vec<tree> *vec_oprnds1,
4502 stmt_vec_info stmt_info, tree vec_dest,
4503 gimple_stmt_iterator *gsi,
4504 enum tree_code code1,
4505 enum tree_code code2, int op_type)
4506 {
4507 int i;
4508 tree vop0, vop1, new_tmp1, new_tmp2;
4509 gimple *new_stmt1, *new_stmt2;
4510 vec<tree> vec_tmp = vNULL;
4511
4512 vec_tmp.create (vec_oprnds0->length () * 2);
4513 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4514 {
4515 if (op_type == binary_op)
4516 vop1 = (*vec_oprnds1)[i];
4517 else
4518 vop1 = NULL_TREE;
4519
4520 /* Generate the two halves of promotion operation. */
4521 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4522 op_type, vec_dest, gsi,
4523 stmt_info);
4524 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4525 op_type, vec_dest, gsi,
4526 stmt_info);
4527 if (is_gimple_call (new_stmt1))
4528 {
4529 new_tmp1 = gimple_call_lhs (new_stmt1);
4530 new_tmp2 = gimple_call_lhs (new_stmt2);
4531 }
4532 else
4533 {
4534 new_tmp1 = gimple_assign_lhs (new_stmt1);
4535 new_tmp2 = gimple_assign_lhs (new_stmt2);
4536 }
4537
4538 /* Store the results for the next step. */
4539 vec_tmp.quick_push (new_tmp1);
4540 vec_tmp.quick_push (new_tmp2);
4541 }
4542
4543 vec_oprnds0->release ();
4544 *vec_oprnds0 = vec_tmp;
4545 }
4546
4547 /* Create vectorized promotion stmts for widening stmts using only half the
4548 potential vector size for input. */
4549 static void
vect_create_half_widening_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,int op_type)4550 vect_create_half_widening_stmts (vec_info *vinfo,
4551 vec<tree> *vec_oprnds0,
4552 vec<tree> *vec_oprnds1,
4553 stmt_vec_info stmt_info, tree vec_dest,
4554 gimple_stmt_iterator *gsi,
4555 enum tree_code code1,
4556 int op_type)
4557 {
4558 int i;
4559 tree vop0, vop1;
4560 gimple *new_stmt1;
4561 gimple *new_stmt2;
4562 gimple *new_stmt3;
4563 vec<tree> vec_tmp = vNULL;
4564
4565 vec_tmp.create (vec_oprnds0->length ());
4566 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4567 {
4568 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4569
4570 gcc_assert (op_type == binary_op);
4571 vop1 = (*vec_oprnds1)[i];
4572
4573 /* Widen the first vector input. */
4574 out_type = TREE_TYPE (vec_dest);
4575 new_tmp1 = make_ssa_name (out_type);
4576 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4577 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4578 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4579 {
4580 /* Widen the second vector input. */
4581 new_tmp2 = make_ssa_name (out_type);
4582 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4583 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4584 /* Perform the operation. With both vector inputs widened. */
4585 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4586 }
4587 else
4588 {
4589 /* Perform the operation. With the single vector input widened. */
4590 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4591 }
4592
4593 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4594 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4595 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4596
4597 /* Store the results for the next step. */
4598 vec_tmp.quick_push (new_tmp3);
4599 }
4600
4601 vec_oprnds0->release ();
4602 *vec_oprnds0 = vec_tmp;
4603 }
4604
4605
4606 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4607 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4608 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4609 Return true if STMT_INFO is vectorizable in this way. */
4610
4611 static bool
vectorizable_conversion(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4612 vectorizable_conversion (vec_info *vinfo,
4613 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4614 gimple **vec_stmt, slp_tree slp_node,
4615 stmt_vector_for_cost *cost_vec)
4616 {
4617 tree vec_dest;
4618 tree scalar_dest;
4619 tree op0, op1 = NULL_TREE;
4620 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4621 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4622 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4623 tree new_temp;
4624 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4625 int ndts = 2;
4626 poly_uint64 nunits_in;
4627 poly_uint64 nunits_out;
4628 tree vectype_out, vectype_in;
4629 int ncopies, i;
4630 tree lhs_type, rhs_type;
4631 enum { NARROW, NONE, WIDEN } modifier;
4632 vec<tree> vec_oprnds0 = vNULL;
4633 vec<tree> vec_oprnds1 = vNULL;
4634 tree vop0;
4635 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4636 int multi_step_cvt = 0;
4637 vec<tree> interm_types = vNULL;
4638 tree intermediate_type, cvt_type = NULL_TREE;
4639 int op_type;
4640 unsigned short fltsz;
4641
4642 /* Is STMT a vectorizable conversion? */
4643
4644 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4645 return false;
4646
4647 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4648 && ! vec_stmt)
4649 return false;
4650
4651 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4652 if (!stmt)
4653 return false;
4654
4655 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4656 return false;
4657
4658 code = gimple_assign_rhs_code (stmt);
4659 if (!CONVERT_EXPR_CODE_P (code)
4660 && code != FIX_TRUNC_EXPR
4661 && code != FLOAT_EXPR
4662 && code != WIDEN_PLUS_EXPR
4663 && code != WIDEN_MINUS_EXPR
4664 && code != WIDEN_MULT_EXPR
4665 && code != WIDEN_LSHIFT_EXPR)
4666 return false;
4667
4668 op_type = TREE_CODE_LENGTH (code);
4669
4670 /* Check types of lhs and rhs. */
4671 scalar_dest = gimple_assign_lhs (stmt);
4672 lhs_type = TREE_TYPE (scalar_dest);
4673 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4674
4675 /* Check the operands of the operation. */
4676 slp_tree slp_op0, slp_op1 = NULL;
4677 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4678 0, &op0, &slp_op0, &dt[0], &vectype_in))
4679 {
4680 if (dump_enabled_p ())
4681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4682 "use not simple.\n");
4683 return false;
4684 }
4685
4686 rhs_type = TREE_TYPE (op0);
4687 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4688 && !((INTEGRAL_TYPE_P (lhs_type)
4689 && INTEGRAL_TYPE_P (rhs_type))
4690 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4691 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4692 return false;
4693
4694 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4695 && ((INTEGRAL_TYPE_P (lhs_type)
4696 && !type_has_mode_precision_p (lhs_type))
4697 || (INTEGRAL_TYPE_P (rhs_type)
4698 && !type_has_mode_precision_p (rhs_type))))
4699 {
4700 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4702 "type conversion to/from bit-precision unsupported."
4703 "\n");
4704 return false;
4705 }
4706
4707 if (op_type == binary_op)
4708 {
4709 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4710 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4711
4712 op1 = gimple_assign_rhs2 (stmt);
4713 tree vectype1_in;
4714 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4715 &op1, &slp_op1, &dt[1], &vectype1_in))
4716 {
4717 if (dump_enabled_p ())
4718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4719 "use not simple.\n");
4720 return false;
4721 }
4722 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4723 OP1. */
4724 if (!vectype_in)
4725 vectype_in = vectype1_in;
4726 }
4727
4728 /* If op0 is an external or constant def, infer the vector type
4729 from the scalar type. */
4730 if (!vectype_in)
4731 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4732 if (vec_stmt)
4733 gcc_assert (vectype_in);
4734 if (!vectype_in)
4735 {
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4738 "no vectype for scalar type %T\n", rhs_type);
4739
4740 return false;
4741 }
4742
4743 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4744 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4745 {
4746 if (dump_enabled_p ())
4747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4748 "can't convert between boolean and non "
4749 "boolean vectors %T\n", rhs_type);
4750
4751 return false;
4752 }
4753
4754 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4755 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4756 if (known_eq (nunits_out, nunits_in))
4757 if (code == WIDEN_MINUS_EXPR
4758 || code == WIDEN_PLUS_EXPR
4759 || code == WIDEN_LSHIFT_EXPR
4760 || code == WIDEN_MULT_EXPR)
4761 modifier = WIDEN;
4762 else
4763 modifier = NONE;
4764 else if (multiple_p (nunits_out, nunits_in))
4765 modifier = NARROW;
4766 else
4767 {
4768 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4769 modifier = WIDEN;
4770 }
4771
4772 /* Multiple types in SLP are handled by creating the appropriate number of
4773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4774 case of SLP. */
4775 if (slp_node)
4776 ncopies = 1;
4777 else if (modifier == NARROW)
4778 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4779 else
4780 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4781
4782 /* Sanity check: make sure that at least one copy of the vectorized stmt
4783 needs to be generated. */
4784 gcc_assert (ncopies >= 1);
4785
4786 bool found_mode = false;
4787 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4788 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4789 opt_scalar_mode rhs_mode_iter;
4790
4791 /* Supportable by target? */
4792 switch (modifier)
4793 {
4794 case NONE:
4795 if (code != FIX_TRUNC_EXPR
4796 && code != FLOAT_EXPR
4797 && !CONVERT_EXPR_CODE_P (code))
4798 return false;
4799 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4800 break;
4801 /* FALLTHRU */
4802 unsupported:
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4805 "conversion not supported by target.\n");
4806 return false;
4807
4808 case WIDEN:
4809 if (known_eq (nunits_in, nunits_out))
4810 {
4811 if (!supportable_half_widening_operation (code, vectype_out,
4812 vectype_in, &code1))
4813 goto unsupported;
4814 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4815 break;
4816 }
4817 if (supportable_widening_operation (vinfo, code, stmt_info,
4818 vectype_out, vectype_in, &code1,
4819 &code2, &multi_step_cvt,
4820 &interm_types))
4821 {
4822 /* Binary widening operation can only be supported directly by the
4823 architecture. */
4824 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4825 break;
4826 }
4827
4828 if (code != FLOAT_EXPR
4829 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4830 goto unsupported;
4831
4832 fltsz = GET_MODE_SIZE (lhs_mode);
4833 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4834 {
4835 rhs_mode = rhs_mode_iter.require ();
4836 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4837 break;
4838
4839 cvt_type
4840 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4841 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4842 if (cvt_type == NULL_TREE)
4843 goto unsupported;
4844
4845 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4846 {
4847 if (!supportable_convert_operation (code, vectype_out,
4848 cvt_type, &codecvt1))
4849 goto unsupported;
4850 }
4851 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4852 vectype_out, cvt_type,
4853 &codecvt1, &codecvt2,
4854 &multi_step_cvt,
4855 &interm_types))
4856 continue;
4857 else
4858 gcc_assert (multi_step_cvt == 0);
4859
4860 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4861 cvt_type,
4862 vectype_in, &code1, &code2,
4863 &multi_step_cvt, &interm_types))
4864 {
4865 found_mode = true;
4866 break;
4867 }
4868 }
4869
4870 if (!found_mode)
4871 goto unsupported;
4872
4873 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4874 codecvt2 = ERROR_MARK;
4875 else
4876 {
4877 multi_step_cvt++;
4878 interm_types.safe_push (cvt_type);
4879 cvt_type = NULL_TREE;
4880 }
4881 break;
4882
4883 case NARROW:
4884 gcc_assert (op_type == unary_op);
4885 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4886 &code1, &multi_step_cvt,
4887 &interm_types))
4888 break;
4889
4890 if (code != FIX_TRUNC_EXPR
4891 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4892 goto unsupported;
4893
4894 cvt_type
4895 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4896 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4897 if (cvt_type == NULL_TREE)
4898 goto unsupported;
4899 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4900 &codecvt1))
4901 goto unsupported;
4902 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4903 &code1, &multi_step_cvt,
4904 &interm_types))
4905 break;
4906 goto unsupported;
4907
4908 default:
4909 gcc_unreachable ();
4910 }
4911
4912 if (!vec_stmt) /* transformation not required. */
4913 {
4914 if (slp_node
4915 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4916 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4917 {
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 "incompatible vector types for invariants\n");
4921 return false;
4922 }
4923 DUMP_VECT_SCOPE ("vectorizable_conversion");
4924 if (modifier == NONE)
4925 {
4926 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4927 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4928 cost_vec);
4929 }
4930 else if (modifier == NARROW)
4931 {
4932 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4933 /* The final packing step produces one vector result per copy. */
4934 unsigned int nvectors
4935 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4936 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4937 multi_step_cvt, cost_vec);
4938 }
4939 else
4940 {
4941 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4942 /* The initial unpacking step produces two vector results
4943 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4944 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4945 unsigned int nvectors
4946 = (slp_node
4947 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4948 : ncopies * 2);
4949 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4950 multi_step_cvt, cost_vec);
4951 }
4952 interm_types.release ();
4953 return true;
4954 }
4955
4956 /* Transform. */
4957 if (dump_enabled_p ())
4958 dump_printf_loc (MSG_NOTE, vect_location,
4959 "transform conversion. ncopies = %d.\n", ncopies);
4960
4961 if (op_type == binary_op)
4962 {
4963 if (CONSTANT_CLASS_P (op0))
4964 op0 = fold_convert (TREE_TYPE (op1), op0);
4965 else if (CONSTANT_CLASS_P (op1))
4966 op1 = fold_convert (TREE_TYPE (op0), op1);
4967 }
4968
4969 /* In case of multi-step conversion, we first generate conversion operations
4970 to the intermediate types, and then from that types to the final one.
4971 We create vector destinations for the intermediate type (TYPES) received
4972 from supportable_*_operation, and store them in the correct order
4973 for future use in vect_create_vectorized_*_stmts (). */
4974 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4975 vec_dest = vect_create_destination_var (scalar_dest,
4976 (cvt_type && modifier == WIDEN)
4977 ? cvt_type : vectype_out);
4978 vec_dsts.quick_push (vec_dest);
4979
4980 if (multi_step_cvt)
4981 {
4982 for (i = interm_types.length () - 1;
4983 interm_types.iterate (i, &intermediate_type); i--)
4984 {
4985 vec_dest = vect_create_destination_var (scalar_dest,
4986 intermediate_type);
4987 vec_dsts.quick_push (vec_dest);
4988 }
4989 }
4990
4991 if (cvt_type)
4992 vec_dest = vect_create_destination_var (scalar_dest,
4993 modifier == WIDEN
4994 ? vectype_out : cvt_type);
4995
4996 int ninputs = 1;
4997 if (!slp_node)
4998 {
4999 if (modifier == WIDEN)
5000 ;
5001 else if (modifier == NARROW)
5002 {
5003 if (multi_step_cvt)
5004 ninputs = vect_pow2 (multi_step_cvt);
5005 ninputs *= 2;
5006 }
5007 }
5008
5009 switch (modifier)
5010 {
5011 case NONE:
5012 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5013 op0, &vec_oprnds0);
5014 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5015 {
5016 /* Arguments are ready, create the new vector stmt. */
5017 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5018 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5019 new_temp = make_ssa_name (vec_dest, new_stmt);
5020 gimple_assign_set_lhs (new_stmt, new_temp);
5021 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5022
5023 if (slp_node)
5024 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5025 else
5026 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5027 }
5028 break;
5029
5030 case WIDEN:
5031 /* In case the vectorization factor (VF) is bigger than the number
5032 of elements that we can fit in a vectype (nunits), we have to
5033 generate more than one vector stmt - i.e - we need to "unroll"
5034 the vector stmt by a factor VF/nunits. */
5035 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5036 op0, &vec_oprnds0,
5037 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5038 &vec_oprnds1);
5039 if (code == WIDEN_LSHIFT_EXPR)
5040 {
5041 int oprnds_size = vec_oprnds0.length ();
5042 vec_oprnds1.create (oprnds_size);
5043 for (i = 0; i < oprnds_size; ++i)
5044 vec_oprnds1.quick_push (op1);
5045 }
5046 /* Arguments are ready. Create the new vector stmts. */
5047 for (i = multi_step_cvt; i >= 0; i--)
5048 {
5049 tree this_dest = vec_dsts[i];
5050 enum tree_code c1 = code1, c2 = code2;
5051 if (i == 0 && codecvt2 != ERROR_MARK)
5052 {
5053 c1 = codecvt1;
5054 c2 = codecvt2;
5055 }
5056 if (known_eq (nunits_out, nunits_in))
5057 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5058 &vec_oprnds1, stmt_info,
5059 this_dest, gsi,
5060 c1, op_type);
5061 else
5062 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5063 &vec_oprnds1, stmt_info,
5064 this_dest, gsi,
5065 c1, c2, op_type);
5066 }
5067
5068 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5069 {
5070 gimple *new_stmt;
5071 if (cvt_type)
5072 {
5073 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5074 new_temp = make_ssa_name (vec_dest);
5075 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5076 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5077 }
5078 else
5079 new_stmt = SSA_NAME_DEF_STMT (vop0);
5080
5081 if (slp_node)
5082 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5083 else
5084 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5085 }
5086 break;
5087
5088 case NARROW:
5089 /* In case the vectorization factor (VF) is bigger than the number
5090 of elements that we can fit in a vectype (nunits), we have to
5091 generate more than one vector stmt - i.e - we need to "unroll"
5092 the vector stmt by a factor VF/nunits. */
5093 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5094 op0, &vec_oprnds0);
5095 /* Arguments are ready. Create the new vector stmts. */
5096 if (cvt_type)
5097 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5098 {
5099 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5100 new_temp = make_ssa_name (vec_dest);
5101 gassign *new_stmt
5102 = gimple_build_assign (new_temp, codecvt1, vop0);
5103 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5104 vec_oprnds0[i] = new_temp;
5105 }
5106
5107 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5108 multi_step_cvt,
5109 stmt_info, vec_dsts, gsi,
5110 slp_node, code1);
5111 break;
5112 }
5113 if (!slp_node)
5114 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5115
5116 vec_oprnds0.release ();
5117 vec_oprnds1.release ();
5118 interm_types.release ();
5119
5120 return true;
5121 }
5122
5123 /* Return true if we can assume from the scalar form of STMT_INFO that
5124 neither the scalar nor the vector forms will generate code. STMT_INFO
5125 is known not to involve a data reference. */
5126
5127 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5128 vect_nop_conversion_p (stmt_vec_info stmt_info)
5129 {
5130 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5131 if (!stmt)
5132 return false;
5133
5134 tree lhs = gimple_assign_lhs (stmt);
5135 tree_code code = gimple_assign_rhs_code (stmt);
5136 tree rhs = gimple_assign_rhs1 (stmt);
5137
5138 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5139 return true;
5140
5141 if (CONVERT_EXPR_CODE_P (code))
5142 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5143
5144 return false;
5145 }
5146
5147 /* Function vectorizable_assignment.
5148
5149 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5150 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5151 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5152 Return true if STMT_INFO is vectorizable in this way. */
5153
5154 static bool
vectorizable_assignment(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5155 vectorizable_assignment (vec_info *vinfo,
5156 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5157 gimple **vec_stmt, slp_tree slp_node,
5158 stmt_vector_for_cost *cost_vec)
5159 {
5160 tree vec_dest;
5161 tree scalar_dest;
5162 tree op;
5163 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5164 tree new_temp;
5165 enum vect_def_type dt[1] = {vect_unknown_def_type};
5166 int ndts = 1;
5167 int ncopies;
5168 int i;
5169 vec<tree> vec_oprnds = vNULL;
5170 tree vop;
5171 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5172 enum tree_code code;
5173 tree vectype_in;
5174
5175 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5176 return false;
5177
5178 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5179 && ! vec_stmt)
5180 return false;
5181
5182 /* Is vectorizable assignment? */
5183 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5184 if (!stmt)
5185 return false;
5186
5187 scalar_dest = gimple_assign_lhs (stmt);
5188 if (TREE_CODE (scalar_dest) != SSA_NAME)
5189 return false;
5190
5191 if (STMT_VINFO_DATA_REF (stmt_info))
5192 return false;
5193
5194 code = gimple_assign_rhs_code (stmt);
5195 if (!(gimple_assign_single_p (stmt)
5196 || code == PAREN_EXPR
5197 || CONVERT_EXPR_CODE_P (code)))
5198 return false;
5199
5200 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5201 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5202
5203 /* Multiple types in SLP are handled by creating the appropriate number of
5204 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5205 case of SLP. */
5206 if (slp_node)
5207 ncopies = 1;
5208 else
5209 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5210
5211 gcc_assert (ncopies >= 1);
5212
5213 slp_tree slp_op;
5214 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5215 &dt[0], &vectype_in))
5216 {
5217 if (dump_enabled_p ())
5218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5219 "use not simple.\n");
5220 return false;
5221 }
5222 if (!vectype_in)
5223 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5224
5225 /* We can handle NOP_EXPR conversions that do not change the number
5226 of elements or the vector size. */
5227 if ((CONVERT_EXPR_CODE_P (code)
5228 || code == VIEW_CONVERT_EXPR)
5229 && (!vectype_in
5230 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5231 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5232 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5233 return false;
5234
5235 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5236 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5237 {
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5240 "can't convert between boolean and non "
5241 "boolean vectors %T\n", TREE_TYPE (op));
5242
5243 return false;
5244 }
5245
5246 /* We do not handle bit-precision changes. */
5247 if ((CONVERT_EXPR_CODE_P (code)
5248 || code == VIEW_CONVERT_EXPR)
5249 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5250 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5251 || !type_has_mode_precision_p (TREE_TYPE (op)))
5252 /* But a conversion that does not change the bit-pattern is ok. */
5253 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5254 > TYPE_PRECISION (TREE_TYPE (op)))
5255 && TYPE_UNSIGNED (TREE_TYPE (op))))
5256 {
5257 if (dump_enabled_p ())
5258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5259 "type conversion to/from bit-precision "
5260 "unsupported.\n");
5261 return false;
5262 }
5263
5264 if (!vec_stmt) /* transformation not required. */
5265 {
5266 if (slp_node
5267 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5268 {
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5271 "incompatible vector types for invariants\n");
5272 return false;
5273 }
5274 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5275 DUMP_VECT_SCOPE ("vectorizable_assignment");
5276 if (!vect_nop_conversion_p (stmt_info))
5277 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5278 cost_vec);
5279 return true;
5280 }
5281
5282 /* Transform. */
5283 if (dump_enabled_p ())
5284 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5285
5286 /* Handle def. */
5287 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5288
5289 /* Handle use. */
5290 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5291
5292 /* Arguments are ready. create the new vector stmt. */
5293 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5294 {
5295 if (CONVERT_EXPR_CODE_P (code)
5296 || code == VIEW_CONVERT_EXPR)
5297 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5298 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5299 new_temp = make_ssa_name (vec_dest, new_stmt);
5300 gimple_assign_set_lhs (new_stmt, new_temp);
5301 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5302 if (slp_node)
5303 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5304 else
5305 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5306 }
5307 if (!slp_node)
5308 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5309
5310 vec_oprnds.release ();
5311 return true;
5312 }
5313
5314
5315 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5316 either as shift by a scalar or by a vector. */
5317
5318 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5319 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5320 {
5321
5322 machine_mode vec_mode;
5323 optab optab;
5324 int icode;
5325 tree vectype;
5326
5327 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5328 if (!vectype)
5329 return false;
5330
5331 optab = optab_for_tree_code (code, vectype, optab_scalar);
5332 if (!optab
5333 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5334 {
5335 optab = optab_for_tree_code (code, vectype, optab_vector);
5336 if (!optab
5337 || (optab_handler (optab, TYPE_MODE (vectype))
5338 == CODE_FOR_nothing))
5339 return false;
5340 }
5341
5342 vec_mode = TYPE_MODE (vectype);
5343 icode = (int) optab_handler (optab, vec_mode);
5344 if (icode == CODE_FOR_nothing)
5345 return false;
5346
5347 return true;
5348 }
5349
5350
5351 /* Function vectorizable_shift.
5352
5353 Check if STMT_INFO performs a shift operation that can be vectorized.
5354 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5355 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5356 Return true if STMT_INFO is vectorizable in this way. */
5357
5358 static bool
vectorizable_shift(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5359 vectorizable_shift (vec_info *vinfo,
5360 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5361 gimple **vec_stmt, slp_tree slp_node,
5362 stmt_vector_for_cost *cost_vec)
5363 {
5364 tree vec_dest;
5365 tree scalar_dest;
5366 tree op0, op1 = NULL;
5367 tree vec_oprnd1 = NULL_TREE;
5368 tree vectype;
5369 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5370 enum tree_code code;
5371 machine_mode vec_mode;
5372 tree new_temp;
5373 optab optab;
5374 int icode;
5375 machine_mode optab_op2_mode;
5376 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5377 int ndts = 2;
5378 poly_uint64 nunits_in;
5379 poly_uint64 nunits_out;
5380 tree vectype_out;
5381 tree op1_vectype;
5382 int ncopies;
5383 int i;
5384 vec<tree> vec_oprnds0 = vNULL;
5385 vec<tree> vec_oprnds1 = vNULL;
5386 tree vop0, vop1;
5387 unsigned int k;
5388 bool scalar_shift_arg = true;
5389 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5390 bool incompatible_op1_vectype_p = false;
5391
5392 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5393 return false;
5394
5395 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5396 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5397 && ! vec_stmt)
5398 return false;
5399
5400 /* Is STMT a vectorizable binary/unary operation? */
5401 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5402 if (!stmt)
5403 return false;
5404
5405 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5406 return false;
5407
5408 code = gimple_assign_rhs_code (stmt);
5409
5410 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5411 || code == RROTATE_EXPR))
5412 return false;
5413
5414 scalar_dest = gimple_assign_lhs (stmt);
5415 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5416 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5417 {
5418 if (dump_enabled_p ())
5419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5420 "bit-precision shifts not supported.\n");
5421 return false;
5422 }
5423
5424 slp_tree slp_op0;
5425 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5426 0, &op0, &slp_op0, &dt[0], &vectype))
5427 {
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5430 "use not simple.\n");
5431 return false;
5432 }
5433 /* If op0 is an external or constant def, infer the vector type
5434 from the scalar type. */
5435 if (!vectype)
5436 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5437 if (vec_stmt)
5438 gcc_assert (vectype);
5439 if (!vectype)
5440 {
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5443 "no vectype for scalar type\n");
5444 return false;
5445 }
5446
5447 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5448 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5449 if (maybe_ne (nunits_out, nunits_in))
5450 return false;
5451
5452 stmt_vec_info op1_def_stmt_info;
5453 slp_tree slp_op1;
5454 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5455 &dt[1], &op1_vectype, &op1_def_stmt_info))
5456 {
5457 if (dump_enabled_p ())
5458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5459 "use not simple.\n");
5460 return false;
5461 }
5462
5463 /* Multiple types in SLP are handled by creating the appropriate number of
5464 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5465 case of SLP. */
5466 if (slp_node)
5467 ncopies = 1;
5468 else
5469 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5470
5471 gcc_assert (ncopies >= 1);
5472
5473 /* Determine whether the shift amount is a vector, or scalar. If the
5474 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5475
5476 if ((dt[1] == vect_internal_def
5477 || dt[1] == vect_induction_def
5478 || dt[1] == vect_nested_cycle)
5479 && !slp_node)
5480 scalar_shift_arg = false;
5481 else if (dt[1] == vect_constant_def
5482 || dt[1] == vect_external_def
5483 || dt[1] == vect_internal_def)
5484 {
5485 /* In SLP, need to check whether the shift count is the same,
5486 in loops if it is a constant or invariant, it is always
5487 a scalar shift. */
5488 if (slp_node)
5489 {
5490 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5491 stmt_vec_info slpstmt_info;
5492
5493 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5494 {
5495 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5496 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5497 scalar_shift_arg = false;
5498 }
5499
5500 /* For internal SLP defs we have to make sure we see scalar stmts
5501 for all vector elements.
5502 ??? For different vectors we could resort to a different
5503 scalar shift operand but code-generation below simply always
5504 takes the first. */
5505 if (dt[1] == vect_internal_def
5506 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5507 stmts.length ()))
5508 scalar_shift_arg = false;
5509 }
5510
5511 /* If the shift amount is computed by a pattern stmt we cannot
5512 use the scalar amount directly thus give up and use a vector
5513 shift. */
5514 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5515 scalar_shift_arg = false;
5516 }
5517 else
5518 {
5519 if (dump_enabled_p ())
5520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5521 "operand mode requires invariant argument.\n");
5522 return false;
5523 }
5524
5525 /* Vector shifted by vector. */
5526 bool was_scalar_shift_arg = scalar_shift_arg;
5527 if (!scalar_shift_arg)
5528 {
5529 optab = optab_for_tree_code (code, vectype, optab_vector);
5530 if (dump_enabled_p ())
5531 dump_printf_loc (MSG_NOTE, vect_location,
5532 "vector/vector shift/rotate found.\n");
5533
5534 if (!op1_vectype)
5535 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5536 slp_op1);
5537 incompatible_op1_vectype_p
5538 = (op1_vectype == NULL_TREE
5539 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5540 TYPE_VECTOR_SUBPARTS (vectype))
5541 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5542 if (incompatible_op1_vectype_p
5543 && (!slp_node
5544 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5545 || slp_op1->refcnt != 1))
5546 {
5547 if (dump_enabled_p ())
5548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5549 "unusable type for last operand in"
5550 " vector/vector shift/rotate.\n");
5551 return false;
5552 }
5553 }
5554 /* See if the machine has a vector shifted by scalar insn and if not
5555 then see if it has a vector shifted by vector insn. */
5556 else
5557 {
5558 optab = optab_for_tree_code (code, vectype, optab_scalar);
5559 if (optab
5560 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5561 {
5562 if (dump_enabled_p ())
5563 dump_printf_loc (MSG_NOTE, vect_location,
5564 "vector/scalar shift/rotate found.\n");
5565 }
5566 else
5567 {
5568 optab = optab_for_tree_code (code, vectype, optab_vector);
5569 if (optab
5570 && (optab_handler (optab, TYPE_MODE (vectype))
5571 != CODE_FOR_nothing))
5572 {
5573 scalar_shift_arg = false;
5574
5575 if (dump_enabled_p ())
5576 dump_printf_loc (MSG_NOTE, vect_location,
5577 "vector/vector shift/rotate found.\n");
5578
5579 if (!op1_vectype)
5580 op1_vectype = get_vectype_for_scalar_type (vinfo,
5581 TREE_TYPE (op1),
5582 slp_op1);
5583
5584 /* Unlike the other binary operators, shifts/rotates have
5585 the rhs being int, instead of the same type as the lhs,
5586 so make sure the scalar is the right type if we are
5587 dealing with vectors of long long/long/short/char. */
5588 incompatible_op1_vectype_p
5589 = (!op1_vectype
5590 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5591 TREE_TYPE (op1)));
5592 if (incompatible_op1_vectype_p
5593 && dt[1] == vect_internal_def)
5594 {
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5597 "unusable type for last operand in"
5598 " vector/vector shift/rotate.\n");
5599 return false;
5600 }
5601 }
5602 }
5603 }
5604
5605 /* Supportable by target? */
5606 if (!optab)
5607 {
5608 if (dump_enabled_p ())
5609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5610 "no optab.\n");
5611 return false;
5612 }
5613 vec_mode = TYPE_MODE (vectype);
5614 icode = (int) optab_handler (optab, vec_mode);
5615 if (icode == CODE_FOR_nothing)
5616 {
5617 if (dump_enabled_p ())
5618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5619 "op not supported by target.\n");
5620 /* Check only during analysis. */
5621 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5622 || (!vec_stmt
5623 && !vect_worthwhile_without_simd_p (vinfo, code)))
5624 return false;
5625 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_NOTE, vect_location,
5627 "proceeding using word mode.\n");
5628 }
5629
5630 /* Worthwhile without SIMD support? Check only during analysis. */
5631 if (!vec_stmt
5632 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5633 && !vect_worthwhile_without_simd_p (vinfo, code))
5634 {
5635 if (dump_enabled_p ())
5636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5637 "not worthwhile without SIMD support.\n");
5638 return false;
5639 }
5640
5641 if (!vec_stmt) /* transformation not required. */
5642 {
5643 if (slp_node
5644 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5645 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5646 && (!incompatible_op1_vectype_p
5647 || dt[1] == vect_constant_def)
5648 && !vect_maybe_update_slp_op_vectype
5649 (slp_op1,
5650 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5651 {
5652 if (dump_enabled_p ())
5653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5654 "incompatible vector types for invariants\n");
5655 return false;
5656 }
5657 /* Now adjust the constant shift amount in place. */
5658 if (slp_node
5659 && incompatible_op1_vectype_p
5660 && dt[1] == vect_constant_def)
5661 {
5662 for (unsigned i = 0;
5663 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5664 {
5665 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5666 = fold_convert (TREE_TYPE (vectype),
5667 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5668 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5669 == INTEGER_CST));
5670 }
5671 }
5672 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5673 DUMP_VECT_SCOPE ("vectorizable_shift");
5674 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5675 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5676 return true;
5677 }
5678
5679 /* Transform. */
5680
5681 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_NOTE, vect_location,
5683 "transform binary/unary operation.\n");
5684
5685 if (incompatible_op1_vectype_p && !slp_node)
5686 {
5687 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5688 op1 = fold_convert (TREE_TYPE (vectype), op1);
5689 if (dt[1] != vect_constant_def)
5690 op1 = vect_init_vector (vinfo, stmt_info, op1,
5691 TREE_TYPE (vectype), NULL);
5692 }
5693
5694 /* Handle def. */
5695 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5696
5697 if (scalar_shift_arg && dt[1] != vect_internal_def)
5698 {
5699 /* Vector shl and shr insn patterns can be defined with scalar
5700 operand 2 (shift operand). In this case, use constant or loop
5701 invariant op1 directly, without extending it to vector mode
5702 first. */
5703 optab_op2_mode = insn_data[icode].operand[2].mode;
5704 if (!VECTOR_MODE_P (optab_op2_mode))
5705 {
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_NOTE, vect_location,
5708 "operand 1 using scalar mode.\n");
5709 vec_oprnd1 = op1;
5710 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5711 vec_oprnds1.quick_push (vec_oprnd1);
5712 /* Store vec_oprnd1 for every vector stmt to be created.
5713 We check during the analysis that all the shift arguments
5714 are the same.
5715 TODO: Allow different constants for different vector
5716 stmts generated for an SLP instance. */
5717 for (k = 0;
5718 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5719 vec_oprnds1.quick_push (vec_oprnd1);
5720 }
5721 }
5722 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5723 {
5724 if (was_scalar_shift_arg)
5725 {
5726 /* If the argument was the same in all lanes create
5727 the correctly typed vector shift amount directly. */
5728 op1 = fold_convert (TREE_TYPE (vectype), op1);
5729 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5730 !loop_vinfo ? gsi : NULL);
5731 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5732 !loop_vinfo ? gsi : NULL);
5733 vec_oprnds1.create (slp_node->vec_stmts_size);
5734 for (k = 0; k < slp_node->vec_stmts_size; k++)
5735 vec_oprnds1.quick_push (vec_oprnd1);
5736 }
5737 else if (dt[1] == vect_constant_def)
5738 /* The constant shift amount has been adjusted in place. */
5739 ;
5740 else
5741 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5742 }
5743
5744 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5745 (a special case for certain kind of vector shifts); otherwise,
5746 operand 1 should be of a vector type (the usual case). */
5747 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5748 op0, &vec_oprnds0,
5749 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5750
5751 /* Arguments are ready. Create the new vector stmt. */
5752 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5753 {
5754 /* For internal defs where we need to use a scalar shift arg
5755 extract the first lane. */
5756 if (scalar_shift_arg && dt[1] == vect_internal_def)
5757 {
5758 vop1 = vec_oprnds1[0];
5759 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5760 gassign *new_stmt
5761 = gimple_build_assign (new_temp,
5762 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5763 vop1,
5764 TYPE_SIZE (TREE_TYPE (new_temp)),
5765 bitsize_zero_node));
5766 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5767 vop1 = new_temp;
5768 }
5769 else
5770 vop1 = vec_oprnds1[i];
5771 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5772 new_temp = make_ssa_name (vec_dest, new_stmt);
5773 gimple_assign_set_lhs (new_stmt, new_temp);
5774 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5775 if (slp_node)
5776 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5777 else
5778 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5779 }
5780
5781 if (!slp_node)
5782 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5783
5784 vec_oprnds0.release ();
5785 vec_oprnds1.release ();
5786
5787 return true;
5788 }
5789
5790
5791 /* Function vectorizable_operation.
5792
5793 Check if STMT_INFO performs a binary, unary or ternary operation that can
5794 be vectorized.
5795 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5796 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5797 Return true if STMT_INFO is vectorizable in this way. */
5798
5799 static bool
vectorizable_operation(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5800 vectorizable_operation (vec_info *vinfo,
5801 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5802 gimple **vec_stmt, slp_tree slp_node,
5803 stmt_vector_for_cost *cost_vec)
5804 {
5805 tree vec_dest;
5806 tree scalar_dest;
5807 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5808 tree vectype;
5809 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5810 enum tree_code code, orig_code;
5811 machine_mode vec_mode;
5812 tree new_temp;
5813 int op_type;
5814 optab optab;
5815 bool target_support_p;
5816 enum vect_def_type dt[3]
5817 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5818 int ndts = 3;
5819 poly_uint64 nunits_in;
5820 poly_uint64 nunits_out;
5821 tree vectype_out;
5822 int ncopies, vec_num;
5823 int i;
5824 vec<tree> vec_oprnds0 = vNULL;
5825 vec<tree> vec_oprnds1 = vNULL;
5826 vec<tree> vec_oprnds2 = vNULL;
5827 tree vop0, vop1, vop2;
5828 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5829
5830 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5831 return false;
5832
5833 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5834 && ! vec_stmt)
5835 return false;
5836
5837 /* Is STMT a vectorizable binary/unary operation? */
5838 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5839 if (!stmt)
5840 return false;
5841
5842 /* Loads and stores are handled in vectorizable_{load,store}. */
5843 if (STMT_VINFO_DATA_REF (stmt_info))
5844 return false;
5845
5846 orig_code = code = gimple_assign_rhs_code (stmt);
5847
5848 /* Shifts are handled in vectorizable_shift. */
5849 if (code == LSHIFT_EXPR
5850 || code == RSHIFT_EXPR
5851 || code == LROTATE_EXPR
5852 || code == RROTATE_EXPR)
5853 return false;
5854
5855 /* Comparisons are handled in vectorizable_comparison. */
5856 if (TREE_CODE_CLASS (code) == tcc_comparison)
5857 return false;
5858
5859 /* Conditions are handled in vectorizable_condition. */
5860 if (code == COND_EXPR)
5861 return false;
5862
5863 /* For pointer addition and subtraction, we should use the normal
5864 plus and minus for the vector operation. */
5865 if (code == POINTER_PLUS_EXPR)
5866 code = PLUS_EXPR;
5867 if (code == POINTER_DIFF_EXPR)
5868 code = MINUS_EXPR;
5869
5870 /* Support only unary or binary operations. */
5871 op_type = TREE_CODE_LENGTH (code);
5872 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5873 {
5874 if (dump_enabled_p ())
5875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5876 "num. args = %d (not unary/binary/ternary op).\n",
5877 op_type);
5878 return false;
5879 }
5880
5881 scalar_dest = gimple_assign_lhs (stmt);
5882 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5883
5884 /* Most operations cannot handle bit-precision types without extra
5885 truncations. */
5886 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5887 if (!mask_op_p
5888 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5889 /* Exception are bitwise binary operations. */
5890 && code != BIT_IOR_EXPR
5891 && code != BIT_XOR_EXPR
5892 && code != BIT_AND_EXPR)
5893 {
5894 if (dump_enabled_p ())
5895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5896 "bit-precision arithmetic not supported.\n");
5897 return false;
5898 }
5899
5900 slp_tree slp_op0;
5901 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5902 0, &op0, &slp_op0, &dt[0], &vectype))
5903 {
5904 if (dump_enabled_p ())
5905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5906 "use not simple.\n");
5907 return false;
5908 }
5909 /* If op0 is an external or constant def, infer the vector type
5910 from the scalar type. */
5911 if (!vectype)
5912 {
5913 /* For boolean type we cannot determine vectype by
5914 invariant value (don't know whether it is a vector
5915 of booleans or vector of integers). We use output
5916 vectype because operations on boolean don't change
5917 type. */
5918 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5919 {
5920 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5921 {
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5924 "not supported operation on bool value.\n");
5925 return false;
5926 }
5927 vectype = vectype_out;
5928 }
5929 else
5930 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5931 slp_node);
5932 }
5933 if (vec_stmt)
5934 gcc_assert (vectype);
5935 if (!vectype)
5936 {
5937 if (dump_enabled_p ())
5938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5939 "no vectype for scalar type %T\n",
5940 TREE_TYPE (op0));
5941
5942 return false;
5943 }
5944
5945 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5946 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5947 if (maybe_ne (nunits_out, nunits_in))
5948 return false;
5949
5950 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5951 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5952 if (op_type == binary_op || op_type == ternary_op)
5953 {
5954 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5955 1, &op1, &slp_op1, &dt[1], &vectype2))
5956 {
5957 if (dump_enabled_p ())
5958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5959 "use not simple.\n");
5960 return false;
5961 }
5962 }
5963 if (op_type == ternary_op)
5964 {
5965 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5966 2, &op2, &slp_op2, &dt[2], &vectype3))
5967 {
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "use not simple.\n");
5971 return false;
5972 }
5973 }
5974
5975 /* Multiple types in SLP are handled by creating the appropriate number of
5976 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5977 case of SLP. */
5978 if (slp_node)
5979 {
5980 ncopies = 1;
5981 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5982 }
5983 else
5984 {
5985 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5986 vec_num = 1;
5987 }
5988
5989 gcc_assert (ncopies >= 1);
5990
5991 /* Reject attempts to combine mask types with nonmask types, e.g. if
5992 we have an AND between a (nonmask) boolean loaded from memory and
5993 a (mask) boolean result of a comparison.
5994
5995 TODO: We could easily fix these cases up using pattern statements. */
5996 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5997 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5998 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5999 {
6000 if (dump_enabled_p ())
6001 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6002 "mixed mask and nonmask vector types\n");
6003 return false;
6004 }
6005
6006 /* Supportable by target? */
6007
6008 vec_mode = TYPE_MODE (vectype);
6009 if (code == MULT_HIGHPART_EXPR)
6010 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6011 else
6012 {
6013 optab = optab_for_tree_code (code, vectype, optab_default);
6014 if (!optab)
6015 {
6016 if (dump_enabled_p ())
6017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6018 "no optab.\n");
6019 return false;
6020 }
6021 target_support_p = (optab_handler (optab, vec_mode)
6022 != CODE_FOR_nothing);
6023 }
6024
6025 if (!target_support_p)
6026 {
6027 if (dump_enabled_p ())
6028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6029 "op not supported by target.\n");
6030 /* Check only during analysis. */
6031 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6032 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6033 return false;
6034 if (dump_enabled_p ())
6035 dump_printf_loc (MSG_NOTE, vect_location,
6036 "proceeding using word mode.\n");
6037 }
6038
6039 /* Worthwhile without SIMD support? Check only during analysis. */
6040 if (!VECTOR_MODE_P (vec_mode)
6041 && !vec_stmt
6042 && !vect_worthwhile_without_simd_p (vinfo, code))
6043 {
6044 if (dump_enabled_p ())
6045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6046 "not worthwhile without SIMD support.\n");
6047 return false;
6048 }
6049
6050 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6051 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6052 internal_fn cond_fn = get_conditional_internal_fn (code);
6053
6054 if (!vec_stmt) /* transformation not required. */
6055 {
6056 /* If this operation is part of a reduction, a fully-masked loop
6057 should only change the active lanes of the reduction chain,
6058 keeping the inactive lanes as-is. */
6059 if (loop_vinfo
6060 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6061 && reduc_idx >= 0)
6062 {
6063 if (cond_fn == IFN_LAST
6064 || !direct_internal_fn_supported_p (cond_fn, vectype,
6065 OPTIMIZE_FOR_SPEED))
6066 {
6067 if (dump_enabled_p ())
6068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6069 "can't use a fully-masked loop because no"
6070 " conditional operation is available.\n");
6071 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6072 }
6073 else
6074 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6075 vectype, NULL);
6076 }
6077
6078 /* Put types on constant and invariant SLP children. */
6079 if (slp_node
6080 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6081 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6082 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6083 {
6084 if (dump_enabled_p ())
6085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6086 "incompatible vector types for invariants\n");
6087 return false;
6088 }
6089
6090 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6091 DUMP_VECT_SCOPE ("vectorizable_operation");
6092 vect_model_simple_cost (vinfo, stmt_info,
6093 ncopies, dt, ndts, slp_node, cost_vec);
6094 return true;
6095 }
6096
6097 /* Transform. */
6098
6099 if (dump_enabled_p ())
6100 dump_printf_loc (MSG_NOTE, vect_location,
6101 "transform binary/unary operation.\n");
6102
6103 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6104
6105 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6106 vectors with unsigned elements, but the result is signed. So, we
6107 need to compute the MINUS_EXPR into vectype temporary and
6108 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6109 tree vec_cvt_dest = NULL_TREE;
6110 if (orig_code == POINTER_DIFF_EXPR)
6111 {
6112 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6113 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6114 }
6115 /* Handle def. */
6116 else
6117 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6118
6119 /* In case the vectorization factor (VF) is bigger than the number
6120 of elements that we can fit in a vectype (nunits), we have to generate
6121 more than one vector stmt - i.e - we need to "unroll" the
6122 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6123 from one copy of the vector stmt to the next, in the field
6124 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6125 stages to find the correct vector defs to be used when vectorizing
6126 stmts that use the defs of the current stmt. The example below
6127 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6128 we need to create 4 vectorized stmts):
6129
6130 before vectorization:
6131 RELATED_STMT VEC_STMT
6132 S1: x = memref - -
6133 S2: z = x + 1 - -
6134
6135 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6136 there):
6137 RELATED_STMT VEC_STMT
6138 VS1_0: vx0 = memref0 VS1_1 -
6139 VS1_1: vx1 = memref1 VS1_2 -
6140 VS1_2: vx2 = memref2 VS1_3 -
6141 VS1_3: vx3 = memref3 - -
6142 S1: x = load - VS1_0
6143 S2: z = x + 1 - -
6144
6145 step2: vectorize stmt S2 (done here):
6146 To vectorize stmt S2 we first need to find the relevant vector
6147 def for the first operand 'x'. This is, as usual, obtained from
6148 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6149 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6150 relevant vector def 'vx0'. Having found 'vx0' we can generate
6151 the vector stmt VS2_0, and as usual, record it in the
6152 STMT_VINFO_VEC_STMT of stmt S2.
6153 When creating the second copy (VS2_1), we obtain the relevant vector
6154 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6155 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6156 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6157 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6158 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6159 chain of stmts and pointers:
6160 RELATED_STMT VEC_STMT
6161 VS1_0: vx0 = memref0 VS1_1 -
6162 VS1_1: vx1 = memref1 VS1_2 -
6163 VS1_2: vx2 = memref2 VS1_3 -
6164 VS1_3: vx3 = memref3 - -
6165 S1: x = load - VS1_0
6166 VS2_0: vz0 = vx0 + v1 VS2_1 -
6167 VS2_1: vz1 = vx1 + v1 VS2_2 -
6168 VS2_2: vz2 = vx2 + v1 VS2_3 -
6169 VS2_3: vz3 = vx3 + v1 - -
6170 S2: z = x + 1 - VS2_0 */
6171
6172 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6173 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6174 /* Arguments are ready. Create the new vector stmt. */
6175 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6176 {
6177 gimple *new_stmt = NULL;
6178 vop1 = ((op_type == binary_op || op_type == ternary_op)
6179 ? vec_oprnds1[i] : NULL_TREE);
6180 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6181 if (masked_loop_p && reduc_idx >= 0)
6182 {
6183 /* Perform the operation on active elements only and take
6184 inactive elements from the reduction chain input. */
6185 gcc_assert (!vop2);
6186 vop2 = reduc_idx == 1 ? vop1 : vop0;
6187 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6188 vectype, i);
6189 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6190 vop0, vop1, vop2);
6191 new_temp = make_ssa_name (vec_dest, call);
6192 gimple_call_set_lhs (call, new_temp);
6193 gimple_call_set_nothrow (call, true);
6194 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6195 new_stmt = call;
6196 }
6197 else
6198 {
6199 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6200 new_temp = make_ssa_name (vec_dest, new_stmt);
6201 gimple_assign_set_lhs (new_stmt, new_temp);
6202 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6203 if (vec_cvt_dest)
6204 {
6205 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6206 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6207 new_temp);
6208 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6209 gimple_assign_set_lhs (new_stmt, new_temp);
6210 vect_finish_stmt_generation (vinfo, stmt_info,
6211 new_stmt, gsi);
6212 }
6213 }
6214 if (slp_node)
6215 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6216 else
6217 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6218 }
6219
6220 if (!slp_node)
6221 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6222
6223 vec_oprnds0.release ();
6224 vec_oprnds1.release ();
6225 vec_oprnds2.release ();
6226
6227 return true;
6228 }
6229
6230 /* A helper function to ensure data reference DR_INFO's base alignment. */
6231
6232 static void
ensure_base_align(dr_vec_info * dr_info)6233 ensure_base_align (dr_vec_info *dr_info)
6234 {
6235 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6236 return;
6237
6238 if (dr_info->base_misaligned)
6239 {
6240 tree base_decl = dr_info->base_decl;
6241
6242 // We should only be able to increase the alignment of a base object if
6243 // we know what its new alignment should be at compile time.
6244 unsigned HOST_WIDE_INT align_base_to =
6245 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6246
6247 if (decl_in_symtab_p (base_decl))
6248 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6249 else if (DECL_ALIGN (base_decl) < align_base_to)
6250 {
6251 SET_DECL_ALIGN (base_decl, align_base_to);
6252 DECL_USER_ALIGN (base_decl) = 1;
6253 }
6254 dr_info->base_misaligned = false;
6255 }
6256 }
6257
6258
6259 /* Function get_group_alias_ptr_type.
6260
6261 Return the alias type for the group starting at FIRST_STMT_INFO. */
6262
6263 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6264 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6265 {
6266 struct data_reference *first_dr, *next_dr;
6267
6268 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6269 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6270 while (next_stmt_info)
6271 {
6272 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6273 if (get_alias_set (DR_REF (first_dr))
6274 != get_alias_set (DR_REF (next_dr)))
6275 {
6276 if (dump_enabled_p ())
6277 dump_printf_loc (MSG_NOTE, vect_location,
6278 "conflicting alias set types.\n");
6279 return ptr_type_node;
6280 }
6281 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6282 }
6283 return reference_alias_ptr_type (DR_REF (first_dr));
6284 }
6285
6286
6287 /* Function scan_operand_equal_p.
6288
6289 Helper function for check_scan_store. Compare two references
6290 with .GOMP_SIMD_LANE bases. */
6291
6292 static bool
scan_operand_equal_p(tree ref1,tree ref2)6293 scan_operand_equal_p (tree ref1, tree ref2)
6294 {
6295 tree ref[2] = { ref1, ref2 };
6296 poly_int64 bitsize[2], bitpos[2];
6297 tree offset[2], base[2];
6298 for (int i = 0; i < 2; ++i)
6299 {
6300 machine_mode mode;
6301 int unsignedp, reversep, volatilep = 0;
6302 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6303 &offset[i], &mode, &unsignedp,
6304 &reversep, &volatilep);
6305 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6306 return false;
6307 if (TREE_CODE (base[i]) == MEM_REF
6308 && offset[i] == NULL_TREE
6309 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6310 {
6311 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6312 if (is_gimple_assign (def_stmt)
6313 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6314 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6315 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6316 {
6317 if (maybe_ne (mem_ref_offset (base[i]), 0))
6318 return false;
6319 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6320 offset[i] = gimple_assign_rhs2 (def_stmt);
6321 }
6322 }
6323 }
6324
6325 if (!operand_equal_p (base[0], base[1], 0))
6326 return false;
6327 if (maybe_ne (bitsize[0], bitsize[1]))
6328 return false;
6329 if (offset[0] != offset[1])
6330 {
6331 if (!offset[0] || !offset[1])
6332 return false;
6333 if (!operand_equal_p (offset[0], offset[1], 0))
6334 {
6335 tree step[2];
6336 for (int i = 0; i < 2; ++i)
6337 {
6338 step[i] = integer_one_node;
6339 if (TREE_CODE (offset[i]) == SSA_NAME)
6340 {
6341 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6342 if (is_gimple_assign (def_stmt)
6343 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6344 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6345 == INTEGER_CST))
6346 {
6347 step[i] = gimple_assign_rhs2 (def_stmt);
6348 offset[i] = gimple_assign_rhs1 (def_stmt);
6349 }
6350 }
6351 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6352 {
6353 step[i] = TREE_OPERAND (offset[i], 1);
6354 offset[i] = TREE_OPERAND (offset[i], 0);
6355 }
6356 tree rhs1 = NULL_TREE;
6357 if (TREE_CODE (offset[i]) == SSA_NAME)
6358 {
6359 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6360 if (gimple_assign_cast_p (def_stmt))
6361 rhs1 = gimple_assign_rhs1 (def_stmt);
6362 }
6363 else if (CONVERT_EXPR_P (offset[i]))
6364 rhs1 = TREE_OPERAND (offset[i], 0);
6365 if (rhs1
6366 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6367 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6368 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6369 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6370 offset[i] = rhs1;
6371 }
6372 if (!operand_equal_p (offset[0], offset[1], 0)
6373 || !operand_equal_p (step[0], step[1], 0))
6374 return false;
6375 }
6376 }
6377 return true;
6378 }
6379
6380
6381 enum scan_store_kind {
6382 /* Normal permutation. */
6383 scan_store_kind_perm,
6384
6385 /* Whole vector left shift permutation with zero init. */
6386 scan_store_kind_lshift_zero,
6387
6388 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6389 scan_store_kind_lshift_cond
6390 };
6391
6392 /* Function check_scan_store.
6393
6394 Verify if we can perform the needed permutations or whole vector shifts.
6395 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6396 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6397 to do at each step. */
6398
6399 static int
6400 scan_store_can_perm_p (tree vectype, tree init,
6401 vec<enum scan_store_kind> *use_whole_vector = NULL)
6402 {
6403 enum machine_mode vec_mode = TYPE_MODE (vectype);
6404 unsigned HOST_WIDE_INT nunits;
6405 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6406 return -1;
6407 int units_log2 = exact_log2 (nunits);
6408 if (units_log2 <= 0)
6409 return -1;
6410
6411 int i;
6412 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6413 for (i = 0; i <= units_log2; ++i)
6414 {
6415 unsigned HOST_WIDE_INT j, k;
6416 enum scan_store_kind kind = scan_store_kind_perm;
6417 vec_perm_builder sel (nunits, nunits, 1);
6418 sel.quick_grow (nunits);
6419 if (i == units_log2)
6420 {
6421 for (j = 0; j < nunits; ++j)
6422 sel[j] = nunits - 1;
6423 }
6424 else
6425 {
6426 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6427 sel[j] = j;
6428 for (k = 0; j < nunits; ++j, ++k)
6429 sel[j] = nunits + k;
6430 }
6431 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6432 if (!can_vec_perm_const_p (vec_mode, indices))
6433 {
6434 if (i == units_log2)
6435 return -1;
6436
6437 if (whole_vector_shift_kind == scan_store_kind_perm)
6438 {
6439 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6440 return -1;
6441 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6442 /* Whole vector shifts shift in zeros, so if init is all zero
6443 constant, there is no need to do anything further. */
6444 if ((TREE_CODE (init) != INTEGER_CST
6445 && TREE_CODE (init) != REAL_CST)
6446 || !initializer_zerop (init))
6447 {
6448 tree masktype = truth_type_for (vectype);
6449 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6450 return -1;
6451 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6452 }
6453 }
6454 kind = whole_vector_shift_kind;
6455 }
6456 if (use_whole_vector)
6457 {
6458 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6459 use_whole_vector->safe_grow_cleared (i, true);
6460 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6461 use_whole_vector->safe_push (kind);
6462 }
6463 }
6464
6465 return units_log2;
6466 }
6467
6468
6469 /* Function check_scan_store.
6470
6471 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6472
6473 static bool
check_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6474 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6475 enum vect_def_type rhs_dt, bool slp, tree mask,
6476 vect_memory_access_type memory_access_type)
6477 {
6478 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6479 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6480 tree ref_type;
6481
6482 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6483 if (slp
6484 || mask
6485 || memory_access_type != VMAT_CONTIGUOUS
6486 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6487 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6488 || loop_vinfo == NULL
6489 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6490 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6491 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6492 || !integer_zerop (DR_INIT (dr_info->dr))
6493 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6494 || !alias_sets_conflict_p (get_alias_set (vectype),
6495 get_alias_set (TREE_TYPE (ref_type))))
6496 {
6497 if (dump_enabled_p ())
6498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6499 "unsupported OpenMP scan store.\n");
6500 return false;
6501 }
6502
6503 /* We need to pattern match code built by OpenMP lowering and simplified
6504 by following optimizations into something we can handle.
6505 #pragma omp simd reduction(inscan,+:r)
6506 for (...)
6507 {
6508 r += something ();
6509 #pragma omp scan inclusive (r)
6510 use (r);
6511 }
6512 shall have body with:
6513 // Initialization for input phase, store the reduction initializer:
6514 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6515 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6516 D.2042[_21] = 0;
6517 // Actual input phase:
6518 ...
6519 r.0_5 = D.2042[_20];
6520 _6 = _4 + r.0_5;
6521 D.2042[_20] = _6;
6522 // Initialization for scan phase:
6523 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6524 _26 = D.2043[_25];
6525 _27 = D.2042[_25];
6526 _28 = _26 + _27;
6527 D.2043[_25] = _28;
6528 D.2042[_25] = _28;
6529 // Actual scan phase:
6530 ...
6531 r.1_8 = D.2042[_20];
6532 ...
6533 The "omp simd array" variable D.2042 holds the privatized copy used
6534 inside of the loop and D.2043 is another one that holds copies of
6535 the current original list item. The separate GOMP_SIMD_LANE ifn
6536 kinds are there in order to allow optimizing the initializer store
6537 and combiner sequence, e.g. if it is originally some C++ish user
6538 defined reduction, but allow the vectorizer to pattern recognize it
6539 and turn into the appropriate vectorized scan.
6540
6541 For exclusive scan, this is slightly different:
6542 #pragma omp simd reduction(inscan,+:r)
6543 for (...)
6544 {
6545 use (r);
6546 #pragma omp scan exclusive (r)
6547 r += something ();
6548 }
6549 shall have body with:
6550 // Initialization for input phase, store the reduction initializer:
6551 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6552 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6553 D.2042[_21] = 0;
6554 // Actual input phase:
6555 ...
6556 r.0_5 = D.2042[_20];
6557 _6 = _4 + r.0_5;
6558 D.2042[_20] = _6;
6559 // Initialization for scan phase:
6560 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6561 _26 = D.2043[_25];
6562 D.2044[_25] = _26;
6563 _27 = D.2042[_25];
6564 _28 = _26 + _27;
6565 D.2043[_25] = _28;
6566 // Actual scan phase:
6567 ...
6568 r.1_8 = D.2044[_20];
6569 ... */
6570
6571 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6572 {
6573 /* Match the D.2042[_21] = 0; store above. Just require that
6574 it is a constant or external definition store. */
6575 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6576 {
6577 fail_init:
6578 if (dump_enabled_p ())
6579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6580 "unsupported OpenMP scan initializer store.\n");
6581 return false;
6582 }
6583
6584 if (! loop_vinfo->scan_map)
6585 loop_vinfo->scan_map = new hash_map<tree, tree>;
6586 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6587 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6588 if (cached)
6589 goto fail_init;
6590 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6591
6592 /* These stores can be vectorized normally. */
6593 return true;
6594 }
6595
6596 if (rhs_dt != vect_internal_def)
6597 {
6598 fail:
6599 if (dump_enabled_p ())
6600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6601 "unsupported OpenMP scan combiner pattern.\n");
6602 return false;
6603 }
6604
6605 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6606 tree rhs = gimple_assign_rhs1 (stmt);
6607 if (TREE_CODE (rhs) != SSA_NAME)
6608 goto fail;
6609
6610 gimple *other_store_stmt = NULL;
6611 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6612 bool inscan_var_store
6613 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6614
6615 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6616 {
6617 if (!inscan_var_store)
6618 {
6619 use_operand_p use_p;
6620 imm_use_iterator iter;
6621 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6622 {
6623 gimple *use_stmt = USE_STMT (use_p);
6624 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6625 continue;
6626 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6627 || !is_gimple_assign (use_stmt)
6628 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6629 || other_store_stmt
6630 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6631 goto fail;
6632 other_store_stmt = use_stmt;
6633 }
6634 if (other_store_stmt == NULL)
6635 goto fail;
6636 rhs = gimple_assign_lhs (other_store_stmt);
6637 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6638 goto fail;
6639 }
6640 }
6641 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6642 {
6643 use_operand_p use_p;
6644 imm_use_iterator iter;
6645 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6646 {
6647 gimple *use_stmt = USE_STMT (use_p);
6648 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6649 continue;
6650 if (other_store_stmt)
6651 goto fail;
6652 other_store_stmt = use_stmt;
6653 }
6654 }
6655 else
6656 goto fail;
6657
6658 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6659 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6660 || !is_gimple_assign (def_stmt)
6661 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6662 goto fail;
6663
6664 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6665 /* For pointer addition, we should use the normal plus for the vector
6666 operation. */
6667 switch (code)
6668 {
6669 case POINTER_PLUS_EXPR:
6670 code = PLUS_EXPR;
6671 break;
6672 case MULT_HIGHPART_EXPR:
6673 goto fail;
6674 default:
6675 break;
6676 }
6677 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6678 goto fail;
6679
6680 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6681 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6682 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6683 goto fail;
6684
6685 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6686 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6687 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6688 || !gimple_assign_load_p (load1_stmt)
6689 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6690 || !gimple_assign_load_p (load2_stmt))
6691 goto fail;
6692
6693 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6694 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6695 if (load1_stmt_info == NULL
6696 || load2_stmt_info == NULL
6697 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6698 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6699 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6700 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6701 goto fail;
6702
6703 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6704 {
6705 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6706 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6707 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6708 goto fail;
6709 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6710 tree lrhs;
6711 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6712 lrhs = rhs1;
6713 else
6714 lrhs = rhs2;
6715 use_operand_p use_p;
6716 imm_use_iterator iter;
6717 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6718 {
6719 gimple *use_stmt = USE_STMT (use_p);
6720 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6721 continue;
6722 if (other_store_stmt)
6723 goto fail;
6724 other_store_stmt = use_stmt;
6725 }
6726 }
6727
6728 if (other_store_stmt == NULL)
6729 goto fail;
6730 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6731 || !gimple_store_p (other_store_stmt))
6732 goto fail;
6733
6734 stmt_vec_info other_store_stmt_info
6735 = loop_vinfo->lookup_stmt (other_store_stmt);
6736 if (other_store_stmt_info == NULL
6737 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6738 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6739 goto fail;
6740
6741 gimple *stmt1 = stmt;
6742 gimple *stmt2 = other_store_stmt;
6743 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6744 std::swap (stmt1, stmt2);
6745 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6746 gimple_assign_rhs1 (load2_stmt)))
6747 {
6748 std::swap (rhs1, rhs2);
6749 std::swap (load1_stmt, load2_stmt);
6750 std::swap (load1_stmt_info, load2_stmt_info);
6751 }
6752 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6753 gimple_assign_rhs1 (load1_stmt)))
6754 goto fail;
6755
6756 tree var3 = NULL_TREE;
6757 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6758 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6759 gimple_assign_rhs1 (load2_stmt)))
6760 goto fail;
6761 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6762 {
6763 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6764 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6765 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6766 goto fail;
6767 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6768 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6769 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6770 || lookup_attribute ("omp simd inscan exclusive",
6771 DECL_ATTRIBUTES (var3)))
6772 goto fail;
6773 }
6774
6775 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6776 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6777 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6778 goto fail;
6779
6780 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6781 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6782 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6783 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6784 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6785 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6786 goto fail;
6787
6788 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6789 std::swap (var1, var2);
6790
6791 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6792 {
6793 if (!lookup_attribute ("omp simd inscan exclusive",
6794 DECL_ATTRIBUTES (var1)))
6795 goto fail;
6796 var1 = var3;
6797 }
6798
6799 if (loop_vinfo->scan_map == NULL)
6800 goto fail;
6801 tree *init = loop_vinfo->scan_map->get (var1);
6802 if (init == NULL)
6803 goto fail;
6804
6805 /* The IL is as expected, now check if we can actually vectorize it.
6806 Inclusive scan:
6807 _26 = D.2043[_25];
6808 _27 = D.2042[_25];
6809 _28 = _26 + _27;
6810 D.2043[_25] = _28;
6811 D.2042[_25] = _28;
6812 should be vectorized as (where _40 is the vectorized rhs
6813 from the D.2042[_21] = 0; store):
6814 _30 = MEM <vector(8) int> [(int *)&D.2043];
6815 _31 = MEM <vector(8) int> [(int *)&D.2042];
6816 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6817 _33 = _31 + _32;
6818 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6819 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6820 _35 = _33 + _34;
6821 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6822 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6823 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6824 _37 = _35 + _36;
6825 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6826 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6827 _38 = _30 + _37;
6828 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6829 MEM <vector(8) int> [(int *)&D.2043] = _39;
6830 MEM <vector(8) int> [(int *)&D.2042] = _38;
6831 Exclusive scan:
6832 _26 = D.2043[_25];
6833 D.2044[_25] = _26;
6834 _27 = D.2042[_25];
6835 _28 = _26 + _27;
6836 D.2043[_25] = _28;
6837 should be vectorized as (where _40 is the vectorized rhs
6838 from the D.2042[_21] = 0; store):
6839 _30 = MEM <vector(8) int> [(int *)&D.2043];
6840 _31 = MEM <vector(8) int> [(int *)&D.2042];
6841 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6842 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6843 _34 = _32 + _33;
6844 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6845 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6846 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6847 _36 = _34 + _35;
6848 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6849 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6850 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6851 _38 = _36 + _37;
6852 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6853 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6854 _39 = _30 + _38;
6855 _50 = _31 + _39;
6856 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6857 MEM <vector(8) int> [(int *)&D.2044] = _39;
6858 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6859 enum machine_mode vec_mode = TYPE_MODE (vectype);
6860 optab optab = optab_for_tree_code (code, vectype, optab_default);
6861 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6862 goto fail;
6863
6864 int units_log2 = scan_store_can_perm_p (vectype, *init);
6865 if (units_log2 == -1)
6866 goto fail;
6867
6868 return true;
6869 }
6870
6871
6872 /* Function vectorizable_scan_store.
6873
6874 Helper of vectorizable_score, arguments like on vectorizable_store.
6875 Handle only the transformation, checking is done in check_scan_store. */
6876
6877 static bool
vectorizable_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,int ncopies)6878 vectorizable_scan_store (vec_info *vinfo,
6879 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6880 gimple **vec_stmt, int ncopies)
6881 {
6882 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6883 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6884 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6885 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6886
6887 if (dump_enabled_p ())
6888 dump_printf_loc (MSG_NOTE, vect_location,
6889 "transform scan store. ncopies = %d\n", ncopies);
6890
6891 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6892 tree rhs = gimple_assign_rhs1 (stmt);
6893 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6894
6895 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6896 bool inscan_var_store
6897 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6898
6899 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6900 {
6901 use_operand_p use_p;
6902 imm_use_iterator iter;
6903 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6904 {
6905 gimple *use_stmt = USE_STMT (use_p);
6906 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6907 continue;
6908 rhs = gimple_assign_lhs (use_stmt);
6909 break;
6910 }
6911 }
6912
6913 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6914 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6915 if (code == POINTER_PLUS_EXPR)
6916 code = PLUS_EXPR;
6917 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6918 && commutative_tree_code (code));
6919 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6920 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6921 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6922 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6923 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6924 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6925 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6926 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6927 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6928 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6929 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6930
6931 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6932 {
6933 std::swap (rhs1, rhs2);
6934 std::swap (var1, var2);
6935 std::swap (load1_dr_info, load2_dr_info);
6936 }
6937
6938 tree *init = loop_vinfo->scan_map->get (var1);
6939 gcc_assert (init);
6940
6941 unsigned HOST_WIDE_INT nunits;
6942 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6943 gcc_unreachable ();
6944 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6945 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6946 gcc_assert (units_log2 > 0);
6947 auto_vec<tree, 16> perms;
6948 perms.quick_grow (units_log2 + 1);
6949 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6950 for (int i = 0; i <= units_log2; ++i)
6951 {
6952 unsigned HOST_WIDE_INT j, k;
6953 vec_perm_builder sel (nunits, nunits, 1);
6954 sel.quick_grow (nunits);
6955 if (i == units_log2)
6956 for (j = 0; j < nunits; ++j)
6957 sel[j] = nunits - 1;
6958 else
6959 {
6960 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6961 sel[j] = j;
6962 for (k = 0; j < nunits; ++j, ++k)
6963 sel[j] = nunits + k;
6964 }
6965 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6966 if (!use_whole_vector.is_empty ()
6967 && use_whole_vector[i] != scan_store_kind_perm)
6968 {
6969 if (zero_vec == NULL_TREE)
6970 zero_vec = build_zero_cst (vectype);
6971 if (masktype == NULL_TREE
6972 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6973 masktype = truth_type_for (vectype);
6974 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6975 }
6976 else
6977 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6978 }
6979
6980 tree vec_oprnd1 = NULL_TREE;
6981 tree vec_oprnd2 = NULL_TREE;
6982 tree vec_oprnd3 = NULL_TREE;
6983 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6984 tree dataref_offset = build_int_cst (ref_type, 0);
6985 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6986 vectype, VMAT_CONTIGUOUS);
6987 tree ldataref_ptr = NULL_TREE;
6988 tree orig = NULL_TREE;
6989 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6990 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6991 auto_vec<tree> vec_oprnds1;
6992 auto_vec<tree> vec_oprnds2;
6993 auto_vec<tree> vec_oprnds3;
6994 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6995 *init, &vec_oprnds1,
6996 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6997 rhs2, &vec_oprnds3);
6998 for (int j = 0; j < ncopies; j++)
6999 {
7000 vec_oprnd1 = vec_oprnds1[j];
7001 if (ldataref_ptr == NULL)
7002 vec_oprnd2 = vec_oprnds2[j];
7003 vec_oprnd3 = vec_oprnds3[j];
7004 if (j == 0)
7005 orig = vec_oprnd3;
7006 else if (!inscan_var_store)
7007 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7008
7009 if (ldataref_ptr)
7010 {
7011 vec_oprnd2 = make_ssa_name (vectype);
7012 tree data_ref = fold_build2 (MEM_REF, vectype,
7013 unshare_expr (ldataref_ptr),
7014 dataref_offset);
7015 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7016 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7017 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7018 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7019 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7020 }
7021
7022 tree v = vec_oprnd2;
7023 for (int i = 0; i < units_log2; ++i)
7024 {
7025 tree new_temp = make_ssa_name (vectype);
7026 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7027 (zero_vec
7028 && (use_whole_vector[i]
7029 != scan_store_kind_perm))
7030 ? zero_vec : vec_oprnd1, v,
7031 perms[i]);
7032 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7033 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7034 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7035
7036 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7037 {
7038 /* Whole vector shift shifted in zero bits, but if *init
7039 is not initializer_zerop, we need to replace those elements
7040 with elements from vec_oprnd1. */
7041 tree_vector_builder vb (masktype, nunits, 1);
7042 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7043 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7044 ? boolean_false_node : boolean_true_node);
7045
7046 tree new_temp2 = make_ssa_name (vectype);
7047 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7048 new_temp, vec_oprnd1);
7049 vect_finish_stmt_generation (vinfo, stmt_info,
7050 g, gsi);
7051 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7052 new_temp = new_temp2;
7053 }
7054
7055 /* For exclusive scan, perform the perms[i] permutation once
7056 more. */
7057 if (i == 0
7058 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7059 && v == vec_oprnd2)
7060 {
7061 v = new_temp;
7062 --i;
7063 continue;
7064 }
7065
7066 tree new_temp2 = make_ssa_name (vectype);
7067 g = gimple_build_assign (new_temp2, code, v, new_temp);
7068 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7069 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7070
7071 v = new_temp2;
7072 }
7073
7074 tree new_temp = make_ssa_name (vectype);
7075 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7076 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7077 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7078
7079 tree last_perm_arg = new_temp;
7080 /* For exclusive scan, new_temp computed above is the exclusive scan
7081 prefix sum. Turn it into inclusive prefix sum for the broadcast
7082 of the last element into orig. */
7083 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7084 {
7085 last_perm_arg = make_ssa_name (vectype);
7086 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7087 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7088 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7089 }
7090
7091 orig = make_ssa_name (vectype);
7092 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7093 last_perm_arg, perms[units_log2]);
7094 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7095 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7096
7097 if (!inscan_var_store)
7098 {
7099 tree data_ref = fold_build2 (MEM_REF, vectype,
7100 unshare_expr (dataref_ptr),
7101 dataref_offset);
7102 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7103 g = gimple_build_assign (data_ref, new_temp);
7104 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7105 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7106 }
7107 }
7108
7109 if (inscan_var_store)
7110 for (int j = 0; j < ncopies; j++)
7111 {
7112 if (j != 0)
7113 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7114
7115 tree data_ref = fold_build2 (MEM_REF, vectype,
7116 unshare_expr (dataref_ptr),
7117 dataref_offset);
7118 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7119 gimple *g = gimple_build_assign (data_ref, orig);
7120 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7121 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7122 }
7123 return true;
7124 }
7125
7126
7127 /* Function vectorizable_store.
7128
7129 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7130 that can be vectorized.
7131 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7132 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7133 Return true if STMT_INFO is vectorizable in this way. */
7134
7135 static bool
vectorizable_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7136 vectorizable_store (vec_info *vinfo,
7137 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7138 gimple **vec_stmt, slp_tree slp_node,
7139 stmt_vector_for_cost *cost_vec)
7140 {
7141 tree data_ref;
7142 tree op;
7143 tree vec_oprnd = NULL_TREE;
7144 tree elem_type;
7145 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7146 class loop *loop = NULL;
7147 machine_mode vec_mode;
7148 tree dummy;
7149 enum vect_def_type rhs_dt = vect_unknown_def_type;
7150 enum vect_def_type mask_dt = vect_unknown_def_type;
7151 tree dataref_ptr = NULL_TREE;
7152 tree dataref_offset = NULL_TREE;
7153 gimple *ptr_incr = NULL;
7154 int ncopies;
7155 int j;
7156 stmt_vec_info first_stmt_info;
7157 bool grouped_store;
7158 unsigned int group_size, i;
7159 vec<tree> oprnds = vNULL;
7160 vec<tree> result_chain = vNULL;
7161 tree offset = NULL_TREE;
7162 vec<tree> vec_oprnds = vNULL;
7163 bool slp = (slp_node != NULL);
7164 unsigned int vec_num;
7165 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7166 tree aggr_type;
7167 gather_scatter_info gs_info;
7168 poly_uint64 vf;
7169 vec_load_store_type vls_type;
7170 tree ref_type;
7171
7172 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7173 return false;
7174
7175 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7176 && ! vec_stmt)
7177 return false;
7178
7179 /* Is vectorizable store? */
7180
7181 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7182 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7183 {
7184 tree scalar_dest = gimple_assign_lhs (assign);
7185 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7186 && is_pattern_stmt_p (stmt_info))
7187 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7188 if (TREE_CODE (scalar_dest) != ARRAY_REF
7189 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7190 && TREE_CODE (scalar_dest) != INDIRECT_REF
7191 && TREE_CODE (scalar_dest) != COMPONENT_REF
7192 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7193 && TREE_CODE (scalar_dest) != REALPART_EXPR
7194 && TREE_CODE (scalar_dest) != MEM_REF)
7195 return false;
7196 }
7197 else
7198 {
7199 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7200 if (!call || !gimple_call_internal_p (call))
7201 return false;
7202
7203 internal_fn ifn = gimple_call_internal_fn (call);
7204 if (!internal_store_fn_p (ifn))
7205 return false;
7206
7207 if (slp_node != NULL)
7208 {
7209 if (dump_enabled_p ())
7210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7211 "SLP of masked stores not supported.\n");
7212 return false;
7213 }
7214
7215 int mask_index = internal_fn_mask_index (ifn);
7216 if (mask_index >= 0)
7217 {
7218 mask = gimple_call_arg (call, mask_index);
7219 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7220 &mask_vectype))
7221 return false;
7222 }
7223 }
7224
7225 op = vect_get_store_rhs (stmt_info);
7226
7227 /* Cannot have hybrid store SLP -- that would mean storing to the
7228 same location twice. */
7229 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7230
7231 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7232 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7233
7234 if (loop_vinfo)
7235 {
7236 loop = LOOP_VINFO_LOOP (loop_vinfo);
7237 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7238 }
7239 else
7240 vf = 1;
7241
7242 /* Multiple types in SLP are handled by creating the appropriate number of
7243 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7244 case of SLP. */
7245 if (slp)
7246 ncopies = 1;
7247 else
7248 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7249
7250 gcc_assert (ncopies >= 1);
7251
7252 /* FORNOW. This restriction should be relaxed. */
7253 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7254 {
7255 if (dump_enabled_p ())
7256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7257 "multiple types in nested loop.\n");
7258 return false;
7259 }
7260
7261 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7262 op, &rhs_dt, &rhs_vectype, &vls_type))
7263 return false;
7264
7265 elem_type = TREE_TYPE (vectype);
7266 vec_mode = TYPE_MODE (vectype);
7267
7268 if (!STMT_VINFO_DATA_REF (stmt_info))
7269 return false;
7270
7271 vect_memory_access_type memory_access_type;
7272 enum dr_alignment_support alignment_support_scheme;
7273 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7274 ncopies, &memory_access_type,
7275 &alignment_support_scheme, &gs_info))
7276 return false;
7277
7278 if (mask)
7279 {
7280 if (memory_access_type == VMAT_CONTIGUOUS)
7281 {
7282 if (!VECTOR_MODE_P (vec_mode)
7283 || !can_vec_mask_load_store_p (vec_mode,
7284 TYPE_MODE (mask_vectype), false))
7285 return false;
7286 }
7287 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7288 && (memory_access_type != VMAT_GATHER_SCATTER
7289 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7290 {
7291 if (dump_enabled_p ())
7292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7293 "unsupported access type for masked store.\n");
7294 return false;
7295 }
7296 }
7297 else
7298 {
7299 /* FORNOW. In some cases can vectorize even if data-type not supported
7300 (e.g. - array initialization with 0). */
7301 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7302 return false;
7303 }
7304
7305 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7306 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7307 && memory_access_type != VMAT_GATHER_SCATTER
7308 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7309 if (grouped_store)
7310 {
7311 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7312 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7313 group_size = DR_GROUP_SIZE (first_stmt_info);
7314 }
7315 else
7316 {
7317 first_stmt_info = stmt_info;
7318 first_dr_info = dr_info;
7319 group_size = vec_num = 1;
7320 }
7321
7322 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7323 {
7324 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7325 memory_access_type))
7326 return false;
7327 }
7328
7329 if (!vec_stmt) /* transformation not required. */
7330 {
7331 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7332
7333 if (loop_vinfo
7334 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7335 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7336 group_size, memory_access_type,
7337 &gs_info, mask);
7338
7339 if (slp_node
7340 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7341 vectype))
7342 {
7343 if (dump_enabled_p ())
7344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7345 "incompatible vector types for invariants\n");
7346 return false;
7347 }
7348
7349 if (dump_enabled_p ()
7350 && memory_access_type != VMAT_ELEMENTWISE
7351 && memory_access_type != VMAT_GATHER_SCATTER
7352 && alignment_support_scheme != dr_aligned)
7353 dump_printf_loc (MSG_NOTE, vect_location,
7354 "Vectorizing an unaligned access.\n");
7355
7356 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7357 vect_model_store_cost (vinfo, stmt_info, ncopies,
7358 memory_access_type, vls_type, slp_node, cost_vec);
7359 return true;
7360 }
7361 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7362
7363 /* Transform. */
7364
7365 ensure_base_align (dr_info);
7366
7367 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7368 {
7369 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7370 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7371 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7372 tree ptr, var, scale, vec_mask;
7373 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7374 tree mask_halfvectype = mask_vectype;
7375 edge pe = loop_preheader_edge (loop);
7376 gimple_seq seq;
7377 basic_block new_bb;
7378 enum { NARROW, NONE, WIDEN } modifier;
7379 poly_uint64 scatter_off_nunits
7380 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7381
7382 if (known_eq (nunits, scatter_off_nunits))
7383 modifier = NONE;
7384 else if (known_eq (nunits * 2, scatter_off_nunits))
7385 {
7386 modifier = WIDEN;
7387
7388 /* Currently gathers and scatters are only supported for
7389 fixed-length vectors. */
7390 unsigned int count = scatter_off_nunits.to_constant ();
7391 vec_perm_builder sel (count, count, 1);
7392 for (i = 0; i < (unsigned int) count; ++i)
7393 sel.quick_push (i | (count / 2));
7394
7395 vec_perm_indices indices (sel, 1, count);
7396 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7397 indices);
7398 gcc_assert (perm_mask != NULL_TREE);
7399 }
7400 else if (known_eq (nunits, scatter_off_nunits * 2))
7401 {
7402 modifier = NARROW;
7403
7404 /* Currently gathers and scatters are only supported for
7405 fixed-length vectors. */
7406 unsigned int count = nunits.to_constant ();
7407 vec_perm_builder sel (count, count, 1);
7408 for (i = 0; i < (unsigned int) count; ++i)
7409 sel.quick_push (i | (count / 2));
7410
7411 vec_perm_indices indices (sel, 2, count);
7412 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7413 gcc_assert (perm_mask != NULL_TREE);
7414 ncopies *= 2;
7415
7416 if (mask)
7417 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7418 }
7419 else
7420 gcc_unreachable ();
7421
7422 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7423 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7424 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7425 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7426 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7427 scaletype = TREE_VALUE (arglist);
7428
7429 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7430 && TREE_CODE (rettype) == VOID_TYPE);
7431
7432 ptr = fold_convert (ptrtype, gs_info.base);
7433 if (!is_gimple_min_invariant (ptr))
7434 {
7435 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7436 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7437 gcc_assert (!new_bb);
7438 }
7439
7440 if (mask == NULL_TREE)
7441 {
7442 mask_arg = build_int_cst (masktype, -1);
7443 mask_arg = vect_init_vector (vinfo, stmt_info,
7444 mask_arg, masktype, NULL);
7445 }
7446
7447 scale = build_int_cst (scaletype, gs_info.scale);
7448
7449 auto_vec<tree> vec_oprnds0;
7450 auto_vec<tree> vec_oprnds1;
7451 auto_vec<tree> vec_masks;
7452 if (mask)
7453 {
7454 tree mask_vectype = truth_type_for (vectype);
7455 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7456 modifier == NARROW
7457 ? ncopies / 2 : ncopies,
7458 mask, &vec_masks, mask_vectype);
7459 }
7460 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7461 modifier == WIDEN
7462 ? ncopies / 2 : ncopies,
7463 gs_info.offset, &vec_oprnds0);
7464 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7465 modifier == NARROW
7466 ? ncopies / 2 : ncopies,
7467 op, &vec_oprnds1);
7468 for (j = 0; j < ncopies; ++j)
7469 {
7470 if (modifier == WIDEN)
7471 {
7472 if (j & 1)
7473 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7474 perm_mask, stmt_info, gsi);
7475 else
7476 op = vec_oprnd0 = vec_oprnds0[j / 2];
7477 src = vec_oprnd1 = vec_oprnds1[j];
7478 if (mask)
7479 mask_op = vec_mask = vec_masks[j];
7480 }
7481 else if (modifier == NARROW)
7482 {
7483 if (j & 1)
7484 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7485 perm_mask, stmt_info, gsi);
7486 else
7487 src = vec_oprnd1 = vec_oprnds1[j / 2];
7488 op = vec_oprnd0 = vec_oprnds0[j];
7489 if (mask)
7490 mask_op = vec_mask = vec_masks[j / 2];
7491 }
7492 else
7493 {
7494 op = vec_oprnd0 = vec_oprnds0[j];
7495 src = vec_oprnd1 = vec_oprnds1[j];
7496 if (mask)
7497 mask_op = vec_mask = vec_masks[j];
7498 }
7499
7500 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7501 {
7502 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7503 TYPE_VECTOR_SUBPARTS (srctype)));
7504 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7505 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7506 gassign *new_stmt
7507 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7508 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7509 src = var;
7510 }
7511
7512 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7513 {
7514 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7515 TYPE_VECTOR_SUBPARTS (idxtype)));
7516 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7517 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7518 gassign *new_stmt
7519 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7520 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7521 op = var;
7522 }
7523
7524 if (mask)
7525 {
7526 tree utype;
7527 mask_arg = mask_op;
7528 if (modifier == NARROW)
7529 {
7530 var = vect_get_new_ssa_name (mask_halfvectype,
7531 vect_simple_var);
7532 gassign *new_stmt
7533 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7534 : VEC_UNPACK_LO_EXPR,
7535 mask_op);
7536 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7537 mask_arg = var;
7538 }
7539 tree optype = TREE_TYPE (mask_arg);
7540 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7541 utype = masktype;
7542 else
7543 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7544 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7545 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7546 gassign *new_stmt
7547 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7548 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7549 mask_arg = var;
7550 if (!useless_type_conversion_p (masktype, utype))
7551 {
7552 gcc_assert (TYPE_PRECISION (utype)
7553 <= TYPE_PRECISION (masktype));
7554 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7555 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7556 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7557 mask_arg = var;
7558 }
7559 }
7560
7561 gcall *new_stmt
7562 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7563 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7564
7565 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7566 }
7567 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7568 return true;
7569 }
7570 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7571 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7572
7573 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7574 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7575
7576 if (grouped_store)
7577 {
7578 /* FORNOW */
7579 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7580
7581 /* We vectorize all the stmts of the interleaving group when we
7582 reach the last stmt in the group. */
7583 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7584 < DR_GROUP_SIZE (first_stmt_info)
7585 && !slp)
7586 {
7587 *vec_stmt = NULL;
7588 return true;
7589 }
7590
7591 if (slp)
7592 {
7593 grouped_store = false;
7594 /* VEC_NUM is the number of vect stmts to be created for this
7595 group. */
7596 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7597 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7598 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7599 == first_stmt_info);
7600 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7601 op = vect_get_store_rhs (first_stmt_info);
7602 }
7603 else
7604 /* VEC_NUM is the number of vect stmts to be created for this
7605 group. */
7606 vec_num = group_size;
7607
7608 ref_type = get_group_alias_ptr_type (first_stmt_info);
7609 }
7610 else
7611 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7612
7613 if (dump_enabled_p ())
7614 dump_printf_loc (MSG_NOTE, vect_location,
7615 "transform store. ncopies = %d\n", ncopies);
7616
7617 if (memory_access_type == VMAT_ELEMENTWISE
7618 || memory_access_type == VMAT_STRIDED_SLP)
7619 {
7620 gimple_stmt_iterator incr_gsi;
7621 bool insert_after;
7622 gimple *incr;
7623 tree offvar;
7624 tree ivstep;
7625 tree running_off;
7626 tree stride_base, stride_step, alias_off;
7627 tree vec_oprnd;
7628 tree dr_offset;
7629 unsigned int g;
7630 /* Checked by get_load_store_type. */
7631 unsigned int const_nunits = nunits.to_constant ();
7632
7633 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7634 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7635
7636 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7637 stride_base
7638 = fold_build_pointer_plus
7639 (DR_BASE_ADDRESS (first_dr_info->dr),
7640 size_binop (PLUS_EXPR,
7641 convert_to_ptrofftype (dr_offset),
7642 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7643 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7644
7645 /* For a store with loop-invariant (but other than power-of-2)
7646 stride (i.e. not a grouped access) like so:
7647
7648 for (i = 0; i < n; i += stride)
7649 array[i] = ...;
7650
7651 we generate a new induction variable and new stores from
7652 the components of the (vectorized) rhs:
7653
7654 for (j = 0; ; j += VF*stride)
7655 vectemp = ...;
7656 tmp1 = vectemp[0];
7657 array[j] = tmp1;
7658 tmp2 = vectemp[1];
7659 array[j + stride] = tmp2;
7660 ...
7661 */
7662
7663 unsigned nstores = const_nunits;
7664 unsigned lnel = 1;
7665 tree ltype = elem_type;
7666 tree lvectype = vectype;
7667 if (slp)
7668 {
7669 if (group_size < const_nunits
7670 && const_nunits % group_size == 0)
7671 {
7672 nstores = const_nunits / group_size;
7673 lnel = group_size;
7674 ltype = build_vector_type (elem_type, group_size);
7675 lvectype = vectype;
7676
7677 /* First check if vec_extract optab doesn't support extraction
7678 of vector elts directly. */
7679 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7680 machine_mode vmode;
7681 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7682 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7683 group_size).exists (&vmode)
7684 || (convert_optab_handler (vec_extract_optab,
7685 TYPE_MODE (vectype), vmode)
7686 == CODE_FOR_nothing))
7687 {
7688 /* Try to avoid emitting an extract of vector elements
7689 by performing the extracts using an integer type of the
7690 same size, extracting from a vector of those and then
7691 re-interpreting it as the original vector type if
7692 supported. */
7693 unsigned lsize
7694 = group_size * GET_MODE_BITSIZE (elmode);
7695 unsigned int lnunits = const_nunits / group_size;
7696 /* If we can't construct such a vector fall back to
7697 element extracts from the original vector type and
7698 element size stores. */
7699 if (int_mode_for_size (lsize, 0).exists (&elmode)
7700 && VECTOR_MODE_P (TYPE_MODE (vectype))
7701 && related_vector_mode (TYPE_MODE (vectype), elmode,
7702 lnunits).exists (&vmode)
7703 && (convert_optab_handler (vec_extract_optab,
7704 vmode, elmode)
7705 != CODE_FOR_nothing))
7706 {
7707 nstores = lnunits;
7708 lnel = group_size;
7709 ltype = build_nonstandard_integer_type (lsize, 1);
7710 lvectype = build_vector_type (ltype, nstores);
7711 }
7712 /* Else fall back to vector extraction anyway.
7713 Fewer stores are more important than avoiding spilling
7714 of the vector we extract from. Compared to the
7715 construction case in vectorizable_load no store-forwarding
7716 issue exists here for reasonable archs. */
7717 }
7718 }
7719 else if (group_size >= const_nunits
7720 && group_size % const_nunits == 0)
7721 {
7722 nstores = 1;
7723 lnel = const_nunits;
7724 ltype = vectype;
7725 lvectype = vectype;
7726 }
7727 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7728 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7729 }
7730
7731 ivstep = stride_step;
7732 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7733 build_int_cst (TREE_TYPE (ivstep), vf));
7734
7735 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7736
7737 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7738 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7739 create_iv (stride_base, ivstep, NULL,
7740 loop, &incr_gsi, insert_after,
7741 &offvar, NULL);
7742 incr = gsi_stmt (incr_gsi);
7743
7744 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7745
7746 alias_off = build_int_cst (ref_type, 0);
7747 stmt_vec_info next_stmt_info = first_stmt_info;
7748 for (g = 0; g < group_size; g++)
7749 {
7750 running_off = offvar;
7751 if (g)
7752 {
7753 tree size = TYPE_SIZE_UNIT (ltype);
7754 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7755 size);
7756 tree newoff = copy_ssa_name (running_off, NULL);
7757 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7758 running_off, pos);
7759 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7760 running_off = newoff;
7761 }
7762 if (!slp)
7763 op = vect_get_store_rhs (next_stmt_info);
7764 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7765 op, &vec_oprnds);
7766 unsigned int group_el = 0;
7767 unsigned HOST_WIDE_INT
7768 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7769 for (j = 0; j < ncopies; j++)
7770 {
7771 vec_oprnd = vec_oprnds[j];
7772 /* Pun the vector to extract from if necessary. */
7773 if (lvectype != vectype)
7774 {
7775 tree tem = make_ssa_name (lvectype);
7776 gimple *pun
7777 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7778 lvectype, vec_oprnd));
7779 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7780 vec_oprnd = tem;
7781 }
7782 for (i = 0; i < nstores; i++)
7783 {
7784 tree newref, newoff;
7785 gimple *incr, *assign;
7786 tree size = TYPE_SIZE (ltype);
7787 /* Extract the i'th component. */
7788 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7789 bitsize_int (i), size);
7790 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7791 size, pos);
7792
7793 elem = force_gimple_operand_gsi (gsi, elem, true,
7794 NULL_TREE, true,
7795 GSI_SAME_STMT);
7796
7797 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7798 group_el * elsz);
7799 newref = build2 (MEM_REF, ltype,
7800 running_off, this_off);
7801 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7802
7803 /* And store it to *running_off. */
7804 assign = gimple_build_assign (newref, elem);
7805 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7806
7807 group_el += lnel;
7808 if (! slp
7809 || group_el == group_size)
7810 {
7811 newoff = copy_ssa_name (running_off, NULL);
7812 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7813 running_off, stride_step);
7814 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7815
7816 running_off = newoff;
7817 group_el = 0;
7818 }
7819 if (g == group_size - 1
7820 && !slp)
7821 {
7822 if (j == 0 && i == 0)
7823 *vec_stmt = assign;
7824 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7825 }
7826 }
7827 }
7828 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7829 vec_oprnds.release ();
7830 if (slp)
7831 break;
7832 }
7833
7834 return true;
7835 }
7836
7837 auto_vec<tree> dr_chain (group_size);
7838 oprnds.create (group_size);
7839
7840 /* Gather-scatter accesses perform only component accesses, alignment
7841 is irrelevant for them. */
7842 if (memory_access_type == VMAT_GATHER_SCATTER)
7843 alignment_support_scheme = dr_unaligned_supported;
7844 else
7845 alignment_support_scheme
7846 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7847
7848 gcc_assert (alignment_support_scheme);
7849 vec_loop_masks *loop_masks
7850 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7851 ? &LOOP_VINFO_MASKS (loop_vinfo)
7852 : NULL);
7853 vec_loop_lens *loop_lens
7854 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7855 ? &LOOP_VINFO_LENS (loop_vinfo)
7856 : NULL);
7857
7858 /* Shouldn't go with length-based approach if fully masked. */
7859 gcc_assert (!loop_lens || !loop_masks);
7860
7861 /* Targets with store-lane instructions must not require explicit
7862 realignment. vect_supportable_dr_alignment always returns either
7863 dr_aligned or dr_unaligned_supported for masked operations. */
7864 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7865 && !mask
7866 && !loop_masks)
7867 || alignment_support_scheme == dr_aligned
7868 || alignment_support_scheme == dr_unaligned_supported);
7869
7870 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7871 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7872 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7873
7874 tree bump;
7875 tree vec_offset = NULL_TREE;
7876 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7877 {
7878 aggr_type = NULL_TREE;
7879 bump = NULL_TREE;
7880 }
7881 else if (memory_access_type == VMAT_GATHER_SCATTER)
7882 {
7883 aggr_type = elem_type;
7884 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7885 &bump, &vec_offset);
7886 }
7887 else
7888 {
7889 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7890 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7891 else
7892 aggr_type = vectype;
7893 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7894 memory_access_type);
7895 }
7896
7897 if (mask)
7898 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7899
7900 /* In case the vectorization factor (VF) is bigger than the number
7901 of elements that we can fit in a vectype (nunits), we have to generate
7902 more than one vector stmt - i.e - we need to "unroll" the
7903 vector stmt by a factor VF/nunits. */
7904
7905 /* In case of interleaving (non-unit grouped access):
7906
7907 S1: &base + 2 = x2
7908 S2: &base = x0
7909 S3: &base + 1 = x1
7910 S4: &base + 3 = x3
7911
7912 We create vectorized stores starting from base address (the access of the
7913 first stmt in the chain (S2 in the above example), when the last store stmt
7914 of the chain (S4) is reached:
7915
7916 VS1: &base = vx2
7917 VS2: &base + vec_size*1 = vx0
7918 VS3: &base + vec_size*2 = vx1
7919 VS4: &base + vec_size*3 = vx3
7920
7921 Then permutation statements are generated:
7922
7923 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7924 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7925 ...
7926
7927 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7928 (the order of the data-refs in the output of vect_permute_store_chain
7929 corresponds to the order of scalar stmts in the interleaving chain - see
7930 the documentation of vect_permute_store_chain()).
7931
7932 In case of both multiple types and interleaving, above vector stores and
7933 permutation stmts are created for every copy. The result vector stmts are
7934 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7935 STMT_VINFO_RELATED_STMT for the next copies.
7936 */
7937
7938 auto_vec<tree> vec_masks;
7939 tree vec_mask = NULL;
7940 auto_vec<tree> vec_offsets;
7941 auto_vec<vec<tree> > gvec_oprnds;
7942 gvec_oprnds.safe_grow_cleared (group_size, true);
7943 for (j = 0; j < ncopies; j++)
7944 {
7945 gimple *new_stmt;
7946 if (j == 0)
7947 {
7948 if (slp)
7949 {
7950 /* Get vectorized arguments for SLP_NODE. */
7951 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7952 op, &vec_oprnds);
7953 vec_oprnd = vec_oprnds[0];
7954 }
7955 else
7956 {
7957 /* For interleaved stores we collect vectorized defs for all the
7958 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7959 used as an input to vect_permute_store_chain().
7960
7961 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7962 and OPRNDS are of size 1. */
7963 stmt_vec_info next_stmt_info = first_stmt_info;
7964 for (i = 0; i < group_size; i++)
7965 {
7966 /* Since gaps are not supported for interleaved stores,
7967 DR_GROUP_SIZE is the exact number of stmts in the chain.
7968 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7969 that there is no interleaving, DR_GROUP_SIZE is 1,
7970 and only one iteration of the loop will be executed. */
7971 op = vect_get_store_rhs (next_stmt_info);
7972 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7973 ncopies, op, &gvec_oprnds[i]);
7974 vec_oprnd = gvec_oprnds[i][0];
7975 dr_chain.quick_push (gvec_oprnds[i][0]);
7976 oprnds.quick_push (gvec_oprnds[i][0]);
7977 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7978 }
7979 if (mask)
7980 {
7981 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7982 mask, &vec_masks, mask_vectype);
7983 vec_mask = vec_masks[0];
7984 }
7985 }
7986
7987 /* We should have catched mismatched types earlier. */
7988 gcc_assert (useless_type_conversion_p (vectype,
7989 TREE_TYPE (vec_oprnd)));
7990 bool simd_lane_access_p
7991 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7992 if (simd_lane_access_p
7993 && !loop_masks
7994 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7995 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7996 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7997 && integer_zerop (DR_INIT (first_dr_info->dr))
7998 && alias_sets_conflict_p (get_alias_set (aggr_type),
7999 get_alias_set (TREE_TYPE (ref_type))))
8000 {
8001 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8002 dataref_offset = build_int_cst (ref_type, 0);
8003 }
8004 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8005 {
8006 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8007 &dataref_ptr, &vec_offsets, ncopies);
8008 vec_offset = vec_offsets[0];
8009 }
8010 else
8011 dataref_ptr
8012 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8013 simd_lane_access_p ? loop : NULL,
8014 offset, &dummy, gsi, &ptr_incr,
8015 simd_lane_access_p, NULL_TREE, bump);
8016 }
8017 else
8018 {
8019 /* For interleaved stores we created vectorized defs for all the
8020 defs stored in OPRNDS in the previous iteration (previous copy).
8021 DR_CHAIN is then used as an input to vect_permute_store_chain().
8022 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8023 OPRNDS are of size 1. */
8024 for (i = 0; i < group_size; i++)
8025 {
8026 vec_oprnd = gvec_oprnds[i][j];
8027 dr_chain[i] = gvec_oprnds[i][j];
8028 oprnds[i] = gvec_oprnds[i][j];
8029 }
8030 if (mask)
8031 vec_mask = vec_masks[j];
8032 if (dataref_offset)
8033 dataref_offset
8034 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8035 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8036 vec_offset = vec_offsets[j];
8037 else
8038 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8039 stmt_info, bump);
8040 }
8041
8042 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8043 {
8044 tree vec_array;
8045
8046 /* Get an array into which we can store the individual vectors. */
8047 vec_array = create_vector_array (vectype, vec_num);
8048
8049 /* Invalidate the current contents of VEC_ARRAY. This should
8050 become an RTL clobber too, which prevents the vector registers
8051 from being upward-exposed. */
8052 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8053
8054 /* Store the individual vectors into the array. */
8055 for (i = 0; i < vec_num; i++)
8056 {
8057 vec_oprnd = dr_chain[i];
8058 write_vector_array (vinfo, stmt_info,
8059 gsi, vec_oprnd, vec_array, i);
8060 }
8061
8062 tree final_mask = NULL;
8063 if (loop_masks)
8064 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8065 vectype, j);
8066 if (vec_mask)
8067 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8068 vec_mask, gsi);
8069
8070 gcall *call;
8071 if (final_mask)
8072 {
8073 /* Emit:
8074 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8075 VEC_ARRAY). */
8076 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8077 tree alias_ptr = build_int_cst (ref_type, align);
8078 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8079 dataref_ptr, alias_ptr,
8080 final_mask, vec_array);
8081 }
8082 else
8083 {
8084 /* Emit:
8085 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8086 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8087 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8088 vec_array);
8089 gimple_call_set_lhs (call, data_ref);
8090 }
8091 gimple_call_set_nothrow (call, true);
8092 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8093 new_stmt = call;
8094
8095 /* Record that VEC_ARRAY is now dead. */
8096 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8097 }
8098 else
8099 {
8100 new_stmt = NULL;
8101 if (grouped_store)
8102 {
8103 if (j == 0)
8104 result_chain.create (group_size);
8105 /* Permute. */
8106 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8107 gsi, &result_chain);
8108 }
8109
8110 stmt_vec_info next_stmt_info = first_stmt_info;
8111 for (i = 0; i < vec_num; i++)
8112 {
8113 unsigned misalign;
8114 unsigned HOST_WIDE_INT align;
8115
8116 tree final_mask = NULL_TREE;
8117 if (loop_masks)
8118 final_mask = vect_get_loop_mask (gsi, loop_masks,
8119 vec_num * ncopies,
8120 vectype, vec_num * j + i);
8121 if (vec_mask)
8122 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8123 vec_mask, gsi);
8124
8125 if (memory_access_type == VMAT_GATHER_SCATTER)
8126 {
8127 tree scale = size_int (gs_info.scale);
8128 gcall *call;
8129 if (final_mask)
8130 call = gimple_build_call_internal
8131 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8132 scale, vec_oprnd, final_mask);
8133 else
8134 call = gimple_build_call_internal
8135 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8136 scale, vec_oprnd);
8137 gimple_call_set_nothrow (call, true);
8138 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8139 new_stmt = call;
8140 break;
8141 }
8142
8143 if (i > 0)
8144 /* Bump the vector pointer. */
8145 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8146 gsi, stmt_info, bump);
8147
8148 if (slp)
8149 vec_oprnd = vec_oprnds[i];
8150 else if (grouped_store)
8151 /* For grouped stores vectorized defs are interleaved in
8152 vect_permute_store_chain(). */
8153 vec_oprnd = result_chain[i];
8154
8155 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8156 if (aligned_access_p (first_dr_info))
8157 misalign = 0;
8158 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8159 {
8160 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8161 misalign = 0;
8162 }
8163 else
8164 misalign = DR_MISALIGNMENT (first_dr_info);
8165 if (dataref_offset == NULL_TREE
8166 && TREE_CODE (dataref_ptr) == SSA_NAME)
8167 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8168 misalign);
8169 align = least_bit_hwi (misalign | align);
8170
8171 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8172 {
8173 tree perm_mask = perm_mask_for_reverse (vectype);
8174 tree perm_dest = vect_create_destination_var
8175 (vect_get_store_rhs (stmt_info), vectype);
8176 tree new_temp = make_ssa_name (perm_dest);
8177
8178 /* Generate the permute statement. */
8179 gimple *perm_stmt
8180 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8181 vec_oprnd, perm_mask);
8182 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8183
8184 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8185 vec_oprnd = new_temp;
8186 }
8187
8188 /* Arguments are ready. Create the new vector stmt. */
8189 if (final_mask)
8190 {
8191 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8192 gcall *call
8193 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8194 dataref_ptr, ptr,
8195 final_mask, vec_oprnd);
8196 gimple_call_set_nothrow (call, true);
8197 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8198 new_stmt = call;
8199 }
8200 else if (loop_lens)
8201 {
8202 tree final_len
8203 = vect_get_loop_len (loop_vinfo, loop_lens,
8204 vec_num * ncopies, vec_num * j + i);
8205 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8206 machine_mode vmode = TYPE_MODE (vectype);
8207 opt_machine_mode new_ovmode
8208 = get_len_load_store_mode (vmode, false);
8209 machine_mode new_vmode = new_ovmode.require ();
8210 /* Need conversion if it's wrapped with VnQI. */
8211 if (vmode != new_vmode)
8212 {
8213 tree new_vtype
8214 = build_vector_type_for_mode (unsigned_intQI_type_node,
8215 new_vmode);
8216 tree var
8217 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8218 vec_oprnd
8219 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8220 gassign *new_stmt
8221 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8222 vec_oprnd);
8223 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8224 gsi);
8225 vec_oprnd = var;
8226 }
8227 gcall *call
8228 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8229 ptr, final_len, vec_oprnd);
8230 gimple_call_set_nothrow (call, true);
8231 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8232 new_stmt = call;
8233 }
8234 else
8235 {
8236 data_ref = fold_build2 (MEM_REF, vectype,
8237 dataref_ptr,
8238 dataref_offset
8239 ? dataref_offset
8240 : build_int_cst (ref_type, 0));
8241 if (aligned_access_p (first_dr_info))
8242 ;
8243 else
8244 TREE_TYPE (data_ref)
8245 = build_aligned_type (TREE_TYPE (data_ref),
8246 align * BITS_PER_UNIT);
8247 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8248 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8249 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8250 }
8251
8252 if (slp)
8253 continue;
8254
8255 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8256 if (!next_stmt_info)
8257 break;
8258 }
8259 }
8260 if (!slp)
8261 {
8262 if (j == 0)
8263 *vec_stmt = new_stmt;
8264 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8265 }
8266 }
8267
8268 for (i = 0; i < group_size; ++i)
8269 {
8270 vec<tree> oprndsi = gvec_oprnds[i];
8271 oprndsi.release ();
8272 }
8273 oprnds.release ();
8274 result_chain.release ();
8275 vec_oprnds.release ();
8276
8277 return true;
8278 }
8279
8280 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8281 VECTOR_CST mask. No checks are made that the target platform supports the
8282 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8283 vect_gen_perm_mask_checked. */
8284
8285 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8286 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8287 {
8288 tree mask_type;
8289
8290 poly_uint64 nunits = sel.length ();
8291 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8292
8293 mask_type = build_vector_type (ssizetype, nunits);
8294 return vec_perm_indices_to_tree (mask_type, sel);
8295 }
8296
8297 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8298 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8299
8300 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8301 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8302 {
8303 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8304 return vect_gen_perm_mask_any (vectype, sel);
8305 }
8306
8307 /* Given a vector variable X and Y, that was generated for the scalar
8308 STMT_INFO, generate instructions to permute the vector elements of X and Y
8309 using permutation mask MASK_VEC, insert them at *GSI and return the
8310 permuted vector variable. */
8311
8312 static tree
permute_vec_elements(vec_info * vinfo,tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8313 permute_vec_elements (vec_info *vinfo,
8314 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8315 gimple_stmt_iterator *gsi)
8316 {
8317 tree vectype = TREE_TYPE (x);
8318 tree perm_dest, data_ref;
8319 gimple *perm_stmt;
8320
8321 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8322 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8323 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8324 else
8325 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8326 data_ref = make_ssa_name (perm_dest);
8327
8328 /* Generate the permute statement. */
8329 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8330 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8331
8332 return data_ref;
8333 }
8334
8335 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8336 inserting them on the loops preheader edge. Returns true if we
8337 were successful in doing so (and thus STMT_INFO can be moved then),
8338 otherwise returns false. */
8339
8340 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8341 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8342 {
8343 ssa_op_iter i;
8344 tree op;
8345 bool any = false;
8346
8347 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8348 {
8349 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8350 if (!gimple_nop_p (def_stmt)
8351 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8352 {
8353 /* Make sure we don't need to recurse. While we could do
8354 so in simple cases when there are more complex use webs
8355 we don't have an easy way to preserve stmt order to fulfil
8356 dependencies within them. */
8357 tree op2;
8358 ssa_op_iter i2;
8359 if (gimple_code (def_stmt) == GIMPLE_PHI)
8360 return false;
8361 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8362 {
8363 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8364 if (!gimple_nop_p (def_stmt2)
8365 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8366 return false;
8367 }
8368 any = true;
8369 }
8370 }
8371
8372 if (!any)
8373 return true;
8374
8375 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8376 {
8377 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8378 if (!gimple_nop_p (def_stmt)
8379 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8380 {
8381 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8382 gsi_remove (&gsi, false);
8383 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8384 }
8385 }
8386
8387 return true;
8388 }
8389
8390 /* vectorizable_load.
8391
8392 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8393 that can be vectorized.
8394 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8395 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8396 Return true if STMT_INFO is vectorizable in this way. */
8397
8398 static bool
vectorizable_load(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8399 vectorizable_load (vec_info *vinfo,
8400 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8401 gimple **vec_stmt, slp_tree slp_node,
8402 stmt_vector_for_cost *cost_vec)
8403 {
8404 tree scalar_dest;
8405 tree vec_dest = NULL;
8406 tree data_ref = NULL;
8407 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8408 class loop *loop = NULL;
8409 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8410 bool nested_in_vect_loop = false;
8411 tree elem_type;
8412 tree new_temp;
8413 machine_mode mode;
8414 tree dummy;
8415 tree dataref_ptr = NULL_TREE;
8416 tree dataref_offset = NULL_TREE;
8417 gimple *ptr_incr = NULL;
8418 int ncopies;
8419 int i, j;
8420 unsigned int group_size;
8421 poly_uint64 group_gap_adj;
8422 tree msq = NULL_TREE, lsq;
8423 tree offset = NULL_TREE;
8424 tree byte_offset = NULL_TREE;
8425 tree realignment_token = NULL_TREE;
8426 gphi *phi = NULL;
8427 vec<tree> dr_chain = vNULL;
8428 bool grouped_load = false;
8429 stmt_vec_info first_stmt_info;
8430 stmt_vec_info first_stmt_info_for_drptr = NULL;
8431 bool compute_in_loop = false;
8432 class loop *at_loop;
8433 int vec_num;
8434 bool slp = (slp_node != NULL);
8435 bool slp_perm = false;
8436 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8437 poly_uint64 vf;
8438 tree aggr_type;
8439 gather_scatter_info gs_info;
8440 tree ref_type;
8441 enum vect_def_type mask_dt = vect_unknown_def_type;
8442
8443 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8444 return false;
8445
8446 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8447 && ! vec_stmt)
8448 return false;
8449
8450 if (!STMT_VINFO_DATA_REF (stmt_info))
8451 return false;
8452
8453 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8454 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8455 which can be different when reduction chains were re-ordered.
8456 Now that we figured we're a dataref reset stmt_info back to
8457 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8458 refactored in a way to maintain the dr_vec_info pointer for the
8459 relevant access explicitely. */
8460 stmt_vec_info orig_stmt_info = stmt_info;
8461 if (slp_node)
8462 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8463
8464 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8465 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8466 {
8467 scalar_dest = gimple_assign_lhs (assign);
8468 if (TREE_CODE (scalar_dest) != SSA_NAME)
8469 return false;
8470
8471 tree_code code = gimple_assign_rhs_code (assign);
8472 if (code != ARRAY_REF
8473 && code != BIT_FIELD_REF
8474 && code != INDIRECT_REF
8475 && code != COMPONENT_REF
8476 && code != IMAGPART_EXPR
8477 && code != REALPART_EXPR
8478 && code != MEM_REF
8479 && TREE_CODE_CLASS (code) != tcc_declaration)
8480 return false;
8481 }
8482 else
8483 {
8484 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8485 if (!call || !gimple_call_internal_p (call))
8486 return false;
8487
8488 internal_fn ifn = gimple_call_internal_fn (call);
8489 if (!internal_load_fn_p (ifn))
8490 return false;
8491
8492 scalar_dest = gimple_call_lhs (call);
8493 if (!scalar_dest)
8494 return false;
8495
8496 int mask_index = internal_fn_mask_index (ifn);
8497 if (mask_index >= 0)
8498 {
8499 mask = gimple_call_arg (call, mask_index);
8500 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8501 &mask_vectype))
8502 return false;
8503 }
8504 }
8505
8506 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8507 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8508
8509 if (loop_vinfo)
8510 {
8511 loop = LOOP_VINFO_LOOP (loop_vinfo);
8512 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8513 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8514 }
8515 else
8516 vf = 1;
8517
8518 /* Multiple types in SLP are handled by creating the appropriate number of
8519 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8520 case of SLP. */
8521 if (slp)
8522 ncopies = 1;
8523 else
8524 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8525
8526 gcc_assert (ncopies >= 1);
8527
8528 /* FORNOW. This restriction should be relaxed. */
8529 if (nested_in_vect_loop && ncopies > 1)
8530 {
8531 if (dump_enabled_p ())
8532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8533 "multiple types in nested loop.\n");
8534 return false;
8535 }
8536
8537 /* Invalidate assumptions made by dependence analysis when vectorization
8538 on the unrolled body effectively re-orders stmts. */
8539 if (ncopies > 1
8540 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8541 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8542 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8543 {
8544 if (dump_enabled_p ())
8545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8546 "cannot perform implicit CSE when unrolling "
8547 "with negative dependence distance\n");
8548 return false;
8549 }
8550
8551 elem_type = TREE_TYPE (vectype);
8552 mode = TYPE_MODE (vectype);
8553
8554 /* FORNOW. In some cases can vectorize even if data-type not supported
8555 (e.g. - data copies). */
8556 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8557 {
8558 if (dump_enabled_p ())
8559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8560 "Aligned load, but unsupported type.\n");
8561 return false;
8562 }
8563
8564 /* Check if the load is a part of an interleaving chain. */
8565 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8566 {
8567 grouped_load = true;
8568 /* FORNOW */
8569 gcc_assert (!nested_in_vect_loop);
8570 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8571
8572 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8573 group_size = DR_GROUP_SIZE (first_stmt_info);
8574
8575 /* Refuse non-SLP vectorization of SLP-only groups. */
8576 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8577 {
8578 if (dump_enabled_p ())
8579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8580 "cannot vectorize load in non-SLP mode.\n");
8581 return false;
8582 }
8583
8584 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8585 {
8586 slp_perm = true;
8587
8588 if (!loop_vinfo)
8589 {
8590 /* In BB vectorization we may not actually use a loaded vector
8591 accessing elements in excess of DR_GROUP_SIZE. */
8592 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8593 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8594 unsigned HOST_WIDE_INT nunits;
8595 unsigned j, k, maxk = 0;
8596 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8597 if (k > maxk)
8598 maxk = k;
8599 tree vectype = STMT_VINFO_VECTYPE (group_info);
8600 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8601 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8602 {
8603 if (dump_enabled_p ())
8604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8605 "BB vectorization with gaps at the end of "
8606 "a load is not supported\n");
8607 return false;
8608 }
8609 }
8610
8611 auto_vec<tree> tem;
8612 unsigned n_perms;
8613 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8614 true, &n_perms))
8615 {
8616 if (dump_enabled_p ())
8617 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8618 vect_location,
8619 "unsupported load permutation\n");
8620 return false;
8621 }
8622 }
8623
8624 /* Invalidate assumptions made by dependence analysis when vectorization
8625 on the unrolled body effectively re-orders stmts. */
8626 if (!PURE_SLP_STMT (stmt_info)
8627 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8628 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8629 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8630 {
8631 if (dump_enabled_p ())
8632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8633 "cannot perform implicit CSE when performing "
8634 "group loads with negative dependence distance\n");
8635 return false;
8636 }
8637 }
8638 else
8639 group_size = 1;
8640
8641 vect_memory_access_type memory_access_type;
8642 enum dr_alignment_support alignment_support_scheme;
8643 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8644 ncopies, &memory_access_type,
8645 &alignment_support_scheme, &gs_info))
8646 return false;
8647
8648 if (mask)
8649 {
8650 if (memory_access_type == VMAT_CONTIGUOUS)
8651 {
8652 machine_mode vec_mode = TYPE_MODE (vectype);
8653 if (!VECTOR_MODE_P (vec_mode)
8654 || !can_vec_mask_load_store_p (vec_mode,
8655 TYPE_MODE (mask_vectype), true))
8656 return false;
8657 }
8658 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8659 && memory_access_type != VMAT_GATHER_SCATTER)
8660 {
8661 if (dump_enabled_p ())
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8663 "unsupported access type for masked load.\n");
8664 return false;
8665 }
8666 }
8667
8668 if (!vec_stmt) /* transformation not required. */
8669 {
8670 if (slp_node
8671 && mask
8672 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8673 mask_vectype))
8674 {
8675 if (dump_enabled_p ())
8676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8677 "incompatible vector types for invariants\n");
8678 return false;
8679 }
8680
8681 if (!slp)
8682 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8683
8684 if (loop_vinfo
8685 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8686 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8687 group_size, memory_access_type,
8688 &gs_info, mask);
8689
8690 if (dump_enabled_p ()
8691 && memory_access_type != VMAT_ELEMENTWISE
8692 && memory_access_type != VMAT_GATHER_SCATTER
8693 && alignment_support_scheme != dr_aligned)
8694 dump_printf_loc (MSG_NOTE, vect_location,
8695 "Vectorizing an unaligned access.\n");
8696
8697 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8698 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8699 slp_node, cost_vec);
8700 return true;
8701 }
8702
8703 if (!slp)
8704 gcc_assert (memory_access_type
8705 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8706
8707 if (dump_enabled_p ())
8708 dump_printf_loc (MSG_NOTE, vect_location,
8709 "transform load. ncopies = %d\n", ncopies);
8710
8711 /* Transform. */
8712
8713 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8714 ensure_base_align (dr_info);
8715
8716 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8717 {
8718 vect_build_gather_load_calls (vinfo,
8719 stmt_info, gsi, vec_stmt, &gs_info, mask);
8720 return true;
8721 }
8722
8723 if (memory_access_type == VMAT_INVARIANT)
8724 {
8725 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8726 /* If we have versioned for aliasing or the loop doesn't
8727 have any data dependencies that would preclude this,
8728 then we are sure this is a loop invariant load and
8729 thus we can insert it on the preheader edge. */
8730 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8731 && !nested_in_vect_loop
8732 && hoist_defs_of_uses (stmt_info, loop));
8733 if (hoist_p)
8734 {
8735 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8736 if (dump_enabled_p ())
8737 dump_printf_loc (MSG_NOTE, vect_location,
8738 "hoisting out of the vectorized loop: %G", stmt);
8739 scalar_dest = copy_ssa_name (scalar_dest);
8740 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8741 gsi_insert_on_edge_immediate
8742 (loop_preheader_edge (loop),
8743 gimple_build_assign (scalar_dest, rhs));
8744 }
8745 /* These copies are all equivalent, but currently the representation
8746 requires a separate STMT_VINFO_VEC_STMT for each one. */
8747 gimple_stmt_iterator gsi2 = *gsi;
8748 gsi_next (&gsi2);
8749 for (j = 0; j < ncopies; j++)
8750 {
8751 if (hoist_p)
8752 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8753 vectype, NULL);
8754 else
8755 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8756 vectype, &gsi2);
8757 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8758 if (slp)
8759 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8760 else
8761 {
8762 if (j == 0)
8763 *vec_stmt = new_stmt;
8764 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8765 }
8766 }
8767 return true;
8768 }
8769
8770 if (memory_access_type == VMAT_ELEMENTWISE
8771 || memory_access_type == VMAT_STRIDED_SLP)
8772 {
8773 gimple_stmt_iterator incr_gsi;
8774 bool insert_after;
8775 tree offvar;
8776 tree ivstep;
8777 tree running_off;
8778 vec<constructor_elt, va_gc> *v = NULL;
8779 tree stride_base, stride_step, alias_off;
8780 /* Checked by get_load_store_type. */
8781 unsigned int const_nunits = nunits.to_constant ();
8782 unsigned HOST_WIDE_INT cst_offset = 0;
8783 tree dr_offset;
8784
8785 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8786 gcc_assert (!nested_in_vect_loop);
8787
8788 if (grouped_load)
8789 {
8790 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8791 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8792 }
8793 else
8794 {
8795 first_stmt_info = stmt_info;
8796 first_dr_info = dr_info;
8797 }
8798 if (slp && grouped_load)
8799 {
8800 group_size = DR_GROUP_SIZE (first_stmt_info);
8801 ref_type = get_group_alias_ptr_type (first_stmt_info);
8802 }
8803 else
8804 {
8805 if (grouped_load)
8806 cst_offset
8807 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8808 * vect_get_place_in_interleaving_chain (stmt_info,
8809 first_stmt_info));
8810 group_size = 1;
8811 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8812 }
8813
8814 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8815 stride_base
8816 = fold_build_pointer_plus
8817 (DR_BASE_ADDRESS (first_dr_info->dr),
8818 size_binop (PLUS_EXPR,
8819 convert_to_ptrofftype (dr_offset),
8820 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8821 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8822
8823 /* For a load with loop-invariant (but other than power-of-2)
8824 stride (i.e. not a grouped access) like so:
8825
8826 for (i = 0; i < n; i += stride)
8827 ... = array[i];
8828
8829 we generate a new induction variable and new accesses to
8830 form a new vector (or vectors, depending on ncopies):
8831
8832 for (j = 0; ; j += VF*stride)
8833 tmp1 = array[j];
8834 tmp2 = array[j + stride];
8835 ...
8836 vectemp = {tmp1, tmp2, ...}
8837 */
8838
8839 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8840 build_int_cst (TREE_TYPE (stride_step), vf));
8841
8842 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8843
8844 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8845 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8846 create_iv (stride_base, ivstep, NULL,
8847 loop, &incr_gsi, insert_after,
8848 &offvar, NULL);
8849
8850 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8851
8852 running_off = offvar;
8853 alias_off = build_int_cst (ref_type, 0);
8854 int nloads = const_nunits;
8855 int lnel = 1;
8856 tree ltype = TREE_TYPE (vectype);
8857 tree lvectype = vectype;
8858 auto_vec<tree> dr_chain;
8859 if (memory_access_type == VMAT_STRIDED_SLP)
8860 {
8861 if (group_size < const_nunits)
8862 {
8863 /* First check if vec_init optab supports construction from vector
8864 elts directly. Otherwise avoid emitting a constructor of
8865 vector elements by performing the loads using an integer type
8866 of the same size, constructing a vector of those and then
8867 re-interpreting it as the original vector type. This avoids a
8868 huge runtime penalty due to the general inability to perform
8869 store forwarding from smaller stores to a larger load. */
8870 tree ptype;
8871 tree vtype
8872 = vector_vector_composition_type (vectype,
8873 const_nunits / group_size,
8874 &ptype);
8875 if (vtype != NULL_TREE)
8876 {
8877 nloads = const_nunits / group_size;
8878 lnel = group_size;
8879 lvectype = vtype;
8880 ltype = ptype;
8881 }
8882 }
8883 else
8884 {
8885 nloads = 1;
8886 lnel = const_nunits;
8887 ltype = vectype;
8888 }
8889 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8890 }
8891 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8892 else if (nloads == 1)
8893 ltype = vectype;
8894
8895 if (slp)
8896 {
8897 /* For SLP permutation support we need to load the whole group,
8898 not only the number of vector stmts the permutation result
8899 fits in. */
8900 if (slp_perm)
8901 {
8902 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8903 variable VF. */
8904 unsigned int const_vf = vf.to_constant ();
8905 ncopies = CEIL (group_size * const_vf, const_nunits);
8906 dr_chain.create (ncopies);
8907 }
8908 else
8909 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8910 }
8911 unsigned int group_el = 0;
8912 unsigned HOST_WIDE_INT
8913 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8914 for (j = 0; j < ncopies; j++)
8915 {
8916 if (nloads > 1)
8917 vec_alloc (v, nloads);
8918 gimple *new_stmt = NULL;
8919 for (i = 0; i < nloads; i++)
8920 {
8921 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8922 group_el * elsz + cst_offset);
8923 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8924 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8925 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8927 if (nloads > 1)
8928 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8929 gimple_assign_lhs (new_stmt));
8930
8931 group_el += lnel;
8932 if (! slp
8933 || group_el == group_size)
8934 {
8935 tree newoff = copy_ssa_name (running_off);
8936 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8937 running_off, stride_step);
8938 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8939
8940 running_off = newoff;
8941 group_el = 0;
8942 }
8943 }
8944 if (nloads > 1)
8945 {
8946 tree vec_inv = build_constructor (lvectype, v);
8947 new_temp = vect_init_vector (vinfo, stmt_info,
8948 vec_inv, lvectype, gsi);
8949 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8950 if (lvectype != vectype)
8951 {
8952 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8953 VIEW_CONVERT_EXPR,
8954 build1 (VIEW_CONVERT_EXPR,
8955 vectype, new_temp));
8956 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8957 }
8958 }
8959
8960 if (slp)
8961 {
8962 if (slp_perm)
8963 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8964 else
8965 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8966 }
8967 else
8968 {
8969 if (j == 0)
8970 *vec_stmt = new_stmt;
8971 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8972 }
8973 }
8974 if (slp_perm)
8975 {
8976 unsigned n_perms;
8977 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8978 false, &n_perms);
8979 }
8980 return true;
8981 }
8982
8983 if (memory_access_type == VMAT_GATHER_SCATTER
8984 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8985 grouped_load = false;
8986
8987 if (grouped_load)
8988 {
8989 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8990 group_size = DR_GROUP_SIZE (first_stmt_info);
8991 /* For SLP vectorization we directly vectorize a subchain
8992 without permutation. */
8993 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8994 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8995 /* For BB vectorization always use the first stmt to base
8996 the data ref pointer on. */
8997 if (bb_vinfo)
8998 first_stmt_info_for_drptr
8999 = vect_find_first_scalar_stmt_in_slp (slp_node);
9000
9001 /* Check if the chain of loads is already vectorized. */
9002 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9003 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9004 ??? But we can only do so if there is exactly one
9005 as we have no way to get at the rest. Leave the CSE
9006 opportunity alone.
9007 ??? With the group load eventually participating
9008 in multiple different permutations (having multiple
9009 slp nodes which refer to the same group) the CSE
9010 is even wrong code. See PR56270. */
9011 && !slp)
9012 {
9013 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9014 return true;
9015 }
9016 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9017 group_gap_adj = 0;
9018
9019 /* VEC_NUM is the number of vect stmts to be created for this group. */
9020 if (slp)
9021 {
9022 grouped_load = false;
9023 /* If an SLP permutation is from N elements to N elements,
9024 and if one vector holds a whole number of N, we can load
9025 the inputs to the permutation in the same way as an
9026 unpermuted sequence. In other cases we need to load the
9027 whole group, not only the number of vector stmts the
9028 permutation result fits in. */
9029 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9030 if (slp_perm
9031 && (group_size != scalar_lanes
9032 || !multiple_p (nunits, group_size)))
9033 {
9034 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9035 variable VF; see vect_transform_slp_perm_load. */
9036 unsigned int const_vf = vf.to_constant ();
9037 unsigned int const_nunits = nunits.to_constant ();
9038 vec_num = CEIL (group_size * const_vf, const_nunits);
9039 group_gap_adj = vf * group_size - nunits * vec_num;
9040 }
9041 else
9042 {
9043 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9044 group_gap_adj
9045 = group_size - scalar_lanes;
9046 }
9047 }
9048 else
9049 vec_num = group_size;
9050
9051 ref_type = get_group_alias_ptr_type (first_stmt_info);
9052 }
9053 else
9054 {
9055 first_stmt_info = stmt_info;
9056 first_dr_info = dr_info;
9057 group_size = vec_num = 1;
9058 group_gap_adj = 0;
9059 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9060 }
9061
9062 gcc_assert (alignment_support_scheme);
9063 vec_loop_masks *loop_masks
9064 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9065 ? &LOOP_VINFO_MASKS (loop_vinfo)
9066 : NULL);
9067 vec_loop_lens *loop_lens
9068 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9069 ? &LOOP_VINFO_LENS (loop_vinfo)
9070 : NULL);
9071
9072 /* Shouldn't go with length-based approach if fully masked. */
9073 gcc_assert (!loop_lens || !loop_masks);
9074
9075 /* Targets with store-lane instructions must not require explicit
9076 realignment. vect_supportable_dr_alignment always returns either
9077 dr_aligned or dr_unaligned_supported for masked operations. */
9078 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9079 && !mask
9080 && !loop_masks)
9081 || alignment_support_scheme == dr_aligned
9082 || alignment_support_scheme == dr_unaligned_supported);
9083
9084 /* In case the vectorization factor (VF) is bigger than the number
9085 of elements that we can fit in a vectype (nunits), we have to generate
9086 more than one vector stmt - i.e - we need to "unroll" the
9087 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9088 from one copy of the vector stmt to the next, in the field
9089 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9090 stages to find the correct vector defs to be used when vectorizing
9091 stmts that use the defs of the current stmt. The example below
9092 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9093 need to create 4 vectorized stmts):
9094
9095 before vectorization:
9096 RELATED_STMT VEC_STMT
9097 S1: x = memref - -
9098 S2: z = x + 1 - -
9099
9100 step 1: vectorize stmt S1:
9101 We first create the vector stmt VS1_0, and, as usual, record a
9102 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9103 Next, we create the vector stmt VS1_1, and record a pointer to
9104 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9105 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9106 stmts and pointers:
9107 RELATED_STMT VEC_STMT
9108 VS1_0: vx0 = memref0 VS1_1 -
9109 VS1_1: vx1 = memref1 VS1_2 -
9110 VS1_2: vx2 = memref2 VS1_3 -
9111 VS1_3: vx3 = memref3 - -
9112 S1: x = load - VS1_0
9113 S2: z = x + 1 - -
9114 */
9115
9116 /* In case of interleaving (non-unit grouped access):
9117
9118 S1: x2 = &base + 2
9119 S2: x0 = &base
9120 S3: x1 = &base + 1
9121 S4: x3 = &base + 3
9122
9123 Vectorized loads are created in the order of memory accesses
9124 starting from the access of the first stmt of the chain:
9125
9126 VS1: vx0 = &base
9127 VS2: vx1 = &base + vec_size*1
9128 VS3: vx3 = &base + vec_size*2
9129 VS4: vx4 = &base + vec_size*3
9130
9131 Then permutation statements are generated:
9132
9133 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9134 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9135 ...
9136
9137 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9138 (the order of the data-refs in the output of vect_permute_load_chain
9139 corresponds to the order of scalar stmts in the interleaving chain - see
9140 the documentation of vect_permute_load_chain()).
9141 The generation of permutation stmts and recording them in
9142 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9143
9144 In case of both multiple types and interleaving, the vector loads and
9145 permutation stmts above are created for every copy. The result vector
9146 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9147 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9148
9149 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9150 on a target that supports unaligned accesses (dr_unaligned_supported)
9151 we generate the following code:
9152 p = initial_addr;
9153 indx = 0;
9154 loop {
9155 p = p + indx * vectype_size;
9156 vec_dest = *(p);
9157 indx = indx + 1;
9158 }
9159
9160 Otherwise, the data reference is potentially unaligned on a target that
9161 does not support unaligned accesses (dr_explicit_realign_optimized) -
9162 then generate the following code, in which the data in each iteration is
9163 obtained by two vector loads, one from the previous iteration, and one
9164 from the current iteration:
9165 p1 = initial_addr;
9166 msq_init = *(floor(p1))
9167 p2 = initial_addr + VS - 1;
9168 realignment_token = call target_builtin;
9169 indx = 0;
9170 loop {
9171 p2 = p2 + indx * vectype_size
9172 lsq = *(floor(p2))
9173 vec_dest = realign_load (msq, lsq, realignment_token)
9174 indx = indx + 1;
9175 msq = lsq;
9176 } */
9177
9178 /* If the misalignment remains the same throughout the execution of the
9179 loop, we can create the init_addr and permutation mask at the loop
9180 preheader. Otherwise, it needs to be created inside the loop.
9181 This can only occur when vectorizing memory accesses in the inner-loop
9182 nested within an outer-loop that is being vectorized. */
9183
9184 if (nested_in_vect_loop
9185 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9186 GET_MODE_SIZE (TYPE_MODE (vectype))))
9187 {
9188 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9189 compute_in_loop = true;
9190 }
9191
9192 bool diff_first_stmt_info
9193 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9194
9195 if ((alignment_support_scheme == dr_explicit_realign_optimized
9196 || alignment_support_scheme == dr_explicit_realign)
9197 && !compute_in_loop)
9198 {
9199 /* If we have different first_stmt_info, we can't set up realignment
9200 here, since we can't guarantee first_stmt_info DR has been
9201 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9202 distance from first_stmt_info DR instead as below. */
9203 if (!diff_first_stmt_info)
9204 msq = vect_setup_realignment (vinfo,
9205 first_stmt_info, gsi, &realignment_token,
9206 alignment_support_scheme, NULL_TREE,
9207 &at_loop);
9208 if (alignment_support_scheme == dr_explicit_realign_optimized)
9209 {
9210 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9211 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9212 size_one_node);
9213 gcc_assert (!first_stmt_info_for_drptr);
9214 }
9215 }
9216 else
9217 at_loop = loop;
9218
9219 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9220 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9221
9222 tree bump;
9223 tree vec_offset = NULL_TREE;
9224 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9225 {
9226 aggr_type = NULL_TREE;
9227 bump = NULL_TREE;
9228 }
9229 else if (memory_access_type == VMAT_GATHER_SCATTER)
9230 {
9231 aggr_type = elem_type;
9232 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9233 &bump, &vec_offset);
9234 }
9235 else
9236 {
9237 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9238 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9239 else
9240 aggr_type = vectype;
9241 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9242 memory_access_type);
9243 }
9244
9245 vec<tree> vec_offsets = vNULL;
9246 auto_vec<tree> vec_masks;
9247 if (mask)
9248 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9249 mask, &vec_masks, mask_vectype, NULL_TREE);
9250 tree vec_mask = NULL_TREE;
9251 poly_uint64 group_elt = 0;
9252 for (j = 0; j < ncopies; j++)
9253 {
9254 /* 1. Create the vector or array pointer update chain. */
9255 if (j == 0)
9256 {
9257 bool simd_lane_access_p
9258 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9259 if (simd_lane_access_p
9260 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9261 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9262 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9263 && integer_zerop (DR_INIT (first_dr_info->dr))
9264 && alias_sets_conflict_p (get_alias_set (aggr_type),
9265 get_alias_set (TREE_TYPE (ref_type)))
9266 && (alignment_support_scheme == dr_aligned
9267 || alignment_support_scheme == dr_unaligned_supported))
9268 {
9269 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9270 dataref_offset = build_int_cst (ref_type, 0);
9271 }
9272 else if (diff_first_stmt_info)
9273 {
9274 dataref_ptr
9275 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9276 aggr_type, at_loop, offset, &dummy,
9277 gsi, &ptr_incr, simd_lane_access_p,
9278 byte_offset, bump);
9279 /* Adjust the pointer by the difference to first_stmt. */
9280 data_reference_p ptrdr
9281 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9282 tree diff
9283 = fold_convert (sizetype,
9284 size_binop (MINUS_EXPR,
9285 DR_INIT (first_dr_info->dr),
9286 DR_INIT (ptrdr)));
9287 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9288 stmt_info, diff);
9289 if (alignment_support_scheme == dr_explicit_realign)
9290 {
9291 msq = vect_setup_realignment (vinfo,
9292 first_stmt_info_for_drptr, gsi,
9293 &realignment_token,
9294 alignment_support_scheme,
9295 dataref_ptr, &at_loop);
9296 gcc_assert (!compute_in_loop);
9297 }
9298 }
9299 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9300 {
9301 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9302 &dataref_ptr, &vec_offsets, ncopies);
9303 vec_offset = vec_offsets[0];
9304 }
9305 else
9306 dataref_ptr
9307 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9308 at_loop,
9309 offset, &dummy, gsi, &ptr_incr,
9310 simd_lane_access_p,
9311 byte_offset, bump);
9312 if (mask)
9313 vec_mask = vec_masks[0];
9314 }
9315 else
9316 {
9317 if (dataref_offset)
9318 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9319 bump);
9320 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9321 vec_offset = vec_offsets[j];
9322 else
9323 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9324 stmt_info, bump);
9325 if (mask)
9326 vec_mask = vec_masks[j];
9327 }
9328
9329 if (grouped_load || slp_perm)
9330 dr_chain.create (vec_num);
9331
9332 gimple *new_stmt = NULL;
9333 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9334 {
9335 tree vec_array;
9336
9337 vec_array = create_vector_array (vectype, vec_num);
9338
9339 tree final_mask = NULL_TREE;
9340 if (loop_masks)
9341 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9342 vectype, j);
9343 if (vec_mask)
9344 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9345 vec_mask, gsi);
9346
9347 gcall *call;
9348 if (final_mask)
9349 {
9350 /* Emit:
9351 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9352 VEC_MASK). */
9353 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9354 tree alias_ptr = build_int_cst (ref_type, align);
9355 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9356 dataref_ptr, alias_ptr,
9357 final_mask);
9358 }
9359 else
9360 {
9361 /* Emit:
9362 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9363 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9364 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9365 }
9366 gimple_call_set_lhs (call, vec_array);
9367 gimple_call_set_nothrow (call, true);
9368 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9369 new_stmt = call;
9370
9371 /* Extract each vector into an SSA_NAME. */
9372 for (i = 0; i < vec_num; i++)
9373 {
9374 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9375 vec_array, i);
9376 dr_chain.quick_push (new_temp);
9377 }
9378
9379 /* Record the mapping between SSA_NAMEs and statements. */
9380 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9381
9382 /* Record that VEC_ARRAY is now dead. */
9383 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9384 }
9385 else
9386 {
9387 for (i = 0; i < vec_num; i++)
9388 {
9389 tree final_mask = NULL_TREE;
9390 if (loop_masks
9391 && memory_access_type != VMAT_INVARIANT)
9392 final_mask = vect_get_loop_mask (gsi, loop_masks,
9393 vec_num * ncopies,
9394 vectype, vec_num * j + i);
9395 if (vec_mask)
9396 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9397 vec_mask, gsi);
9398
9399 if (i > 0)
9400 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9401 gsi, stmt_info, bump);
9402
9403 /* 2. Create the vector-load in the loop. */
9404 switch (alignment_support_scheme)
9405 {
9406 case dr_aligned:
9407 case dr_unaligned_supported:
9408 {
9409 unsigned int misalign;
9410 unsigned HOST_WIDE_INT align;
9411
9412 if (memory_access_type == VMAT_GATHER_SCATTER)
9413 {
9414 tree zero = build_zero_cst (vectype);
9415 tree scale = size_int (gs_info.scale);
9416 gcall *call;
9417 if (final_mask)
9418 call = gimple_build_call_internal
9419 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9420 vec_offset, scale, zero, final_mask);
9421 else
9422 call = gimple_build_call_internal
9423 (IFN_GATHER_LOAD, 4, dataref_ptr,
9424 vec_offset, scale, zero);
9425 gimple_call_set_nothrow (call, true);
9426 new_stmt = call;
9427 data_ref = NULL_TREE;
9428 break;
9429 }
9430
9431 align =
9432 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9433 if (alignment_support_scheme == dr_aligned)
9434 {
9435 gcc_assert (aligned_access_p (first_dr_info));
9436 misalign = 0;
9437 }
9438 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9439 {
9440 align = dr_alignment
9441 (vect_dr_behavior (vinfo, first_dr_info));
9442 misalign = 0;
9443 }
9444 else
9445 misalign = DR_MISALIGNMENT (first_dr_info);
9446 if (dataref_offset == NULL_TREE
9447 && TREE_CODE (dataref_ptr) == SSA_NAME)
9448 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9449 align, misalign);
9450 align = least_bit_hwi (misalign | align);
9451
9452 if (final_mask)
9453 {
9454 tree ptr = build_int_cst (ref_type,
9455 align * BITS_PER_UNIT);
9456 gcall *call
9457 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9458 dataref_ptr, ptr,
9459 final_mask);
9460 gimple_call_set_nothrow (call, true);
9461 new_stmt = call;
9462 data_ref = NULL_TREE;
9463 }
9464 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9465 {
9466 tree final_len
9467 = vect_get_loop_len (loop_vinfo, loop_lens,
9468 vec_num * ncopies,
9469 vec_num * j + i);
9470 tree ptr = build_int_cst (ref_type,
9471 align * BITS_PER_UNIT);
9472 gcall *call
9473 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9474 dataref_ptr, ptr,
9475 final_len);
9476 gimple_call_set_nothrow (call, true);
9477 new_stmt = call;
9478 data_ref = NULL_TREE;
9479
9480 /* Need conversion if it's wrapped with VnQI. */
9481 machine_mode vmode = TYPE_MODE (vectype);
9482 opt_machine_mode new_ovmode
9483 = get_len_load_store_mode (vmode, true);
9484 machine_mode new_vmode = new_ovmode.require ();
9485 if (vmode != new_vmode)
9486 {
9487 tree qi_type = unsigned_intQI_type_node;
9488 tree new_vtype
9489 = build_vector_type_for_mode (qi_type, new_vmode);
9490 tree var = vect_get_new_ssa_name (new_vtype,
9491 vect_simple_var);
9492 gimple_set_lhs (call, var);
9493 vect_finish_stmt_generation (vinfo, stmt_info, call,
9494 gsi);
9495 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9496 new_stmt
9497 = gimple_build_assign (vec_dest,
9498 VIEW_CONVERT_EXPR, op);
9499 }
9500 }
9501 else
9502 {
9503 tree ltype = vectype;
9504 tree new_vtype = NULL_TREE;
9505 unsigned HOST_WIDE_INT gap
9506 = DR_GROUP_GAP (first_stmt_info);
9507 unsigned int vect_align
9508 = vect_known_alignment_in_bytes (first_dr_info);
9509 unsigned int scalar_dr_size
9510 = vect_get_scalar_dr_size (first_dr_info);
9511 /* If there's no peeling for gaps but we have a gap
9512 with slp loads then load the lower half of the
9513 vector only. See get_group_load_store_type for
9514 when we apply this optimization. */
9515 if (slp
9516 && loop_vinfo
9517 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9518 && gap != 0
9519 && known_eq (nunits, (group_size - gap) * 2)
9520 && known_eq (nunits, group_size)
9521 && gap >= (vect_align / scalar_dr_size))
9522 {
9523 tree half_vtype;
9524 new_vtype
9525 = vector_vector_composition_type (vectype, 2,
9526 &half_vtype);
9527 if (new_vtype != NULL_TREE)
9528 ltype = half_vtype;
9529 }
9530 tree offset
9531 = (dataref_offset ? dataref_offset
9532 : build_int_cst (ref_type, 0));
9533 if (ltype != vectype
9534 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9535 {
9536 unsigned HOST_WIDE_INT gap_offset
9537 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9538 tree gapcst = build_int_cst (ref_type, gap_offset);
9539 offset = size_binop (PLUS_EXPR, offset, gapcst);
9540 }
9541 data_ref
9542 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9543 if (alignment_support_scheme == dr_aligned)
9544 ;
9545 else
9546 TREE_TYPE (data_ref)
9547 = build_aligned_type (TREE_TYPE (data_ref),
9548 align * BITS_PER_UNIT);
9549 if (ltype != vectype)
9550 {
9551 vect_copy_ref_info (data_ref,
9552 DR_REF (first_dr_info->dr));
9553 tree tem = make_ssa_name (ltype);
9554 new_stmt = gimple_build_assign (tem, data_ref);
9555 vect_finish_stmt_generation (vinfo, stmt_info,
9556 new_stmt, gsi);
9557 data_ref = NULL;
9558 vec<constructor_elt, va_gc> *v;
9559 vec_alloc (v, 2);
9560 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9561 {
9562 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9563 build_zero_cst (ltype));
9564 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9565 }
9566 else
9567 {
9568 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9569 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9570 build_zero_cst (ltype));
9571 }
9572 gcc_assert (new_vtype != NULL_TREE);
9573 if (new_vtype == vectype)
9574 new_stmt = gimple_build_assign (
9575 vec_dest, build_constructor (vectype, v));
9576 else
9577 {
9578 tree new_vname = make_ssa_name (new_vtype);
9579 new_stmt = gimple_build_assign (
9580 new_vname, build_constructor (new_vtype, v));
9581 vect_finish_stmt_generation (vinfo, stmt_info,
9582 new_stmt, gsi);
9583 new_stmt = gimple_build_assign (
9584 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9585 new_vname));
9586 }
9587 }
9588 }
9589 break;
9590 }
9591 case dr_explicit_realign:
9592 {
9593 tree ptr, bump;
9594
9595 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9596
9597 if (compute_in_loop)
9598 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9599 &realignment_token,
9600 dr_explicit_realign,
9601 dataref_ptr, NULL);
9602
9603 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9604 ptr = copy_ssa_name (dataref_ptr);
9605 else
9606 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9607 // For explicit realign the target alignment should be
9608 // known at compile time.
9609 unsigned HOST_WIDE_INT align =
9610 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9611 new_stmt = gimple_build_assign
9612 (ptr, BIT_AND_EXPR, dataref_ptr,
9613 build_int_cst
9614 (TREE_TYPE (dataref_ptr),
9615 -(HOST_WIDE_INT) align));
9616 vect_finish_stmt_generation (vinfo, stmt_info,
9617 new_stmt, gsi);
9618 data_ref
9619 = build2 (MEM_REF, vectype, ptr,
9620 build_int_cst (ref_type, 0));
9621 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9622 vec_dest = vect_create_destination_var (scalar_dest,
9623 vectype);
9624 new_stmt = gimple_build_assign (vec_dest, data_ref);
9625 new_temp = make_ssa_name (vec_dest, new_stmt);
9626 gimple_assign_set_lhs (new_stmt, new_temp);
9627 gimple_move_vops (new_stmt, stmt_info->stmt);
9628 vect_finish_stmt_generation (vinfo, stmt_info,
9629 new_stmt, gsi);
9630 msq = new_temp;
9631
9632 bump = size_binop (MULT_EXPR, vs,
9633 TYPE_SIZE_UNIT (elem_type));
9634 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9635 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9636 stmt_info, bump);
9637 new_stmt = gimple_build_assign
9638 (NULL_TREE, BIT_AND_EXPR, ptr,
9639 build_int_cst
9640 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9641 ptr = copy_ssa_name (ptr, new_stmt);
9642 gimple_assign_set_lhs (new_stmt, ptr);
9643 vect_finish_stmt_generation (vinfo, stmt_info,
9644 new_stmt, gsi);
9645 data_ref
9646 = build2 (MEM_REF, vectype, ptr,
9647 build_int_cst (ref_type, 0));
9648 break;
9649 }
9650 case dr_explicit_realign_optimized:
9651 {
9652 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9653 new_temp = copy_ssa_name (dataref_ptr);
9654 else
9655 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9656 // We should only be doing this if we know the target
9657 // alignment at compile time.
9658 unsigned HOST_WIDE_INT align =
9659 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9660 new_stmt = gimple_build_assign
9661 (new_temp, BIT_AND_EXPR, dataref_ptr,
9662 build_int_cst (TREE_TYPE (dataref_ptr),
9663 -(HOST_WIDE_INT) align));
9664 vect_finish_stmt_generation (vinfo, stmt_info,
9665 new_stmt, gsi);
9666 data_ref
9667 = build2 (MEM_REF, vectype, new_temp,
9668 build_int_cst (ref_type, 0));
9669 break;
9670 }
9671 default:
9672 gcc_unreachable ();
9673 }
9674 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9675 /* DATA_REF is null if we've already built the statement. */
9676 if (data_ref)
9677 {
9678 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9679 new_stmt = gimple_build_assign (vec_dest, data_ref);
9680 }
9681 new_temp = make_ssa_name (vec_dest, new_stmt);
9682 gimple_set_lhs (new_stmt, new_temp);
9683 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9684
9685 /* 3. Handle explicit realignment if necessary/supported.
9686 Create in loop:
9687 vec_dest = realign_load (msq, lsq, realignment_token) */
9688 if (alignment_support_scheme == dr_explicit_realign_optimized
9689 || alignment_support_scheme == dr_explicit_realign)
9690 {
9691 lsq = gimple_assign_lhs (new_stmt);
9692 if (!realignment_token)
9693 realignment_token = dataref_ptr;
9694 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9695 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9696 msq, lsq, realignment_token);
9697 new_temp = make_ssa_name (vec_dest, new_stmt);
9698 gimple_assign_set_lhs (new_stmt, new_temp);
9699 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9700
9701 if (alignment_support_scheme == dr_explicit_realign_optimized)
9702 {
9703 gcc_assert (phi);
9704 if (i == vec_num - 1 && j == ncopies - 1)
9705 add_phi_arg (phi, lsq,
9706 loop_latch_edge (containing_loop),
9707 UNKNOWN_LOCATION);
9708 msq = lsq;
9709 }
9710 }
9711
9712 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9713 {
9714 tree perm_mask = perm_mask_for_reverse (vectype);
9715 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9716 perm_mask, stmt_info, gsi);
9717 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9718 }
9719
9720 /* Collect vector loads and later create their permutation in
9721 vect_transform_grouped_load (). */
9722 if (grouped_load || slp_perm)
9723 dr_chain.quick_push (new_temp);
9724
9725 /* Store vector loads in the corresponding SLP_NODE. */
9726 if (slp && !slp_perm)
9727 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9728
9729 /* With SLP permutation we load the gaps as well, without
9730 we need to skip the gaps after we manage to fully load
9731 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9732 group_elt += nunits;
9733 if (maybe_ne (group_gap_adj, 0U)
9734 && !slp_perm
9735 && known_eq (group_elt, group_size - group_gap_adj))
9736 {
9737 poly_wide_int bump_val
9738 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9739 * group_gap_adj);
9740 if (tree_int_cst_sgn
9741 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9742 bump_val = -bump_val;
9743 tree bump = wide_int_to_tree (sizetype, bump_val);
9744 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9745 gsi, stmt_info, bump);
9746 group_elt = 0;
9747 }
9748 }
9749 /* Bump the vector pointer to account for a gap or for excess
9750 elements loaded for a permuted SLP load. */
9751 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9752 {
9753 poly_wide_int bump_val
9754 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9755 * group_gap_adj);
9756 if (tree_int_cst_sgn
9757 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9758 bump_val = -bump_val;
9759 tree bump = wide_int_to_tree (sizetype, bump_val);
9760 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9761 stmt_info, bump);
9762 }
9763 }
9764
9765 if (slp && !slp_perm)
9766 continue;
9767
9768 if (slp_perm)
9769 {
9770 unsigned n_perms;
9771 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9772 gsi, vf, false, &n_perms);
9773 gcc_assert (ok);
9774 }
9775 else
9776 {
9777 if (grouped_load)
9778 {
9779 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9780 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9781 group_size, gsi);
9782 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9783 }
9784 else
9785 {
9786 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9787 }
9788 }
9789 dr_chain.release ();
9790 }
9791 if (!slp)
9792 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9793
9794 return true;
9795 }
9796
9797 /* Function vect_is_simple_cond.
9798
9799 Input:
9800 LOOP - the loop that is being vectorized.
9801 COND - Condition that is checked for simple use.
9802
9803 Output:
9804 *COMP_VECTYPE - the vector type for the comparison.
9805 *DTS - The def types for the arguments of the comparison
9806
9807 Returns whether a COND can be vectorized. Checks whether
9808 condition operands are supportable using vec_is_simple_use. */
9809
9810 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)9811 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9812 slp_tree slp_node, tree *comp_vectype,
9813 enum vect_def_type *dts, tree vectype)
9814 {
9815 tree lhs, rhs;
9816 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9817 slp_tree slp_op;
9818
9819 /* Mask case. */
9820 if (TREE_CODE (cond) == SSA_NAME
9821 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9822 {
9823 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9824 &slp_op, &dts[0], comp_vectype)
9825 || !*comp_vectype
9826 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9827 return false;
9828 return true;
9829 }
9830
9831 if (!COMPARISON_CLASS_P (cond))
9832 return false;
9833
9834 lhs = TREE_OPERAND (cond, 0);
9835 rhs = TREE_OPERAND (cond, 1);
9836
9837 if (TREE_CODE (lhs) == SSA_NAME)
9838 {
9839 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9840 &lhs, &slp_op, &dts[0], &vectype1))
9841 return false;
9842 }
9843 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9844 || TREE_CODE (lhs) == FIXED_CST)
9845 dts[0] = vect_constant_def;
9846 else
9847 return false;
9848
9849 if (TREE_CODE (rhs) == SSA_NAME)
9850 {
9851 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9852 &rhs, &slp_op, &dts[1], &vectype2))
9853 return false;
9854 }
9855 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9856 || TREE_CODE (rhs) == FIXED_CST)
9857 dts[1] = vect_constant_def;
9858 else
9859 return false;
9860
9861 if (vectype1 && vectype2
9862 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9863 TYPE_VECTOR_SUBPARTS (vectype2)))
9864 return false;
9865
9866 *comp_vectype = vectype1 ? vectype1 : vectype2;
9867 /* Invariant comparison. */
9868 if (! *comp_vectype)
9869 {
9870 tree scalar_type = TREE_TYPE (lhs);
9871 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9872 *comp_vectype = truth_type_for (vectype);
9873 else
9874 {
9875 /* If we can widen the comparison to match vectype do so. */
9876 if (INTEGRAL_TYPE_P (scalar_type)
9877 && !slp_node
9878 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9879 TYPE_SIZE (TREE_TYPE (vectype))))
9880 scalar_type = build_nonstandard_integer_type
9881 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9882 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9883 slp_node);
9884 }
9885 }
9886
9887 return true;
9888 }
9889
9890 /* vectorizable_condition.
9891
9892 Check if STMT_INFO is conditional modify expression that can be vectorized.
9893 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9894 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9895 at GSI.
9896
9897 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9898
9899 Return true if STMT_INFO is vectorizable in this way. */
9900
9901 static bool
vectorizable_condition(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)9902 vectorizable_condition (vec_info *vinfo,
9903 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9904 gimple **vec_stmt,
9905 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9906 {
9907 tree scalar_dest = NULL_TREE;
9908 tree vec_dest = NULL_TREE;
9909 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9910 tree then_clause, else_clause;
9911 tree comp_vectype = NULL_TREE;
9912 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9913 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9914 tree vec_compare;
9915 tree new_temp;
9916 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9917 enum vect_def_type dts[4]
9918 = {vect_unknown_def_type, vect_unknown_def_type,
9919 vect_unknown_def_type, vect_unknown_def_type};
9920 int ndts = 4;
9921 int ncopies;
9922 int vec_num;
9923 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9924 int i;
9925 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9926 vec<tree> vec_oprnds0 = vNULL;
9927 vec<tree> vec_oprnds1 = vNULL;
9928 vec<tree> vec_oprnds2 = vNULL;
9929 vec<tree> vec_oprnds3 = vNULL;
9930 tree vec_cmp_type;
9931 bool masked = false;
9932
9933 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9934 return false;
9935
9936 /* Is vectorizable conditional operation? */
9937 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9938 if (!stmt)
9939 return false;
9940
9941 code = gimple_assign_rhs_code (stmt);
9942 if (code != COND_EXPR)
9943 return false;
9944
9945 stmt_vec_info reduc_info = NULL;
9946 int reduc_index = -1;
9947 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9948 bool for_reduction
9949 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9950 if (for_reduction)
9951 {
9952 if (STMT_SLP_TYPE (stmt_info))
9953 return false;
9954 reduc_info = info_for_reduction (vinfo, stmt_info);
9955 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9956 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9957 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9958 || reduc_index != -1);
9959 }
9960 else
9961 {
9962 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9963 return false;
9964 }
9965
9966 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9967 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9968
9969 if (slp_node)
9970 {
9971 ncopies = 1;
9972 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9973 }
9974 else
9975 {
9976 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9977 vec_num = 1;
9978 }
9979
9980 gcc_assert (ncopies >= 1);
9981 if (for_reduction && ncopies > 1)
9982 return false; /* FORNOW */
9983
9984 cond_expr = gimple_assign_rhs1 (stmt);
9985
9986 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9987 &comp_vectype, &dts[0], vectype)
9988 || !comp_vectype)
9989 return false;
9990
9991 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9992 slp_tree then_slp_node, else_slp_node;
9993 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9994 &then_clause, &then_slp_node, &dts[2], &vectype1))
9995 return false;
9996 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9997 &else_clause, &else_slp_node, &dts[3], &vectype2))
9998 return false;
9999
10000 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10001 return false;
10002
10003 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10004 return false;
10005
10006 masked = !COMPARISON_CLASS_P (cond_expr);
10007 vec_cmp_type = truth_type_for (comp_vectype);
10008
10009 if (vec_cmp_type == NULL_TREE)
10010 return false;
10011
10012 cond_code = TREE_CODE (cond_expr);
10013 if (!masked)
10014 {
10015 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10016 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10017 }
10018
10019 /* For conditional reductions, the "then" value needs to be the candidate
10020 value calculated by this iteration while the "else" value needs to be
10021 the result carried over from previous iterations. If the COND_EXPR
10022 is the other way around, we need to swap it. */
10023 bool must_invert_cmp_result = false;
10024 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10025 {
10026 if (masked)
10027 must_invert_cmp_result = true;
10028 else
10029 {
10030 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10031 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10032 if (new_code == ERROR_MARK)
10033 must_invert_cmp_result = true;
10034 else
10035 {
10036 cond_code = new_code;
10037 /* Make sure we don't accidentally use the old condition. */
10038 cond_expr = NULL_TREE;
10039 }
10040 }
10041 std::swap (then_clause, else_clause);
10042 }
10043
10044 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10045 {
10046 /* Boolean values may have another representation in vectors
10047 and therefore we prefer bit operations over comparison for
10048 them (which also works for scalar masks). We store opcodes
10049 to use in bitop1 and bitop2. Statement is vectorized as
10050 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10051 depending on bitop1 and bitop2 arity. */
10052 switch (cond_code)
10053 {
10054 case GT_EXPR:
10055 bitop1 = BIT_NOT_EXPR;
10056 bitop2 = BIT_AND_EXPR;
10057 break;
10058 case GE_EXPR:
10059 bitop1 = BIT_NOT_EXPR;
10060 bitop2 = BIT_IOR_EXPR;
10061 break;
10062 case LT_EXPR:
10063 bitop1 = BIT_NOT_EXPR;
10064 bitop2 = BIT_AND_EXPR;
10065 std::swap (cond_expr0, cond_expr1);
10066 break;
10067 case LE_EXPR:
10068 bitop1 = BIT_NOT_EXPR;
10069 bitop2 = BIT_IOR_EXPR;
10070 std::swap (cond_expr0, cond_expr1);
10071 break;
10072 case NE_EXPR:
10073 bitop1 = BIT_XOR_EXPR;
10074 break;
10075 case EQ_EXPR:
10076 bitop1 = BIT_XOR_EXPR;
10077 bitop2 = BIT_NOT_EXPR;
10078 break;
10079 default:
10080 return false;
10081 }
10082 cond_code = SSA_NAME;
10083 }
10084
10085 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10086 && reduction_type == EXTRACT_LAST_REDUCTION
10087 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10088 {
10089 if (dump_enabled_p ())
10090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10091 "reduction comparison operation not supported.\n");
10092 return false;
10093 }
10094
10095 if (!vec_stmt)
10096 {
10097 if (bitop1 != NOP_EXPR)
10098 {
10099 machine_mode mode = TYPE_MODE (comp_vectype);
10100 optab optab;
10101
10102 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10103 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10104 return false;
10105
10106 if (bitop2 != NOP_EXPR)
10107 {
10108 optab = optab_for_tree_code (bitop2, comp_vectype,
10109 optab_default);
10110 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10111 return false;
10112 }
10113 }
10114
10115 vect_cost_for_stmt kind = vector_stmt;
10116 if (reduction_type == EXTRACT_LAST_REDUCTION)
10117 /* Count one reduction-like operation per vector. */
10118 kind = vec_to_scalar;
10119 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10120 return false;
10121
10122 if (slp_node
10123 && (!vect_maybe_update_slp_op_vectype
10124 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10125 || (op_adjust == 1
10126 && !vect_maybe_update_slp_op_vectype
10127 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10128 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10129 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10130 {
10131 if (dump_enabled_p ())
10132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10133 "incompatible vector types for invariants\n");
10134 return false;
10135 }
10136
10137 if (loop_vinfo && for_reduction
10138 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10139 {
10140 if (reduction_type == EXTRACT_LAST_REDUCTION)
10141 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10142 ncopies * vec_num, vectype, NULL);
10143 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10144 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10145 {
10146 if (dump_enabled_p ())
10147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10148 "conditional reduction prevents the use"
10149 " of partial vectors.\n");
10150 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10151 }
10152 }
10153
10154 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10155 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10156 cost_vec, kind);
10157 return true;
10158 }
10159
10160 /* Transform. */
10161
10162 /* Handle def. */
10163 scalar_dest = gimple_assign_lhs (stmt);
10164 if (reduction_type != EXTRACT_LAST_REDUCTION)
10165 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10166
10167 bool swap_cond_operands = false;
10168
10169 /* See whether another part of the vectorized code applies a loop
10170 mask to the condition, or to its inverse. */
10171
10172 vec_loop_masks *masks = NULL;
10173 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10174 {
10175 if (reduction_type == EXTRACT_LAST_REDUCTION)
10176 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10177 else
10178 {
10179 scalar_cond_masked_key cond (cond_expr, ncopies);
10180 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10181 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10182 else
10183 {
10184 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10185 cond.code = invert_tree_comparison (cond.code, honor_nans);
10186 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10187 {
10188 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10189 cond_code = cond.code;
10190 swap_cond_operands = true;
10191 }
10192 }
10193 }
10194 }
10195
10196 /* Handle cond expr. */
10197 if (masked)
10198 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10199 cond_expr, &vec_oprnds0, comp_vectype,
10200 then_clause, &vec_oprnds2, vectype,
10201 reduction_type != EXTRACT_LAST_REDUCTION
10202 ? else_clause : NULL, &vec_oprnds3, vectype);
10203 else
10204 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10205 cond_expr0, &vec_oprnds0, comp_vectype,
10206 cond_expr1, &vec_oprnds1, comp_vectype,
10207 then_clause, &vec_oprnds2, vectype,
10208 reduction_type != EXTRACT_LAST_REDUCTION
10209 ? else_clause : NULL, &vec_oprnds3, vectype);
10210
10211 /* Arguments are ready. Create the new vector stmt. */
10212 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10213 {
10214 vec_then_clause = vec_oprnds2[i];
10215 if (reduction_type != EXTRACT_LAST_REDUCTION)
10216 vec_else_clause = vec_oprnds3[i];
10217
10218 if (swap_cond_operands)
10219 std::swap (vec_then_clause, vec_else_clause);
10220
10221 if (masked)
10222 vec_compare = vec_cond_lhs;
10223 else
10224 {
10225 vec_cond_rhs = vec_oprnds1[i];
10226 if (bitop1 == NOP_EXPR)
10227 {
10228 gimple_seq stmts = NULL;
10229 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10230 vec_cond_lhs, vec_cond_rhs);
10231 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10232 }
10233 else
10234 {
10235 new_temp = make_ssa_name (vec_cmp_type);
10236 gassign *new_stmt;
10237 if (bitop1 == BIT_NOT_EXPR)
10238 new_stmt = gimple_build_assign (new_temp, bitop1,
10239 vec_cond_rhs);
10240 else
10241 new_stmt
10242 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10243 vec_cond_rhs);
10244 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10245 if (bitop2 == NOP_EXPR)
10246 vec_compare = new_temp;
10247 else if (bitop2 == BIT_NOT_EXPR)
10248 {
10249 /* Instead of doing ~x ? y : z do x ? z : y. */
10250 vec_compare = new_temp;
10251 std::swap (vec_then_clause, vec_else_clause);
10252 }
10253 else
10254 {
10255 vec_compare = make_ssa_name (vec_cmp_type);
10256 new_stmt
10257 = gimple_build_assign (vec_compare, bitop2,
10258 vec_cond_lhs, new_temp);
10259 vect_finish_stmt_generation (vinfo, stmt_info,
10260 new_stmt, gsi);
10261 }
10262 }
10263 }
10264
10265 /* If we decided to apply a loop mask to the result of the vector
10266 comparison, AND the comparison with the mask now. Later passes
10267 should then be able to reuse the AND results between mulitple
10268 vector statements.
10269
10270 For example:
10271 for (int i = 0; i < 100; ++i)
10272 x[i] = y[i] ? z[i] : 10;
10273
10274 results in following optimized GIMPLE:
10275
10276 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10277 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10278 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10279 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10280 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10281 vect_iftmp.11_47, { 10, ... }>;
10282
10283 instead of using a masked and unmasked forms of
10284 vec != { 0, ... } (masked in the MASK_LOAD,
10285 unmasked in the VEC_COND_EXPR). */
10286
10287 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10288 in cases where that's necessary. */
10289
10290 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10291 {
10292 if (!is_gimple_val (vec_compare))
10293 {
10294 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10295 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10296 vec_compare);
10297 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10298 vec_compare = vec_compare_name;
10299 }
10300
10301 if (must_invert_cmp_result)
10302 {
10303 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10304 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10305 BIT_NOT_EXPR,
10306 vec_compare);
10307 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10308 vec_compare = vec_compare_name;
10309 }
10310
10311 if (masks)
10312 {
10313 tree loop_mask
10314 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10315 vectype, i);
10316 tree tmp2 = make_ssa_name (vec_cmp_type);
10317 gassign *g
10318 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10319 loop_mask);
10320 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10321 vec_compare = tmp2;
10322 }
10323 }
10324
10325 gimple *new_stmt;
10326 if (reduction_type == EXTRACT_LAST_REDUCTION)
10327 {
10328 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10329 tree lhs = gimple_get_lhs (old_stmt);
10330 new_stmt = gimple_build_call_internal
10331 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10332 vec_then_clause);
10333 gimple_call_set_lhs (new_stmt, lhs);
10334 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10335 if (old_stmt == gsi_stmt (*gsi))
10336 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10337 else
10338 {
10339 /* In this case we're moving the definition to later in the
10340 block. That doesn't matter because the only uses of the
10341 lhs are in phi statements. */
10342 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10343 gsi_remove (&old_gsi, true);
10344 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10345 }
10346 }
10347 else
10348 {
10349 new_temp = make_ssa_name (vec_dest);
10350 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10351 vec_then_clause, vec_else_clause);
10352 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10353 }
10354 if (slp_node)
10355 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10356 else
10357 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10358 }
10359
10360 if (!slp_node)
10361 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10362
10363 vec_oprnds0.release ();
10364 vec_oprnds1.release ();
10365 vec_oprnds2.release ();
10366 vec_oprnds3.release ();
10367
10368 return true;
10369 }
10370
10371 /* vectorizable_comparison.
10372
10373 Check if STMT_INFO is comparison expression that can be vectorized.
10374 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10375 comparison, put it in VEC_STMT, and insert it at GSI.
10376
10377 Return true if STMT_INFO is vectorizable in this way. */
10378
10379 static bool
vectorizable_comparison(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10380 vectorizable_comparison (vec_info *vinfo,
10381 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10382 gimple **vec_stmt,
10383 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10384 {
10385 tree lhs, rhs1, rhs2;
10386 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10387 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10388 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10389 tree new_temp;
10390 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10391 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10392 int ndts = 2;
10393 poly_uint64 nunits;
10394 int ncopies;
10395 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10396 int i;
10397 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10398 vec<tree> vec_oprnds0 = vNULL;
10399 vec<tree> vec_oprnds1 = vNULL;
10400 tree mask_type;
10401 tree mask;
10402
10403 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10404 return false;
10405
10406 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10407 return false;
10408
10409 mask_type = vectype;
10410 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10411
10412 if (slp_node)
10413 ncopies = 1;
10414 else
10415 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10416
10417 gcc_assert (ncopies >= 1);
10418 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10419 return false;
10420
10421 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10422 if (!stmt)
10423 return false;
10424
10425 code = gimple_assign_rhs_code (stmt);
10426
10427 if (TREE_CODE_CLASS (code) != tcc_comparison)
10428 return false;
10429
10430 slp_tree slp_rhs1, slp_rhs2;
10431 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10432 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10433 return false;
10434
10435 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10436 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10437 return false;
10438
10439 if (vectype1 && vectype2
10440 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10441 TYPE_VECTOR_SUBPARTS (vectype2)))
10442 return false;
10443
10444 vectype = vectype1 ? vectype1 : vectype2;
10445
10446 /* Invariant comparison. */
10447 if (!vectype)
10448 {
10449 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10450 vectype = mask_type;
10451 else
10452 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10453 slp_node);
10454 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10455 return false;
10456 }
10457 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10458 return false;
10459
10460 /* Can't compare mask and non-mask types. */
10461 if (vectype1 && vectype2
10462 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10463 return false;
10464
10465 /* Boolean values may have another representation in vectors
10466 and therefore we prefer bit operations over comparison for
10467 them (which also works for scalar masks). We store opcodes
10468 to use in bitop1 and bitop2. Statement is vectorized as
10469 BITOP2 (rhs1 BITOP1 rhs2) or
10470 rhs1 BITOP2 (BITOP1 rhs2)
10471 depending on bitop1 and bitop2 arity. */
10472 bool swap_p = false;
10473 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10474 {
10475 if (code == GT_EXPR)
10476 {
10477 bitop1 = BIT_NOT_EXPR;
10478 bitop2 = BIT_AND_EXPR;
10479 }
10480 else if (code == GE_EXPR)
10481 {
10482 bitop1 = BIT_NOT_EXPR;
10483 bitop2 = BIT_IOR_EXPR;
10484 }
10485 else if (code == LT_EXPR)
10486 {
10487 bitop1 = BIT_NOT_EXPR;
10488 bitop2 = BIT_AND_EXPR;
10489 swap_p = true;
10490 }
10491 else if (code == LE_EXPR)
10492 {
10493 bitop1 = BIT_NOT_EXPR;
10494 bitop2 = BIT_IOR_EXPR;
10495 swap_p = true;
10496 }
10497 else
10498 {
10499 bitop1 = BIT_XOR_EXPR;
10500 if (code == EQ_EXPR)
10501 bitop2 = BIT_NOT_EXPR;
10502 }
10503 }
10504
10505 if (!vec_stmt)
10506 {
10507 if (bitop1 == NOP_EXPR)
10508 {
10509 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10510 return false;
10511 }
10512 else
10513 {
10514 machine_mode mode = TYPE_MODE (vectype);
10515 optab optab;
10516
10517 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10518 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10519 return false;
10520
10521 if (bitop2 != NOP_EXPR)
10522 {
10523 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10524 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10525 return false;
10526 }
10527 }
10528
10529 /* Put types on constant and invariant SLP children. */
10530 if (slp_node
10531 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10532 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10533 {
10534 if (dump_enabled_p ())
10535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10536 "incompatible vector types for invariants\n");
10537 return false;
10538 }
10539
10540 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10541 vect_model_simple_cost (vinfo, stmt_info,
10542 ncopies * (1 + (bitop2 != NOP_EXPR)),
10543 dts, ndts, slp_node, cost_vec);
10544 return true;
10545 }
10546
10547 /* Transform. */
10548
10549 /* Handle def. */
10550 lhs = gimple_assign_lhs (stmt);
10551 mask = vect_create_destination_var (lhs, mask_type);
10552
10553 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10554 rhs1, &vec_oprnds0, vectype,
10555 rhs2, &vec_oprnds1, vectype);
10556 if (swap_p)
10557 std::swap (vec_oprnds0, vec_oprnds1);
10558
10559 /* Arguments are ready. Create the new vector stmt. */
10560 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10561 {
10562 gimple *new_stmt;
10563 vec_rhs2 = vec_oprnds1[i];
10564
10565 new_temp = make_ssa_name (mask);
10566 if (bitop1 == NOP_EXPR)
10567 {
10568 new_stmt = gimple_build_assign (new_temp, code,
10569 vec_rhs1, vec_rhs2);
10570 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10571 }
10572 else
10573 {
10574 if (bitop1 == BIT_NOT_EXPR)
10575 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10576 else
10577 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10578 vec_rhs2);
10579 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10580 if (bitop2 != NOP_EXPR)
10581 {
10582 tree res = make_ssa_name (mask);
10583 if (bitop2 == BIT_NOT_EXPR)
10584 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10585 else
10586 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10587 new_temp);
10588 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10589 }
10590 }
10591 if (slp_node)
10592 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10593 else
10594 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10595 }
10596
10597 if (!slp_node)
10598 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10599
10600 vec_oprnds0.release ();
10601 vec_oprnds1.release ();
10602
10603 return true;
10604 }
10605
10606 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10607 can handle all live statements in the node. Otherwise return true
10608 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10609 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10610
10611 static bool
can_vectorize_live_stmts(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10612 can_vectorize_live_stmts (vec_info *vinfo,
10613 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10614 slp_tree slp_node, slp_instance slp_node_instance,
10615 bool vec_stmt_p,
10616 stmt_vector_for_cost *cost_vec)
10617 {
10618 if (slp_node)
10619 {
10620 stmt_vec_info slp_stmt_info;
10621 unsigned int i;
10622 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10623 {
10624 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10625 && !vectorizable_live_operation (vinfo,
10626 slp_stmt_info, gsi, slp_node,
10627 slp_node_instance, i,
10628 vec_stmt_p, cost_vec))
10629 return false;
10630 }
10631 }
10632 else if (STMT_VINFO_LIVE_P (stmt_info)
10633 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10634 slp_node, slp_node_instance, -1,
10635 vec_stmt_p, cost_vec))
10636 return false;
10637
10638 return true;
10639 }
10640
10641 /* Make sure the statement is vectorizable. */
10642
10643 opt_result
vect_analyze_stmt(vec_info * vinfo,stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10644 vect_analyze_stmt (vec_info *vinfo,
10645 stmt_vec_info stmt_info, bool *need_to_vectorize,
10646 slp_tree node, slp_instance node_instance,
10647 stmt_vector_for_cost *cost_vec)
10648 {
10649 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10650 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10651 bool ok;
10652 gimple_seq pattern_def_seq;
10653
10654 if (dump_enabled_p ())
10655 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10656 stmt_info->stmt);
10657
10658 if (gimple_has_volatile_ops (stmt_info->stmt))
10659 return opt_result::failure_at (stmt_info->stmt,
10660 "not vectorized:"
10661 " stmt has volatile operands: %G\n",
10662 stmt_info->stmt);
10663
10664 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10665 && node == NULL
10666 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10667 {
10668 gimple_stmt_iterator si;
10669
10670 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10671 {
10672 stmt_vec_info pattern_def_stmt_info
10673 = vinfo->lookup_stmt (gsi_stmt (si));
10674 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10675 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10676 {
10677 /* Analyze def stmt of STMT if it's a pattern stmt. */
10678 if (dump_enabled_p ())
10679 dump_printf_loc (MSG_NOTE, vect_location,
10680 "==> examining pattern def statement: %G",
10681 pattern_def_stmt_info->stmt);
10682
10683 opt_result res
10684 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10685 need_to_vectorize, node, node_instance,
10686 cost_vec);
10687 if (!res)
10688 return res;
10689 }
10690 }
10691 }
10692
10693 /* Skip stmts that do not need to be vectorized. In loops this is expected
10694 to include:
10695 - the COND_EXPR which is the loop exit condition
10696 - any LABEL_EXPRs in the loop
10697 - computations that are used only for array indexing or loop control.
10698 In basic blocks we only analyze statements that are a part of some SLP
10699 instance, therefore, all the statements are relevant.
10700
10701 Pattern statement needs to be analyzed instead of the original statement
10702 if the original statement is not relevant. Otherwise, we analyze both
10703 statements. In basic blocks we are called from some SLP instance
10704 traversal, don't analyze pattern stmts instead, the pattern stmts
10705 already will be part of SLP instance. */
10706
10707 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10708 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10709 && !STMT_VINFO_LIVE_P (stmt_info))
10710 {
10711 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10712 && pattern_stmt_info
10713 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10714 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10715 {
10716 /* Analyze PATTERN_STMT instead of the original stmt. */
10717 stmt_info = pattern_stmt_info;
10718 if (dump_enabled_p ())
10719 dump_printf_loc (MSG_NOTE, vect_location,
10720 "==> examining pattern statement: %G",
10721 stmt_info->stmt);
10722 }
10723 else
10724 {
10725 if (dump_enabled_p ())
10726 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10727
10728 return opt_result::success ();
10729 }
10730 }
10731 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10732 && node == NULL
10733 && pattern_stmt_info
10734 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10735 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10736 {
10737 /* Analyze PATTERN_STMT too. */
10738 if (dump_enabled_p ())
10739 dump_printf_loc (MSG_NOTE, vect_location,
10740 "==> examining pattern statement: %G",
10741 pattern_stmt_info->stmt);
10742
10743 opt_result res
10744 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10745 node_instance, cost_vec);
10746 if (!res)
10747 return res;
10748 }
10749
10750 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10751 {
10752 case vect_internal_def:
10753 break;
10754
10755 case vect_reduction_def:
10756 case vect_nested_cycle:
10757 gcc_assert (!bb_vinfo
10758 && (relevance == vect_used_in_outer
10759 || relevance == vect_used_in_outer_by_reduction
10760 || relevance == vect_used_by_reduction
10761 || relevance == vect_unused_in_scope
10762 || relevance == vect_used_only_live));
10763 break;
10764
10765 case vect_induction_def:
10766 gcc_assert (!bb_vinfo);
10767 break;
10768
10769 case vect_constant_def:
10770 case vect_external_def:
10771 case vect_unknown_def_type:
10772 default:
10773 gcc_unreachable ();
10774 }
10775
10776 if (STMT_VINFO_RELEVANT_P (stmt_info))
10777 {
10778 tree type = gimple_expr_type (stmt_info->stmt);
10779 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10780 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10781 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10782 || (call && gimple_call_lhs (call) == NULL_TREE));
10783 *need_to_vectorize = true;
10784 }
10785
10786 if (PURE_SLP_STMT (stmt_info) && !node)
10787 {
10788 if (dump_enabled_p ())
10789 dump_printf_loc (MSG_NOTE, vect_location,
10790 "handled only by SLP analysis\n");
10791 return opt_result::success ();
10792 }
10793
10794 ok = true;
10795 if (!bb_vinfo
10796 && (STMT_VINFO_RELEVANT_P (stmt_info)
10797 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10798 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10799 -mveclibabi= takes preference over library functions with
10800 the simd attribute. */
10801 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10802 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10803 cost_vec)
10804 || vectorizable_conversion (vinfo, stmt_info,
10805 NULL, NULL, node, cost_vec)
10806 || vectorizable_operation (vinfo, stmt_info,
10807 NULL, NULL, node, cost_vec)
10808 || vectorizable_assignment (vinfo, stmt_info,
10809 NULL, NULL, node, cost_vec)
10810 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10811 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10812 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10813 node, node_instance, cost_vec)
10814 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10815 NULL, node, cost_vec)
10816 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10817 || vectorizable_condition (vinfo, stmt_info,
10818 NULL, NULL, node, cost_vec)
10819 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10820 cost_vec)
10821 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10822 stmt_info, NULL, node));
10823 else
10824 {
10825 if (bb_vinfo)
10826 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10827 || vectorizable_simd_clone_call (vinfo, stmt_info,
10828 NULL, NULL, node, cost_vec)
10829 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10830 cost_vec)
10831 || vectorizable_shift (vinfo, stmt_info,
10832 NULL, NULL, node, cost_vec)
10833 || vectorizable_operation (vinfo, stmt_info,
10834 NULL, NULL, node, cost_vec)
10835 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10836 cost_vec)
10837 || vectorizable_load (vinfo, stmt_info,
10838 NULL, NULL, node, cost_vec)
10839 || vectorizable_store (vinfo, stmt_info,
10840 NULL, NULL, node, cost_vec)
10841 || vectorizable_condition (vinfo, stmt_info,
10842 NULL, NULL, node, cost_vec)
10843 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10844 cost_vec)
10845 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10846 }
10847
10848 if (!ok)
10849 return opt_result::failure_at (stmt_info->stmt,
10850 "not vectorized:"
10851 " relevant stmt not supported: %G",
10852 stmt_info->stmt);
10853
10854 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10855 need extra handling, except for vectorizable reductions. */
10856 if (!bb_vinfo
10857 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10858 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10859 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10860 stmt_info, NULL, node, node_instance,
10861 false, cost_vec))
10862 return opt_result::failure_at (stmt_info->stmt,
10863 "not vectorized:"
10864 " live stmt not supported: %G",
10865 stmt_info->stmt);
10866
10867 return opt_result::success ();
10868 }
10869
10870
10871 /* Function vect_transform_stmt.
10872
10873 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10874
10875 bool
vect_transform_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)10876 vect_transform_stmt (vec_info *vinfo,
10877 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10878 slp_tree slp_node, slp_instance slp_node_instance)
10879 {
10880 bool is_store = false;
10881 gimple *vec_stmt = NULL;
10882 bool done;
10883
10884 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10885
10886 switch (STMT_VINFO_TYPE (stmt_info))
10887 {
10888 case type_demotion_vec_info_type:
10889 case type_promotion_vec_info_type:
10890 case type_conversion_vec_info_type:
10891 done = vectorizable_conversion (vinfo, stmt_info,
10892 gsi, &vec_stmt, slp_node, NULL);
10893 gcc_assert (done);
10894 break;
10895
10896 case induc_vec_info_type:
10897 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10898 stmt_info, &vec_stmt, slp_node,
10899 NULL);
10900 gcc_assert (done);
10901 break;
10902
10903 case shift_vec_info_type:
10904 done = vectorizable_shift (vinfo, stmt_info,
10905 gsi, &vec_stmt, slp_node, NULL);
10906 gcc_assert (done);
10907 break;
10908
10909 case op_vec_info_type:
10910 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10911 NULL);
10912 gcc_assert (done);
10913 break;
10914
10915 case assignment_vec_info_type:
10916 done = vectorizable_assignment (vinfo, stmt_info,
10917 gsi, &vec_stmt, slp_node, NULL);
10918 gcc_assert (done);
10919 break;
10920
10921 case load_vec_info_type:
10922 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10923 NULL);
10924 gcc_assert (done);
10925 break;
10926
10927 case store_vec_info_type:
10928 done = vectorizable_store (vinfo, stmt_info,
10929 gsi, &vec_stmt, slp_node, NULL);
10930 gcc_assert (done);
10931 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10932 {
10933 /* In case of interleaving, the whole chain is vectorized when the
10934 last store in the chain is reached. Store stmts before the last
10935 one are skipped, and there vec_stmt_info shouldn't be freed
10936 meanwhile. */
10937 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10938 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10939 is_store = true;
10940 }
10941 else
10942 is_store = true;
10943 break;
10944
10945 case condition_vec_info_type:
10946 done = vectorizable_condition (vinfo, stmt_info,
10947 gsi, &vec_stmt, slp_node, NULL);
10948 gcc_assert (done);
10949 break;
10950
10951 case comparison_vec_info_type:
10952 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10953 slp_node, NULL);
10954 gcc_assert (done);
10955 break;
10956
10957 case call_vec_info_type:
10958 done = vectorizable_call (vinfo, stmt_info,
10959 gsi, &vec_stmt, slp_node, NULL);
10960 break;
10961
10962 case call_simd_clone_vec_info_type:
10963 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10964 slp_node, NULL);
10965 break;
10966
10967 case reduc_vec_info_type:
10968 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10969 gsi, &vec_stmt, slp_node);
10970 gcc_assert (done);
10971 break;
10972
10973 case cycle_phi_info_type:
10974 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10975 &vec_stmt, slp_node, slp_node_instance);
10976 gcc_assert (done);
10977 break;
10978
10979 case lc_phi_info_type:
10980 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10981 stmt_info, &vec_stmt, slp_node);
10982 gcc_assert (done);
10983 break;
10984
10985 case phi_info_type:
10986 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
10987 gcc_assert (done);
10988 break;
10989
10990 default:
10991 if (!STMT_VINFO_LIVE_P (stmt_info))
10992 {
10993 if (dump_enabled_p ())
10994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10995 "stmt not supported.\n");
10996 gcc_unreachable ();
10997 }
10998 done = true;
10999 }
11000
11001 if (!slp_node && vec_stmt)
11002 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11003
11004 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11005 return is_store;
11006
11007 /* Handle stmts whose DEF is used outside the loop-nest that is
11008 being vectorized. */
11009 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11010 slp_node_instance, true, NULL);
11011 gcc_assert (done);
11012
11013 return false;
11014 }
11015
11016
11017 /* Remove a group of stores (for SLP or interleaving), free their
11018 stmt_vec_info. */
11019
11020 void
vect_remove_stores(vec_info * vinfo,stmt_vec_info first_stmt_info)11021 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11022 {
11023 stmt_vec_info next_stmt_info = first_stmt_info;
11024
11025 while (next_stmt_info)
11026 {
11027 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11028 next_stmt_info = vect_orig_stmt (next_stmt_info);
11029 /* Free the attached stmt_vec_info and remove the stmt. */
11030 vinfo->remove_stmt (next_stmt_info);
11031 next_stmt_info = tmp;
11032 }
11033 }
11034
11035 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11036 elements of type SCALAR_TYPE, or null if the target doesn't support
11037 such a type.
11038
11039 If NUNITS is zero, return a vector type that contains elements of
11040 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11041
11042 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11043 for this vectorization region and want to "autodetect" the best choice.
11044 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11045 and we want the new type to be interoperable with it. PREVAILING_MODE
11046 in this case can be a scalar integer mode or a vector mode; when it
11047 is a vector mode, the function acts like a tree-level version of
11048 related_vector_mode. */
11049
11050 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11051 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11052 tree scalar_type, poly_uint64 nunits)
11053 {
11054 tree orig_scalar_type = scalar_type;
11055 scalar_mode inner_mode;
11056 machine_mode simd_mode;
11057 tree vectype;
11058
11059 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11060 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11061 return NULL_TREE;
11062
11063 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11064
11065 /* For vector types of elements whose mode precision doesn't
11066 match their types precision we use a element type of mode
11067 precision. The vectorization routines will have to make sure
11068 they support the proper result truncation/extension.
11069 We also make sure to build vector types with INTEGER_TYPE
11070 component type only. */
11071 if (INTEGRAL_TYPE_P (scalar_type)
11072 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11073 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11074 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11075 TYPE_UNSIGNED (scalar_type));
11076
11077 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11078 When the component mode passes the above test simply use a type
11079 corresponding to that mode. The theory is that any use that
11080 would cause problems with this will disable vectorization anyway. */
11081 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11082 && !INTEGRAL_TYPE_P (scalar_type))
11083 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11084
11085 /* We can't build a vector type of elements with alignment bigger than
11086 their size. */
11087 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11088 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11089 TYPE_UNSIGNED (scalar_type));
11090
11091 /* If we felt back to using the mode fail if there was
11092 no scalar type for it. */
11093 if (scalar_type == NULL_TREE)
11094 return NULL_TREE;
11095
11096 /* If no prevailing mode was supplied, use the mode the target prefers.
11097 Otherwise lookup a vector mode based on the prevailing mode. */
11098 if (prevailing_mode == VOIDmode)
11099 {
11100 gcc_assert (known_eq (nunits, 0U));
11101 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11102 if (SCALAR_INT_MODE_P (simd_mode))
11103 {
11104 /* Traditional behavior is not to take the integer mode
11105 literally, but simply to use it as a way of determining
11106 the vector size. It is up to mode_for_vector to decide
11107 what the TYPE_MODE should be.
11108
11109 Note that nunits == 1 is allowed in order to support single
11110 element vector types. */
11111 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11112 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11113 return NULL_TREE;
11114 }
11115 }
11116 else if (SCALAR_INT_MODE_P (prevailing_mode)
11117 || !related_vector_mode (prevailing_mode,
11118 inner_mode, nunits).exists (&simd_mode))
11119 {
11120 /* Fall back to using mode_for_vector, mostly in the hope of being
11121 able to use an integer mode. */
11122 if (known_eq (nunits, 0U)
11123 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11124 return NULL_TREE;
11125
11126 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11127 return NULL_TREE;
11128 }
11129
11130 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11131
11132 /* In cases where the mode was chosen by mode_for_vector, check that
11133 the target actually supports the chosen mode, or that it at least
11134 allows the vector mode to be replaced by a like-sized integer. */
11135 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11136 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11137 return NULL_TREE;
11138
11139 /* Re-attach the address-space qualifier if we canonicalized the scalar
11140 type. */
11141 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11142 return build_qualified_type
11143 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11144
11145 return vectype;
11146 }
11147
11148 /* Function get_vectype_for_scalar_type.
11149
11150 Returns the vector type corresponding to SCALAR_TYPE as supported
11151 by the target. If GROUP_SIZE is nonzero and we're performing BB
11152 vectorization, make sure that the number of elements in the vector
11153 is no bigger than GROUP_SIZE. */
11154
11155 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11156 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11157 unsigned int group_size)
11158 {
11159 /* For BB vectorization, we should always have a group size once we've
11160 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11161 are tentative requests during things like early data reference
11162 analysis and pattern recognition. */
11163 if (is_a <bb_vec_info> (vinfo))
11164 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11165 else
11166 group_size = 0;
11167
11168 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11169 scalar_type);
11170 if (vectype && vinfo->vector_mode == VOIDmode)
11171 vinfo->vector_mode = TYPE_MODE (vectype);
11172
11173 /* Register the natural choice of vector type, before the group size
11174 has been applied. */
11175 if (vectype)
11176 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11177
11178 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11179 try again with an explicit number of elements. */
11180 if (vectype
11181 && group_size
11182 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11183 {
11184 /* Start with the biggest number of units that fits within
11185 GROUP_SIZE and halve it until we find a valid vector type.
11186 Usually either the first attempt will succeed or all will
11187 fail (in the latter case because GROUP_SIZE is too small
11188 for the target), but it's possible that a target could have
11189 a hole between supported vector types.
11190
11191 If GROUP_SIZE is not a power of 2, this has the effect of
11192 trying the largest power of 2 that fits within the group,
11193 even though the group is not a multiple of that vector size.
11194 The BB vectorizer will then try to carve up the group into
11195 smaller pieces. */
11196 unsigned int nunits = 1 << floor_log2 (group_size);
11197 do
11198 {
11199 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11200 scalar_type, nunits);
11201 nunits /= 2;
11202 }
11203 while (nunits > 1 && !vectype);
11204 }
11205
11206 return vectype;
11207 }
11208
11209 /* Return the vector type corresponding to SCALAR_TYPE as supported
11210 by the target. NODE, if nonnull, is the SLP tree node that will
11211 use the returned vector type. */
11212
11213 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11214 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11215 {
11216 unsigned int group_size = 0;
11217 if (node)
11218 group_size = SLP_TREE_LANES (node);
11219 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11220 }
11221
11222 /* Function get_mask_type_for_scalar_type.
11223
11224 Returns the mask type corresponding to a result of comparison
11225 of vectors of specified SCALAR_TYPE as supported by target.
11226 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11227 make sure that the number of elements in the vector is no bigger
11228 than GROUP_SIZE. */
11229
11230 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11231 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11232 unsigned int group_size)
11233 {
11234 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11235
11236 if (!vectype)
11237 return NULL;
11238
11239 return truth_type_for (vectype);
11240 }
11241
11242 /* Function get_same_sized_vectype
11243
11244 Returns a vector type corresponding to SCALAR_TYPE of size
11245 VECTOR_TYPE if supported by the target. */
11246
11247 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11248 get_same_sized_vectype (tree scalar_type, tree vector_type)
11249 {
11250 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11251 return truth_type_for (vector_type);
11252
11253 poly_uint64 nunits;
11254 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11255 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11256 return NULL_TREE;
11257
11258 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11259 scalar_type, nunits);
11260 }
11261
11262 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11263 would not change the chosen vector modes. */
11264
11265 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11266 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11267 {
11268 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11269 i != vinfo->used_vector_modes.end (); ++i)
11270 if (!VECTOR_MODE_P (*i)
11271 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11272 return false;
11273 return true;
11274 }
11275
11276 /* Function vect_is_simple_use.
11277
11278 Input:
11279 VINFO - the vect info of the loop or basic block that is being vectorized.
11280 OPERAND - operand in the loop or bb.
11281 Output:
11282 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11283 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11284 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11285 the definition could be anywhere in the function
11286 DT - the type of definition
11287
11288 Returns whether a stmt with OPERAND can be vectorized.
11289 For loops, supportable operands are constants, loop invariants, and operands
11290 that are defined by the current iteration of the loop. Unsupportable
11291 operands are those that are defined by a previous iteration of the loop (as
11292 is the case in reduction/induction computations).
11293 For basic blocks, supportable operands are constants and bb invariants.
11294 For now, operands defined outside the basic block are not supported. */
11295
11296 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11297 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11298 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11299 {
11300 if (def_stmt_info_out)
11301 *def_stmt_info_out = NULL;
11302 if (def_stmt_out)
11303 *def_stmt_out = NULL;
11304 *dt = vect_unknown_def_type;
11305
11306 if (dump_enabled_p ())
11307 {
11308 dump_printf_loc (MSG_NOTE, vect_location,
11309 "vect_is_simple_use: operand ");
11310 if (TREE_CODE (operand) == SSA_NAME
11311 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11312 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11313 else
11314 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11315 }
11316
11317 if (CONSTANT_CLASS_P (operand))
11318 *dt = vect_constant_def;
11319 else if (is_gimple_min_invariant (operand))
11320 *dt = vect_external_def;
11321 else if (TREE_CODE (operand) != SSA_NAME)
11322 *dt = vect_unknown_def_type;
11323 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11324 *dt = vect_external_def;
11325 else
11326 {
11327 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11328 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11329 if (!stmt_vinfo)
11330 *dt = vect_external_def;
11331 else
11332 {
11333 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11334 def_stmt = stmt_vinfo->stmt;
11335 switch (gimple_code (def_stmt))
11336 {
11337 case GIMPLE_PHI:
11338 case GIMPLE_ASSIGN:
11339 case GIMPLE_CALL:
11340 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11341 break;
11342 default:
11343 *dt = vect_unknown_def_type;
11344 break;
11345 }
11346 if (def_stmt_info_out)
11347 *def_stmt_info_out = stmt_vinfo;
11348 }
11349 if (def_stmt_out)
11350 *def_stmt_out = def_stmt;
11351 }
11352
11353 if (dump_enabled_p ())
11354 {
11355 dump_printf (MSG_NOTE, ", type of def: ");
11356 switch (*dt)
11357 {
11358 case vect_uninitialized_def:
11359 dump_printf (MSG_NOTE, "uninitialized\n");
11360 break;
11361 case vect_constant_def:
11362 dump_printf (MSG_NOTE, "constant\n");
11363 break;
11364 case vect_external_def:
11365 dump_printf (MSG_NOTE, "external\n");
11366 break;
11367 case vect_internal_def:
11368 dump_printf (MSG_NOTE, "internal\n");
11369 break;
11370 case vect_induction_def:
11371 dump_printf (MSG_NOTE, "induction\n");
11372 break;
11373 case vect_reduction_def:
11374 dump_printf (MSG_NOTE, "reduction\n");
11375 break;
11376 case vect_double_reduction_def:
11377 dump_printf (MSG_NOTE, "double reduction\n");
11378 break;
11379 case vect_nested_cycle:
11380 dump_printf (MSG_NOTE, "nested cycle\n");
11381 break;
11382 case vect_unknown_def_type:
11383 dump_printf (MSG_NOTE, "unknown\n");
11384 break;
11385 }
11386 }
11387
11388 if (*dt == vect_unknown_def_type)
11389 {
11390 if (dump_enabled_p ())
11391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11392 "Unsupported pattern.\n");
11393 return false;
11394 }
11395
11396 return true;
11397 }
11398
11399 /* Function vect_is_simple_use.
11400
11401 Same as vect_is_simple_use but also determines the vector operand
11402 type of OPERAND and stores it to *VECTYPE. If the definition of
11403 OPERAND is vect_uninitialized_def, vect_constant_def or
11404 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11405 is responsible to compute the best suited vector type for the
11406 scalar operand. */
11407
11408 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11409 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11410 tree *vectype, stmt_vec_info *def_stmt_info_out,
11411 gimple **def_stmt_out)
11412 {
11413 stmt_vec_info def_stmt_info;
11414 gimple *def_stmt;
11415 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11416 return false;
11417
11418 if (def_stmt_out)
11419 *def_stmt_out = def_stmt;
11420 if (def_stmt_info_out)
11421 *def_stmt_info_out = def_stmt_info;
11422
11423 /* Now get a vector type if the def is internal, otherwise supply
11424 NULL_TREE and leave it up to the caller to figure out a proper
11425 type for the use stmt. */
11426 if (*dt == vect_internal_def
11427 || *dt == vect_induction_def
11428 || *dt == vect_reduction_def
11429 || *dt == vect_double_reduction_def
11430 || *dt == vect_nested_cycle)
11431 {
11432 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11433 gcc_assert (*vectype != NULL_TREE);
11434 if (dump_enabled_p ())
11435 dump_printf_loc (MSG_NOTE, vect_location,
11436 "vect_is_simple_use: vectype %T\n", *vectype);
11437 }
11438 else if (*dt == vect_uninitialized_def
11439 || *dt == vect_constant_def
11440 || *dt == vect_external_def)
11441 *vectype = NULL_TREE;
11442 else
11443 gcc_unreachable ();
11444
11445 return true;
11446 }
11447
11448 /* Function vect_is_simple_use.
11449
11450 Same as vect_is_simple_use but determines the operand by operand
11451 position OPERAND from either STMT or SLP_NODE, filling in *OP
11452 and *SLP_DEF (when SLP_NODE is not NULL). */
11453
11454 bool
vect_is_simple_use(vec_info * vinfo,stmt_vec_info stmt,slp_tree slp_node,unsigned operand,tree * op,slp_tree * slp_def,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out)11455 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11456 unsigned operand, tree *op, slp_tree *slp_def,
11457 enum vect_def_type *dt,
11458 tree *vectype, stmt_vec_info *def_stmt_info_out)
11459 {
11460 if (slp_node)
11461 {
11462 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11463 *slp_def = child;
11464 *vectype = SLP_TREE_VECTYPE (child);
11465 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11466 {
11467 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11468 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11469 }
11470 else
11471 {
11472 if (def_stmt_info_out)
11473 *def_stmt_info_out = NULL;
11474 *op = SLP_TREE_SCALAR_OPS (child)[0];
11475 *dt = SLP_TREE_DEF_TYPE (child);
11476 return true;
11477 }
11478 }
11479 else
11480 {
11481 *slp_def = NULL;
11482 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11483 {
11484 if (gimple_assign_rhs_code (ass) == COND_EXPR
11485 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11486 {
11487 if (operand < 2)
11488 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11489 else
11490 *op = gimple_op (ass, operand);
11491 }
11492 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11493 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11494 else
11495 *op = gimple_op (ass, operand + 1);
11496 }
11497 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11498 {
11499 if (gimple_call_internal_p (call)
11500 && internal_store_fn_p (gimple_call_internal_fn (call)))
11501 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11502 (call));
11503 *op = gimple_call_arg (call, operand);
11504 }
11505 else
11506 gcc_unreachable ();
11507 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11508 }
11509 }
11510
11511 /* If OP is not NULL and is external or constant update its vector
11512 type with VECTYPE. Returns true if successful or false if not,
11513 for example when conflicting vector types are present. */
11514
11515 bool
vect_maybe_update_slp_op_vectype(slp_tree op,tree vectype)11516 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11517 {
11518 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11519 return true;
11520 if (SLP_TREE_VECTYPE (op))
11521 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11522 SLP_TREE_VECTYPE (op) = vectype;
11523 return true;
11524 }
11525
11526 /* Function supportable_widening_operation
11527
11528 Check whether an operation represented by the code CODE is a
11529 widening operation that is supported by the target platform in
11530 vector form (i.e., when operating on arguments of type VECTYPE_IN
11531 producing a result of type VECTYPE_OUT).
11532
11533 Widening operations we currently support are NOP (CONVERT), FLOAT,
11534 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11535 are supported by the target platform either directly (via vector
11536 tree-codes), or via target builtins.
11537
11538 Output:
11539 - CODE1 and CODE2 are codes of vector operations to be used when
11540 vectorizing the operation, if available.
11541 - MULTI_STEP_CVT determines the number of required intermediate steps in
11542 case of multi-step conversion (like char->short->int - in that case
11543 MULTI_STEP_CVT will be 1).
11544 - INTERM_TYPES contains the intermediate type required to perform the
11545 widening operation (short in the above example). */
11546
11547 bool
supportable_widening_operation(vec_info * vinfo,enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11548 supportable_widening_operation (vec_info *vinfo,
11549 enum tree_code code, stmt_vec_info stmt_info,
11550 tree vectype_out, tree vectype_in,
11551 enum tree_code *code1, enum tree_code *code2,
11552 int *multi_step_cvt,
11553 vec<tree> *interm_types)
11554 {
11555 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11556 class loop *vect_loop = NULL;
11557 machine_mode vec_mode;
11558 enum insn_code icode1, icode2;
11559 optab optab1, optab2;
11560 tree vectype = vectype_in;
11561 tree wide_vectype = vectype_out;
11562 enum tree_code c1, c2;
11563 int i;
11564 tree prev_type, intermediate_type;
11565 machine_mode intermediate_mode, prev_mode;
11566 optab optab3, optab4;
11567
11568 *multi_step_cvt = 0;
11569 if (loop_info)
11570 vect_loop = LOOP_VINFO_LOOP (loop_info);
11571
11572 switch (code)
11573 {
11574 case WIDEN_MULT_EXPR:
11575 /* The result of a vectorized widening operation usually requires
11576 two vectors (because the widened results do not fit into one vector).
11577 The generated vector results would normally be expected to be
11578 generated in the same order as in the original scalar computation,
11579 i.e. if 8 results are generated in each vector iteration, they are
11580 to be organized as follows:
11581 vect1: [res1,res2,res3,res4],
11582 vect2: [res5,res6,res7,res8].
11583
11584 However, in the special case that the result of the widening
11585 operation is used in a reduction computation only, the order doesn't
11586 matter (because when vectorizing a reduction we change the order of
11587 the computation). Some targets can take advantage of this and
11588 generate more efficient code. For example, targets like Altivec,
11589 that support widen_mult using a sequence of {mult_even,mult_odd}
11590 generate the following vectors:
11591 vect1: [res1,res3,res5,res7],
11592 vect2: [res2,res4,res6,res8].
11593
11594 When vectorizing outer-loops, we execute the inner-loop sequentially
11595 (each vectorized inner-loop iteration contributes to VF outer-loop
11596 iterations in parallel). We therefore don't allow to change the
11597 order of the computation in the inner-loop during outer-loop
11598 vectorization. */
11599 /* TODO: Another case in which order doesn't *really* matter is when we
11600 widen and then contract again, e.g. (short)((int)x * y >> 8).
11601 Normally, pack_trunc performs an even/odd permute, whereas the
11602 repack from an even/odd expansion would be an interleave, which
11603 would be significantly simpler for e.g. AVX2. */
11604 /* In any case, in order to avoid duplicating the code below, recurse
11605 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11606 are properly set up for the caller. If we fail, we'll continue with
11607 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11608 if (vect_loop
11609 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11610 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11611 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11612 stmt_info, vectype_out,
11613 vectype_in, code1, code2,
11614 multi_step_cvt, interm_types))
11615 {
11616 /* Elements in a vector with vect_used_by_reduction property cannot
11617 be reordered if the use chain with this property does not have the
11618 same operation. One such an example is s += a * b, where elements
11619 in a and b cannot be reordered. Here we check if the vector defined
11620 by STMT is only directly used in the reduction statement. */
11621 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11622 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11623 if (use_stmt_info
11624 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11625 return true;
11626 }
11627 c1 = VEC_WIDEN_MULT_LO_EXPR;
11628 c2 = VEC_WIDEN_MULT_HI_EXPR;
11629 break;
11630
11631 case DOT_PROD_EXPR:
11632 c1 = DOT_PROD_EXPR;
11633 c2 = DOT_PROD_EXPR;
11634 break;
11635
11636 case SAD_EXPR:
11637 c1 = SAD_EXPR;
11638 c2 = SAD_EXPR;
11639 break;
11640
11641 case VEC_WIDEN_MULT_EVEN_EXPR:
11642 /* Support the recursion induced just above. */
11643 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11644 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11645 break;
11646
11647 case WIDEN_LSHIFT_EXPR:
11648 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11649 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11650 break;
11651
11652 case WIDEN_PLUS_EXPR:
11653 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11654 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11655 break;
11656
11657 case WIDEN_MINUS_EXPR:
11658 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11659 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11660 break;
11661
11662 CASE_CONVERT:
11663 c1 = VEC_UNPACK_LO_EXPR;
11664 c2 = VEC_UNPACK_HI_EXPR;
11665 break;
11666
11667 case FLOAT_EXPR:
11668 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11669 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11670 break;
11671
11672 case FIX_TRUNC_EXPR:
11673 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11674 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11675 break;
11676
11677 default:
11678 gcc_unreachable ();
11679 }
11680
11681 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11682 std::swap (c1, c2);
11683
11684 if (code == FIX_TRUNC_EXPR)
11685 {
11686 /* The signedness is determined from output operand. */
11687 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11688 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11689 }
11690 else if (CONVERT_EXPR_CODE_P (code)
11691 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11692 && VECTOR_BOOLEAN_TYPE_P (vectype)
11693 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11694 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11695 {
11696 /* If the input and result modes are the same, a different optab
11697 is needed where we pass in the number of units in vectype. */
11698 optab1 = vec_unpacks_sbool_lo_optab;
11699 optab2 = vec_unpacks_sbool_hi_optab;
11700 }
11701 else
11702 {
11703 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11704 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11705 }
11706
11707 if (!optab1 || !optab2)
11708 return false;
11709
11710 vec_mode = TYPE_MODE (vectype);
11711 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11712 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11713 return false;
11714
11715 *code1 = c1;
11716 *code2 = c2;
11717
11718 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11719 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11720 {
11721 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11722 return true;
11723 /* For scalar masks we may have different boolean
11724 vector types having the same QImode. Thus we
11725 add additional check for elements number. */
11726 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11727 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11728 return true;
11729 }
11730
11731 /* Check if it's a multi-step conversion that can be done using intermediate
11732 types. */
11733
11734 prev_type = vectype;
11735 prev_mode = vec_mode;
11736
11737 if (!CONVERT_EXPR_CODE_P (code))
11738 return false;
11739
11740 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11741 intermediate steps in promotion sequence. We try
11742 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11743 not. */
11744 interm_types->create (MAX_INTERM_CVT_STEPS);
11745 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11746 {
11747 intermediate_mode = insn_data[icode1].operand[0].mode;
11748 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11749 intermediate_type
11750 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11751 else
11752 intermediate_type
11753 = lang_hooks.types.type_for_mode (intermediate_mode,
11754 TYPE_UNSIGNED (prev_type));
11755
11756 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11757 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11758 && intermediate_mode == prev_mode
11759 && SCALAR_INT_MODE_P (prev_mode))
11760 {
11761 /* If the input and result modes are the same, a different optab
11762 is needed where we pass in the number of units in vectype. */
11763 optab3 = vec_unpacks_sbool_lo_optab;
11764 optab4 = vec_unpacks_sbool_hi_optab;
11765 }
11766 else
11767 {
11768 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11769 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11770 }
11771
11772 if (!optab3 || !optab4
11773 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11774 || insn_data[icode1].operand[0].mode != intermediate_mode
11775 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11776 || insn_data[icode2].operand[0].mode != intermediate_mode
11777 || ((icode1 = optab_handler (optab3, intermediate_mode))
11778 == CODE_FOR_nothing)
11779 || ((icode2 = optab_handler (optab4, intermediate_mode))
11780 == CODE_FOR_nothing))
11781 break;
11782
11783 interm_types->quick_push (intermediate_type);
11784 (*multi_step_cvt)++;
11785
11786 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11787 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11788 {
11789 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11790 return true;
11791 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11792 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11793 return true;
11794 }
11795
11796 prev_type = intermediate_type;
11797 prev_mode = intermediate_mode;
11798 }
11799
11800 interm_types->release ();
11801 return false;
11802 }
11803
11804
11805 /* Function supportable_narrowing_operation
11806
11807 Check whether an operation represented by the code CODE is a
11808 narrowing operation that is supported by the target platform in
11809 vector form (i.e., when operating on arguments of type VECTYPE_IN
11810 and producing a result of type VECTYPE_OUT).
11811
11812 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11813 and FLOAT. This function checks if these operations are supported by
11814 the target platform directly via vector tree-codes.
11815
11816 Output:
11817 - CODE1 is the code of a vector operation to be used when
11818 vectorizing the operation, if available.
11819 - MULTI_STEP_CVT determines the number of required intermediate steps in
11820 case of multi-step conversion (like int->short->char - in that case
11821 MULTI_STEP_CVT will be 1).
11822 - INTERM_TYPES contains the intermediate type required to perform the
11823 narrowing operation (short in the above example). */
11824
11825 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)11826 supportable_narrowing_operation (enum tree_code code,
11827 tree vectype_out, tree vectype_in,
11828 enum tree_code *code1, int *multi_step_cvt,
11829 vec<tree> *interm_types)
11830 {
11831 machine_mode vec_mode;
11832 enum insn_code icode1;
11833 optab optab1, interm_optab;
11834 tree vectype = vectype_in;
11835 tree narrow_vectype = vectype_out;
11836 enum tree_code c1;
11837 tree intermediate_type, prev_type;
11838 machine_mode intermediate_mode, prev_mode;
11839 int i;
11840 bool uns;
11841
11842 *multi_step_cvt = 0;
11843 switch (code)
11844 {
11845 CASE_CONVERT:
11846 c1 = VEC_PACK_TRUNC_EXPR;
11847 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11848 && VECTOR_BOOLEAN_TYPE_P (vectype)
11849 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11850 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11851 optab1 = vec_pack_sbool_trunc_optab;
11852 else
11853 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11854 break;
11855
11856 case FIX_TRUNC_EXPR:
11857 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11858 /* The signedness is determined from output operand. */
11859 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11860 break;
11861
11862 case FLOAT_EXPR:
11863 c1 = VEC_PACK_FLOAT_EXPR;
11864 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11865 break;
11866
11867 default:
11868 gcc_unreachable ();
11869 }
11870
11871 if (!optab1)
11872 return false;
11873
11874 vec_mode = TYPE_MODE (vectype);
11875 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11876 return false;
11877
11878 *code1 = c1;
11879
11880 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11881 {
11882 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11883 return true;
11884 /* For scalar masks we may have different boolean
11885 vector types having the same QImode. Thus we
11886 add additional check for elements number. */
11887 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11888 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11889 return true;
11890 }
11891
11892 if (code == FLOAT_EXPR)
11893 return false;
11894
11895 /* Check if it's a multi-step conversion that can be done using intermediate
11896 types. */
11897 prev_mode = vec_mode;
11898 prev_type = vectype;
11899 if (code == FIX_TRUNC_EXPR)
11900 uns = TYPE_UNSIGNED (vectype_out);
11901 else
11902 uns = TYPE_UNSIGNED (vectype);
11903
11904 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11905 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11906 costly than signed. */
11907 if (code == FIX_TRUNC_EXPR && uns)
11908 {
11909 enum insn_code icode2;
11910
11911 intermediate_type
11912 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11913 interm_optab
11914 = optab_for_tree_code (c1, intermediate_type, optab_default);
11915 if (interm_optab != unknown_optab
11916 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11917 && insn_data[icode1].operand[0].mode
11918 == insn_data[icode2].operand[0].mode)
11919 {
11920 uns = false;
11921 optab1 = interm_optab;
11922 icode1 = icode2;
11923 }
11924 }
11925
11926 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11927 intermediate steps in promotion sequence. We try
11928 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11929 interm_types->create (MAX_INTERM_CVT_STEPS);
11930 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11931 {
11932 intermediate_mode = insn_data[icode1].operand[0].mode;
11933 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11934 intermediate_type
11935 = vect_double_mask_nunits (prev_type, intermediate_mode);
11936 else
11937 intermediate_type
11938 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11939 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11940 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11941 && intermediate_mode == prev_mode
11942 && SCALAR_INT_MODE_P (prev_mode))
11943 interm_optab = vec_pack_sbool_trunc_optab;
11944 else
11945 interm_optab
11946 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11947 optab_default);
11948 if (!interm_optab
11949 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11950 || insn_data[icode1].operand[0].mode != intermediate_mode
11951 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11952 == CODE_FOR_nothing))
11953 break;
11954
11955 interm_types->quick_push (intermediate_type);
11956 (*multi_step_cvt)++;
11957
11958 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11959 {
11960 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11961 return true;
11962 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11963 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11964 return true;
11965 }
11966
11967 prev_mode = intermediate_mode;
11968 prev_type = intermediate_type;
11969 optab1 = interm_optab;
11970 }
11971
11972 interm_types->release ();
11973 return false;
11974 }
11975
11976 /* Generate and return a statement that sets vector mask MASK such that
11977 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11978
11979 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)11980 vect_gen_while (tree mask, tree start_index, tree end_index)
11981 {
11982 tree cmp_type = TREE_TYPE (start_index);
11983 tree mask_type = TREE_TYPE (mask);
11984 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11985 cmp_type, mask_type,
11986 OPTIMIZE_FOR_SPEED));
11987 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11988 start_index, end_index,
11989 build_zero_cst (mask_type));
11990 gimple_call_set_lhs (call, mask);
11991 return call;
11992 }
11993
11994 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11995 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11996
11997 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)11998 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11999 tree end_index)
12000 {
12001 tree tmp = make_ssa_name (mask_type);
12002 gcall *call = vect_gen_while (tmp, start_index, end_index);
12003 gimple_seq_add_stmt (seq, call);
12004 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12005 }
12006
12007 /* Try to compute the vector types required to vectorize STMT_INFO,
12008 returning true on success and false if vectorization isn't possible.
12009 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12010 take sure that the number of elements in the vectors is no bigger
12011 than GROUP_SIZE.
12012
12013 On success:
12014
12015 - Set *STMT_VECTYPE_OUT to:
12016 - NULL_TREE if the statement doesn't need to be vectorized;
12017 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12018
12019 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12020 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12021 statement does not help to determine the overall number of units. */
12022
12023 opt_result
vect_get_vector_types_for_stmt(vec_info * vinfo,stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12024 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12025 tree *stmt_vectype_out,
12026 tree *nunits_vectype_out,
12027 unsigned int group_size)
12028 {
12029 gimple *stmt = stmt_info->stmt;
12030
12031 /* For BB vectorization, we should always have a group size once we've
12032 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12033 are tentative requests during things like early data reference
12034 analysis and pattern recognition. */
12035 if (is_a <bb_vec_info> (vinfo))
12036 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12037 else
12038 group_size = 0;
12039
12040 *stmt_vectype_out = NULL_TREE;
12041 *nunits_vectype_out = NULL_TREE;
12042
12043 if (gimple_get_lhs (stmt) == NULL_TREE
12044 /* MASK_STORE has no lhs, but is ok. */
12045 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12046 {
12047 if (is_a <gcall *> (stmt))
12048 {
12049 /* Ignore calls with no lhs. These must be calls to
12050 #pragma omp simd functions, and what vectorization factor
12051 it really needs can't be determined until
12052 vectorizable_simd_clone_call. */
12053 if (dump_enabled_p ())
12054 dump_printf_loc (MSG_NOTE, vect_location,
12055 "defer to SIMD clone analysis.\n");
12056 return opt_result::success ();
12057 }
12058
12059 return opt_result::failure_at (stmt,
12060 "not vectorized: irregular stmt.%G", stmt);
12061 }
12062
12063 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12064 return opt_result::failure_at (stmt,
12065 "not vectorized: vector stmt in loop:%G",
12066 stmt);
12067
12068 tree vectype;
12069 tree scalar_type = NULL_TREE;
12070 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12071 {
12072 vectype = STMT_VINFO_VECTYPE (stmt_info);
12073 if (dump_enabled_p ())
12074 dump_printf_loc (MSG_NOTE, vect_location,
12075 "precomputed vectype: %T\n", vectype);
12076 }
12077 else if (vect_use_mask_type_p (stmt_info))
12078 {
12079 unsigned int precision = stmt_info->mask_precision;
12080 scalar_type = build_nonstandard_integer_type (precision, 1);
12081 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12082 if (!vectype)
12083 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12084 " data-type %T\n", scalar_type);
12085 if (dump_enabled_p ())
12086 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12087 }
12088 else
12089 {
12090 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12091 scalar_type = TREE_TYPE (DR_REF (dr));
12092 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12093 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12094 else
12095 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12096
12097 if (dump_enabled_p ())
12098 {
12099 if (group_size)
12100 dump_printf_loc (MSG_NOTE, vect_location,
12101 "get vectype for scalar type (group size %d):"
12102 " %T\n", group_size, scalar_type);
12103 else
12104 dump_printf_loc (MSG_NOTE, vect_location,
12105 "get vectype for scalar type: %T\n", scalar_type);
12106 }
12107 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12108 if (!vectype)
12109 return opt_result::failure_at (stmt,
12110 "not vectorized:"
12111 " unsupported data-type %T\n",
12112 scalar_type);
12113
12114 if (dump_enabled_p ())
12115 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12116 }
12117 *stmt_vectype_out = vectype;
12118
12119 /* Don't try to compute scalar types if the stmt produces a boolean
12120 vector; use the existing vector type instead. */
12121 tree nunits_vectype = vectype;
12122 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12123 {
12124 /* The number of units is set according to the smallest scalar
12125 type (or the largest vector size, but we only support one
12126 vector size per vectorization). */
12127 HOST_WIDE_INT dummy;
12128 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12129 if (scalar_type != TREE_TYPE (vectype))
12130 {
12131 if (dump_enabled_p ())
12132 dump_printf_loc (MSG_NOTE, vect_location,
12133 "get vectype for smallest scalar type: %T\n",
12134 scalar_type);
12135 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12136 group_size);
12137 if (!nunits_vectype)
12138 return opt_result::failure_at
12139 (stmt, "not vectorized: unsupported data-type %T\n",
12140 scalar_type);
12141 if (dump_enabled_p ())
12142 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12143 nunits_vectype);
12144 }
12145 }
12146
12147 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12148 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12149 return opt_result::failure_at (stmt,
12150 "Not vectorized: Incompatible number "
12151 "of vector subparts between %T and %T\n",
12152 nunits_vectype, *stmt_vectype_out);
12153
12154 if (dump_enabled_p ())
12155 {
12156 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12157 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12158 dump_printf (MSG_NOTE, "\n");
12159 }
12160
12161 *nunits_vectype_out = nunits_vectype;
12162 return opt_result::success ();
12163 }
12164
12165 /* Generate and return statement sequence that sets vector length LEN that is:
12166
12167 min_of_start_and_end = min (START_INDEX, END_INDEX);
12168 left_len = END_INDEX - min_of_start_and_end;
12169 rhs = min (left_len, LEN_LIMIT);
12170 LEN = rhs;
12171
12172 Note: the cost of the code generated by this function is modeled
12173 by vect_estimate_min_profitable_iters, so changes here may need
12174 corresponding changes there. */
12175
12176 gimple_seq
vect_gen_len(tree len,tree start_index,tree end_index,tree len_limit)12177 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12178 {
12179 gimple_seq stmts = NULL;
12180 tree len_type = TREE_TYPE (len);
12181 gcc_assert (TREE_TYPE (start_index) == len_type);
12182
12183 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12184 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12185 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12186 gimple* stmt = gimple_build_assign (len, rhs);
12187 gimple_seq_add_stmt (&stmts, stmt);
12188
12189 return stmts;
12190 }
12191
12192