1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
stmt_in_inner_loop_p(class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
96 {
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
103
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
106
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
read_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 {
130 tree vect_type, vect, vect_name, array_ref;
131 gimple *new_stmt;
132
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
139
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144
145 return vect_name;
146 }
147
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
151
152 static void
write_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 tree vect, tree array, unsigned HOST_WIDE_INT n)
155 {
156 tree array_ref;
157 gimple *new_stmt;
158
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
162
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
165 }
166
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
170
171 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 {
174 tree mem_ref;
175
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
180 }
181
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
184
185 static void
vect_clobber_variable(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 tree var)
188 {
189 tree clobber = build_clobber (TREE_TYPE (var));
190 gimple *new_stmt = gimple_build_assign (var, clobber);
191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
192 }
193
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195
196 /* Function vect_mark_relevant.
197
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
199
200 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 enum vect_relevant relevant, bool live_p)
203 {
204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: %G", relevant, live_p,
210 stmt_info->stmt);
211
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 {
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
222
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE, vect_location,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info = stmt_info;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232 }
233
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 {
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
245 }
246
247 worklist->safe_push (stmt_info);
248 }
249
250
251 /* Function is_simple_and_all_uses_invariant
252
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
254
255 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 loop_vec_info loop_vinfo)
258 {
259 tree op;
260 ssa_op_iter iter;
261
262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263 if (!stmt)
264 return false;
265
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 {
268 enum vect_def_type dt = vect_uninitialized_def;
269
270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 {
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
276 }
277
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
280 }
281 return true;
282 }
283
284 /* Function vect_stmt_relevant_p.
285
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
288
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
293
294 CHECKME: what other side effects would the vectorizer allow? */
295
296 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
299 {
300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
305
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
308
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info->stmt)
311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
313
314 /* changing memory. */
315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt_info->stmt)
317 && !gimple_clobber_p (stmt_info->stmt))
318 {
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE, vect_location,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant = vect_used_in_scope;
323 }
324
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 {
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 {
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
332 {
333 if (is_gimple_debug (USE_STMT (use_p)))
334 continue;
335
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
339
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
344
345 *live_p = true;
346 }
347 }
348 }
349
350 if (*live_p && *relevant == vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 {
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant = vect_used_only_live;
357 }
358
359 return (*live_p || *relevant);
360 }
361
362
363 /* Function exist_non_indexing_operands_for_use_p
364
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
367
368 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 {
371 tree operand;
372
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
378
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
384
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
388
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
391
392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
394 {
395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396 if (call && gimple_call_internal_p (call))
397 {
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
410 }
411 return false;
412 }
413
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
419
420 if (operand == use)
421 return true;
422
423 return false;
424 }
425
426
427 /*
428 Function process_use.
429
430 Inputs:
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
437
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
451
452 Return true if everything is as expected. Return false otherwise. */
453
454 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 bool force)
458 {
459 stmt_vec_info dstmt_vinfo;
460 enum vect_def_type dt;
461
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return opt_result::success ();
466
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468 return opt_result::failure_at (stmt_vinfo->stmt,
469 "not vectorized:"
470 " unsupported use in stmt.\n");
471
472 if (!dstmt_vinfo)
473 return opt_result::success ();
474
475 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
476 basic_block bb = gimple_bb (stmt_vinfo->stmt);
477
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 We have to force the stmt live since the epilogue loop needs it to
480 continue computing the reduction. */
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
491 return opt_result::success ();
492 }
493
494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 outer-loop-header-bb:
496 d = dstmt_vinfo
497 inner-loop:
498 stmt # use (d)
499 outer-loop-tail-bb:
500 ... */
501 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
502 {
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE, vect_location,
505 "outer-loop def-stmt defining inner-loop stmt.\n");
506
507 switch (relevant)
508 {
509 case vect_unused_in_scope:
510 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
511 vect_used_in_scope : vect_unused_in_scope;
512 break;
513
514 case vect_used_in_outer_by_reduction:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 relevant = vect_used_by_reduction;
517 break;
518
519 case vect_used_in_outer:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_in_scope;
522 break;
523
524 case vect_used_in_scope:
525 break;
526
527 default:
528 gcc_unreachable ();
529 }
530 }
531
532 /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 outer-loop-header-bb:
534 ...
535 inner-loop:
536 d = dstmt_vinfo
537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
538 stmt # use (d) */
539 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
540 {
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "inner-loop def-stmt defining outer-loop stmt.\n");
544
545 switch (relevant)
546 {
547 case vect_unused_in_scope:
548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
549 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
550 vect_used_in_outer_by_reduction : vect_unused_in_scope;
551 break;
552
553 case vect_used_by_reduction:
554 case vect_used_only_live:
555 relevant = vect_used_in_outer_by_reduction;
556 break;
557
558 case vect_used_in_scope:
559 relevant = vect_used_in_outer;
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566 /* We are also not interested in uses on loop PHI backedges that are
567 inductions. Otherwise we'll needlessly vectorize the IV increment
568 and cause hybrid SLP for SLP inductions. Unless the PHI is live
569 of course. */
570 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
571 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
572 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
574 loop_latch_edge (bb->loop_father))
575 == use))
576 {
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
579 "induction value on backedge.\n");
580 return opt_result::success ();
581 }
582
583
584 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
585 return opt_result::success ();
586 }
587
588
589 /* Function vect_mark_stmts_to_be_vectorized.
590
591 Not all stmts in the loop need to be vectorized. For example:
592
593 for i...
594 for j...
595 1. T0 = i + j
596 2. T1 = a[T0]
597
598 3. j = j + 1
599
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
602
603 This pass detects such stmts. */
604
605 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
607 {
608 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610 unsigned int nbbs = loop->num_nodes;
611 gimple_stmt_iterator si;
612 unsigned int i;
613 basic_block bb;
614 bool live_p;
615 enum vect_relevant relevant;
616
617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
618
619 auto_vec<stmt_vec_info, 64> worklist;
620
621 /* 1. Init worklist. */
622 for (i = 0; i < nbbs; i++)
623 {
624 bb = bbs[i];
625 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
626 {
627 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
630 phi_info->stmt);
631
632 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
634 }
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 {
637 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "init: stmt relevant? %G", stmt_info->stmt);
641
642 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
643 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
644 }
645 }
646
647 /* 2. Process_worklist */
648 while (worklist.length () > 0)
649 {
650 use_operand_p use_p;
651 ssa_op_iter iter;
652
653 stmt_vec_info stmt_vinfo = worklist.pop ();
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "worklist: examine stmt: %G", stmt_vinfo->stmt);
657
658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 (DEF_STMT) as relevant/irrelevant according to the relevance property
660 of STMT. */
661 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
662
663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 propagated as is to the DEF_STMTs of its USEs.
665
666 One exception is when STMT has been identified as defining a reduction
667 variable; in this case we set the relevance to vect_used_by_reduction.
668 This is because we distinguish between two kinds of relevant stmts -
669 those that are used by a reduction computation, and those that are
670 (also) used by a regular computation. This allows us later on to
671 identify stmts that are used solely by a reduction, and therefore the
672 order of the results that they produce does not have to be kept. */
673
674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
675 {
676 case vect_reduction_def:
677 gcc_assert (relevant != vect_unused_in_scope);
678 if (relevant != vect_unused_in_scope
679 && relevant != vect_used_in_scope
680 && relevant != vect_used_by_reduction
681 && relevant != vect_used_only_live)
682 return opt_result::failure_at
683 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
684 break;
685
686 case vect_nested_cycle:
687 if (relevant != vect_unused_in_scope
688 && relevant != vect_used_in_outer_by_reduction
689 && relevant != vect_used_in_outer)
690 return opt_result::failure_at
691 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
692 break;
693
694 case vect_double_reduction_def:
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_by_reduction
697 && relevant != vect_used_only_live)
698 return opt_result::failure_at
699 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
700 break;
701
702 default:
703 break;
704 }
705
706 if (is_pattern_stmt_p (stmt_vinfo))
707 {
708 /* Pattern statements are not inserted into the code, so
709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710 have to scan the RHS or function arguments instead. */
711 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
712 {
713 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
714 tree op = gimple_assign_rhs1 (assign);
715
716 i = 1;
717 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
718 {
719 opt_result res
720 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
721 loop_vinfo, relevant, &worklist, false);
722 if (!res)
723 return res;
724 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 i = 2;
729 }
730 for (; i < gimple_num_ops (assign); i++)
731 {
732 op = gimple_op (assign, i);
733 if (TREE_CODE (op) == SSA_NAME)
734 {
735 opt_result res
736 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
737 &worklist, false);
738 if (!res)
739 return res;
740 }
741 }
742 }
743 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
744 {
745 for (i = 0; i < gimple_call_num_args (call); i++)
746 {
747 tree arg = gimple_call_arg (call, i);
748 opt_result res
749 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
750 &worklist, false);
751 if (!res)
752 return res;
753 }
754 }
755 }
756 else
757 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
758 {
759 tree op = USE_FROM_PTR (use_p);
760 opt_result res
761 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
762 &worklist, false);
763 if (!res)
764 return res;
765 }
766
767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
768 {
769 gather_scatter_info gs_info;
770 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
771 gcc_unreachable ();
772 opt_result res
773 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
774 &worklist, true);
775 if (!res)
776 {
777 if (fatal)
778 *fatal = false;
779 return res;
780 }
781 }
782 } /* while worklist */
783
784 return opt_result::success ();
785 }
786
787 /* Compute the prologue cost for invariant or constant operands. */
788
789 static unsigned
vect_prologue_cost_for_slp_op(slp_tree node,stmt_vec_info stmt_info,unsigned opno,enum vect_def_type dt,stmt_vector_for_cost * cost_vec)790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 unsigned opno, enum vect_def_type dt,
792 stmt_vector_for_cost *cost_vec)
793 {
794 vec_info *vinfo = stmt_info->vinfo;
795 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796 tree op = gimple_op (stmt, opno);
797 unsigned prologue_cost = 0;
798
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
803 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804 unsigned num_vects_to_check;
805 unsigned HOST_WIDE_INT const_nunits;
806 unsigned nelt_limit;
807 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808 && ! multiple_p (const_nunits, group_size))
809 {
810 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811 nelt_limit = const_nunits;
812 }
813 else
814 {
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check = 1;
819 nelt_limit = group_size;
820 }
821 tree elt = NULL_TREE;
822 unsigned nelt = 0;
823 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824 {
825 unsigned si = j % group_size;
826 if (nelt == 0)
827 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 opno))
833 elt = NULL_TREE;
834 nelt++;
835 if (nelt == nelt_limit)
836 {
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost += record_stmt_cost
840 (cost_vec, 1,
841 dt == vect_external_def
842 ? (elt ? scalar_to_vec : vec_construct)
843 : vector_load,
844 stmt_info, 0, vect_prologue);
845 nelt = 0;
846 }
847 }
848
849 return prologue_cost;
850 }
851
852 /* Function vect_model_simple_cost.
853
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
857
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 enum vect_def_type *dt,
861 int ndts,
862 slp_tree node,
863 stmt_vector_for_cost *cost_vec,
864 vect_cost_for_stmt kind = vector_stmt)
865 {
866 int inside_cost = 0, prologue_cost = 0;
867
868 gcc_assert (cost_vec != NULL);
869
870 /* ??? Somehow we need to fix this at the callers. */
871 if (node)
872 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
873
874 if (node)
875 {
876 /* Scan operands and account for prologue cost of constants/externals.
877 ??? This over-estimates cost for multiple uses and should be
878 re-engineered. */
879 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
880 tree lhs = gimple_get_lhs (stmt);
881 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
882 {
883 tree op = gimple_op (stmt, i);
884 enum vect_def_type dt;
885 if (!op || op == lhs)
886 continue;
887 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
888 && (dt == vect_constant_def || dt == vect_external_def))
889 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
890 i, dt, cost_vec);
891 }
892 }
893 else
894 /* Cost the "broadcast" of a scalar operand in to a vector operand.
895 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
896 cost model. */
897 for (int i = 0; i < ndts; i++)
898 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
899 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
900 stmt_info, 0, vect_prologue);
901
902 /* Adjust for two-operator SLP nodes. */
903 if (node && SLP_TREE_TWO_OPERATORS (node))
904 {
905 ncopies *= 2;
906 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
907 stmt_info, 0, vect_body);
908 }
909
910 /* Pass the inside-of-loop statements to the target-specific cost model. */
911 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
912 stmt_info, 0, vect_body);
913
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_NOTE, vect_location,
916 "vect_model_simple_cost: inside_cost = %d, "
917 "prologue_cost = %d .\n", inside_cost, prologue_cost);
918 }
919
920
921 /* Model cost for type demotion and promotion operations. PWR is
922 normally zero for single-step promotions and demotions. It will be
923 one if two-step promotion/demotion is required, and so on. NCOPIES
924 is the number of vector results (and thus number of instructions)
925 for the narrowest end of the operation chain. Each additional
926 step doubles the number of instructions required. */
927
928 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec)929 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
930 enum vect_def_type *dt,
931 unsigned int ncopies, int pwr,
932 stmt_vector_for_cost *cost_vec)
933 {
934 int i;
935 int inside_cost = 0, prologue_cost = 0;
936
937 for (i = 0; i < pwr + 1; i++)
938 {
939 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
940 stmt_info, 0, vect_body);
941 ncopies *= 2;
942 }
943
944 /* FORNOW: Assuming maximum 2 args per stmts. */
945 for (i = 0; i < 2; i++)
946 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
947 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
948 stmt_info, 0, vect_prologue);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_promotion_demotion_cost: inside_cost = %d, "
953 "prologue_cost = %d .\n", inside_cost, prologue_cost);
954 }
955
956 /* Returns true if the current function returns DECL. */
957
958 static bool
cfun_returns(tree decl)959 cfun_returns (tree decl)
960 {
961 edge_iterator ei;
962 edge e;
963 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
964 {
965 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
966 if (!ret)
967 continue;
968 if (gimple_return_retval (ret) == decl)
969 return true;
970 /* We often end up with an aggregate copy to the result decl,
971 handle that case as well. First skip intermediate clobbers
972 though. */
973 gimple *def = ret;
974 do
975 {
976 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
977 }
978 while (gimple_clobber_p (def));
979 if (is_a <gassign *> (def)
980 && gimple_assign_lhs (def) == gimple_return_retval (ret)
981 && gimple_assign_rhs1 (def) == decl)
982 return true;
983 }
984 return false;
985 }
986
987 /* Function vect_model_store_cost
988
989 Models cost for stores. In the case of grouped accesses, one access
990 has the overhead of the grouped access attributed to it. */
991
992 static void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type dt,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)993 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
994 enum vect_def_type dt,
995 vect_memory_access_type memory_access_type,
996 vec_load_store_type vls_type, slp_tree slp_node,
997 stmt_vector_for_cost *cost_vec)
998 {
999 unsigned int inside_cost = 0, prologue_cost = 0;
1000 stmt_vec_info first_stmt_info = stmt_info;
1001 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1002
1003 /* ??? Somehow we need to fix this at the callers. */
1004 if (slp_node)
1005 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1006
1007 if (vls_type == VLS_STORE_INVARIANT)
1008 {
1009 if (slp_node)
1010 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1011 1, dt, cost_vec);
1012 else
1013 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1014 stmt_info, 0, vect_prologue);
1015 }
1016
1017 /* Grouped stores update all elements in the group at once,
1018 so we want the DR for the first statement. */
1019 if (!slp_node && grouped_access_p)
1020 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1021
1022 /* True if we should include any once-per-group costs as well as
1023 the cost of the statement itself. For SLP we only get called
1024 once per group anyhow. */
1025 bool first_stmt_p = (first_stmt_info == stmt_info);
1026
1027 /* We assume that the cost of a single store-lanes instruction is
1028 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1029 access is instead being provided by a permute-and-store operation,
1030 include the cost of the permutes. */
1031 if (first_stmt_p
1032 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1033 {
1034 /* Uses a high and low interleave or shuffle operations for each
1035 needed permute. */
1036 int group_size = DR_GROUP_SIZE (first_stmt_info);
1037 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1038 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1039 stmt_info, 0, vect_body);
1040
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: strided group_size = %d .\n",
1044 group_size);
1045 }
1046
1047 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1048 /* Costs of the stores. */
1049 if (memory_access_type == VMAT_ELEMENTWISE
1050 || memory_access_type == VMAT_GATHER_SCATTER)
1051 {
1052 /* N scalar stores plus extracting the elements. */
1053 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1054 inside_cost += record_stmt_cost (cost_vec,
1055 ncopies * assumed_nunits,
1056 scalar_store, stmt_info, 0, vect_body);
1057 }
1058 else
1059 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1060
1061 if (memory_access_type == VMAT_ELEMENTWISE
1062 || memory_access_type == VMAT_STRIDED_SLP)
1063 {
1064 /* N scalar stores plus extracting the elements. */
1065 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1066 inside_cost += record_stmt_cost (cost_vec,
1067 ncopies * assumed_nunits,
1068 vec_to_scalar, stmt_info, 0, vect_body);
1069 }
1070
1071 /* When vectorizing a store into the function result assign
1072 a penalty if the function returns in a multi-register location.
1073 In this case we assume we'll end up with having to spill the
1074 vector result and do piecewise loads as a conservative estimate. */
1075 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1076 if (base
1077 && (TREE_CODE (base) == RESULT_DECL
1078 || (DECL_P (base) && cfun_returns (base)))
1079 && !aggregate_value_p (base, cfun->decl))
1080 {
1081 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1082 /* ??? Handle PARALLEL in some way. */
1083 if (REG_P (reg))
1084 {
1085 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1086 /* Assume that a single reg-reg move is possible and cheap,
1087 do not account for vector to gp register move cost. */
1088 if (nregs > 1)
1089 {
1090 /* Spill. */
1091 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1092 vector_store,
1093 stmt_info, 0, vect_epilogue);
1094 /* Loads. */
1095 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1096 scalar_load,
1097 stmt_info, 0, vect_epilogue);
1098 }
1099 }
1100 }
1101
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_store_cost: inside_cost = %d, "
1105 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1106 }
1107
1108
1109 /* Calculate cost of DR's memory access. */
1110 void
vect_get_store_cost(stmt_vec_info stmt_info,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1111 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1112 unsigned int *inside_cost,
1113 stmt_vector_for_cost *body_cost_vec)
1114 {
1115 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1116 int alignment_support_scheme
1117 = vect_supportable_dr_alignment (dr_info, false);
1118
1119 switch (alignment_support_scheme)
1120 {
1121 case dr_aligned:
1122 {
1123 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1124 vector_store, stmt_info, 0,
1125 vect_body);
1126
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "vect_model_store_cost: aligned.\n");
1130 break;
1131 }
1132
1133 case dr_unaligned_supported:
1134 {
1135 /* Here, we assign an additional cost for the unaligned store. */
1136 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1137 unaligned_store, stmt_info,
1138 DR_MISALIGNMENT (dr_info),
1139 vect_body);
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "vect_model_store_cost: unaligned supported by "
1143 "hardware.\n");
1144 break;
1145 }
1146
1147 case dr_unaligned_unsupported:
1148 {
1149 *inside_cost = VECT_MAX_COST;
1150
1151 if (dump_enabled_p ())
1152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1153 "vect_model_store_cost: unsupported access.\n");
1154 break;
1155 }
1156
1157 default:
1158 gcc_unreachable ();
1159 }
1160 }
1161
1162
1163 /* Function vect_model_load_cost
1164
1165 Models cost for loads. In the case of grouped accesses, one access has
1166 the overhead of the grouped access attributed to it. Since unaligned
1167 accesses are supported for loads, we also account for the costs of the
1168 access scheme chosen. */
1169
1170 static void
vect_model_load_cost(stmt_vec_info stmt_info,unsigned ncopies,vect_memory_access_type memory_access_type,slp_instance instance,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1171 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1172 vect_memory_access_type memory_access_type,
1173 slp_instance instance,
1174 slp_tree slp_node,
1175 stmt_vector_for_cost *cost_vec)
1176 {
1177 unsigned int inside_cost = 0, prologue_cost = 0;
1178 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1179
1180 gcc_assert (cost_vec);
1181
1182 /* ??? Somehow we need to fix this at the callers. */
1183 if (slp_node)
1184 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1185
1186 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1187 {
1188 /* If the load is permuted then the alignment is determined by
1189 the first group element not by the first scalar stmt DR. */
1190 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1191 /* Record the cost for the permutation. */
1192 unsigned n_perms;
1193 unsigned assumed_nunits
1194 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1195 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1196 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1197 slp_vf, instance, true,
1198 &n_perms);
1199 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1200 first_stmt_info, 0, vect_body);
1201 /* And adjust the number of loads performed. This handles
1202 redundancies as well as loads that are later dead. */
1203 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1204 bitmap_clear (perm);
1205 for (unsigned i = 0;
1206 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1207 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1208 ncopies = 0;
1209 bool load_seen = false;
1210 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1211 {
1212 if (i % assumed_nunits == 0)
1213 {
1214 if (load_seen)
1215 ncopies++;
1216 load_seen = false;
1217 }
1218 if (bitmap_bit_p (perm, i))
1219 load_seen = true;
1220 }
1221 if (load_seen)
1222 ncopies++;
1223 gcc_assert (ncopies
1224 <= (DR_GROUP_SIZE (first_stmt_info)
1225 - DR_GROUP_GAP (first_stmt_info)
1226 + assumed_nunits - 1) / assumed_nunits);
1227 }
1228
1229 /* Grouped loads read all elements in the group at once,
1230 so we want the DR for the first statement. */
1231 stmt_vec_info first_stmt_info = stmt_info;
1232 if (!slp_node && grouped_access_p)
1233 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1234
1235 /* True if we should include any once-per-group costs as well as
1236 the cost of the statement itself. For SLP we only get called
1237 once per group anyhow. */
1238 bool first_stmt_p = (first_stmt_info == stmt_info);
1239
1240 /* We assume that the cost of a single load-lanes instruction is
1241 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1242 access is instead being provided by a load-and-permute operation,
1243 include the cost of the permutes. */
1244 if (first_stmt_p
1245 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1246 {
1247 /* Uses an even and odd extract operations or shuffle operations
1248 for each needed permute. */
1249 int group_size = DR_GROUP_SIZE (first_stmt_info);
1250 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1251 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1252 stmt_info, 0, vect_body);
1253
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: strided group_size = %d .\n",
1257 group_size);
1258 }
1259
1260 /* The loads themselves. */
1261 if (memory_access_type == VMAT_ELEMENTWISE
1262 || memory_access_type == VMAT_GATHER_SCATTER)
1263 {
1264 /* N scalar loads plus gathering them into a vector. */
1265 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1266 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1267 inside_cost += record_stmt_cost (cost_vec,
1268 ncopies * assumed_nunits,
1269 scalar_load, stmt_info, 0, vect_body);
1270 }
1271 else
1272 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1273 &inside_cost, &prologue_cost,
1274 cost_vec, cost_vec, true);
1275 if (memory_access_type == VMAT_ELEMENTWISE
1276 || memory_access_type == VMAT_STRIDED_SLP)
1277 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1278 stmt_info, 0, vect_body);
1279
1280 if (dump_enabled_p ())
1281 dump_printf_loc (MSG_NOTE, vect_location,
1282 "vect_model_load_cost: inside_cost = %d, "
1283 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1284 }
1285
1286
1287 /* Calculate cost of DR's memory access. */
1288 void
vect_get_load_cost(stmt_vec_info stmt_info,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1289 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1290 bool add_realign_cost, unsigned int *inside_cost,
1291 unsigned int *prologue_cost,
1292 stmt_vector_for_cost *prologue_cost_vec,
1293 stmt_vector_for_cost *body_cost_vec,
1294 bool record_prologue_costs)
1295 {
1296 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1297 int alignment_support_scheme
1298 = vect_supportable_dr_alignment (dr_info, false);
1299
1300 switch (alignment_support_scheme)
1301 {
1302 case dr_aligned:
1303 {
1304 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1305 stmt_info, 0, vect_body);
1306
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_NOTE, vect_location,
1309 "vect_model_load_cost: aligned.\n");
1310
1311 break;
1312 }
1313 case dr_unaligned_supported:
1314 {
1315 /* Here, we assign an additional cost for the unaligned load. */
1316 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1317 unaligned_load, stmt_info,
1318 DR_MISALIGNMENT (dr_info),
1319 vect_body);
1320
1321 if (dump_enabled_p ())
1322 dump_printf_loc (MSG_NOTE, vect_location,
1323 "vect_model_load_cost: unaligned supported by "
1324 "hardware.\n");
1325
1326 break;
1327 }
1328 case dr_explicit_realign:
1329 {
1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1331 vector_load, stmt_info, 0, vect_body);
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1333 vec_perm, stmt_info, 0, vect_body);
1334
1335 /* FIXME: If the misalignment remains fixed across the iterations of
1336 the containing loop, the following cost should be added to the
1337 prologue costs. */
1338 if (targetm.vectorize.builtin_mask_for_load)
1339 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1340 stmt_info, 0, vect_body);
1341
1342 if (dump_enabled_p ())
1343 dump_printf_loc (MSG_NOTE, vect_location,
1344 "vect_model_load_cost: explicit realign\n");
1345
1346 break;
1347 }
1348 case dr_explicit_realign_optimized:
1349 {
1350 if (dump_enabled_p ())
1351 dump_printf_loc (MSG_NOTE, vect_location,
1352 "vect_model_load_cost: unaligned software "
1353 "pipelined.\n");
1354
1355 /* Unaligned software pipeline has a load of an address, an initial
1356 load, and possibly a mask operation to "prime" the loop. However,
1357 if this is an access in a group of loads, which provide grouped
1358 access, then the above cost should only be considered for one
1359 access in the group. Inside the loop, there is a load op
1360 and a realignment op. */
1361
1362 if (add_realign_cost && record_prologue_costs)
1363 {
1364 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1365 vector_stmt, stmt_info,
1366 0, vect_prologue);
1367 if (targetm.vectorize.builtin_mask_for_load)
1368 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1369 vector_stmt, stmt_info,
1370 0, vect_prologue);
1371 }
1372
1373 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1374 stmt_info, 0, vect_body);
1375 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1376 stmt_info, 0, vect_body);
1377
1378 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "vect_model_load_cost: explicit realign optimized"
1381 "\n");
1382
1383 break;
1384 }
1385
1386 case dr_unaligned_unsupported:
1387 {
1388 *inside_cost = VECT_MAX_COST;
1389
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392 "vect_model_load_cost: unsupported access.\n");
1393 break;
1394 }
1395
1396 default:
1397 gcc_unreachable ();
1398 }
1399 }
1400
1401 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1402 the loop preheader for the vectorized stmt STMT_VINFO. */
1403
1404 static void
vect_init_vector_1(stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1405 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1406 gimple_stmt_iterator *gsi)
1407 {
1408 if (gsi)
1409 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1410 else
1411 {
1412 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1413
1414 if (loop_vinfo)
1415 {
1416 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1417 basic_block new_bb;
1418 edge pe;
1419
1420 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1421 loop = loop->inner;
1422
1423 pe = loop_preheader_edge (loop);
1424 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1425 gcc_assert (!new_bb);
1426 }
1427 else
1428 {
1429 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1430 basic_block bb;
1431 gimple_stmt_iterator gsi_bb_start;
1432
1433 gcc_assert (bb_vinfo);
1434 bb = BB_VINFO_BB (bb_vinfo);
1435 gsi_bb_start = gsi_after_labels (bb);
1436 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1437 }
1438 }
1439
1440 if (dump_enabled_p ())
1441 dump_printf_loc (MSG_NOTE, vect_location,
1442 "created new init_stmt: %G", new_stmt);
1443 }
1444
1445 /* Function vect_init_vector.
1446
1447 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1448 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1449 vector type a vector with all elements equal to VAL is created first.
1450 Place the initialization at GSI if it is not NULL. Otherwise, place the
1451 initialization at the loop preheader.
1452 Return the DEF of INIT_STMT.
1453 It will be used in the vectorization of STMT_INFO. */
1454
1455 tree
vect_init_vector(stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1456 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1457 gimple_stmt_iterator *gsi)
1458 {
1459 gimple *init_stmt;
1460 tree new_temp;
1461
1462 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1463 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1464 {
1465 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1466 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1467 {
1468 /* Scalar boolean value should be transformed into
1469 all zeros or all ones value before building a vector. */
1470 if (VECTOR_BOOLEAN_TYPE_P (type))
1471 {
1472 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1473 tree false_val = build_zero_cst (TREE_TYPE (type));
1474
1475 if (CONSTANT_CLASS_P (val))
1476 val = integer_zerop (val) ? false_val : true_val;
1477 else
1478 {
1479 new_temp = make_ssa_name (TREE_TYPE (type));
1480 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1481 val, true_val, false_val);
1482 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1483 val = new_temp;
1484 }
1485 }
1486 else
1487 {
1488 gimple_seq stmts = NULL;
1489 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1490 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1491 TREE_TYPE (type), val);
1492 else
1493 /* ??? Condition vectorization expects us to do
1494 promotion of invariant/external defs. */
1495 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1496 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1497 !gsi_end_p (gsi2); )
1498 {
1499 init_stmt = gsi_stmt (gsi2);
1500 gsi_remove (&gsi2, false);
1501 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1502 }
1503 }
1504 }
1505 val = build_vector_from_val (type, val);
1506 }
1507
1508 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1509 init_stmt = gimple_build_assign (new_temp, val);
1510 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1511 return new_temp;
1512 }
1513
1514 /* Function vect_get_vec_def_for_operand_1.
1515
1516 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1517 with type DT that will be used in the vectorized stmt. */
1518
1519 tree
vect_get_vec_def_for_operand_1(stmt_vec_info def_stmt_info,enum vect_def_type dt)1520 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1521 enum vect_def_type dt)
1522 {
1523 tree vec_oprnd;
1524 stmt_vec_info vec_stmt_info;
1525
1526 switch (dt)
1527 {
1528 /* operand is a constant or a loop invariant. */
1529 case vect_constant_def:
1530 case vect_external_def:
1531 /* Code should use vect_get_vec_def_for_operand. */
1532 gcc_unreachable ();
1533
1534 /* Operand is defined by a loop header phi. In case of nested
1535 cycles we also may have uses of the backedge def. */
1536 case vect_reduction_def:
1537 case vect_double_reduction_def:
1538 case vect_nested_cycle:
1539 case vect_induction_def:
1540 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1541 || dt == vect_nested_cycle);
1542 /* Fallthru. */
1543
1544 /* operand is defined inside the loop. */
1545 case vect_internal_def:
1546 {
1547 /* Get the def from the vectorized stmt. */
1548 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1549 /* Get vectorized pattern statement. */
1550 if (!vec_stmt_info
1551 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1552 && !STMT_VINFO_RELEVANT (def_stmt_info))
1553 vec_stmt_info = (STMT_VINFO_VEC_STMT
1554 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1555 gcc_assert (vec_stmt_info);
1556 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1557 vec_oprnd = PHI_RESULT (phi);
1558 else
1559 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1560 return vec_oprnd;
1561 }
1562
1563 default:
1564 gcc_unreachable ();
1565 }
1566 }
1567
1568
1569 /* Function vect_get_vec_def_for_operand.
1570
1571 OP is an operand in STMT_VINFO. This function returns a (vector) def
1572 that will be used in the vectorized stmt for STMT_VINFO.
1573
1574 In the case that OP is an SSA_NAME which is defined in the loop, then
1575 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1576
1577 In case OP is an invariant or constant, a new stmt that creates a vector def
1578 needs to be introduced. VECTYPE may be used to specify a required type for
1579 vector invariant. */
1580
1581 tree
vect_get_vec_def_for_operand(tree op,stmt_vec_info stmt_vinfo,tree vectype)1582 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1583 {
1584 gimple *def_stmt;
1585 enum vect_def_type dt;
1586 bool is_simple_use;
1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1588
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location,
1591 "vect_get_vec_def_for_operand: %T\n", op);
1592
1593 stmt_vec_info def_stmt_info;
1594 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1595 &def_stmt_info, &def_stmt);
1596 gcc_assert (is_simple_use);
1597 if (def_stmt && dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1599
1600 if (dt == vect_constant_def || dt == vect_external_def)
1601 {
1602 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1603 tree vector_type;
1604
1605 if (vectype)
1606 vector_type = vectype;
1607 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1608 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1609 vector_type = truth_type_for (stmt_vectype);
1610 else
1611 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1612
1613 gcc_assert (vector_type);
1614 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1615 }
1616 else
1617 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1618 }
1619
1620
1621 /* Function vect_get_vec_def_for_stmt_copy
1622
1623 Return a vector-def for an operand. This function is used when the
1624 vectorized stmt to be created (by the caller to this function) is a "copy"
1625 created in case the vectorized result cannot fit in one vector, and several
1626 copies of the vector-stmt are required. In this case the vector-def is
1627 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1628 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1629
1630 Context:
1631 In case the vectorization factor (VF) is bigger than the number
1632 of elements that can fit in a vectype (nunits), we have to generate
1633 more than one vector stmt to vectorize the scalar stmt. This situation
1634 arises when there are multiple data-types operated upon in the loop; the
1635 smallest data-type determines the VF, and as a result, when vectorizing
1636 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1637 vector stmt (each computing a vector of 'nunits' results, and together
1638 computing 'VF' results in each iteration). This function is called when
1639 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1640 which VF=16 and nunits=4, so the number of copies required is 4):
1641
1642 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1643
1644 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1645 VS1.1: vx.1 = memref1 VS1.2
1646 VS1.2: vx.2 = memref2 VS1.3
1647 VS1.3: vx.3 = memref3
1648
1649 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1650 VSnew.1: vz1 = vx.1 + ... VSnew.2
1651 VSnew.2: vz2 = vx.2 + ... VSnew.3
1652 VSnew.3: vz3 = vx.3 + ...
1653
1654 The vectorization of S1 is explained in vectorizable_load.
1655 The vectorization of S2:
1656 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1657 the function 'vect_get_vec_def_for_operand' is called to
1658 get the relevant vector-def for each operand of S2. For operand x it
1659 returns the vector-def 'vx.0'.
1660
1661 To create the remaining copies of the vector-stmt (VSnew.j), this
1662 function is called to get the relevant vector-def for each operand. It is
1663 obtained from the respective VS1.j stmt, which is recorded in the
1664 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1665
1666 For example, to obtain the vector-def 'vx.1' in order to create the
1667 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1668 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1669 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1670 and return its def ('vx.1').
1671 Overall, to create the above sequence this function will be called 3 times:
1672 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1673 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1674 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1675
1676 tree
vect_get_vec_def_for_stmt_copy(vec_info * vinfo,tree vec_oprnd)1677 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1678 {
1679 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1680 if (!def_stmt_info)
1681 /* Do nothing; can reuse same def. */
1682 return vec_oprnd;
1683
1684 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1685 gcc_assert (def_stmt_info);
1686 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1687 vec_oprnd = PHI_RESULT (phi);
1688 else
1689 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1690 return vec_oprnd;
1691 }
1692
1693
1694 /* Get vectorized definitions for the operands to create a copy of an original
1695 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1696
1697 void
vect_get_vec_defs_for_stmt_copy(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1698 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1699 vec<tree> *vec_oprnds0,
1700 vec<tree> *vec_oprnds1)
1701 {
1702 tree vec_oprnd = vec_oprnds0->pop ();
1703
1704 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705 vec_oprnds0->quick_push (vec_oprnd);
1706
1707 if (vec_oprnds1 && vec_oprnds1->length ())
1708 {
1709 vec_oprnd = vec_oprnds1->pop ();
1710 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1711 vec_oprnds1->quick_push (vec_oprnd);
1712 }
1713 }
1714
1715
1716 /* Get vectorized definitions for OP0 and OP1. */
1717
1718 void
vect_get_vec_defs(tree op0,tree op1,stmt_vec_info stmt_info,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1719 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1720 vec<tree> *vec_oprnds0,
1721 vec<tree> *vec_oprnds1,
1722 slp_tree slp_node)
1723 {
1724 if (slp_node)
1725 {
1726 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1727 vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1728 *vec_oprnds0 = vec_defs[0];
1729 if (op1)
1730 *vec_oprnds1 = vec_defs[1];
1731 }
1732 else
1733 {
1734 tree vec_oprnd;
1735
1736 vec_oprnds0->create (1);
1737 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1738 vec_oprnds0->quick_push (vec_oprnd);
1739
1740 if (op1)
1741 {
1742 vec_oprnds1->create (1);
1743 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1744 vec_oprnds1->quick_push (vec_oprnd);
1745 }
1746 }
1747 }
1748
1749 /* Helper function called by vect_finish_replace_stmt and
1750 vect_finish_stmt_generation. Set the location of the new
1751 statement and create and return a stmt_vec_info for it. */
1752
1753 static stmt_vec_info
vect_finish_stmt_generation_1(stmt_vec_info stmt_info,gimple * vec_stmt)1754 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1755 {
1756 vec_info *vinfo = stmt_info->vinfo;
1757
1758 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1759
1760 if (dump_enabled_p ())
1761 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1762
1763 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1764
1765 /* While EH edges will generally prevent vectorization, stmt might
1766 e.g. be in a must-not-throw region. Ensure newly created stmts
1767 that could throw are part of the same region. */
1768 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1769 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1770 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1771
1772 return vec_stmt_info;
1773 }
1774
1775 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1776 which sets the same scalar result as STMT_INFO did. Create and return a
1777 stmt_vec_info for VEC_STMT. */
1778
1779 stmt_vec_info
vect_finish_replace_stmt(stmt_vec_info stmt_info,gimple * vec_stmt)1780 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1781 {
1782 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1783 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1784
1785 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1786 gsi_replace (&gsi, vec_stmt, true);
1787
1788 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1789 }
1790
1791 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1792 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1793
1794 stmt_vec_info
vect_finish_stmt_generation(stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1795 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1796 gimple_stmt_iterator *gsi)
1797 {
1798 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1799
1800 if (!gsi_end_p (*gsi)
1801 && gimple_has_mem_ops (vec_stmt))
1802 {
1803 gimple *at_stmt = gsi_stmt (*gsi);
1804 tree vuse = gimple_vuse (at_stmt);
1805 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1806 {
1807 tree vdef = gimple_vdef (at_stmt);
1808 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1809 /* If we have an SSA vuse and insert a store, update virtual
1810 SSA form to avoid triggering the renamer. Do so only
1811 if we can easily see all uses - which is what almost always
1812 happens with the way vectorized stmts are inserted. */
1813 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1814 && ((is_gimple_assign (vec_stmt)
1815 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1816 || (is_gimple_call (vec_stmt)
1817 && !(gimple_call_flags (vec_stmt)
1818 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1819 {
1820 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1821 gimple_set_vdef (vec_stmt, new_vdef);
1822 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1823 }
1824 }
1825 }
1826 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1827 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1828 }
1829
1830 /* We want to vectorize a call to combined function CFN with function
1831 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1832 as the types of all inputs. Check whether this is possible using
1833 an internal function, returning its code if so or IFN_LAST if not. */
1834
1835 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1836 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1837 tree vectype_out, tree vectype_in)
1838 {
1839 internal_fn ifn;
1840 if (internal_fn_p (cfn))
1841 ifn = as_internal_fn (cfn);
1842 else
1843 ifn = associated_internal_fn (fndecl);
1844 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1845 {
1846 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1847 if (info.vectorizable)
1848 {
1849 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1850 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1851 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1852 OPTIMIZE_FOR_SPEED))
1853 return ifn;
1854 }
1855 }
1856 return IFN_LAST;
1857 }
1858
1859
1860 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1861 gimple_stmt_iterator *);
1862
1863 /* Check whether a load or store statement in the loop described by
1864 LOOP_VINFO is possible in a fully-masked loop. This is testing
1865 whether the vectorizer pass has the appropriate support, as well as
1866 whether the target does.
1867
1868 VLS_TYPE says whether the statement is a load or store and VECTYPE
1869 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1870 says how the load or store is going to be implemented and GROUP_SIZE
1871 is the number of load or store statements in the containing group.
1872 If the access is a gather load or scatter store, GS_INFO describes
1873 its arguments. If the load or store is conditional, SCALAR_MASK is the
1874 condition under which it occurs.
1875
1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877 supported, otherwise record the required mask types. */
1878
1879 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 vec_load_store_type vls_type, int group_size,
1882 vect_memory_access_type memory_access_type,
1883 gather_scatter_info *gs_info, tree scalar_mask)
1884 {
1885 /* Invariant loads need no special support. */
1886 if (memory_access_type == VMAT_INVARIANT)
1887 return;
1888
1889 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890 machine_mode vecmode = TYPE_MODE (vectype);
1891 bool is_load = (vls_type == VLS_LOAD);
1892 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1893 {
1894 if (is_load
1895 ? !vect_load_lanes_supported (vectype, group_size, true)
1896 : !vect_store_lanes_supported (vectype, group_size, true))
1897 {
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 "can't use a fully-masked loop because the"
1901 " target doesn't have an appropriate masked"
1902 " load/store-lanes instruction.\n");
1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 return;
1905 }
1906 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1908 return;
1909 }
1910
1911 if (memory_access_type == VMAT_GATHER_SCATTER)
1912 {
1913 internal_fn ifn = (is_load
1914 ? IFN_MASK_GATHER_LOAD
1915 : IFN_MASK_SCATTER_STORE);
1916 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1917 gs_info->memory_type,
1918 gs_info->offset_vectype,
1919 gs_info->scale))
1920 {
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1923 "can't use a fully-masked loop because the"
1924 " target doesn't have an appropriate masked"
1925 " gather load or scatter store instruction.\n");
1926 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1927 return;
1928 }
1929 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1930 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1931 return;
1932 }
1933
1934 if (memory_access_type != VMAT_CONTIGUOUS
1935 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1936 {
1937 /* Element X of the data must come from iteration i * VF + X of the
1938 scalar loop. We need more work to support other mappings. */
1939 if (dump_enabled_p ())
1940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941 "can't use a fully-masked loop because an access"
1942 " isn't contiguous.\n");
1943 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1944 return;
1945 }
1946
1947 machine_mode mask_mode;
1948 if (!VECTOR_MODE_P (vecmode)
1949 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1950 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1951 {
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 "can't use a fully-masked loop because the target"
1955 " doesn't have the appropriate masked load or"
1956 " store.\n");
1957 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1958 return;
1959 }
1960 /* We might load more scalars than we need for permuting SLP loads.
1961 We checked in get_group_load_store_type that the extra elements
1962 don't leak into a new vector. */
1963 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1965 unsigned int nvectors;
1966 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1967 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1968 else
1969 gcc_unreachable ();
1970 }
1971
1972 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1973 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1974 that needs to be applied to all loads and stores in a vectorized loop.
1975 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1976
1977 MASK_TYPE is the type of both masks. If new statements are needed,
1978 insert them before GSI. */
1979
1980 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1981 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1982 gimple_stmt_iterator *gsi)
1983 {
1984 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1985 if (!loop_mask)
1986 return vec_mask;
1987
1988 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1989 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1990 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1991 vec_mask, loop_mask);
1992 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1993 return and_res;
1994 }
1995
1996 /* Determine whether we can use a gather load or scatter store to vectorize
1997 strided load or store STMT_INFO by truncating the current offset to a
1998 smaller width. We need to be able to construct an offset vector:
1999
2000 { 0, X, X*2, X*3, ... }
2001
2002 without loss of precision, where X is STMT_INFO's DR_STEP.
2003
2004 Return true if this is possible, describing the gather load or scatter
2005 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2006
2007 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2008 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2009 loop_vec_info loop_vinfo, bool masked_p,
2010 gather_scatter_info *gs_info)
2011 {
2012 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2013 data_reference *dr = dr_info->dr;
2014 tree step = DR_STEP (dr);
2015 if (TREE_CODE (step) != INTEGER_CST)
2016 {
2017 /* ??? Perhaps we could use range information here? */
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE, vect_location,
2020 "cannot truncate variable step.\n");
2021 return false;
2022 }
2023
2024 /* Get the number of bits in an element. */
2025 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2026 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2027 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2028
2029 /* Set COUNT to the upper limit on the number of elements - 1.
2030 Start with the maximum vectorization factor. */
2031 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2032
2033 /* Try lowering COUNT to the number of scalar latch iterations. */
2034 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2035 widest_int max_iters;
2036 if (max_loop_iterations (loop, &max_iters)
2037 && max_iters < count)
2038 count = max_iters.to_shwi ();
2039
2040 /* Try scales of 1 and the element size. */
2041 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2042 wi::overflow_type overflow = wi::OVF_NONE;
2043 for (int i = 0; i < 2; ++i)
2044 {
2045 int scale = scales[i];
2046 widest_int factor;
2047 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2048 continue;
2049
2050 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2051 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2052 if (overflow)
2053 continue;
2054 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2055 unsigned int min_offset_bits = wi::min_precision (range, sign);
2056
2057 /* Find the narrowest viable offset type. */
2058 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2059 tree offset_type = build_nonstandard_integer_type (offset_bits,
2060 sign == UNSIGNED);
2061
2062 /* See whether the target supports the operation with an offset
2063 no narrower than OFFSET_TYPE. */
2064 tree memory_type = TREE_TYPE (DR_REF (dr));
2065 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2066 vectype, memory_type, offset_type, scale,
2067 &gs_info->ifn, &gs_info->offset_vectype))
2068 continue;
2069
2070 gs_info->decl = NULL_TREE;
2071 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2072 but we don't need to store that here. */
2073 gs_info->base = NULL_TREE;
2074 gs_info->element_type = TREE_TYPE (vectype);
2075 gs_info->offset = fold_convert (offset_type, step);
2076 gs_info->offset_dt = vect_constant_def;
2077 gs_info->scale = scale;
2078 gs_info->memory_type = memory_type;
2079 return true;
2080 }
2081
2082 if (overflow && dump_enabled_p ())
2083 dump_printf_loc (MSG_NOTE, vect_location,
2084 "truncating gather/scatter offset to %d bits"
2085 " might change its value.\n", element_bits);
2086
2087 return false;
2088 }
2089
2090 /* Return true if we can use gather/scatter internal functions to
2091 vectorize STMT_INFO, which is a grouped or strided load or store.
2092 MASKED_P is true if load or store is conditional. When returning
2093 true, fill in GS_INFO with the information required to perform the
2094 operation. */
2095
2096 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2097 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2098 loop_vec_info loop_vinfo, bool masked_p,
2099 gather_scatter_info *gs_info)
2100 {
2101 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2102 || gs_info->decl)
2103 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2104 masked_p, gs_info);
2105
2106 tree old_offset_type = TREE_TYPE (gs_info->offset);
2107 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2108
2109 gcc_assert (TYPE_PRECISION (new_offset_type)
2110 >= TYPE_PRECISION (old_offset_type));
2111 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2112
2113 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_NOTE, vect_location,
2115 "using gather/scatter for strided/grouped access,"
2116 " scale = %d\n", gs_info->scale);
2117
2118 return true;
2119 }
2120
2121 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2122 elements with a known constant step. Return -1 if that step
2123 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2124
2125 static int
compare_step_with_zero(stmt_vec_info stmt_info)2126 compare_step_with_zero (stmt_vec_info stmt_info)
2127 {
2128 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2129 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2130 size_zero_node);
2131 }
2132
2133 /* If the target supports a permute mask that reverses the elements in
2134 a vector of type VECTYPE, return that mask, otherwise return null. */
2135
2136 static tree
perm_mask_for_reverse(tree vectype)2137 perm_mask_for_reverse (tree vectype)
2138 {
2139 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2140
2141 /* The encoding has a single stepped pattern. */
2142 vec_perm_builder sel (nunits, 1, 3);
2143 for (int i = 0; i < 3; ++i)
2144 sel.quick_push (nunits - 1 - i);
2145
2146 vec_perm_indices indices (sel, 1, nunits);
2147 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2148 return NULL_TREE;
2149 return vect_gen_perm_mask_checked (vectype, indices);
2150 }
2151
2152 /* A subroutine of get_load_store_type, with a subset of the same
2153 arguments. Handle the case where STMT_INFO is a load or store that
2154 accesses consecutive elements with a negative step. */
2155
2156 static vect_memory_access_type
get_negative_load_store_type(stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2157 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2158 vec_load_store_type vls_type,
2159 unsigned int ncopies)
2160 {
2161 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2162 dr_alignment_support alignment_support_scheme;
2163
2164 if (ncopies > 1)
2165 {
2166 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 "multiple types with negative step.\n");
2169 return VMAT_ELEMENTWISE;
2170 }
2171
2172 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2173 if (alignment_support_scheme != dr_aligned
2174 && alignment_support_scheme != dr_unaligned_supported)
2175 {
2176 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 "negative step but alignment required.\n");
2179 return VMAT_ELEMENTWISE;
2180 }
2181
2182 if (vls_type == VLS_STORE_INVARIANT)
2183 {
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_NOTE, vect_location,
2186 "negative step with invariant source;"
2187 " no permute needed.\n");
2188 return VMAT_CONTIGUOUS_DOWN;
2189 }
2190
2191 if (!perm_mask_for_reverse (vectype))
2192 {
2193 if (dump_enabled_p ())
2194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195 "negative step and reversing not supported.\n");
2196 return VMAT_ELEMENTWISE;
2197 }
2198
2199 return VMAT_CONTIGUOUS_REVERSE;
2200 }
2201
2202 /* STMT_INFO is either a masked or unconditional store. Return the value
2203 being stored. */
2204
2205 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2206 vect_get_store_rhs (stmt_vec_info stmt_info)
2207 {
2208 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2209 {
2210 gcc_assert (gimple_assign_single_p (assign));
2211 return gimple_assign_rhs1 (assign);
2212 }
2213 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2214 {
2215 internal_fn ifn = gimple_call_internal_fn (call);
2216 int index = internal_fn_stored_value_index (ifn);
2217 gcc_assert (index >= 0);
2218 return gimple_call_arg (call, index);
2219 }
2220 gcc_unreachable ();
2221 }
2222
2223 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2224
2225 This function returns a vector type which can be composed with NETLS pieces,
2226 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2227 same vector size as the return vector. It checks target whether supports
2228 pieces-size vector mode for construction firstly, if target fails to, check
2229 pieces-size scalar mode for construction further. It returns NULL_TREE if
2230 fails to find the available composition.
2231
2232 For example, for (vtype=V16QI, nelts=4), we can probably get:
2233 - V16QI with PTYPE V4QI.
2234 - V4SI with PTYPE SI.
2235 - NULL_TREE. */
2236
2237 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2238 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2239 {
2240 gcc_assert (VECTOR_TYPE_P (vtype));
2241 gcc_assert (known_gt (nelts, 0U));
2242
2243 machine_mode vmode = TYPE_MODE (vtype);
2244 if (!VECTOR_MODE_P (vmode))
2245 return NULL_TREE;
2246
2247 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2248 unsigned int pbsize;
2249 if (constant_multiple_p (vbsize, nelts, &pbsize))
2250 {
2251 /* First check if vec_init optab supports construction from
2252 vector pieces directly. */
2253 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2254 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2255 machine_mode rmode;
2256 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2257 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2258 != CODE_FOR_nothing))
2259 {
2260 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2261 return vtype;
2262 }
2263
2264 /* Otherwise check if exists an integer type of the same piece size and
2265 if vec_init optab supports construction from it directly. */
2266 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2267 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2268 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2269 != CODE_FOR_nothing))
2270 {
2271 *ptype = build_nonstandard_integer_type (pbsize, 1);
2272 return build_vector_type (*ptype, nelts);
2273 }
2274 }
2275
2276 return NULL_TREE;
2277 }
2278
2279 /* A subroutine of get_load_store_type, with a subset of the same
2280 arguments. Handle the case where STMT_INFO is part of a grouped load
2281 or store.
2282
2283 For stores, the statements in the group are all consecutive
2284 and there is no gap at the end. For loads, the statements in the
2285 group might not be consecutive; there can be gaps between statements
2286 as well as at the end. */
2287
2288 static bool
get_group_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2289 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2290 bool masked_p, vec_load_store_type vls_type,
2291 vect_memory_access_type *memory_access_type,
2292 gather_scatter_info *gs_info)
2293 {
2294 vec_info *vinfo = stmt_info->vinfo;
2295 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2296 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2297 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2298 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2299 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2300 bool single_element_p = (stmt_info == first_stmt_info
2301 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2302 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304
2305 /* True if the vectorized statements would access beyond the last
2306 statement in the group. */
2307 bool overrun_p = false;
2308
2309 /* True if we can cope with such overrun by peeling for gaps, so that
2310 there is at least one final scalar iteration after the vector loop. */
2311 bool can_overrun_p = (!masked_p
2312 && vls_type == VLS_LOAD
2313 && loop_vinfo
2314 && !loop->inner);
2315
2316 /* There can only be a gap at the end of the group if the stride is
2317 known at compile time. */
2318 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2319
2320 /* Stores can't yet have gaps. */
2321 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2322
2323 if (slp)
2324 {
2325 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2326 {
2327 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2328 separated by the stride, until we have a complete vector.
2329 Fall back to scalar accesses if that isn't possible. */
2330 if (multiple_p (nunits, group_size))
2331 *memory_access_type = VMAT_STRIDED_SLP;
2332 else
2333 *memory_access_type = VMAT_ELEMENTWISE;
2334 }
2335 else
2336 {
2337 overrun_p = loop_vinfo && gap != 0;
2338 if (overrun_p && vls_type != VLS_LOAD)
2339 {
2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2341 "Grouped store with gaps requires"
2342 " non-consecutive accesses\n");
2343 return false;
2344 }
2345 /* An overrun is fine if the trailing elements are smaller
2346 than the alignment boundary B. Every vector access will
2347 be a multiple of B and so we are guaranteed to access a
2348 non-gap element in the same B-sized block. */
2349 if (overrun_p
2350 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2351 / vect_get_scalar_dr_size (first_dr_info)))
2352 overrun_p = false;
2353
2354 /* If the gap splits the vector in half and the target
2355 can do half-vector operations avoid the epilogue peeling
2356 by simply loading half of the vector only. Usually
2357 the construction with an upper zero half will be elided. */
2358 dr_alignment_support alignment_support_scheme;
2359 tree half_vtype;
2360 if (overrun_p
2361 && !masked_p
2362 && (((alignment_support_scheme
2363 = vect_supportable_dr_alignment (first_dr_info, false)))
2364 == dr_aligned
2365 || alignment_support_scheme == dr_unaligned_supported)
2366 && known_eq (nunits, (group_size - gap) * 2)
2367 && known_eq (nunits, group_size)
2368 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2369 != NULL_TREE))
2370 overrun_p = false;
2371
2372 if (overrun_p && !can_overrun_p)
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "Peeling for outer loop is not supported\n");
2377 return false;
2378 }
2379 int cmp = compare_step_with_zero (stmt_info);
2380 if (cmp < 0)
2381 *memory_access_type = get_negative_load_store_type
2382 (stmt_info, vectype, vls_type, 1);
2383 else
2384 {
2385 gcc_assert (!loop_vinfo || cmp > 0);
2386 *memory_access_type = VMAT_CONTIGUOUS;
2387 }
2388 }
2389 }
2390 else
2391 {
2392 /* We can always handle this case using elementwise accesses,
2393 but see if something more efficient is available. */
2394 *memory_access_type = VMAT_ELEMENTWISE;
2395
2396 /* If there is a gap at the end of the group then these optimizations
2397 would access excess elements in the last iteration. */
2398 bool would_overrun_p = (gap != 0);
2399 /* An overrun is fine if the trailing elements are smaller than the
2400 alignment boundary B. Every vector access will be a multiple of B
2401 and so we are guaranteed to access a non-gap element in the
2402 same B-sized block. */
2403 if (would_overrun_p
2404 && !masked_p
2405 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2406 / vect_get_scalar_dr_size (first_dr_info)))
2407 would_overrun_p = false;
2408
2409 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2410 && (can_overrun_p || !would_overrun_p)
2411 && compare_step_with_zero (stmt_info) > 0)
2412 {
2413 /* First cope with the degenerate case of a single-element
2414 vector. */
2415 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2416 *memory_access_type = VMAT_CONTIGUOUS;
2417
2418 /* Otherwise try using LOAD/STORE_LANES. */
2419 if (*memory_access_type == VMAT_ELEMENTWISE
2420 && (vls_type == VLS_LOAD
2421 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2422 : vect_store_lanes_supported (vectype, group_size,
2423 masked_p)))
2424 {
2425 *memory_access_type = VMAT_LOAD_STORE_LANES;
2426 overrun_p = would_overrun_p;
2427 }
2428
2429 /* If that fails, try using permuting loads. */
2430 if (*memory_access_type == VMAT_ELEMENTWISE
2431 && (vls_type == VLS_LOAD
2432 ? vect_grouped_load_supported (vectype, single_element_p,
2433 group_size)
2434 : vect_grouped_store_supported (vectype, group_size)))
2435 {
2436 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2437 overrun_p = would_overrun_p;
2438 }
2439 }
2440
2441 /* As a last resort, trying using a gather load or scatter store.
2442
2443 ??? Although the code can handle all group sizes correctly,
2444 it probably isn't a win to use separate strided accesses based
2445 on nearby locations. Or, even if it's a win over scalar code,
2446 it might not be a win over vectorizing at a lower VF, if that
2447 allows us to use contiguous accesses. */
2448 if (*memory_access_type == VMAT_ELEMENTWISE
2449 && single_element_p
2450 && loop_vinfo
2451 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2452 masked_p, gs_info))
2453 *memory_access_type = VMAT_GATHER_SCATTER;
2454 }
2455
2456 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2457 {
2458 /* STMT is the leader of the group. Check the operands of all the
2459 stmts of the group. */
2460 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2461 while (next_stmt_info)
2462 {
2463 tree op = vect_get_store_rhs (next_stmt_info);
2464 enum vect_def_type dt;
2465 if (!vect_is_simple_use (op, vinfo, &dt))
2466 {
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2469 "use not simple.\n");
2470 return false;
2471 }
2472 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2473 }
2474 }
2475
2476 if (overrun_p)
2477 {
2478 gcc_assert (can_overrun_p);
2479 if (dump_enabled_p ())
2480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2481 "Data access with gaps requires scalar "
2482 "epilogue loop\n");
2483 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2484 }
2485
2486 return true;
2487 }
2488
2489 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2490 if there is a memory access type that the vectorized form can use,
2491 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2492 or scatters, fill in GS_INFO accordingly.
2493
2494 SLP says whether we're performing SLP rather than loop vectorization.
2495 MASKED_P is true if the statement is conditional on a vectorized mask.
2496 VECTYPE is the vector type that the vectorized statements will use.
2497 NCOPIES is the number of vector statements that will be needed. */
2498
2499 static bool
get_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2500 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2501 bool masked_p, vec_load_store_type vls_type,
2502 unsigned int ncopies,
2503 vect_memory_access_type *memory_access_type,
2504 gather_scatter_info *gs_info)
2505 {
2506 vec_info *vinfo = stmt_info->vinfo;
2507 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2508 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2509 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2510 {
2511 *memory_access_type = VMAT_GATHER_SCATTER;
2512 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2513 gcc_unreachable ();
2514 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2515 &gs_info->offset_dt,
2516 &gs_info->offset_vectype))
2517 {
2518 if (dump_enabled_p ())
2519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2520 "%s index use not simple.\n",
2521 vls_type == VLS_LOAD ? "gather" : "scatter");
2522 return false;
2523 }
2524 }
2525 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2526 {
2527 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2528 vls_type, memory_access_type, gs_info))
2529 return false;
2530 }
2531 else if (STMT_VINFO_STRIDED_P (stmt_info))
2532 {
2533 gcc_assert (!slp);
2534 if (loop_vinfo
2535 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2536 masked_p, gs_info))
2537 *memory_access_type = VMAT_GATHER_SCATTER;
2538 else
2539 *memory_access_type = VMAT_ELEMENTWISE;
2540 }
2541 else
2542 {
2543 int cmp = compare_step_with_zero (stmt_info);
2544 if (cmp < 0)
2545 *memory_access_type = get_negative_load_store_type
2546 (stmt_info, vectype, vls_type, ncopies);
2547 else if (cmp == 0)
2548 {
2549 gcc_assert (vls_type == VLS_LOAD);
2550 *memory_access_type = VMAT_INVARIANT;
2551 }
2552 else
2553 *memory_access_type = VMAT_CONTIGUOUS;
2554 }
2555
2556 if ((*memory_access_type == VMAT_ELEMENTWISE
2557 || *memory_access_type == VMAT_STRIDED_SLP)
2558 && !nunits.is_constant ())
2559 {
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 "Not using elementwise accesses due to variable "
2563 "vectorization factor.\n");
2564 return false;
2565 }
2566
2567 /* FIXME: At the moment the cost model seems to underestimate the
2568 cost of using elementwise accesses. This check preserves the
2569 traditional behavior until that can be fixed. */
2570 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2571 if (!first_stmt_info)
2572 first_stmt_info = stmt_info;
2573 if (*memory_access_type == VMAT_ELEMENTWISE
2574 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2575 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2576 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2577 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2578 {
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2581 "not falling back to elementwise accesses\n");
2582 return false;
2583 }
2584 return true;
2585 }
2586
2587 /* Return true if boolean argument MASK is suitable for vectorizing
2588 conditional operation STMT_INFO. When returning true, store the type
2589 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2590 in *MASK_VECTYPE_OUT. */
2591
2592 static bool
vect_check_scalar_mask(stmt_vec_info stmt_info,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2593 vect_check_scalar_mask (stmt_vec_info stmt_info, tree mask,
2594 vect_def_type *mask_dt_out,
2595 tree *mask_vectype_out)
2596 {
2597 vec_info *vinfo = stmt_info->vinfo;
2598 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2599 {
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "mask argument is not a boolean.\n");
2603 return false;
2604 }
2605
2606 if (TREE_CODE (mask) != SSA_NAME)
2607 {
2608 if (dump_enabled_p ())
2609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2610 "mask argument is not an SSA name.\n");
2611 return false;
2612 }
2613
2614 enum vect_def_type mask_dt;
2615 tree mask_vectype;
2616 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2617 {
2618 if (dump_enabled_p ())
2619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2620 "mask use not simple.\n");
2621 return false;
2622 }
2623
2624 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2625 if (!mask_vectype)
2626 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2627
2628 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2629 {
2630 if (dump_enabled_p ())
2631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2632 "could not find an appropriate vector mask type.\n");
2633 return false;
2634 }
2635
2636 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2637 TYPE_VECTOR_SUBPARTS (vectype)))
2638 {
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "vector mask type %T"
2642 " does not match vector data type %T.\n",
2643 mask_vectype, vectype);
2644
2645 return false;
2646 }
2647
2648 *mask_dt_out = mask_dt;
2649 *mask_vectype_out = mask_vectype;
2650 return true;
2651 }
2652
2653 /* Return true if stored value RHS is suitable for vectorizing store
2654 statement STMT_INFO. When returning true, store the type of the
2655 definition in *RHS_DT_OUT, the type of the vectorized store value in
2656 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2657
2658 static bool
vect_check_store_rhs(stmt_vec_info stmt_info,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2659 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2660 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2661 vec_load_store_type *vls_type_out)
2662 {
2663 /* In the case this is a store from a constant make sure
2664 native_encode_expr can handle it. */
2665 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2666 {
2667 if (dump_enabled_p ())
2668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2669 "cannot encode constant as a byte sequence.\n");
2670 return false;
2671 }
2672
2673 enum vect_def_type rhs_dt;
2674 tree rhs_vectype;
2675 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2676 {
2677 if (dump_enabled_p ())
2678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2679 "use not simple.\n");
2680 return false;
2681 }
2682
2683 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2684 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2685 {
2686 if (dump_enabled_p ())
2687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2688 "incompatible vector types.\n");
2689 return false;
2690 }
2691
2692 *rhs_dt_out = rhs_dt;
2693 *rhs_vectype_out = rhs_vectype;
2694 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2695 *vls_type_out = VLS_STORE_INVARIANT;
2696 else
2697 *vls_type_out = VLS_STORE;
2698 return true;
2699 }
2700
2701 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2702 Note that we support masks with floating-point type, in which case the
2703 floats are interpreted as a bitmask. */
2704
2705 static tree
vect_build_all_ones_mask(stmt_vec_info stmt_info,tree masktype)2706 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2707 {
2708 if (TREE_CODE (masktype) == INTEGER_TYPE)
2709 return build_int_cst (masktype, -1);
2710 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2711 {
2712 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2713 mask = build_vector_from_val (masktype, mask);
2714 return vect_init_vector (stmt_info, mask, masktype, NULL);
2715 }
2716 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2717 {
2718 REAL_VALUE_TYPE r;
2719 long tmp[6];
2720 for (int j = 0; j < 6; ++j)
2721 tmp[j] = -1;
2722 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2723 tree mask = build_real (TREE_TYPE (masktype), r);
2724 mask = build_vector_from_val (masktype, mask);
2725 return vect_init_vector (stmt_info, mask, masktype, NULL);
2726 }
2727 gcc_unreachable ();
2728 }
2729
2730 /* Build an all-zero merge value of type VECTYPE while vectorizing
2731 STMT_INFO as a gather load. */
2732
2733 static tree
vect_build_zero_merge_argument(stmt_vec_info stmt_info,tree vectype)2734 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2735 {
2736 tree merge;
2737 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2738 merge = build_int_cst (TREE_TYPE (vectype), 0);
2739 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2740 {
2741 REAL_VALUE_TYPE r;
2742 long tmp[6];
2743 for (int j = 0; j < 6; ++j)
2744 tmp[j] = 0;
2745 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2746 merge = build_real (TREE_TYPE (vectype), r);
2747 }
2748 else
2749 gcc_unreachable ();
2750 merge = build_vector_from_val (vectype, merge);
2751 return vect_init_vector (stmt_info, merge, vectype, NULL);
2752 }
2753
2754 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2755 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2756 the gather load operation. If the load is conditional, MASK is the
2757 unvectorized condition and MASK_DT is its definition type, otherwise
2758 MASK is null. */
2759
2760 static void
vect_build_gather_load_calls(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,gather_scatter_info * gs_info,tree mask)2761 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2762 gimple_stmt_iterator *gsi,
2763 stmt_vec_info *vec_stmt,
2764 gather_scatter_info *gs_info,
2765 tree mask)
2766 {
2767 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2768 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2769 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2770 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2771 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2772 edge pe = loop_preheader_edge (loop);
2773 enum { NARROW, NONE, WIDEN } modifier;
2774 poly_uint64 gather_off_nunits
2775 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2776
2777 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2778 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2779 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2780 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2781 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2782 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2783 tree scaletype = TREE_VALUE (arglist);
2784 tree real_masktype = masktype;
2785 gcc_checking_assert (types_compatible_p (srctype, rettype)
2786 && (!mask
2787 || TREE_CODE (masktype) == INTEGER_TYPE
2788 || types_compatible_p (srctype, masktype)));
2789 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2790 masktype = truth_type_for (srctype);
2791
2792 tree mask_halftype = masktype;
2793 tree perm_mask = NULL_TREE;
2794 tree mask_perm_mask = NULL_TREE;
2795 if (known_eq (nunits, gather_off_nunits))
2796 modifier = NONE;
2797 else if (known_eq (nunits * 2, gather_off_nunits))
2798 {
2799 modifier = WIDEN;
2800
2801 /* Currently widening gathers and scatters are only supported for
2802 fixed-length vectors. */
2803 int count = gather_off_nunits.to_constant ();
2804 vec_perm_builder sel (count, count, 1);
2805 for (int i = 0; i < count; ++i)
2806 sel.quick_push (i | (count / 2));
2807
2808 vec_perm_indices indices (sel, 1, count);
2809 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2810 indices);
2811 }
2812 else if (known_eq (nunits, gather_off_nunits * 2))
2813 {
2814 modifier = NARROW;
2815
2816 /* Currently narrowing gathers and scatters are only supported for
2817 fixed-length vectors. */
2818 int count = nunits.to_constant ();
2819 vec_perm_builder sel (count, count, 1);
2820 sel.quick_grow (count);
2821 for (int i = 0; i < count; ++i)
2822 sel[i] = i < count / 2 ? i : i + count / 2;
2823 vec_perm_indices indices (sel, 2, count);
2824 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2825
2826 ncopies *= 2;
2827
2828 if (mask && masktype == real_masktype)
2829 {
2830 for (int i = 0; i < count; ++i)
2831 sel[i] = i | (count / 2);
2832 indices.new_vector (sel, 2, count);
2833 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2834 }
2835 else if (mask)
2836 mask_halftype = truth_type_for (gs_info->offset_vectype);
2837 }
2838 else
2839 gcc_unreachable ();
2840
2841 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2842 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2843
2844 tree ptr = fold_convert (ptrtype, gs_info->base);
2845 if (!is_gimple_min_invariant (ptr))
2846 {
2847 gimple_seq seq;
2848 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2849 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2850 gcc_assert (!new_bb);
2851 }
2852
2853 tree scale = build_int_cst (scaletype, gs_info->scale);
2854
2855 tree vec_oprnd0 = NULL_TREE;
2856 tree vec_mask = NULL_TREE;
2857 tree src_op = NULL_TREE;
2858 tree mask_op = NULL_TREE;
2859 tree prev_res = NULL_TREE;
2860 stmt_vec_info prev_stmt_info = NULL;
2861
2862 if (!mask)
2863 {
2864 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2865 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2866 }
2867
2868 for (int j = 0; j < ncopies; ++j)
2869 {
2870 tree op, var;
2871 if (modifier == WIDEN && (j & 1))
2872 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2873 perm_mask, stmt_info, gsi);
2874 else if (j == 0)
2875 op = vec_oprnd0
2876 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2877 else
2878 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2879 vec_oprnd0);
2880
2881 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2882 {
2883 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2884 TYPE_VECTOR_SUBPARTS (idxtype)));
2885 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2886 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2887 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2888 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2889 op = var;
2890 }
2891
2892 if (mask)
2893 {
2894 if (mask_perm_mask && (j & 1))
2895 mask_op = permute_vec_elements (mask_op, mask_op,
2896 mask_perm_mask, stmt_info, gsi);
2897 else
2898 {
2899 if (j == 0)
2900 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2901 else if (modifier != NARROW || (j & 1) == 0)
2902 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2903 vec_mask);
2904
2905 mask_op = vec_mask;
2906 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2907 {
2908 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2909 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2910 gcc_assert (known_eq (sub1, sub2));
2911 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2912 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2913 gassign *new_stmt
2914 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2915 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2916 mask_op = var;
2917 }
2918 }
2919 if (modifier == NARROW && masktype != real_masktype)
2920 {
2921 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2922 gassign *new_stmt
2923 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2924 : VEC_UNPACK_LO_EXPR,
2925 mask_op);
2926 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2927 mask_op = var;
2928 }
2929 src_op = mask_op;
2930 }
2931
2932 tree mask_arg = mask_op;
2933 if (masktype != real_masktype)
2934 {
2935 tree utype, optype = TREE_TYPE (mask_op);
2936 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2937 utype = real_masktype;
2938 else
2939 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2940 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2941 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2942 gassign *new_stmt
2943 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2944 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2945 mask_arg = var;
2946 if (!useless_type_conversion_p (real_masktype, utype))
2947 {
2948 gcc_assert (TYPE_PRECISION (utype)
2949 <= TYPE_PRECISION (real_masktype));
2950 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2951 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2952 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2953 mask_arg = var;
2954 }
2955 src_op = build_zero_cst (srctype);
2956 }
2957 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2958 mask_arg, scale);
2959
2960 stmt_vec_info new_stmt_info;
2961 if (!useless_type_conversion_p (vectype, rettype))
2962 {
2963 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2964 TYPE_VECTOR_SUBPARTS (rettype)));
2965 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2966 gimple_call_set_lhs (new_call, op);
2967 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2968 var = make_ssa_name (vec_dest);
2969 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2970 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2971 new_stmt_info
2972 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2973 }
2974 else
2975 {
2976 var = make_ssa_name (vec_dest, new_call);
2977 gimple_call_set_lhs (new_call, var);
2978 new_stmt_info
2979 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2980 }
2981
2982 if (modifier == NARROW)
2983 {
2984 if ((j & 1) == 0)
2985 {
2986 prev_res = var;
2987 continue;
2988 }
2989 var = permute_vec_elements (prev_res, var, perm_mask,
2990 stmt_info, gsi);
2991 new_stmt_info = loop_vinfo->lookup_def (var);
2992 }
2993
2994 if (prev_stmt_info == NULL)
2995 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2996 else
2997 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2998 prev_stmt_info = new_stmt_info;
2999 }
3000 }
3001
3002 /* Prepare the base and offset in GS_INFO for vectorization.
3003 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3004 to the vectorized offset argument for the first copy of STMT_INFO.
3005 STMT_INFO is the statement described by GS_INFO and LOOP is the
3006 containing loop. */
3007
3008 static void
vect_get_gather_scatter_ops(class loop * loop,stmt_vec_info stmt_info,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)3009 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
3010 gather_scatter_info *gs_info,
3011 tree *dataref_ptr, tree *vec_offset)
3012 {
3013 gimple_seq stmts = NULL;
3014 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3015 if (stmts != NULL)
3016 {
3017 basic_block new_bb;
3018 edge pe = loop_preheader_edge (loop);
3019 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3020 gcc_assert (!new_bb);
3021 }
3022 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
3023 gs_info->offset_vectype);
3024 }
3025
3026 /* Prepare to implement a grouped or strided load or store using
3027 the gather load or scatter store operation described by GS_INFO.
3028 STMT_INFO is the load or store statement.
3029
3030 Set *DATAREF_BUMP to the amount that should be added to the base
3031 address after each copy of the vectorized statement. Set *VEC_OFFSET
3032 to an invariant offset vector in which element I has the value
3033 I * DR_STEP / SCALE. */
3034
3035 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3036 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3037 loop_vec_info loop_vinfo,
3038 gather_scatter_info *gs_info,
3039 tree *dataref_bump, tree *vec_offset)
3040 {
3041 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3042 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3043 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3044 gimple_seq stmts;
3045
3046 tree bump = size_binop (MULT_EXPR,
3047 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3048 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3049 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3050 if (stmts)
3051 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3052
3053 /* The offset given in GS_INFO can have pointer type, so use the element
3054 type of the vector instead. */
3055 tree offset_type = TREE_TYPE (gs_info->offset);
3056 offset_type = TREE_TYPE (gs_info->offset_vectype);
3057
3058 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3059 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3060 ssize_int (gs_info->scale));
3061 step = fold_convert (offset_type, step);
3062 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3063
3064 /* Create {0, X, X*2, X*3, ...}. */
3065 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3066 build_zero_cst (offset_type), step);
3067 if (stmts)
3068 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3069 }
3070
3071 /* Return the amount that should be added to a vector pointer to move
3072 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3073 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3074 vectorization. */
3075
3076 static tree
vect_get_data_ptr_increment(dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3077 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3078 vect_memory_access_type memory_access_type)
3079 {
3080 if (memory_access_type == VMAT_INVARIANT)
3081 return size_zero_node;
3082
3083 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3084 tree step = vect_dr_behavior (dr_info)->step;
3085 if (tree_int_cst_sgn (step) == -1)
3086 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3087 return iv_step;
3088 }
3089
3090 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3091
3092 static bool
vectorizable_bswap(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,tree vectype_in,stmt_vector_for_cost * cost_vec)3093 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3094 stmt_vec_info *vec_stmt, slp_tree slp_node,
3095 tree vectype_in, stmt_vector_for_cost *cost_vec)
3096 {
3097 tree op, vectype;
3098 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3099 vec_info *vinfo = stmt_info->vinfo;
3100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3101 unsigned ncopies;
3102
3103 op = gimple_call_arg (stmt, 0);
3104 vectype = STMT_VINFO_VECTYPE (stmt_info);
3105 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3106
3107 /* Multiple types in SLP are handled by creating the appropriate number of
3108 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3109 case of SLP. */
3110 if (slp_node)
3111 ncopies = 1;
3112 else
3113 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3114
3115 gcc_assert (ncopies >= 1);
3116
3117 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3118 if (! char_vectype)
3119 return false;
3120
3121 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3122 unsigned word_bytes;
3123 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3124 return false;
3125
3126 /* The encoding uses one stepped pattern for each byte in the word. */
3127 vec_perm_builder elts (num_bytes, word_bytes, 3);
3128 for (unsigned i = 0; i < 3; ++i)
3129 for (unsigned j = 0; j < word_bytes; ++j)
3130 elts.quick_push ((i + 1) * word_bytes - j - 1);
3131
3132 vec_perm_indices indices (elts, 1, num_bytes);
3133 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3134 return false;
3135
3136 if (! vec_stmt)
3137 {
3138 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3139 DUMP_VECT_SCOPE ("vectorizable_bswap");
3140 if (! slp_node)
3141 {
3142 record_stmt_cost (cost_vec,
3143 1, vector_stmt, stmt_info, 0, vect_prologue);
3144 record_stmt_cost (cost_vec,
3145 ncopies, vec_perm, stmt_info, 0, vect_body);
3146 }
3147 return true;
3148 }
3149
3150 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3151
3152 /* Transform. */
3153 vec<tree> vec_oprnds = vNULL;
3154 stmt_vec_info new_stmt_info = NULL;
3155 stmt_vec_info prev_stmt_info = NULL;
3156 for (unsigned j = 0; j < ncopies; j++)
3157 {
3158 /* Handle uses. */
3159 if (j == 0)
3160 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3161 else
3162 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3163
3164 /* Arguments are ready. create the new vector stmt. */
3165 unsigned i;
3166 tree vop;
3167 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3168 {
3169 gimple *new_stmt;
3170 tree tem = make_ssa_name (char_vectype);
3171 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3172 char_vectype, vop));
3173 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3174 tree tem2 = make_ssa_name (char_vectype);
3175 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3176 tem, tem, bswap_vconst);
3177 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3178 tem = make_ssa_name (vectype);
3179 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3180 vectype, tem2));
3181 new_stmt_info
3182 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3183 if (slp_node)
3184 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3185 }
3186
3187 if (slp_node)
3188 continue;
3189
3190 if (j == 0)
3191 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3192 else
3193 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3194
3195 prev_stmt_info = new_stmt_info;
3196 }
3197
3198 vec_oprnds.release ();
3199 return true;
3200 }
3201
3202 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3203 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3204 in a single step. On success, store the binary pack code in
3205 *CONVERT_CODE. */
3206
3207 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3208 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3209 tree_code *convert_code)
3210 {
3211 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3212 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3213 return false;
3214
3215 tree_code code;
3216 int multi_step_cvt = 0;
3217 auto_vec <tree, 8> interm_types;
3218 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3219 &code, &multi_step_cvt, &interm_types)
3220 || multi_step_cvt)
3221 return false;
3222
3223 *convert_code = code;
3224 return true;
3225 }
3226
3227 /* Function vectorizable_call.
3228
3229 Check if STMT_INFO performs a function call that can be vectorized.
3230 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3231 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3232 Return true if STMT_INFO is vectorizable in this way. */
3233
3234 static bool
vectorizable_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3235 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3236 stmt_vec_info *vec_stmt, slp_tree slp_node,
3237 stmt_vector_for_cost *cost_vec)
3238 {
3239 gcall *stmt;
3240 tree vec_dest;
3241 tree scalar_dest;
3242 tree op;
3243 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3244 stmt_vec_info prev_stmt_info;
3245 tree vectype_out, vectype_in;
3246 poly_uint64 nunits_in;
3247 poly_uint64 nunits_out;
3248 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3250 vec_info *vinfo = stmt_info->vinfo;
3251 tree fndecl, new_temp, rhs_type;
3252 enum vect_def_type dt[4]
3253 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3254 vect_unknown_def_type };
3255 tree vectypes[ARRAY_SIZE (dt)] = {};
3256 int ndts = ARRAY_SIZE (dt);
3257 int ncopies, j;
3258 auto_vec<tree, 8> vargs;
3259 auto_vec<tree, 8> orig_vargs;
3260 enum { NARROW, NONE, WIDEN } modifier;
3261 size_t i, nargs;
3262 tree lhs;
3263
3264 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3265 return false;
3266
3267 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3268 && ! vec_stmt)
3269 return false;
3270
3271 /* Is STMT_INFO a vectorizable call? */
3272 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3273 if (!stmt)
3274 return false;
3275
3276 if (gimple_call_internal_p (stmt)
3277 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3278 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3279 /* Handled by vectorizable_load and vectorizable_store. */
3280 return false;
3281
3282 if (gimple_call_lhs (stmt) == NULL_TREE
3283 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3284 return false;
3285
3286 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3287
3288 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3289
3290 /* Process function arguments. */
3291 rhs_type = NULL_TREE;
3292 vectype_in = NULL_TREE;
3293 nargs = gimple_call_num_args (stmt);
3294
3295 /* Bail out if the function has more than three arguments, we do not have
3296 interesting builtin functions to vectorize with more than two arguments
3297 except for fma. No arguments is also not good. */
3298 if (nargs == 0 || nargs > 4)
3299 return false;
3300
3301 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3302 combined_fn cfn = gimple_call_combined_fn (stmt);
3303 if (cfn == CFN_GOMP_SIMD_LANE)
3304 {
3305 nargs = 0;
3306 rhs_type = unsigned_type_node;
3307 }
3308
3309 int mask_opno = -1;
3310 if (internal_fn_p (cfn))
3311 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3312
3313 for (i = 0; i < nargs; i++)
3314 {
3315 op = gimple_call_arg (stmt, i);
3316
3317 if ((int) i == mask_opno)
3318 {
3319 if (!vect_check_scalar_mask (stmt_info, op, &dt[i], &vectypes[i]))
3320 return false;
3321 continue;
3322 }
3323
3324 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3325 {
3326 if (dump_enabled_p ())
3327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3328 "use not simple.\n");
3329 return false;
3330 }
3331
3332 /* We can only handle calls with arguments of the same type. */
3333 if (rhs_type
3334 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3335 {
3336 if (dump_enabled_p ())
3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3338 "argument types differ.\n");
3339 return false;
3340 }
3341 if (!rhs_type)
3342 rhs_type = TREE_TYPE (op);
3343
3344 if (!vectype_in)
3345 vectype_in = vectypes[i];
3346 else if (vectypes[i]
3347 && !types_compatible_p (vectypes[i], vectype_in))
3348 {
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3351 "argument vector types differ.\n");
3352 return false;
3353 }
3354 }
3355 /* If all arguments are external or constant defs, infer the vector type
3356 from the scalar type. */
3357 if (!vectype_in)
3358 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3359 if (vec_stmt)
3360 gcc_assert (vectype_in);
3361 if (!vectype_in)
3362 {
3363 if (dump_enabled_p ())
3364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3365 "no vectype for scalar type %T\n", rhs_type);
3366
3367 return false;
3368 }
3369 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3370 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3371 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3372 by a pack of the two vectors into an SI vector. We would need
3373 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3374 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3375 {
3376 if (dump_enabled_p ())
3377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3378 "mismatched vector sizes %T and %T\n",
3379 vectype_in, vectype_out);
3380 return false;
3381 }
3382
3383 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3384 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3385 {
3386 if (dump_enabled_p ())
3387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3388 "mixed mask and nonmask vector types\n");
3389 return false;
3390 }
3391
3392 /* FORNOW */
3393 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3394 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3395 if (known_eq (nunits_in * 2, nunits_out))
3396 modifier = NARROW;
3397 else if (known_eq (nunits_out, nunits_in))
3398 modifier = NONE;
3399 else if (known_eq (nunits_out * 2, nunits_in))
3400 modifier = WIDEN;
3401 else
3402 return false;
3403
3404 /* We only handle functions that do not read or clobber memory. */
3405 if (gimple_vuse (stmt))
3406 {
3407 if (dump_enabled_p ())
3408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3409 "function reads from or writes to memory.\n");
3410 return false;
3411 }
3412
3413 /* For now, we only vectorize functions if a target specific builtin
3414 is available. TODO -- in some cases, it might be profitable to
3415 insert the calls for pieces of the vector, in order to be able
3416 to vectorize other operations in the loop. */
3417 fndecl = NULL_TREE;
3418 internal_fn ifn = IFN_LAST;
3419 tree callee = gimple_call_fndecl (stmt);
3420
3421 /* First try using an internal function. */
3422 tree_code convert_code = ERROR_MARK;
3423 if (cfn != CFN_LAST
3424 && (modifier == NONE
3425 || (modifier == NARROW
3426 && simple_integer_narrowing (vectype_out, vectype_in,
3427 &convert_code))))
3428 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3429 vectype_in);
3430
3431 /* If that fails, try asking for a target-specific built-in function. */
3432 if (ifn == IFN_LAST)
3433 {
3434 if (cfn != CFN_LAST)
3435 fndecl = targetm.vectorize.builtin_vectorized_function
3436 (cfn, vectype_out, vectype_in);
3437 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3438 fndecl = targetm.vectorize.builtin_md_vectorized_function
3439 (callee, vectype_out, vectype_in);
3440 }
3441
3442 if (ifn == IFN_LAST && !fndecl)
3443 {
3444 if (cfn == CFN_GOMP_SIMD_LANE
3445 && !slp_node
3446 && loop_vinfo
3447 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3448 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3449 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3450 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3451 {
3452 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3453 { 0, 1, 2, ... vf - 1 } vector. */
3454 gcc_assert (nargs == 0);
3455 }
3456 else if (modifier == NONE
3457 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3458 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3459 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3460 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3461 vectype_in, cost_vec);
3462 else
3463 {
3464 if (dump_enabled_p ())
3465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3466 "function is not vectorizable.\n");
3467 return false;
3468 }
3469 }
3470
3471 if (slp_node)
3472 ncopies = 1;
3473 else if (modifier == NARROW && ifn == IFN_LAST)
3474 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3475 else
3476 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3477
3478 /* Sanity check: make sure that at least one copy of the vectorized stmt
3479 needs to be generated. */
3480 gcc_assert (ncopies >= 1);
3481
3482 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3483 if (!vec_stmt) /* transformation not required. */
3484 {
3485 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3486 DUMP_VECT_SCOPE ("vectorizable_call");
3487 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3488 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3489 record_stmt_cost (cost_vec, ncopies / 2,
3490 vec_promote_demote, stmt_info, 0, vect_body);
3491
3492 if (loop_vinfo && mask_opno >= 0)
3493 {
3494 unsigned int nvectors = (slp_node
3495 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3496 : ncopies);
3497 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3498 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3499 vectype_out, scalar_mask);
3500 }
3501 return true;
3502 }
3503
3504 /* Transform. */
3505
3506 if (dump_enabled_p ())
3507 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3508
3509 /* Handle def. */
3510 scalar_dest = gimple_call_lhs (stmt);
3511 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3512
3513 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3514
3515 stmt_vec_info new_stmt_info = NULL;
3516 prev_stmt_info = NULL;
3517 if (modifier == NONE || ifn != IFN_LAST)
3518 {
3519 tree prev_res = NULL_TREE;
3520 vargs.safe_grow (nargs);
3521 orig_vargs.safe_grow (nargs);
3522 for (j = 0; j < ncopies; ++j)
3523 {
3524 /* Build argument list for the vectorized call. */
3525 if (slp_node)
3526 {
3527 auto_vec<vec<tree> > vec_defs (nargs);
3528 vec<tree> vec_oprnds0;
3529
3530 vect_get_slp_defs (slp_node, &vec_defs);
3531 vec_oprnds0 = vec_defs[0];
3532
3533 /* Arguments are ready. Create the new vector stmt. */
3534 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3535 {
3536 size_t k;
3537 for (k = 0; k < nargs; k++)
3538 {
3539 vec<tree> vec_oprndsk = vec_defs[k];
3540 vargs[k] = vec_oprndsk[i];
3541 }
3542 if (modifier == NARROW)
3543 {
3544 /* We don't define any narrowing conditional functions
3545 at present. */
3546 gcc_assert (mask_opno < 0);
3547 tree half_res = make_ssa_name (vectype_in);
3548 gcall *call
3549 = gimple_build_call_internal_vec (ifn, vargs);
3550 gimple_call_set_lhs (call, half_res);
3551 gimple_call_set_nothrow (call, true);
3552 vect_finish_stmt_generation (stmt_info, call, gsi);
3553 if ((i & 1) == 0)
3554 {
3555 prev_res = half_res;
3556 continue;
3557 }
3558 new_temp = make_ssa_name (vec_dest);
3559 gimple *new_stmt
3560 = gimple_build_assign (new_temp, convert_code,
3561 prev_res, half_res);
3562 new_stmt_info
3563 = vect_finish_stmt_generation (stmt_info, new_stmt,
3564 gsi);
3565 }
3566 else
3567 {
3568 if (mask_opno >= 0 && masked_loop_p)
3569 {
3570 unsigned int vec_num = vec_oprnds0.length ();
3571 /* Always true for SLP. */
3572 gcc_assert (ncopies == 1);
3573 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3574 vectype_out, i);
3575 vargs[mask_opno] = prepare_load_store_mask
3576 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3577 }
3578
3579 gcall *call;
3580 if (ifn != IFN_LAST)
3581 call = gimple_build_call_internal_vec (ifn, vargs);
3582 else
3583 call = gimple_build_call_vec (fndecl, vargs);
3584 new_temp = make_ssa_name (vec_dest, call);
3585 gimple_call_set_lhs (call, new_temp);
3586 gimple_call_set_nothrow (call, true);
3587 new_stmt_info
3588 = vect_finish_stmt_generation (stmt_info, call, gsi);
3589 }
3590 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3591 }
3592
3593 for (i = 0; i < nargs; i++)
3594 {
3595 vec<tree> vec_oprndsi = vec_defs[i];
3596 vec_oprndsi.release ();
3597 }
3598 continue;
3599 }
3600
3601 for (i = 0; i < nargs; i++)
3602 {
3603 op = gimple_call_arg (stmt, i);
3604 if (j == 0)
3605 vec_oprnd0
3606 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3607 else
3608 vec_oprnd0
3609 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3610
3611 orig_vargs[i] = vargs[i] = vec_oprnd0;
3612 }
3613
3614 if (mask_opno >= 0 && masked_loop_p)
3615 {
3616 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3617 vectype_out, j);
3618 vargs[mask_opno]
3619 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3620 vargs[mask_opno], gsi);
3621 }
3622
3623 if (cfn == CFN_GOMP_SIMD_LANE)
3624 {
3625 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3626 tree new_var
3627 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3628 gimple *init_stmt = gimple_build_assign (new_var, cst);
3629 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3630 new_temp = make_ssa_name (vec_dest);
3631 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3632 new_stmt_info
3633 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3634 }
3635 else if (modifier == NARROW)
3636 {
3637 /* We don't define any narrowing conditional functions at
3638 present. */
3639 gcc_assert (mask_opno < 0);
3640 tree half_res = make_ssa_name (vectype_in);
3641 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3642 gimple_call_set_lhs (call, half_res);
3643 gimple_call_set_nothrow (call, true);
3644 vect_finish_stmt_generation (stmt_info, call, gsi);
3645 if ((j & 1) == 0)
3646 {
3647 prev_res = half_res;
3648 continue;
3649 }
3650 new_temp = make_ssa_name (vec_dest);
3651 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3652 prev_res, half_res);
3653 new_stmt_info
3654 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3655 }
3656 else
3657 {
3658 gcall *call;
3659 if (ifn != IFN_LAST)
3660 call = gimple_build_call_internal_vec (ifn, vargs);
3661 else
3662 call = gimple_build_call_vec (fndecl, vargs);
3663 new_temp = make_ssa_name (vec_dest, call);
3664 gimple_call_set_lhs (call, new_temp);
3665 gimple_call_set_nothrow (call, true);
3666 new_stmt_info
3667 = vect_finish_stmt_generation (stmt_info, call, gsi);
3668 }
3669
3670 if (j == (modifier == NARROW ? 1 : 0))
3671 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3672 else
3673 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3674
3675 prev_stmt_info = new_stmt_info;
3676 }
3677 }
3678 else if (modifier == NARROW)
3679 {
3680 /* We don't define any narrowing conditional functions at present. */
3681 gcc_assert (mask_opno < 0);
3682 for (j = 0; j < ncopies; ++j)
3683 {
3684 /* Build argument list for the vectorized call. */
3685 if (j == 0)
3686 vargs.create (nargs * 2);
3687 else
3688 vargs.truncate (0);
3689
3690 if (slp_node)
3691 {
3692 auto_vec<vec<tree> > vec_defs (nargs);
3693 vec<tree> vec_oprnds0;
3694
3695 vect_get_slp_defs (slp_node, &vec_defs);
3696 vec_oprnds0 = vec_defs[0];
3697
3698 /* Arguments are ready. Create the new vector stmt. */
3699 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3700 {
3701 size_t k;
3702 vargs.truncate (0);
3703 for (k = 0; k < nargs; k++)
3704 {
3705 vec<tree> vec_oprndsk = vec_defs[k];
3706 vargs.quick_push (vec_oprndsk[i]);
3707 vargs.quick_push (vec_oprndsk[i + 1]);
3708 }
3709 gcall *call;
3710 if (ifn != IFN_LAST)
3711 call = gimple_build_call_internal_vec (ifn, vargs);
3712 else
3713 call = gimple_build_call_vec (fndecl, vargs);
3714 new_temp = make_ssa_name (vec_dest, call);
3715 gimple_call_set_lhs (call, new_temp);
3716 gimple_call_set_nothrow (call, true);
3717 new_stmt_info
3718 = vect_finish_stmt_generation (stmt_info, call, gsi);
3719 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3720 }
3721
3722 for (i = 0; i < nargs; i++)
3723 {
3724 vec<tree> vec_oprndsi = vec_defs[i];
3725 vec_oprndsi.release ();
3726 }
3727 continue;
3728 }
3729
3730 for (i = 0; i < nargs; i++)
3731 {
3732 op = gimple_call_arg (stmt, i);
3733 if (j == 0)
3734 {
3735 vec_oprnd0
3736 = vect_get_vec_def_for_operand (op, stmt_info,
3737 vectypes[i]);
3738 vec_oprnd1
3739 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3740 }
3741 else
3742 {
3743 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3744 2 * i + 1);
3745 vec_oprnd0
3746 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3747 vec_oprnd1
3748 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3749 }
3750
3751 vargs.quick_push (vec_oprnd0);
3752 vargs.quick_push (vec_oprnd1);
3753 }
3754
3755 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3756 new_temp = make_ssa_name (vec_dest, new_stmt);
3757 gimple_call_set_lhs (new_stmt, new_temp);
3758 new_stmt_info
3759 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3760
3761 if (j == 0)
3762 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3763 else
3764 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3765
3766 prev_stmt_info = new_stmt_info;
3767 }
3768
3769 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3770 }
3771 else
3772 /* No current target implements this case. */
3773 return false;
3774
3775 vargs.release ();
3776
3777 /* The call in STMT might prevent it from being removed in dce.
3778 We however cannot remove it here, due to the way the ssa name
3779 it defines is mapped to the new definition. So just replace
3780 rhs of the statement with something harmless. */
3781
3782 if (slp_node)
3783 return true;
3784
3785 stmt_info = vect_orig_stmt (stmt_info);
3786 lhs = gimple_get_lhs (stmt_info->stmt);
3787
3788 gassign *new_stmt
3789 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3790 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3791
3792 return true;
3793 }
3794
3795
3796 struct simd_call_arg_info
3797 {
3798 tree vectype;
3799 tree op;
3800 HOST_WIDE_INT linear_step;
3801 enum vect_def_type dt;
3802 unsigned int align;
3803 bool simd_lane_linear;
3804 };
3805
3806 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3807 is linear within simd lane (but not within whole loop), note it in
3808 *ARGINFO. */
3809
3810 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3811 vect_simd_lane_linear (tree op, class loop *loop,
3812 struct simd_call_arg_info *arginfo)
3813 {
3814 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3815
3816 if (!is_gimple_assign (def_stmt)
3817 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3818 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3819 return;
3820
3821 tree base = gimple_assign_rhs1 (def_stmt);
3822 HOST_WIDE_INT linear_step = 0;
3823 tree v = gimple_assign_rhs2 (def_stmt);
3824 while (TREE_CODE (v) == SSA_NAME)
3825 {
3826 tree t;
3827 def_stmt = SSA_NAME_DEF_STMT (v);
3828 if (is_gimple_assign (def_stmt))
3829 switch (gimple_assign_rhs_code (def_stmt))
3830 {
3831 case PLUS_EXPR:
3832 t = gimple_assign_rhs2 (def_stmt);
3833 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3834 return;
3835 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3836 v = gimple_assign_rhs1 (def_stmt);
3837 continue;
3838 case MULT_EXPR:
3839 t = gimple_assign_rhs2 (def_stmt);
3840 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3841 return;
3842 linear_step = tree_to_shwi (t);
3843 v = gimple_assign_rhs1 (def_stmt);
3844 continue;
3845 CASE_CONVERT:
3846 t = gimple_assign_rhs1 (def_stmt);
3847 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3848 || (TYPE_PRECISION (TREE_TYPE (v))
3849 < TYPE_PRECISION (TREE_TYPE (t))))
3850 return;
3851 if (!linear_step)
3852 linear_step = 1;
3853 v = t;
3854 continue;
3855 default:
3856 return;
3857 }
3858 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3859 && loop->simduid
3860 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3861 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3862 == loop->simduid))
3863 {
3864 if (!linear_step)
3865 linear_step = 1;
3866 arginfo->linear_step = linear_step;
3867 arginfo->op = base;
3868 arginfo->simd_lane_linear = true;
3869 return;
3870 }
3871 }
3872 }
3873
3874 /* Return the number of elements in vector type VECTYPE, which is associated
3875 with a SIMD clone. At present these vectors always have a constant
3876 length. */
3877
3878 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3879 simd_clone_subparts (tree vectype)
3880 {
3881 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3882 }
3883
3884 /* Function vectorizable_simd_clone_call.
3885
3886 Check if STMT_INFO performs a function call that can be vectorized
3887 by calling a simd clone of the function.
3888 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3889 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3890 Return true if STMT_INFO is vectorizable in this way. */
3891
3892 static bool
vectorizable_simd_clone_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3893 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3894 gimple_stmt_iterator *gsi,
3895 stmt_vec_info *vec_stmt, slp_tree slp_node,
3896 stmt_vector_for_cost *)
3897 {
3898 tree vec_dest;
3899 tree scalar_dest;
3900 tree op, type;
3901 tree vec_oprnd0 = NULL_TREE;
3902 stmt_vec_info prev_stmt_info;
3903 tree vectype;
3904 unsigned int nunits;
3905 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3906 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3907 vec_info *vinfo = stmt_info->vinfo;
3908 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3909 tree fndecl, new_temp;
3910 int ncopies, j;
3911 auto_vec<simd_call_arg_info> arginfo;
3912 vec<tree> vargs = vNULL;
3913 size_t i, nargs;
3914 tree lhs, rtype, ratype;
3915 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3916
3917 /* Is STMT a vectorizable call? */
3918 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3919 if (!stmt)
3920 return false;
3921
3922 fndecl = gimple_call_fndecl (stmt);
3923 if (fndecl == NULL_TREE)
3924 return false;
3925
3926 struct cgraph_node *node = cgraph_node::get (fndecl);
3927 if (node == NULL || node->simd_clones == NULL)
3928 return false;
3929
3930 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3931 return false;
3932
3933 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3934 && ! vec_stmt)
3935 return false;
3936
3937 if (gimple_call_lhs (stmt)
3938 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3939 return false;
3940
3941 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3942
3943 vectype = STMT_VINFO_VECTYPE (stmt_info);
3944
3945 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3946 return false;
3947
3948 /* FORNOW */
3949 if (slp_node)
3950 return false;
3951
3952 /* Process function arguments. */
3953 nargs = gimple_call_num_args (stmt);
3954
3955 /* Bail out if the function has zero arguments. */
3956 if (nargs == 0)
3957 return false;
3958
3959 arginfo.reserve (nargs, true);
3960
3961 for (i = 0; i < nargs; i++)
3962 {
3963 simd_call_arg_info thisarginfo;
3964 affine_iv iv;
3965
3966 thisarginfo.linear_step = 0;
3967 thisarginfo.align = 0;
3968 thisarginfo.op = NULL_TREE;
3969 thisarginfo.simd_lane_linear = false;
3970
3971 op = gimple_call_arg (stmt, i);
3972 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3973 &thisarginfo.vectype)
3974 || thisarginfo.dt == vect_uninitialized_def)
3975 {
3976 if (dump_enabled_p ())
3977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3978 "use not simple.\n");
3979 return false;
3980 }
3981
3982 if (thisarginfo.dt == vect_constant_def
3983 || thisarginfo.dt == vect_external_def)
3984 gcc_assert (thisarginfo.vectype == NULL_TREE);
3985 else
3986 {
3987 gcc_assert (thisarginfo.vectype != NULL_TREE);
3988 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3989 {
3990 if (dump_enabled_p ())
3991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3992 "vector mask arguments are not supported\n");
3993 return false;
3994 }
3995 }
3996
3997 /* For linear arguments, the analyze phase should have saved
3998 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3999 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4000 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4001 {
4002 gcc_assert (vec_stmt);
4003 thisarginfo.linear_step
4004 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4005 thisarginfo.op
4006 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4007 thisarginfo.simd_lane_linear
4008 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4009 == boolean_true_node);
4010 /* If loop has been peeled for alignment, we need to adjust it. */
4011 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4012 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4013 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4014 {
4015 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4016 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4017 tree opt = TREE_TYPE (thisarginfo.op);
4018 bias = fold_convert (TREE_TYPE (step), bias);
4019 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4020 thisarginfo.op
4021 = fold_build2 (POINTER_TYPE_P (opt)
4022 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4023 thisarginfo.op, bias);
4024 }
4025 }
4026 else if (!vec_stmt
4027 && thisarginfo.dt != vect_constant_def
4028 && thisarginfo.dt != vect_external_def
4029 && loop_vinfo
4030 && TREE_CODE (op) == SSA_NAME
4031 && simple_iv (loop, loop_containing_stmt (stmt), op,
4032 &iv, false)
4033 && tree_fits_shwi_p (iv.step))
4034 {
4035 thisarginfo.linear_step = tree_to_shwi (iv.step);
4036 thisarginfo.op = iv.base;
4037 }
4038 else if ((thisarginfo.dt == vect_constant_def
4039 || thisarginfo.dt == vect_external_def)
4040 && POINTER_TYPE_P (TREE_TYPE (op)))
4041 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4042 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4043 linear too. */
4044 if (POINTER_TYPE_P (TREE_TYPE (op))
4045 && !thisarginfo.linear_step
4046 && !vec_stmt
4047 && thisarginfo.dt != vect_constant_def
4048 && thisarginfo.dt != vect_external_def
4049 && loop_vinfo
4050 && !slp_node
4051 && TREE_CODE (op) == SSA_NAME)
4052 vect_simd_lane_linear (op, loop, &thisarginfo);
4053
4054 arginfo.quick_push (thisarginfo);
4055 }
4056
4057 unsigned HOST_WIDE_INT vf;
4058 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4059 {
4060 if (dump_enabled_p ())
4061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4062 "not considering SIMD clones; not yet supported"
4063 " for variable-width vectors.\n");
4064 return false;
4065 }
4066
4067 unsigned int badness = 0;
4068 struct cgraph_node *bestn = NULL;
4069 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4070 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4071 else
4072 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4073 n = n->simdclone->next_clone)
4074 {
4075 unsigned int this_badness = 0;
4076 if (n->simdclone->simdlen > vf
4077 || n->simdclone->nargs != nargs)
4078 continue;
4079 if (n->simdclone->simdlen < vf)
4080 this_badness += (exact_log2 (vf)
4081 - exact_log2 (n->simdclone->simdlen)) * 1024;
4082 if (n->simdclone->inbranch)
4083 this_badness += 2048;
4084 int target_badness = targetm.simd_clone.usable (n);
4085 if (target_badness < 0)
4086 continue;
4087 this_badness += target_badness * 512;
4088 /* FORNOW: Have to add code to add the mask argument. */
4089 if (n->simdclone->inbranch)
4090 continue;
4091 for (i = 0; i < nargs; i++)
4092 {
4093 switch (n->simdclone->args[i].arg_type)
4094 {
4095 case SIMD_CLONE_ARG_TYPE_VECTOR:
4096 if (!useless_type_conversion_p
4097 (n->simdclone->args[i].orig_type,
4098 TREE_TYPE (gimple_call_arg (stmt, i))))
4099 i = -1;
4100 else if (arginfo[i].dt == vect_constant_def
4101 || arginfo[i].dt == vect_external_def
4102 || arginfo[i].linear_step)
4103 this_badness += 64;
4104 break;
4105 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4106 if (arginfo[i].dt != vect_constant_def
4107 && arginfo[i].dt != vect_external_def)
4108 i = -1;
4109 break;
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4111 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4112 if (arginfo[i].dt == vect_constant_def
4113 || arginfo[i].dt == vect_external_def
4114 || (arginfo[i].linear_step
4115 != n->simdclone->args[i].linear_step))
4116 i = -1;
4117 break;
4118 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4119 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4120 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4121 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4122 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4123 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4124 /* FORNOW */
4125 i = -1;
4126 break;
4127 case SIMD_CLONE_ARG_TYPE_MASK:
4128 gcc_unreachable ();
4129 }
4130 if (i == (size_t) -1)
4131 break;
4132 if (n->simdclone->args[i].alignment > arginfo[i].align)
4133 {
4134 i = -1;
4135 break;
4136 }
4137 if (arginfo[i].align)
4138 this_badness += (exact_log2 (arginfo[i].align)
4139 - exact_log2 (n->simdclone->args[i].alignment));
4140 }
4141 if (i == (size_t) -1)
4142 continue;
4143 if (bestn == NULL || this_badness < badness)
4144 {
4145 bestn = n;
4146 badness = this_badness;
4147 }
4148 }
4149
4150 if (bestn == NULL)
4151 return false;
4152
4153 for (i = 0; i < nargs; i++)
4154 if ((arginfo[i].dt == vect_constant_def
4155 || arginfo[i].dt == vect_external_def)
4156 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4157 {
4158 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4159 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4160 slp_node);
4161 if (arginfo[i].vectype == NULL
4162 || (simd_clone_subparts (arginfo[i].vectype)
4163 > bestn->simdclone->simdlen))
4164 return false;
4165 }
4166
4167 fndecl = bestn->decl;
4168 nunits = bestn->simdclone->simdlen;
4169 ncopies = vf / nunits;
4170
4171 /* If the function isn't const, only allow it in simd loops where user
4172 has asserted that at least nunits consecutive iterations can be
4173 performed using SIMD instructions. */
4174 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4175 && gimple_vuse (stmt))
4176 return false;
4177
4178 /* Sanity check: make sure that at least one copy of the vectorized stmt
4179 needs to be generated. */
4180 gcc_assert (ncopies >= 1);
4181
4182 if (!vec_stmt) /* transformation not required. */
4183 {
4184 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4185 for (i = 0; i < nargs; i++)
4186 if ((bestn->simdclone->args[i].arg_type
4187 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4188 || (bestn->simdclone->args[i].arg_type
4189 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4190 {
4191 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4192 + 1);
4193 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4194 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4195 ? size_type_node : TREE_TYPE (arginfo[i].op);
4196 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4197 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4198 tree sll = arginfo[i].simd_lane_linear
4199 ? boolean_true_node : boolean_false_node;
4200 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4201 }
4202 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4203 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4204 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4205 return true;
4206 }
4207
4208 /* Transform. */
4209
4210 if (dump_enabled_p ())
4211 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4212
4213 /* Handle def. */
4214 scalar_dest = gimple_call_lhs (stmt);
4215 vec_dest = NULL_TREE;
4216 rtype = NULL_TREE;
4217 ratype = NULL_TREE;
4218 if (scalar_dest)
4219 {
4220 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4221 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4222 if (TREE_CODE (rtype) == ARRAY_TYPE)
4223 {
4224 ratype = rtype;
4225 rtype = TREE_TYPE (ratype);
4226 }
4227 }
4228
4229 prev_stmt_info = NULL;
4230 for (j = 0; j < ncopies; ++j)
4231 {
4232 /* Build argument list for the vectorized call. */
4233 if (j == 0)
4234 vargs.create (nargs);
4235 else
4236 vargs.truncate (0);
4237
4238 for (i = 0; i < nargs; i++)
4239 {
4240 unsigned int k, l, m, o;
4241 tree atype;
4242 op = gimple_call_arg (stmt, i);
4243 switch (bestn->simdclone->args[i].arg_type)
4244 {
4245 case SIMD_CLONE_ARG_TYPE_VECTOR:
4246 atype = bestn->simdclone->args[i].vector_type;
4247 o = nunits / simd_clone_subparts (atype);
4248 for (m = j * o; m < (j + 1) * o; m++)
4249 {
4250 if (simd_clone_subparts (atype)
4251 < simd_clone_subparts (arginfo[i].vectype))
4252 {
4253 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4254 k = (simd_clone_subparts (arginfo[i].vectype)
4255 / simd_clone_subparts (atype));
4256 gcc_assert ((k & (k - 1)) == 0);
4257 if (m == 0)
4258 vec_oprnd0
4259 = vect_get_vec_def_for_operand (op, stmt_info);
4260 else
4261 {
4262 vec_oprnd0 = arginfo[i].op;
4263 if ((m & (k - 1)) == 0)
4264 vec_oprnd0
4265 = vect_get_vec_def_for_stmt_copy (vinfo,
4266 vec_oprnd0);
4267 }
4268 arginfo[i].op = vec_oprnd0;
4269 vec_oprnd0
4270 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4271 bitsize_int (prec),
4272 bitsize_int ((m & (k - 1)) * prec));
4273 gassign *new_stmt
4274 = gimple_build_assign (make_ssa_name (atype),
4275 vec_oprnd0);
4276 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4277 vargs.safe_push (gimple_assign_lhs (new_stmt));
4278 }
4279 else
4280 {
4281 k = (simd_clone_subparts (atype)
4282 / simd_clone_subparts (arginfo[i].vectype));
4283 gcc_assert ((k & (k - 1)) == 0);
4284 vec<constructor_elt, va_gc> *ctor_elts;
4285 if (k != 1)
4286 vec_alloc (ctor_elts, k);
4287 else
4288 ctor_elts = NULL;
4289 for (l = 0; l < k; l++)
4290 {
4291 if (m == 0 && l == 0)
4292 vec_oprnd0
4293 = vect_get_vec_def_for_operand (op, stmt_info);
4294 else
4295 vec_oprnd0
4296 = vect_get_vec_def_for_stmt_copy (vinfo,
4297 arginfo[i].op);
4298 arginfo[i].op = vec_oprnd0;
4299 if (k == 1)
4300 break;
4301 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4302 vec_oprnd0);
4303 }
4304 if (k == 1)
4305 vargs.safe_push (vec_oprnd0);
4306 else
4307 {
4308 vec_oprnd0 = build_constructor (atype, ctor_elts);
4309 gassign *new_stmt
4310 = gimple_build_assign (make_ssa_name (atype),
4311 vec_oprnd0);
4312 vect_finish_stmt_generation (stmt_info, new_stmt,
4313 gsi);
4314 vargs.safe_push (gimple_assign_lhs (new_stmt));
4315 }
4316 }
4317 }
4318 break;
4319 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4320 vargs.safe_push (op);
4321 break;
4322 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4323 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4324 if (j == 0)
4325 {
4326 gimple_seq stmts;
4327 arginfo[i].op
4328 = force_gimple_operand (unshare_expr (arginfo[i].op),
4329 &stmts, true, NULL_TREE);
4330 if (stmts != NULL)
4331 {
4332 basic_block new_bb;
4333 edge pe = loop_preheader_edge (loop);
4334 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4335 gcc_assert (!new_bb);
4336 }
4337 if (arginfo[i].simd_lane_linear)
4338 {
4339 vargs.safe_push (arginfo[i].op);
4340 break;
4341 }
4342 tree phi_res = copy_ssa_name (op);
4343 gphi *new_phi = create_phi_node (phi_res, loop->header);
4344 loop_vinfo->add_stmt (new_phi);
4345 add_phi_arg (new_phi, arginfo[i].op,
4346 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4347 enum tree_code code
4348 = POINTER_TYPE_P (TREE_TYPE (op))
4349 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4350 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4351 ? sizetype : TREE_TYPE (op);
4352 widest_int cst
4353 = wi::mul (bestn->simdclone->args[i].linear_step,
4354 ncopies * nunits);
4355 tree tcst = wide_int_to_tree (type, cst);
4356 tree phi_arg = copy_ssa_name (op);
4357 gassign *new_stmt
4358 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4359 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4360 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4361 loop_vinfo->add_stmt (new_stmt);
4362 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4363 UNKNOWN_LOCATION);
4364 arginfo[i].op = phi_res;
4365 vargs.safe_push (phi_res);
4366 }
4367 else
4368 {
4369 enum tree_code code
4370 = POINTER_TYPE_P (TREE_TYPE (op))
4371 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4372 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4373 ? sizetype : TREE_TYPE (op);
4374 widest_int cst
4375 = wi::mul (bestn->simdclone->args[i].linear_step,
4376 j * nunits);
4377 tree tcst = wide_int_to_tree (type, cst);
4378 new_temp = make_ssa_name (TREE_TYPE (op));
4379 gassign *new_stmt
4380 = gimple_build_assign (new_temp, code,
4381 arginfo[i].op, tcst);
4382 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4383 vargs.safe_push (new_temp);
4384 }
4385 break;
4386 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4387 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4388 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4389 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4390 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4391 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4392 default:
4393 gcc_unreachable ();
4394 }
4395 }
4396
4397 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4398 if (vec_dest)
4399 {
4400 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4401 if (ratype)
4402 new_temp = create_tmp_var (ratype);
4403 else if (simd_clone_subparts (vectype)
4404 == simd_clone_subparts (rtype))
4405 new_temp = make_ssa_name (vec_dest, new_call);
4406 else
4407 new_temp = make_ssa_name (rtype, new_call);
4408 gimple_call_set_lhs (new_call, new_temp);
4409 }
4410 stmt_vec_info new_stmt_info
4411 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4412
4413 if (vec_dest)
4414 {
4415 if (simd_clone_subparts (vectype) < nunits)
4416 {
4417 unsigned int k, l;
4418 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4419 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4420 k = nunits / simd_clone_subparts (vectype);
4421 gcc_assert ((k & (k - 1)) == 0);
4422 for (l = 0; l < k; l++)
4423 {
4424 tree t;
4425 if (ratype)
4426 {
4427 t = build_fold_addr_expr (new_temp);
4428 t = build2 (MEM_REF, vectype, t,
4429 build_int_cst (TREE_TYPE (t), l * bytes));
4430 }
4431 else
4432 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4433 bitsize_int (prec), bitsize_int (l * prec));
4434 gimple *new_stmt
4435 = gimple_build_assign (make_ssa_name (vectype), t);
4436 new_stmt_info
4437 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4438
4439 if (j == 0 && l == 0)
4440 STMT_VINFO_VEC_STMT (stmt_info)
4441 = *vec_stmt = new_stmt_info;
4442 else
4443 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4444
4445 prev_stmt_info = new_stmt_info;
4446 }
4447
4448 if (ratype)
4449 vect_clobber_variable (stmt_info, gsi, new_temp);
4450 continue;
4451 }
4452 else if (simd_clone_subparts (vectype) > nunits)
4453 {
4454 unsigned int k = (simd_clone_subparts (vectype)
4455 / simd_clone_subparts (rtype));
4456 gcc_assert ((k & (k - 1)) == 0);
4457 if ((j & (k - 1)) == 0)
4458 vec_alloc (ret_ctor_elts, k);
4459 if (ratype)
4460 {
4461 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4462 for (m = 0; m < o; m++)
4463 {
4464 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4465 size_int (m), NULL_TREE, NULL_TREE);
4466 gimple *new_stmt
4467 = gimple_build_assign (make_ssa_name (rtype), tem);
4468 new_stmt_info
4469 = vect_finish_stmt_generation (stmt_info, new_stmt,
4470 gsi);
4471 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4472 gimple_assign_lhs (new_stmt));
4473 }
4474 vect_clobber_variable (stmt_info, gsi, new_temp);
4475 }
4476 else
4477 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4478 if ((j & (k - 1)) != k - 1)
4479 continue;
4480 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4481 gimple *new_stmt
4482 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4483 new_stmt_info
4484 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4485
4486 if ((unsigned) j == k - 1)
4487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4488 else
4489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4490
4491 prev_stmt_info = new_stmt_info;
4492 continue;
4493 }
4494 else if (ratype)
4495 {
4496 tree t = build_fold_addr_expr (new_temp);
4497 t = build2 (MEM_REF, vectype, t,
4498 build_int_cst (TREE_TYPE (t), 0));
4499 gimple *new_stmt
4500 = gimple_build_assign (make_ssa_name (vec_dest), t);
4501 new_stmt_info
4502 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4503 vect_clobber_variable (stmt_info, gsi, new_temp);
4504 }
4505 }
4506
4507 if (j == 0)
4508 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4509 else
4510 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4511
4512 prev_stmt_info = new_stmt_info;
4513 }
4514
4515 vargs.release ();
4516
4517 /* The call in STMT might prevent it from being removed in dce.
4518 We however cannot remove it here, due to the way the ssa name
4519 it defines is mapped to the new definition. So just replace
4520 rhs of the statement with something harmless. */
4521
4522 if (slp_node)
4523 return true;
4524
4525 gimple *new_stmt;
4526 if (scalar_dest)
4527 {
4528 type = TREE_TYPE (scalar_dest);
4529 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4530 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4531 }
4532 else
4533 new_stmt = gimple_build_nop ();
4534 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4535 unlink_stmt_vdef (stmt);
4536
4537 return true;
4538 }
4539
4540
4541 /* Function vect_gen_widened_results_half
4542
4543 Create a vector stmt whose code, type, number of arguments, and result
4544 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4545 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4546 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4547 needs to be created (DECL is a function-decl of a target-builtin).
4548 STMT_INFO is the original scalar stmt that we are vectorizing. */
4549
4550 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4551 vect_gen_widened_results_half (enum tree_code code,
4552 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4553 tree vec_dest, gimple_stmt_iterator *gsi,
4554 stmt_vec_info stmt_info)
4555 {
4556 gimple *new_stmt;
4557 tree new_temp;
4558
4559 /* Generate half of the widened result: */
4560 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4561 if (op_type != binary_op)
4562 vec_oprnd1 = NULL;
4563 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4564 new_temp = make_ssa_name (vec_dest, new_stmt);
4565 gimple_assign_set_lhs (new_stmt, new_temp);
4566 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4567
4568 return new_stmt;
4569 }
4570
4571
4572 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4573 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4574 containing scalar operand), and for the rest we get a copy with
4575 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4576 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4577 The vectors are collected into VEC_OPRNDS. */
4578
4579 static void
vect_get_loop_based_defs(tree * oprnd,stmt_vec_info stmt_info,vec<tree> * vec_oprnds,int multi_step_cvt)4580 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4581 vec<tree> *vec_oprnds, int multi_step_cvt)
4582 {
4583 vec_info *vinfo = stmt_info->vinfo;
4584 tree vec_oprnd;
4585
4586 /* Get first vector operand. */
4587 /* All the vector operands except the very first one (that is scalar oprnd)
4588 are stmt copies. */
4589 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4590 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4591 else
4592 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4593
4594 vec_oprnds->quick_push (vec_oprnd);
4595
4596 /* Get second vector operand. */
4597 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4598 vec_oprnds->quick_push (vec_oprnd);
4599
4600 *oprnd = vec_oprnd;
4601
4602 /* For conversion in multiple steps, continue to get operands
4603 recursively. */
4604 if (multi_step_cvt)
4605 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4606 multi_step_cvt - 1);
4607 }
4608
4609
4610 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4611 For multi-step conversions store the resulting vectors and call the function
4612 recursively. */
4613
4614 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4615 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4616 int multi_step_cvt,
4617 stmt_vec_info stmt_info,
4618 vec<tree> vec_dsts,
4619 gimple_stmt_iterator *gsi,
4620 slp_tree slp_node, enum tree_code code,
4621 stmt_vec_info *prev_stmt_info)
4622 {
4623 unsigned int i;
4624 tree vop0, vop1, new_tmp, vec_dest;
4625
4626 vec_dest = vec_dsts.pop ();
4627
4628 for (i = 0; i < vec_oprnds->length (); i += 2)
4629 {
4630 /* Create demotion operation. */
4631 vop0 = (*vec_oprnds)[i];
4632 vop1 = (*vec_oprnds)[i + 1];
4633 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4634 new_tmp = make_ssa_name (vec_dest, new_stmt);
4635 gimple_assign_set_lhs (new_stmt, new_tmp);
4636 stmt_vec_info new_stmt_info
4637 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4638
4639 if (multi_step_cvt)
4640 /* Store the resulting vector for next recursive call. */
4641 (*vec_oprnds)[i/2] = new_tmp;
4642 else
4643 {
4644 /* This is the last step of the conversion sequence. Store the
4645 vectors in SLP_NODE or in vector info of the scalar statement
4646 (or in STMT_VINFO_RELATED_STMT chain). */
4647 if (slp_node)
4648 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4649 else
4650 {
4651 if (!*prev_stmt_info)
4652 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4653 else
4654 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4655
4656 *prev_stmt_info = new_stmt_info;
4657 }
4658 }
4659 }
4660
4661 /* For multi-step demotion operations we first generate demotion operations
4662 from the source type to the intermediate types, and then combine the
4663 results (stored in VEC_OPRNDS) in demotion operation to the destination
4664 type. */
4665 if (multi_step_cvt)
4666 {
4667 /* At each level of recursion we have half of the operands we had at the
4668 previous level. */
4669 vec_oprnds->truncate ((i+1)/2);
4670 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4671 stmt_info, vec_dsts, gsi,
4672 slp_node, VEC_PACK_TRUNC_EXPR,
4673 prev_stmt_info);
4674 }
4675
4676 vec_dsts.quick_push (vec_dest);
4677 }
4678
4679
4680 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4681 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4682 STMT_INFO. For multi-step conversions store the resulting vectors and
4683 call the function recursively. */
4684
4685 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4686 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4687 vec<tree> *vec_oprnds1,
4688 stmt_vec_info stmt_info, tree vec_dest,
4689 gimple_stmt_iterator *gsi,
4690 enum tree_code code1,
4691 enum tree_code code2, int op_type)
4692 {
4693 int i;
4694 tree vop0, vop1, new_tmp1, new_tmp2;
4695 gimple *new_stmt1, *new_stmt2;
4696 vec<tree> vec_tmp = vNULL;
4697
4698 vec_tmp.create (vec_oprnds0->length () * 2);
4699 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4700 {
4701 if (op_type == binary_op)
4702 vop1 = (*vec_oprnds1)[i];
4703 else
4704 vop1 = NULL_TREE;
4705
4706 /* Generate the two halves of promotion operation. */
4707 new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1,
4708 op_type, vec_dest, gsi,
4709 stmt_info);
4710 new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1,
4711 op_type, vec_dest, gsi,
4712 stmt_info);
4713 if (is_gimple_call (new_stmt1))
4714 {
4715 new_tmp1 = gimple_call_lhs (new_stmt1);
4716 new_tmp2 = gimple_call_lhs (new_stmt2);
4717 }
4718 else
4719 {
4720 new_tmp1 = gimple_assign_lhs (new_stmt1);
4721 new_tmp2 = gimple_assign_lhs (new_stmt2);
4722 }
4723
4724 /* Store the results for the next step. */
4725 vec_tmp.quick_push (new_tmp1);
4726 vec_tmp.quick_push (new_tmp2);
4727 }
4728
4729 vec_oprnds0->release ();
4730 *vec_oprnds0 = vec_tmp;
4731 }
4732
4733
4734 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4735 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4736 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4737 Return true if STMT_INFO is vectorizable in this way. */
4738
4739 static bool
vectorizable_conversion(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4740 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4741 stmt_vec_info *vec_stmt, slp_tree slp_node,
4742 stmt_vector_for_cost *cost_vec)
4743 {
4744 tree vec_dest;
4745 tree scalar_dest;
4746 tree op0, op1 = NULL_TREE;
4747 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4748 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4749 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4750 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4751 tree new_temp;
4752 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4753 int ndts = 2;
4754 stmt_vec_info prev_stmt_info;
4755 poly_uint64 nunits_in;
4756 poly_uint64 nunits_out;
4757 tree vectype_out, vectype_in;
4758 int ncopies, i, j;
4759 tree lhs_type, rhs_type;
4760 enum { NARROW, NONE, WIDEN } modifier;
4761 vec<tree> vec_oprnds0 = vNULL;
4762 vec<tree> vec_oprnds1 = vNULL;
4763 tree vop0;
4764 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4765 vec_info *vinfo = stmt_info->vinfo;
4766 int multi_step_cvt = 0;
4767 vec<tree> interm_types = vNULL;
4768 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4769 int op_type;
4770 unsigned short fltsz;
4771
4772 /* Is STMT a vectorizable conversion? */
4773
4774 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4775 return false;
4776
4777 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4778 && ! vec_stmt)
4779 return false;
4780
4781 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4782 if (!stmt)
4783 return false;
4784
4785 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4786 return false;
4787
4788 code = gimple_assign_rhs_code (stmt);
4789 if (!CONVERT_EXPR_CODE_P (code)
4790 && code != FIX_TRUNC_EXPR
4791 && code != FLOAT_EXPR
4792 && code != WIDEN_MULT_EXPR
4793 && code != WIDEN_LSHIFT_EXPR)
4794 return false;
4795
4796 op_type = TREE_CODE_LENGTH (code);
4797
4798 /* Check types of lhs and rhs. */
4799 scalar_dest = gimple_assign_lhs (stmt);
4800 lhs_type = TREE_TYPE (scalar_dest);
4801 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4802
4803 op0 = gimple_assign_rhs1 (stmt);
4804 rhs_type = TREE_TYPE (op0);
4805
4806 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4807 && !((INTEGRAL_TYPE_P (lhs_type)
4808 && INTEGRAL_TYPE_P (rhs_type))
4809 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4810 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4811 return false;
4812
4813 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4814 && ((INTEGRAL_TYPE_P (lhs_type)
4815 && !type_has_mode_precision_p (lhs_type))
4816 || (INTEGRAL_TYPE_P (rhs_type)
4817 && !type_has_mode_precision_p (rhs_type))))
4818 {
4819 if (dump_enabled_p ())
4820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4821 "type conversion to/from bit-precision unsupported."
4822 "\n");
4823 return false;
4824 }
4825
4826 /* Check the operands of the operation. */
4827 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4828 {
4829 if (dump_enabled_p ())
4830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4831 "use not simple.\n");
4832 return false;
4833 }
4834 if (op_type == binary_op)
4835 {
4836 bool ok;
4837
4838 op1 = gimple_assign_rhs2 (stmt);
4839 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4840 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4841 OP1. */
4842 if (CONSTANT_CLASS_P (op0))
4843 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4844 else
4845 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4846
4847 if (!ok)
4848 {
4849 if (dump_enabled_p ())
4850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4851 "use not simple.\n");
4852 return false;
4853 }
4854 }
4855
4856 /* If op0 is an external or constant def, infer the vector type
4857 from the scalar type. */
4858 if (!vectype_in)
4859 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4860 if (vec_stmt)
4861 gcc_assert (vectype_in);
4862 if (!vectype_in)
4863 {
4864 if (dump_enabled_p ())
4865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4866 "no vectype for scalar type %T\n", rhs_type);
4867
4868 return false;
4869 }
4870
4871 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4872 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4873 {
4874 if (dump_enabled_p ())
4875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4876 "can't convert between boolean and non "
4877 "boolean vectors %T\n", rhs_type);
4878
4879 return false;
4880 }
4881
4882 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4883 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4884 if (known_eq (nunits_out, nunits_in))
4885 modifier = NONE;
4886 else if (multiple_p (nunits_out, nunits_in))
4887 modifier = NARROW;
4888 else
4889 {
4890 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4891 modifier = WIDEN;
4892 }
4893
4894 /* Multiple types in SLP are handled by creating the appropriate number of
4895 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4896 case of SLP. */
4897 if (slp_node)
4898 ncopies = 1;
4899 else if (modifier == NARROW)
4900 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4901 else
4902 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4903
4904 /* Sanity check: make sure that at least one copy of the vectorized stmt
4905 needs to be generated. */
4906 gcc_assert (ncopies >= 1);
4907
4908 bool found_mode = false;
4909 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4910 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4911 opt_scalar_mode rhs_mode_iter;
4912
4913 /* Supportable by target? */
4914 switch (modifier)
4915 {
4916 case NONE:
4917 if (code != FIX_TRUNC_EXPR
4918 && code != FLOAT_EXPR
4919 && !CONVERT_EXPR_CODE_P (code))
4920 return false;
4921 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4922 break;
4923 /* FALLTHRU */
4924 unsupported:
4925 if (dump_enabled_p ())
4926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4927 "conversion not supported by target.\n");
4928 return false;
4929
4930 case WIDEN:
4931 if (supportable_widening_operation (code, stmt_info, vectype_out,
4932 vectype_in, &code1, &code2,
4933 &multi_step_cvt, &interm_types))
4934 {
4935 /* Binary widening operation can only be supported directly by the
4936 architecture. */
4937 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4938 break;
4939 }
4940
4941 if (code != FLOAT_EXPR
4942 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4943 goto unsupported;
4944
4945 fltsz = GET_MODE_SIZE (lhs_mode);
4946 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4947 {
4948 rhs_mode = rhs_mode_iter.require ();
4949 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4950 break;
4951
4952 cvt_type
4953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4954 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4955 if (cvt_type == NULL_TREE)
4956 goto unsupported;
4957
4958 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4959 {
4960 if (!supportable_convert_operation (code, vectype_out,
4961 cvt_type, &codecvt1))
4962 goto unsupported;
4963 }
4964 else if (!supportable_widening_operation (code, stmt_info,
4965 vectype_out, cvt_type,
4966 &codecvt1, &codecvt2,
4967 &multi_step_cvt,
4968 &interm_types))
4969 continue;
4970 else
4971 gcc_assert (multi_step_cvt == 0);
4972
4973 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4974 vectype_in, &code1, &code2,
4975 &multi_step_cvt, &interm_types))
4976 {
4977 found_mode = true;
4978 break;
4979 }
4980 }
4981
4982 if (!found_mode)
4983 goto unsupported;
4984
4985 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4986 codecvt2 = ERROR_MARK;
4987 else
4988 {
4989 multi_step_cvt++;
4990 interm_types.safe_push (cvt_type);
4991 cvt_type = NULL_TREE;
4992 }
4993 break;
4994
4995 case NARROW:
4996 gcc_assert (op_type == unary_op);
4997 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4998 &code1, &multi_step_cvt,
4999 &interm_types))
5000 break;
5001
5002 if (code != FIX_TRUNC_EXPR
5003 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5004 goto unsupported;
5005
5006 cvt_type
5007 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5008 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5009 if (cvt_type == NULL_TREE)
5010 goto unsupported;
5011 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5012 &codecvt1))
5013 goto unsupported;
5014 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5015 &code1, &multi_step_cvt,
5016 &interm_types))
5017 break;
5018 goto unsupported;
5019
5020 default:
5021 gcc_unreachable ();
5022 }
5023
5024 if (!vec_stmt) /* transformation not required. */
5025 {
5026 DUMP_VECT_SCOPE ("vectorizable_conversion");
5027 if (modifier == NONE)
5028 {
5029 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5030 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5031 cost_vec);
5032 }
5033 else if (modifier == NARROW)
5034 {
5035 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5036 /* The final packing step produces one vector result per copy. */
5037 unsigned int nvectors
5038 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5039 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5040 multi_step_cvt, cost_vec);
5041 }
5042 else
5043 {
5044 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5045 /* The initial unpacking step produces two vector results
5046 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5047 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5048 unsigned int nvectors
5049 = (slp_node
5050 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5051 : ncopies * 2);
5052 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5053 multi_step_cvt, cost_vec);
5054 }
5055 interm_types.release ();
5056 return true;
5057 }
5058
5059 /* Transform. */
5060 if (dump_enabled_p ())
5061 dump_printf_loc (MSG_NOTE, vect_location,
5062 "transform conversion. ncopies = %d.\n", ncopies);
5063
5064 if (op_type == binary_op)
5065 {
5066 if (CONSTANT_CLASS_P (op0))
5067 op0 = fold_convert (TREE_TYPE (op1), op0);
5068 else if (CONSTANT_CLASS_P (op1))
5069 op1 = fold_convert (TREE_TYPE (op0), op1);
5070 }
5071
5072 /* In case of multi-step conversion, we first generate conversion operations
5073 to the intermediate types, and then from that types to the final one.
5074 We create vector destinations for the intermediate type (TYPES) received
5075 from supportable_*_operation, and store them in the correct order
5076 for future use in vect_create_vectorized_*_stmts (). */
5077 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5078 vec_dest = vect_create_destination_var (scalar_dest,
5079 (cvt_type && modifier == WIDEN)
5080 ? cvt_type : vectype_out);
5081 vec_dsts.quick_push (vec_dest);
5082
5083 if (multi_step_cvt)
5084 {
5085 for (i = interm_types.length () - 1;
5086 interm_types.iterate (i, &intermediate_type); i--)
5087 {
5088 vec_dest = vect_create_destination_var (scalar_dest,
5089 intermediate_type);
5090 vec_dsts.quick_push (vec_dest);
5091 }
5092 }
5093
5094 if (cvt_type)
5095 vec_dest = vect_create_destination_var (scalar_dest,
5096 modifier == WIDEN
5097 ? vectype_out : cvt_type);
5098
5099 if (!slp_node)
5100 {
5101 if (modifier == WIDEN)
5102 {
5103 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5104 if (op_type == binary_op)
5105 vec_oprnds1.create (1);
5106 }
5107 else if (modifier == NARROW)
5108 vec_oprnds0.create (
5109 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5110 }
5111 else if (code == WIDEN_LSHIFT_EXPR)
5112 vec_oprnds1.create (slp_node->vec_stmts_size);
5113
5114 last_oprnd = op0;
5115 prev_stmt_info = NULL;
5116 switch (modifier)
5117 {
5118 case NONE:
5119 for (j = 0; j < ncopies; j++)
5120 {
5121 if (j == 0)
5122 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5123 NULL, slp_node);
5124 else
5125 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5126
5127 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5128 {
5129 stmt_vec_info new_stmt_info;
5130 /* Arguments are ready, create the new vector stmt. */
5131 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5132 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5133 new_temp = make_ssa_name (vec_dest, new_stmt);
5134 gimple_assign_set_lhs (new_stmt, new_temp);
5135 new_stmt_info
5136 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5137
5138 if (slp_node)
5139 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5140 else
5141 {
5142 if (!prev_stmt_info)
5143 STMT_VINFO_VEC_STMT (stmt_info)
5144 = *vec_stmt = new_stmt_info;
5145 else
5146 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5147 prev_stmt_info = new_stmt_info;
5148 }
5149 }
5150 }
5151 break;
5152
5153 case WIDEN:
5154 /* In case the vectorization factor (VF) is bigger than the number
5155 of elements that we can fit in a vectype (nunits), we have to
5156 generate more than one vector stmt - i.e - we need to "unroll"
5157 the vector stmt by a factor VF/nunits. */
5158 for (j = 0; j < ncopies; j++)
5159 {
5160 /* Handle uses. */
5161 if (j == 0)
5162 {
5163 if (slp_node)
5164 {
5165 if (code == WIDEN_LSHIFT_EXPR)
5166 {
5167 unsigned int k;
5168
5169 vec_oprnd1 = op1;
5170 /* Store vec_oprnd1 for every vector stmt to be created
5171 for SLP_NODE. We check during the analysis that all
5172 the shift arguments are the same. */
5173 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5174 vec_oprnds1.quick_push (vec_oprnd1);
5175
5176 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5177 &vec_oprnds0, NULL, slp_node);
5178 }
5179 else
5180 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5181 &vec_oprnds1, slp_node);
5182 }
5183 else
5184 {
5185 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5186 vec_oprnds0.quick_push (vec_oprnd0);
5187 if (op_type == binary_op)
5188 {
5189 if (code == WIDEN_LSHIFT_EXPR)
5190 vec_oprnd1 = op1;
5191 else
5192 vec_oprnd1
5193 = vect_get_vec_def_for_operand (op1, stmt_info);
5194 vec_oprnds1.quick_push (vec_oprnd1);
5195 }
5196 }
5197 }
5198 else
5199 {
5200 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5201 vec_oprnds0.truncate (0);
5202 vec_oprnds0.quick_push (vec_oprnd0);
5203 if (op_type == binary_op)
5204 {
5205 if (code == WIDEN_LSHIFT_EXPR)
5206 vec_oprnd1 = op1;
5207 else
5208 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5209 vec_oprnd1);
5210 vec_oprnds1.truncate (0);
5211 vec_oprnds1.quick_push (vec_oprnd1);
5212 }
5213 }
5214
5215 /* Arguments are ready. Create the new vector stmts. */
5216 for (i = multi_step_cvt; i >= 0; i--)
5217 {
5218 tree this_dest = vec_dsts[i];
5219 enum tree_code c1 = code1, c2 = code2;
5220 if (i == 0 && codecvt2 != ERROR_MARK)
5221 {
5222 c1 = codecvt1;
5223 c2 = codecvt2;
5224 }
5225 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5226 &vec_oprnds1, stmt_info,
5227 this_dest, gsi,
5228 c1, c2, op_type);
5229 }
5230
5231 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5232 {
5233 stmt_vec_info new_stmt_info;
5234 if (cvt_type)
5235 {
5236 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5237 new_temp = make_ssa_name (vec_dest);
5238 gassign *new_stmt
5239 = gimple_build_assign (new_temp, codecvt1, vop0);
5240 new_stmt_info
5241 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5242 }
5243 else
5244 new_stmt_info = vinfo->lookup_def (vop0);
5245
5246 if (slp_node)
5247 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5248 else
5249 {
5250 if (!prev_stmt_info)
5251 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5252 else
5253 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5254 prev_stmt_info = new_stmt_info;
5255 }
5256 }
5257 }
5258
5259 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5260 break;
5261
5262 case NARROW:
5263 /* In case the vectorization factor (VF) is bigger than the number
5264 of elements that we can fit in a vectype (nunits), we have to
5265 generate more than one vector stmt - i.e - we need to "unroll"
5266 the vector stmt by a factor VF/nunits. */
5267 for (j = 0; j < ncopies; j++)
5268 {
5269 /* Handle uses. */
5270 if (slp_node)
5271 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5272 slp_node);
5273 else
5274 {
5275 vec_oprnds0.truncate (0);
5276 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5277 vect_pow2 (multi_step_cvt) - 1);
5278 }
5279
5280 /* Arguments are ready. Create the new vector stmts. */
5281 if (cvt_type)
5282 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5283 {
5284 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5285 new_temp = make_ssa_name (vec_dest);
5286 gassign *new_stmt
5287 = gimple_build_assign (new_temp, codecvt1, vop0);
5288 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5289 vec_oprnds0[i] = new_temp;
5290 }
5291
5292 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5293 stmt_info, vec_dsts, gsi,
5294 slp_node, code1,
5295 &prev_stmt_info);
5296 }
5297
5298 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5299 break;
5300 }
5301
5302 vec_oprnds0.release ();
5303 vec_oprnds1.release ();
5304 interm_types.release ();
5305
5306 return true;
5307 }
5308
5309 /* Return true if we can assume from the scalar form of STMT_INFO that
5310 neither the scalar nor the vector forms will generate code. STMT_INFO
5311 is known not to involve a data reference. */
5312
5313 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5314 vect_nop_conversion_p (stmt_vec_info stmt_info)
5315 {
5316 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5317 if (!stmt)
5318 return false;
5319
5320 tree lhs = gimple_assign_lhs (stmt);
5321 tree_code code = gimple_assign_rhs_code (stmt);
5322 tree rhs = gimple_assign_rhs1 (stmt);
5323
5324 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5325 return true;
5326
5327 if (CONVERT_EXPR_CODE_P (code))
5328 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5329
5330 return false;
5331 }
5332
5333 /* Function vectorizable_assignment.
5334
5335 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5336 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5337 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5338 Return true if STMT_INFO is vectorizable in this way. */
5339
5340 static bool
vectorizable_assignment(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5341 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5342 stmt_vec_info *vec_stmt, slp_tree slp_node,
5343 stmt_vector_for_cost *cost_vec)
5344 {
5345 tree vec_dest;
5346 tree scalar_dest;
5347 tree op;
5348 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5349 tree new_temp;
5350 enum vect_def_type dt[1] = {vect_unknown_def_type};
5351 int ndts = 1;
5352 int ncopies;
5353 int i, j;
5354 vec<tree> vec_oprnds = vNULL;
5355 tree vop;
5356 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5357 vec_info *vinfo = stmt_info->vinfo;
5358 stmt_vec_info prev_stmt_info = NULL;
5359 enum tree_code code;
5360 tree vectype_in;
5361
5362 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5363 return false;
5364
5365 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5366 && ! vec_stmt)
5367 return false;
5368
5369 /* Is vectorizable assignment? */
5370 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5371 if (!stmt)
5372 return false;
5373
5374 scalar_dest = gimple_assign_lhs (stmt);
5375 if (TREE_CODE (scalar_dest) != SSA_NAME)
5376 return false;
5377
5378 code = gimple_assign_rhs_code (stmt);
5379 if (gimple_assign_single_p (stmt)
5380 || code == PAREN_EXPR
5381 || CONVERT_EXPR_CODE_P (code))
5382 op = gimple_assign_rhs1 (stmt);
5383 else
5384 return false;
5385
5386 if (code == VIEW_CONVERT_EXPR)
5387 op = TREE_OPERAND (op, 0);
5388
5389 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5390 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5391
5392 /* Multiple types in SLP are handled by creating the appropriate number of
5393 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5394 case of SLP. */
5395 if (slp_node)
5396 ncopies = 1;
5397 else
5398 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5399
5400 gcc_assert (ncopies >= 1);
5401
5402 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5403 {
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "use not simple.\n");
5407 return false;
5408 }
5409
5410 /* We can handle NOP_EXPR conversions that do not change the number
5411 of elements or the vector size. */
5412 if ((CONVERT_EXPR_CODE_P (code)
5413 || code == VIEW_CONVERT_EXPR)
5414 && (!vectype_in
5415 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5416 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5417 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5418 return false;
5419
5420 /* We do not handle bit-precision changes. */
5421 if ((CONVERT_EXPR_CODE_P (code)
5422 || code == VIEW_CONVERT_EXPR)
5423 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5424 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5425 || !type_has_mode_precision_p (TREE_TYPE (op)))
5426 /* But a conversion that does not change the bit-pattern is ok. */
5427 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5428 > TYPE_PRECISION (TREE_TYPE (op)))
5429 && TYPE_UNSIGNED (TREE_TYPE (op)))
5430 /* Conversion between boolean types of different sizes is
5431 a simple assignment in case their vectypes are same
5432 boolean vectors. */
5433 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5434 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5435 {
5436 if (dump_enabled_p ())
5437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5438 "type conversion to/from bit-precision "
5439 "unsupported.\n");
5440 return false;
5441 }
5442
5443 if (!vec_stmt) /* transformation not required. */
5444 {
5445 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5446 DUMP_VECT_SCOPE ("vectorizable_assignment");
5447 if (!vect_nop_conversion_p (stmt_info))
5448 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5449 cost_vec);
5450 return true;
5451 }
5452
5453 /* Transform. */
5454 if (dump_enabled_p ())
5455 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5456
5457 /* Handle def. */
5458 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5459
5460 /* Handle use. */
5461 for (j = 0; j < ncopies; j++)
5462 {
5463 /* Handle uses. */
5464 if (j == 0)
5465 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5466 else
5467 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5468
5469 /* Arguments are ready. create the new vector stmt. */
5470 stmt_vec_info new_stmt_info = NULL;
5471 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5472 {
5473 if (CONVERT_EXPR_CODE_P (code)
5474 || code == VIEW_CONVERT_EXPR)
5475 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5476 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5477 new_temp = make_ssa_name (vec_dest, new_stmt);
5478 gimple_assign_set_lhs (new_stmt, new_temp);
5479 new_stmt_info
5480 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5481 if (slp_node)
5482 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5483 }
5484
5485 if (slp_node)
5486 continue;
5487
5488 if (j == 0)
5489 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5490 else
5491 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5492
5493 prev_stmt_info = new_stmt_info;
5494 }
5495
5496 vec_oprnds.release ();
5497 return true;
5498 }
5499
5500
5501 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5502 either as shift by a scalar or by a vector. */
5503
5504 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5505 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5506 {
5507
5508 machine_mode vec_mode;
5509 optab optab;
5510 int icode;
5511 tree vectype;
5512
5513 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5514 if (!vectype)
5515 return false;
5516
5517 optab = optab_for_tree_code (code, vectype, optab_scalar);
5518 if (!optab
5519 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5520 {
5521 optab = optab_for_tree_code (code, vectype, optab_vector);
5522 if (!optab
5523 || (optab_handler (optab, TYPE_MODE (vectype))
5524 == CODE_FOR_nothing))
5525 return false;
5526 }
5527
5528 vec_mode = TYPE_MODE (vectype);
5529 icode = (int) optab_handler (optab, vec_mode);
5530 if (icode == CODE_FOR_nothing)
5531 return false;
5532
5533 return true;
5534 }
5535
5536
5537 /* Function vectorizable_shift.
5538
5539 Check if STMT_INFO performs a shift operation that can be vectorized.
5540 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5541 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5542 Return true if STMT_INFO is vectorizable in this way. */
5543
5544 static bool
vectorizable_shift(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5545 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5546 stmt_vec_info *vec_stmt, slp_tree slp_node,
5547 stmt_vector_for_cost *cost_vec)
5548 {
5549 tree vec_dest;
5550 tree scalar_dest;
5551 tree op0, op1 = NULL;
5552 tree vec_oprnd1 = NULL_TREE;
5553 tree vectype;
5554 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5555 enum tree_code code;
5556 machine_mode vec_mode;
5557 tree new_temp;
5558 optab optab;
5559 int icode;
5560 machine_mode optab_op2_mode;
5561 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5562 int ndts = 2;
5563 stmt_vec_info prev_stmt_info;
5564 poly_uint64 nunits_in;
5565 poly_uint64 nunits_out;
5566 tree vectype_out;
5567 tree op1_vectype;
5568 int ncopies;
5569 int j, i;
5570 vec<tree> vec_oprnds0 = vNULL;
5571 vec<tree> vec_oprnds1 = vNULL;
5572 tree vop0, vop1;
5573 unsigned int k;
5574 bool scalar_shift_arg = true;
5575 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5576 vec_info *vinfo = stmt_info->vinfo;
5577 bool incompatible_op1_vectype_p = false;
5578
5579 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5580 return false;
5581
5582 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5583 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5584 && ! vec_stmt)
5585 return false;
5586
5587 /* Is STMT a vectorizable binary/unary operation? */
5588 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5589 if (!stmt)
5590 return false;
5591
5592 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5593 return false;
5594
5595 code = gimple_assign_rhs_code (stmt);
5596
5597 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5598 || code == RROTATE_EXPR))
5599 return false;
5600
5601 scalar_dest = gimple_assign_lhs (stmt);
5602 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5603 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5604 {
5605 if (dump_enabled_p ())
5606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5607 "bit-precision shifts not supported.\n");
5608 return false;
5609 }
5610
5611 op0 = gimple_assign_rhs1 (stmt);
5612 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5613 {
5614 if (dump_enabled_p ())
5615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5616 "use not simple.\n");
5617 return false;
5618 }
5619 /* If op0 is an external or constant def, infer the vector type
5620 from the scalar type. */
5621 if (!vectype)
5622 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5623 if (vec_stmt)
5624 gcc_assert (vectype);
5625 if (!vectype)
5626 {
5627 if (dump_enabled_p ())
5628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5629 "no vectype for scalar type\n");
5630 return false;
5631 }
5632
5633 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5634 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5635 if (maybe_ne (nunits_out, nunits_in))
5636 return false;
5637
5638 op1 = gimple_assign_rhs2 (stmt);
5639 stmt_vec_info op1_def_stmt_info;
5640 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5641 &op1_def_stmt_info))
5642 {
5643 if (dump_enabled_p ())
5644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5645 "use not simple.\n");
5646 return false;
5647 }
5648
5649 /* Multiple types in SLP are handled by creating the appropriate number of
5650 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5651 case of SLP. */
5652 if (slp_node)
5653 ncopies = 1;
5654 else
5655 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5656
5657 gcc_assert (ncopies >= 1);
5658
5659 /* Determine whether the shift amount is a vector, or scalar. If the
5660 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5661
5662 if ((dt[1] == vect_internal_def
5663 || dt[1] == vect_induction_def
5664 || dt[1] == vect_nested_cycle)
5665 && !slp_node)
5666 scalar_shift_arg = false;
5667 else if (dt[1] == vect_constant_def
5668 || dt[1] == vect_external_def
5669 || dt[1] == vect_internal_def)
5670 {
5671 /* In SLP, need to check whether the shift count is the same,
5672 in loops if it is a constant or invariant, it is always
5673 a scalar shift. */
5674 if (slp_node)
5675 {
5676 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5677 stmt_vec_info slpstmt_info;
5678
5679 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5680 {
5681 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5682 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5683 scalar_shift_arg = false;
5684 }
5685
5686 /* For internal SLP defs we have to make sure we see scalar stmts
5687 for all vector elements.
5688 ??? For different vectors we could resort to a different
5689 scalar shift operand but code-generation below simply always
5690 takes the first. */
5691 if (dt[1] == vect_internal_def
5692 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5693 stmts.length ()))
5694 scalar_shift_arg = false;
5695 }
5696
5697 /* If the shift amount is computed by a pattern stmt we cannot
5698 use the scalar amount directly thus give up and use a vector
5699 shift. */
5700 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5701 scalar_shift_arg = false;
5702 }
5703 else
5704 {
5705 if (dump_enabled_p ())
5706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5707 "operand mode requires invariant argument.\n");
5708 return false;
5709 }
5710
5711 /* Vector shifted by vector. */
5712 bool was_scalar_shift_arg = scalar_shift_arg;
5713 if (!scalar_shift_arg)
5714 {
5715 optab = optab_for_tree_code (code, vectype, optab_vector);
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_NOTE, vect_location,
5718 "vector/vector shift/rotate found.\n");
5719
5720 if (!op1_vectype)
5721 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5722 slp_node);
5723 incompatible_op1_vectype_p
5724 = (op1_vectype == NULL_TREE
5725 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5726 TYPE_VECTOR_SUBPARTS (vectype))
5727 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5728 if (incompatible_op1_vectype_p
5729 && (!slp_node
5730 || SLP_TREE_DEF_TYPE
5731 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5732 {
5733 if (dump_enabled_p ())
5734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5735 "unusable type for last operand in"
5736 " vector/vector shift/rotate.\n");
5737 return false;
5738 }
5739 }
5740 /* See if the machine has a vector shifted by scalar insn and if not
5741 then see if it has a vector shifted by vector insn. */
5742 else
5743 {
5744 optab = optab_for_tree_code (code, vectype, optab_scalar);
5745 if (optab
5746 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5747 {
5748 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_NOTE, vect_location,
5750 "vector/scalar shift/rotate found.\n");
5751 }
5752 else
5753 {
5754 optab = optab_for_tree_code (code, vectype, optab_vector);
5755 if (optab
5756 && (optab_handler (optab, TYPE_MODE (vectype))
5757 != CODE_FOR_nothing))
5758 {
5759 scalar_shift_arg = false;
5760
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_NOTE, vect_location,
5763 "vector/vector shift/rotate found.\n");
5764
5765 /* Unlike the other binary operators, shifts/rotates have
5766 the rhs being int, instead of the same type as the lhs,
5767 so make sure the scalar is the right type if we are
5768 dealing with vectors of long long/long/short/char. */
5769 incompatible_op1_vectype_p
5770 = !tree_nop_conversion_p (TREE_TYPE (vectype),
5771 TREE_TYPE (op1));
5772 }
5773 }
5774 }
5775
5776 /* Supportable by target? */
5777 if (!optab)
5778 {
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5781 "no optab.\n");
5782 return false;
5783 }
5784 vec_mode = TYPE_MODE (vectype);
5785 icode = (int) optab_handler (optab, vec_mode);
5786 if (icode == CODE_FOR_nothing)
5787 {
5788 if (dump_enabled_p ())
5789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5790 "op not supported by target.\n");
5791 /* Check only during analysis. */
5792 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5793 || (!vec_stmt
5794 && !vect_worthwhile_without_simd_p (vinfo, code)))
5795 return false;
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_NOTE, vect_location,
5798 "proceeding using word mode.\n");
5799 }
5800
5801 /* Worthwhile without SIMD support? Check only during analysis. */
5802 if (!vec_stmt
5803 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5804 && !vect_worthwhile_without_simd_p (vinfo, code))
5805 {
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5808 "not worthwhile without SIMD support.\n");
5809 return false;
5810 }
5811
5812 if (!vec_stmt) /* transformation not required. */
5813 {
5814 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5815 DUMP_VECT_SCOPE ("vectorizable_shift");
5816 vect_model_simple_cost (stmt_info, ncopies, dt,
5817 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5818 return true;
5819 }
5820
5821 /* Transform. */
5822
5823 if (dump_enabled_p ())
5824 dump_printf_loc (MSG_NOTE, vect_location,
5825 "transform binary/unary operation.\n");
5826
5827 if (incompatible_op1_vectype_p && !slp_node)
5828 {
5829 op1 = fold_convert (TREE_TYPE (vectype), op1);
5830 if (dt[1] != vect_constant_def)
5831 op1 = vect_init_vector (stmt_info, op1,
5832 TREE_TYPE (vectype), NULL);
5833 }
5834
5835 /* Handle def. */
5836 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5837
5838 prev_stmt_info = NULL;
5839 for (j = 0; j < ncopies; j++)
5840 {
5841 /* Handle uses. */
5842 if (j == 0)
5843 {
5844 if (scalar_shift_arg)
5845 {
5846 /* Vector shl and shr insn patterns can be defined with scalar
5847 operand 2 (shift operand). In this case, use constant or loop
5848 invariant op1 directly, without extending it to vector mode
5849 first. */
5850 optab_op2_mode = insn_data[icode].operand[2].mode;
5851 if (!VECTOR_MODE_P (optab_op2_mode))
5852 {
5853 if (dump_enabled_p ())
5854 dump_printf_loc (MSG_NOTE, vect_location,
5855 "operand 1 using scalar mode.\n");
5856 vec_oprnd1 = op1;
5857 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5858 vec_oprnds1.quick_push (vec_oprnd1);
5859 if (slp_node)
5860 {
5861 /* Store vec_oprnd1 for every vector stmt to be created
5862 for SLP_NODE. We check during the analysis that all
5863 the shift arguments are the same.
5864 TODO: Allow different constants for different vector
5865 stmts generated for an SLP instance. */
5866 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5867 vec_oprnds1.quick_push (vec_oprnd1);
5868 }
5869 }
5870 }
5871 else if (slp_node && incompatible_op1_vectype_p)
5872 {
5873 if (was_scalar_shift_arg)
5874 {
5875 /* If the argument was the same in all lanes create
5876 the correctly typed vector shift amount directly. */
5877 op1 = fold_convert (TREE_TYPE (vectype), op1);
5878 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5879 !loop_vinfo ? gsi : NULL);
5880 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5881 !loop_vinfo ? gsi : NULL);
5882 vec_oprnds1.create (slp_node->vec_stmts_size);
5883 for (k = 0; k < slp_node->vec_stmts_size; k++)
5884 vec_oprnds1.quick_push (vec_oprnd1);
5885 }
5886 else if (dt[1] == vect_constant_def)
5887 {
5888 /* Convert the scalar constant shift amounts in-place. */
5889 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5890 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5891 for (unsigned i = 0;
5892 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5893 {
5894 SLP_TREE_SCALAR_OPS (shift)[i]
5895 = fold_convert (TREE_TYPE (vectype),
5896 SLP_TREE_SCALAR_OPS (shift)[i]);
5897 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5898 == INTEGER_CST));
5899 }
5900 }
5901 else
5902 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5903 }
5904
5905 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5906 (a special case for certain kind of vector shifts); otherwise,
5907 operand 1 should be of a vector type (the usual case). */
5908 if (vec_oprnd1)
5909 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5910 slp_node);
5911 else
5912 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5913 slp_node);
5914 }
5915 else
5916 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5917
5918 /* Arguments are ready. Create the new vector stmt. */
5919 stmt_vec_info new_stmt_info = NULL;
5920 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5921 {
5922 vop1 = vec_oprnds1[i];
5923 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5924 new_temp = make_ssa_name (vec_dest, new_stmt);
5925 gimple_assign_set_lhs (new_stmt, new_temp);
5926 new_stmt_info
5927 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5928 if (slp_node)
5929 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5930 }
5931
5932 if (slp_node)
5933 continue;
5934
5935 if (j == 0)
5936 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5937 else
5938 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5939 prev_stmt_info = new_stmt_info;
5940 }
5941
5942 vec_oprnds0.release ();
5943 vec_oprnds1.release ();
5944
5945 return true;
5946 }
5947
5948
5949 /* Function vectorizable_operation.
5950
5951 Check if STMT_INFO performs a binary, unary or ternary operation that can
5952 be vectorized.
5953 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5954 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5955 Return true if STMT_INFO is vectorizable in this way. */
5956
5957 static bool
vectorizable_operation(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5958 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5959 stmt_vec_info *vec_stmt, slp_tree slp_node,
5960 stmt_vector_for_cost *cost_vec)
5961 {
5962 tree vec_dest;
5963 tree scalar_dest;
5964 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5965 tree vectype;
5966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5967 enum tree_code code, orig_code;
5968 machine_mode vec_mode;
5969 tree new_temp;
5970 int op_type;
5971 optab optab;
5972 bool target_support_p;
5973 enum vect_def_type dt[3]
5974 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5975 int ndts = 3;
5976 stmt_vec_info prev_stmt_info;
5977 poly_uint64 nunits_in;
5978 poly_uint64 nunits_out;
5979 tree vectype_out;
5980 int ncopies, vec_num;
5981 int j, i;
5982 vec<tree> vec_oprnds0 = vNULL;
5983 vec<tree> vec_oprnds1 = vNULL;
5984 vec<tree> vec_oprnds2 = vNULL;
5985 tree vop0, vop1, vop2;
5986 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5987 vec_info *vinfo = stmt_info->vinfo;
5988
5989 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5990 return false;
5991
5992 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5993 && ! vec_stmt)
5994 return false;
5995
5996 /* Is STMT a vectorizable binary/unary operation? */
5997 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5998 if (!stmt)
5999 return false;
6000
6001 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6002 return false;
6003
6004 orig_code = code = gimple_assign_rhs_code (stmt);
6005
6006 /* Shifts are handled in vectorizable_shift. */
6007 if (code == LSHIFT_EXPR
6008 || code == RSHIFT_EXPR
6009 || code == LROTATE_EXPR
6010 || code == RROTATE_EXPR)
6011 return false;
6012
6013 /* Comparisons are handled in vectorizable_comparison. */
6014 if (TREE_CODE_CLASS (code) == tcc_comparison)
6015 return false;
6016
6017 /* Conditions are handled in vectorizable_condition. */
6018 if (code == COND_EXPR)
6019 return false;
6020
6021 /* For pointer addition and subtraction, we should use the normal
6022 plus and minus for the vector operation. */
6023 if (code == POINTER_PLUS_EXPR)
6024 code = PLUS_EXPR;
6025 if (code == POINTER_DIFF_EXPR)
6026 code = MINUS_EXPR;
6027
6028 /* Support only unary or binary operations. */
6029 op_type = TREE_CODE_LENGTH (code);
6030 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6031 {
6032 if (dump_enabled_p ())
6033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6034 "num. args = %d (not unary/binary/ternary op).\n",
6035 op_type);
6036 return false;
6037 }
6038
6039 scalar_dest = gimple_assign_lhs (stmt);
6040 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6041
6042 /* Most operations cannot handle bit-precision types without extra
6043 truncations. */
6044 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6045 if (!mask_op_p
6046 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6047 /* Exception are bitwise binary operations. */
6048 && code != BIT_IOR_EXPR
6049 && code != BIT_XOR_EXPR
6050 && code != BIT_AND_EXPR)
6051 {
6052 if (dump_enabled_p ())
6053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6054 "bit-precision arithmetic not supported.\n");
6055 return false;
6056 }
6057
6058 op0 = gimple_assign_rhs1 (stmt);
6059 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6060 {
6061 if (dump_enabled_p ())
6062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 "use not simple.\n");
6064 return false;
6065 }
6066 /* If op0 is an external or constant def, infer the vector type
6067 from the scalar type. */
6068 if (!vectype)
6069 {
6070 /* For boolean type we cannot determine vectype by
6071 invariant value (don't know whether it is a vector
6072 of booleans or vector of integers). We use output
6073 vectype because operations on boolean don't change
6074 type. */
6075 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6076 {
6077 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6078 {
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6081 "not supported operation on bool value.\n");
6082 return false;
6083 }
6084 vectype = vectype_out;
6085 }
6086 else
6087 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6088 slp_node);
6089 }
6090 if (vec_stmt)
6091 gcc_assert (vectype);
6092 if (!vectype)
6093 {
6094 if (dump_enabled_p ())
6095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6096 "no vectype for scalar type %T\n",
6097 TREE_TYPE (op0));
6098
6099 return false;
6100 }
6101
6102 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6103 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6104 if (maybe_ne (nunits_out, nunits_in))
6105 return false;
6106
6107 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6108 if (op_type == binary_op || op_type == ternary_op)
6109 {
6110 op1 = gimple_assign_rhs2 (stmt);
6111 if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
6112 {
6113 if (dump_enabled_p ())
6114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6115 "use not simple.\n");
6116 return false;
6117 }
6118 }
6119 if (op_type == ternary_op)
6120 {
6121 op2 = gimple_assign_rhs3 (stmt);
6122 if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
6123 {
6124 if (dump_enabled_p ())
6125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6126 "use not simple.\n");
6127 return false;
6128 }
6129 }
6130
6131 /* Multiple types in SLP are handled by creating the appropriate number of
6132 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6133 case of SLP. */
6134 if (slp_node)
6135 {
6136 ncopies = 1;
6137 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6138 }
6139 else
6140 {
6141 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6142 vec_num = 1;
6143 }
6144
6145 gcc_assert (ncopies >= 1);
6146
6147 /* Reject attempts to combine mask types with nonmask types, e.g. if
6148 we have an AND between a (nonmask) boolean loaded from memory and
6149 a (mask) boolean result of a comparison.
6150
6151 TODO: We could easily fix these cases up using pattern statements. */
6152 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6153 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6154 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6155 {
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6158 "mixed mask and nonmask vector types\n");
6159 return false;
6160 }
6161
6162 /* Supportable by target? */
6163
6164 vec_mode = TYPE_MODE (vectype);
6165 if (code == MULT_HIGHPART_EXPR)
6166 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6167 else
6168 {
6169 optab = optab_for_tree_code (code, vectype, optab_default);
6170 if (!optab)
6171 {
6172 if (dump_enabled_p ())
6173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6174 "no optab.\n");
6175 return false;
6176 }
6177 target_support_p = (optab_handler (optab, vec_mode)
6178 != CODE_FOR_nothing);
6179 }
6180
6181 if (!target_support_p)
6182 {
6183 if (dump_enabled_p ())
6184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6185 "op not supported by target.\n");
6186 /* Check only during analysis. */
6187 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6188 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6189 return false;
6190 if (dump_enabled_p ())
6191 dump_printf_loc (MSG_NOTE, vect_location,
6192 "proceeding using word mode.\n");
6193 }
6194
6195 /* Worthwhile without SIMD support? Check only during analysis. */
6196 if (!VECTOR_MODE_P (vec_mode)
6197 && !vec_stmt
6198 && !vect_worthwhile_without_simd_p (vinfo, code))
6199 {
6200 if (dump_enabled_p ())
6201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6202 "not worthwhile without SIMD support.\n");
6203 return false;
6204 }
6205
6206 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6207 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6208 internal_fn cond_fn = get_conditional_internal_fn (code);
6209
6210 if (!vec_stmt) /* transformation not required. */
6211 {
6212 /* If this operation is part of a reduction, a fully-masked loop
6213 should only change the active lanes of the reduction chain,
6214 keeping the inactive lanes as-is. */
6215 if (loop_vinfo
6216 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6217 && reduc_idx >= 0)
6218 {
6219 if (cond_fn == IFN_LAST
6220 || !direct_internal_fn_supported_p (cond_fn, vectype,
6221 OPTIMIZE_FOR_SPEED))
6222 {
6223 if (dump_enabled_p ())
6224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6225 "can't use a fully-masked loop because no"
6226 " conditional operation is available.\n");
6227 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6228 }
6229 else
6230 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6231 vectype, NULL);
6232 }
6233
6234 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6235 DUMP_VECT_SCOPE ("vectorizable_operation");
6236 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6237 return true;
6238 }
6239
6240 /* Transform. */
6241
6242 if (dump_enabled_p ())
6243 dump_printf_loc (MSG_NOTE, vect_location,
6244 "transform binary/unary operation.\n");
6245
6246 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6247
6248 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6249 vectors with unsigned elements, but the result is signed. So, we
6250 need to compute the MINUS_EXPR into vectype temporary and
6251 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6252 tree vec_cvt_dest = NULL_TREE;
6253 if (orig_code == POINTER_DIFF_EXPR)
6254 {
6255 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6256 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6257 }
6258 /* Handle def. */
6259 else
6260 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6261
6262 /* In case the vectorization factor (VF) is bigger than the number
6263 of elements that we can fit in a vectype (nunits), we have to generate
6264 more than one vector stmt - i.e - we need to "unroll" the
6265 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6266 from one copy of the vector stmt to the next, in the field
6267 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6268 stages to find the correct vector defs to be used when vectorizing
6269 stmts that use the defs of the current stmt. The example below
6270 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6271 we need to create 4 vectorized stmts):
6272
6273 before vectorization:
6274 RELATED_STMT VEC_STMT
6275 S1: x = memref - -
6276 S2: z = x + 1 - -
6277
6278 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6279 there):
6280 RELATED_STMT VEC_STMT
6281 VS1_0: vx0 = memref0 VS1_1 -
6282 VS1_1: vx1 = memref1 VS1_2 -
6283 VS1_2: vx2 = memref2 VS1_3 -
6284 VS1_3: vx3 = memref3 - -
6285 S1: x = load - VS1_0
6286 S2: z = x + 1 - -
6287
6288 step2: vectorize stmt S2 (done here):
6289 To vectorize stmt S2 we first need to find the relevant vector
6290 def for the first operand 'x'. This is, as usual, obtained from
6291 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6292 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6293 relevant vector def 'vx0'. Having found 'vx0' we can generate
6294 the vector stmt VS2_0, and as usual, record it in the
6295 STMT_VINFO_VEC_STMT of stmt S2.
6296 When creating the second copy (VS2_1), we obtain the relevant vector
6297 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6298 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6299 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6300 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6301 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6302 chain of stmts and pointers:
6303 RELATED_STMT VEC_STMT
6304 VS1_0: vx0 = memref0 VS1_1 -
6305 VS1_1: vx1 = memref1 VS1_2 -
6306 VS1_2: vx2 = memref2 VS1_3 -
6307 VS1_3: vx3 = memref3 - -
6308 S1: x = load - VS1_0
6309 VS2_0: vz0 = vx0 + v1 VS2_1 -
6310 VS2_1: vz1 = vx1 + v1 VS2_2 -
6311 VS2_2: vz2 = vx2 + v1 VS2_3 -
6312 VS2_3: vz3 = vx3 + v1 - -
6313 S2: z = x + 1 - VS2_0 */
6314
6315 prev_stmt_info = NULL;
6316 for (j = 0; j < ncopies; j++)
6317 {
6318 /* Handle uses. */
6319 if (j == 0)
6320 {
6321 if (op_type == binary_op)
6322 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6323 slp_node);
6324 else if (op_type == ternary_op)
6325 {
6326 if (slp_node)
6327 {
6328 auto_vec<vec<tree> > vec_defs(3);
6329 vect_get_slp_defs (slp_node, &vec_defs);
6330 vec_oprnds0 = vec_defs[0];
6331 vec_oprnds1 = vec_defs[1];
6332 vec_oprnds2 = vec_defs[2];
6333 }
6334 else
6335 {
6336 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6337 &vec_oprnds1, NULL);
6338 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6339 NULL, NULL);
6340 }
6341 }
6342 else
6343 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6344 slp_node);
6345 }
6346 else
6347 {
6348 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6349 if (op_type == ternary_op)
6350 {
6351 tree vec_oprnd = vec_oprnds2.pop ();
6352 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6353 vec_oprnd));
6354 }
6355 }
6356
6357 /* Arguments are ready. Create the new vector stmt. */
6358 stmt_vec_info new_stmt_info = NULL;
6359 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6360 {
6361 vop1 = ((op_type == binary_op || op_type == ternary_op)
6362 ? vec_oprnds1[i] : NULL_TREE);
6363 vop2 = ((op_type == ternary_op)
6364 ? vec_oprnds2[i] : NULL_TREE);
6365 if (masked_loop_p && reduc_idx >= 0)
6366 {
6367 /* Perform the operation on active elements only and take
6368 inactive elements from the reduction chain input. */
6369 gcc_assert (!vop2);
6370 vop2 = reduc_idx == 1 ? vop1 : vop0;
6371 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6372 vectype, i * ncopies + j);
6373 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6374 vop0, vop1, vop2);
6375 new_temp = make_ssa_name (vec_dest, call);
6376 gimple_call_set_lhs (call, new_temp);
6377 gimple_call_set_nothrow (call, true);
6378 new_stmt_info
6379 = vect_finish_stmt_generation (stmt_info, call, gsi);
6380 }
6381 else
6382 {
6383 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6384 vop0, vop1, vop2);
6385 new_temp = make_ssa_name (vec_dest, new_stmt);
6386 gimple_assign_set_lhs (new_stmt, new_temp);
6387 new_stmt_info
6388 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6389 if (vec_cvt_dest)
6390 {
6391 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6392 gassign *new_stmt
6393 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6394 new_temp);
6395 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6396 gimple_assign_set_lhs (new_stmt, new_temp);
6397 new_stmt_info
6398 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6399 }
6400 }
6401 if (slp_node)
6402 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6403 }
6404
6405 if (slp_node)
6406 continue;
6407
6408 if (j == 0)
6409 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6410 else
6411 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6412 prev_stmt_info = new_stmt_info;
6413 }
6414
6415 vec_oprnds0.release ();
6416 vec_oprnds1.release ();
6417 vec_oprnds2.release ();
6418
6419 return true;
6420 }
6421
6422 /* A helper function to ensure data reference DR_INFO's base alignment. */
6423
6424 static void
ensure_base_align(dr_vec_info * dr_info)6425 ensure_base_align (dr_vec_info *dr_info)
6426 {
6427 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6428 return;
6429
6430 if (dr_info->base_misaligned)
6431 {
6432 tree base_decl = dr_info->base_decl;
6433
6434 // We should only be able to increase the alignment of a base object if
6435 // we know what its new alignment should be at compile time.
6436 unsigned HOST_WIDE_INT align_base_to =
6437 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6438
6439 if (decl_in_symtab_p (base_decl))
6440 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6441 else if (DECL_ALIGN (base_decl) < align_base_to)
6442 {
6443 SET_DECL_ALIGN (base_decl, align_base_to);
6444 DECL_USER_ALIGN (base_decl) = 1;
6445 }
6446 dr_info->base_misaligned = false;
6447 }
6448 }
6449
6450
6451 /* Function get_group_alias_ptr_type.
6452
6453 Return the alias type for the group starting at FIRST_STMT_INFO. */
6454
6455 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6456 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6457 {
6458 struct data_reference *first_dr, *next_dr;
6459
6460 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6461 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6462 while (next_stmt_info)
6463 {
6464 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6465 if (get_alias_set (DR_REF (first_dr))
6466 != get_alias_set (DR_REF (next_dr)))
6467 {
6468 if (dump_enabled_p ())
6469 dump_printf_loc (MSG_NOTE, vect_location,
6470 "conflicting alias set types.\n");
6471 return ptr_type_node;
6472 }
6473 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6474 }
6475 return reference_alias_ptr_type (DR_REF (first_dr));
6476 }
6477
6478
6479 /* Function scan_operand_equal_p.
6480
6481 Helper function for check_scan_store. Compare two references
6482 with .GOMP_SIMD_LANE bases. */
6483
6484 static bool
scan_operand_equal_p(tree ref1,tree ref2)6485 scan_operand_equal_p (tree ref1, tree ref2)
6486 {
6487 tree ref[2] = { ref1, ref2 };
6488 poly_int64 bitsize[2], bitpos[2];
6489 tree offset[2], base[2];
6490 for (int i = 0; i < 2; ++i)
6491 {
6492 machine_mode mode;
6493 int unsignedp, reversep, volatilep = 0;
6494 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6495 &offset[i], &mode, &unsignedp,
6496 &reversep, &volatilep);
6497 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6498 return false;
6499 if (TREE_CODE (base[i]) == MEM_REF
6500 && offset[i] == NULL_TREE
6501 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6502 {
6503 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6504 if (is_gimple_assign (def_stmt)
6505 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6506 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6507 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6508 {
6509 if (maybe_ne (mem_ref_offset (base[i]), 0))
6510 return false;
6511 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6512 offset[i] = gimple_assign_rhs2 (def_stmt);
6513 }
6514 }
6515 }
6516
6517 if (!operand_equal_p (base[0], base[1], 0))
6518 return false;
6519 if (maybe_ne (bitsize[0], bitsize[1]))
6520 return false;
6521 if (offset[0] != offset[1])
6522 {
6523 if (!offset[0] || !offset[1])
6524 return false;
6525 if (!operand_equal_p (offset[0], offset[1], 0))
6526 {
6527 tree step[2];
6528 for (int i = 0; i < 2; ++i)
6529 {
6530 step[i] = integer_one_node;
6531 if (TREE_CODE (offset[i]) == SSA_NAME)
6532 {
6533 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6534 if (is_gimple_assign (def_stmt)
6535 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6536 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6537 == INTEGER_CST))
6538 {
6539 step[i] = gimple_assign_rhs2 (def_stmt);
6540 offset[i] = gimple_assign_rhs1 (def_stmt);
6541 }
6542 }
6543 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6544 {
6545 step[i] = TREE_OPERAND (offset[i], 1);
6546 offset[i] = TREE_OPERAND (offset[i], 0);
6547 }
6548 tree rhs1 = NULL_TREE;
6549 if (TREE_CODE (offset[i]) == SSA_NAME)
6550 {
6551 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6552 if (gimple_assign_cast_p (def_stmt))
6553 rhs1 = gimple_assign_rhs1 (def_stmt);
6554 }
6555 else if (CONVERT_EXPR_P (offset[i]))
6556 rhs1 = TREE_OPERAND (offset[i], 0);
6557 if (rhs1
6558 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6559 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6560 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6561 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6562 offset[i] = rhs1;
6563 }
6564 if (!operand_equal_p (offset[0], offset[1], 0)
6565 || !operand_equal_p (step[0], step[1], 0))
6566 return false;
6567 }
6568 }
6569 return true;
6570 }
6571
6572
6573 enum scan_store_kind {
6574 /* Normal permutation. */
6575 scan_store_kind_perm,
6576
6577 /* Whole vector left shift permutation with zero init. */
6578 scan_store_kind_lshift_zero,
6579
6580 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6581 scan_store_kind_lshift_cond
6582 };
6583
6584 /* Function check_scan_store.
6585
6586 Verify if we can perform the needed permutations or whole vector shifts.
6587 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6588 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6589 to do at each step. */
6590
6591 static int
6592 scan_store_can_perm_p (tree vectype, tree init,
6593 vec<enum scan_store_kind> *use_whole_vector = NULL)
6594 {
6595 enum machine_mode vec_mode = TYPE_MODE (vectype);
6596 unsigned HOST_WIDE_INT nunits;
6597 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6598 return -1;
6599 int units_log2 = exact_log2 (nunits);
6600 if (units_log2 <= 0)
6601 return -1;
6602
6603 int i;
6604 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6605 for (i = 0; i <= units_log2; ++i)
6606 {
6607 unsigned HOST_WIDE_INT j, k;
6608 enum scan_store_kind kind = scan_store_kind_perm;
6609 vec_perm_builder sel (nunits, nunits, 1);
6610 sel.quick_grow (nunits);
6611 if (i == units_log2)
6612 {
6613 for (j = 0; j < nunits; ++j)
6614 sel[j] = nunits - 1;
6615 }
6616 else
6617 {
6618 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6619 sel[j] = j;
6620 for (k = 0; j < nunits; ++j, ++k)
6621 sel[j] = nunits + k;
6622 }
6623 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6624 if (!can_vec_perm_const_p (vec_mode, indices))
6625 {
6626 if (i == units_log2)
6627 return -1;
6628
6629 if (whole_vector_shift_kind == scan_store_kind_perm)
6630 {
6631 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6632 return -1;
6633 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6634 /* Whole vector shifts shift in zeros, so if init is all zero
6635 constant, there is no need to do anything further. */
6636 if ((TREE_CODE (init) != INTEGER_CST
6637 && TREE_CODE (init) != REAL_CST)
6638 || !initializer_zerop (init))
6639 {
6640 tree masktype = truth_type_for (vectype);
6641 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6642 return -1;
6643 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6644 }
6645 }
6646 kind = whole_vector_shift_kind;
6647 }
6648 if (use_whole_vector)
6649 {
6650 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6651 use_whole_vector->safe_grow_cleared (i);
6652 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6653 use_whole_vector->safe_push (kind);
6654 }
6655 }
6656
6657 return units_log2;
6658 }
6659
6660
6661 /* Function check_scan_store.
6662
6663 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6664
6665 static bool
check_scan_store(stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6666 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6667 enum vect_def_type rhs_dt, bool slp, tree mask,
6668 vect_memory_access_type memory_access_type)
6669 {
6670 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6671 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6672 tree ref_type;
6673
6674 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6675 if (slp
6676 || mask
6677 || memory_access_type != VMAT_CONTIGUOUS
6678 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6679 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6680 || loop_vinfo == NULL
6681 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6682 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6683 || !integer_zerop (get_dr_vinfo_offset (dr_info))
6684 || !integer_zerop (DR_INIT (dr_info->dr))
6685 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6686 || !alias_sets_conflict_p (get_alias_set (vectype),
6687 get_alias_set (TREE_TYPE (ref_type))))
6688 {
6689 if (dump_enabled_p ())
6690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6691 "unsupported OpenMP scan store.\n");
6692 return false;
6693 }
6694
6695 /* We need to pattern match code built by OpenMP lowering and simplified
6696 by following optimizations into something we can handle.
6697 #pragma omp simd reduction(inscan,+:r)
6698 for (...)
6699 {
6700 r += something ();
6701 #pragma omp scan inclusive (r)
6702 use (r);
6703 }
6704 shall have body with:
6705 // Initialization for input phase, store the reduction initializer:
6706 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6707 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6708 D.2042[_21] = 0;
6709 // Actual input phase:
6710 ...
6711 r.0_5 = D.2042[_20];
6712 _6 = _4 + r.0_5;
6713 D.2042[_20] = _6;
6714 // Initialization for scan phase:
6715 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6716 _26 = D.2043[_25];
6717 _27 = D.2042[_25];
6718 _28 = _26 + _27;
6719 D.2043[_25] = _28;
6720 D.2042[_25] = _28;
6721 // Actual scan phase:
6722 ...
6723 r.1_8 = D.2042[_20];
6724 ...
6725 The "omp simd array" variable D.2042 holds the privatized copy used
6726 inside of the loop and D.2043 is another one that holds copies of
6727 the current original list item. The separate GOMP_SIMD_LANE ifn
6728 kinds are there in order to allow optimizing the initializer store
6729 and combiner sequence, e.g. if it is originally some C++ish user
6730 defined reduction, but allow the vectorizer to pattern recognize it
6731 and turn into the appropriate vectorized scan.
6732
6733 For exclusive scan, this is slightly different:
6734 #pragma omp simd reduction(inscan,+:r)
6735 for (...)
6736 {
6737 use (r);
6738 #pragma omp scan exclusive (r)
6739 r += something ();
6740 }
6741 shall have body with:
6742 // Initialization for input phase, store the reduction initializer:
6743 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6744 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6745 D.2042[_21] = 0;
6746 // Actual input phase:
6747 ...
6748 r.0_5 = D.2042[_20];
6749 _6 = _4 + r.0_5;
6750 D.2042[_20] = _6;
6751 // Initialization for scan phase:
6752 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6753 _26 = D.2043[_25];
6754 D.2044[_25] = _26;
6755 _27 = D.2042[_25];
6756 _28 = _26 + _27;
6757 D.2043[_25] = _28;
6758 // Actual scan phase:
6759 ...
6760 r.1_8 = D.2044[_20];
6761 ... */
6762
6763 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6764 {
6765 /* Match the D.2042[_21] = 0; store above. Just require that
6766 it is a constant or external definition store. */
6767 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6768 {
6769 fail_init:
6770 if (dump_enabled_p ())
6771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6772 "unsupported OpenMP scan initializer store.\n");
6773 return false;
6774 }
6775
6776 if (! loop_vinfo->scan_map)
6777 loop_vinfo->scan_map = new hash_map<tree, tree>;
6778 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6779 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6780 if (cached)
6781 goto fail_init;
6782 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6783
6784 /* These stores can be vectorized normally. */
6785 return true;
6786 }
6787
6788 if (rhs_dt != vect_internal_def)
6789 {
6790 fail:
6791 if (dump_enabled_p ())
6792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6793 "unsupported OpenMP scan combiner pattern.\n");
6794 return false;
6795 }
6796
6797 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6798 tree rhs = gimple_assign_rhs1 (stmt);
6799 if (TREE_CODE (rhs) != SSA_NAME)
6800 goto fail;
6801
6802 gimple *other_store_stmt = NULL;
6803 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6804 bool inscan_var_store
6805 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6806
6807 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6808 {
6809 if (!inscan_var_store)
6810 {
6811 use_operand_p use_p;
6812 imm_use_iterator iter;
6813 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6814 {
6815 gimple *use_stmt = USE_STMT (use_p);
6816 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6817 continue;
6818 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6819 || !is_gimple_assign (use_stmt)
6820 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6821 || other_store_stmt
6822 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6823 goto fail;
6824 other_store_stmt = use_stmt;
6825 }
6826 if (other_store_stmt == NULL)
6827 goto fail;
6828 rhs = gimple_assign_lhs (other_store_stmt);
6829 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6830 goto fail;
6831 }
6832 }
6833 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6834 {
6835 use_operand_p use_p;
6836 imm_use_iterator iter;
6837 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6838 {
6839 gimple *use_stmt = USE_STMT (use_p);
6840 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6841 continue;
6842 if (other_store_stmt)
6843 goto fail;
6844 other_store_stmt = use_stmt;
6845 }
6846 }
6847 else
6848 goto fail;
6849
6850 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6851 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6852 || !is_gimple_assign (def_stmt)
6853 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6854 goto fail;
6855
6856 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6857 /* For pointer addition, we should use the normal plus for the vector
6858 operation. */
6859 switch (code)
6860 {
6861 case POINTER_PLUS_EXPR:
6862 code = PLUS_EXPR;
6863 break;
6864 case MULT_HIGHPART_EXPR:
6865 goto fail;
6866 default:
6867 break;
6868 }
6869 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6870 goto fail;
6871
6872 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6873 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6874 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6875 goto fail;
6876
6877 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6878 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6879 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6880 || !gimple_assign_load_p (load1_stmt)
6881 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6882 || !gimple_assign_load_p (load2_stmt))
6883 goto fail;
6884
6885 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6886 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6887 if (load1_stmt_info == NULL
6888 || load2_stmt_info == NULL
6889 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6890 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6891 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6892 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6893 goto fail;
6894
6895 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6896 {
6897 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6898 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6899 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6900 goto fail;
6901 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6902 tree lrhs;
6903 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6904 lrhs = rhs1;
6905 else
6906 lrhs = rhs2;
6907 use_operand_p use_p;
6908 imm_use_iterator iter;
6909 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6910 {
6911 gimple *use_stmt = USE_STMT (use_p);
6912 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6913 continue;
6914 if (other_store_stmt)
6915 goto fail;
6916 other_store_stmt = use_stmt;
6917 }
6918 }
6919
6920 if (other_store_stmt == NULL)
6921 goto fail;
6922 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6923 || !gimple_store_p (other_store_stmt))
6924 goto fail;
6925
6926 stmt_vec_info other_store_stmt_info
6927 = loop_vinfo->lookup_stmt (other_store_stmt);
6928 if (other_store_stmt_info == NULL
6929 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6930 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6931 goto fail;
6932
6933 gimple *stmt1 = stmt;
6934 gimple *stmt2 = other_store_stmt;
6935 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6936 std::swap (stmt1, stmt2);
6937 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6938 gimple_assign_rhs1 (load2_stmt)))
6939 {
6940 std::swap (rhs1, rhs2);
6941 std::swap (load1_stmt, load2_stmt);
6942 std::swap (load1_stmt_info, load2_stmt_info);
6943 }
6944 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6945 gimple_assign_rhs1 (load1_stmt)))
6946 goto fail;
6947
6948 tree var3 = NULL_TREE;
6949 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6950 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6951 gimple_assign_rhs1 (load2_stmt)))
6952 goto fail;
6953 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6954 {
6955 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6956 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6957 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6958 goto fail;
6959 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6960 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6961 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6962 || lookup_attribute ("omp simd inscan exclusive",
6963 DECL_ATTRIBUTES (var3)))
6964 goto fail;
6965 }
6966
6967 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6968 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6969 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6970 goto fail;
6971
6972 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6973 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6974 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6975 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6976 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6977 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6978 goto fail;
6979
6980 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6981 std::swap (var1, var2);
6982
6983 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6984 {
6985 if (!lookup_attribute ("omp simd inscan exclusive",
6986 DECL_ATTRIBUTES (var1)))
6987 goto fail;
6988 var1 = var3;
6989 }
6990
6991 if (loop_vinfo->scan_map == NULL)
6992 goto fail;
6993 tree *init = loop_vinfo->scan_map->get (var1);
6994 if (init == NULL)
6995 goto fail;
6996
6997 /* The IL is as expected, now check if we can actually vectorize it.
6998 Inclusive scan:
6999 _26 = D.2043[_25];
7000 _27 = D.2042[_25];
7001 _28 = _26 + _27;
7002 D.2043[_25] = _28;
7003 D.2042[_25] = _28;
7004 should be vectorized as (where _40 is the vectorized rhs
7005 from the D.2042[_21] = 0; store):
7006 _30 = MEM <vector(8) int> [(int *)&D.2043];
7007 _31 = MEM <vector(8) int> [(int *)&D.2042];
7008 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7009 _33 = _31 + _32;
7010 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7011 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7012 _35 = _33 + _34;
7013 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7014 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7015 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7016 _37 = _35 + _36;
7017 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7018 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7019 _38 = _30 + _37;
7020 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7021 MEM <vector(8) int> [(int *)&D.2043] = _39;
7022 MEM <vector(8) int> [(int *)&D.2042] = _38;
7023 Exclusive scan:
7024 _26 = D.2043[_25];
7025 D.2044[_25] = _26;
7026 _27 = D.2042[_25];
7027 _28 = _26 + _27;
7028 D.2043[_25] = _28;
7029 should be vectorized as (where _40 is the vectorized rhs
7030 from the D.2042[_21] = 0; store):
7031 _30 = MEM <vector(8) int> [(int *)&D.2043];
7032 _31 = MEM <vector(8) int> [(int *)&D.2042];
7033 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7034 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7035 _34 = _32 + _33;
7036 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7037 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7038 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7039 _36 = _34 + _35;
7040 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7041 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7042 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7043 _38 = _36 + _37;
7044 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7045 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7046 _39 = _30 + _38;
7047 _50 = _31 + _39;
7048 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7049 MEM <vector(8) int> [(int *)&D.2044] = _39;
7050 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7051 enum machine_mode vec_mode = TYPE_MODE (vectype);
7052 optab optab = optab_for_tree_code (code, vectype, optab_default);
7053 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7054 goto fail;
7055
7056 int units_log2 = scan_store_can_perm_p (vectype, *init);
7057 if (units_log2 == -1)
7058 goto fail;
7059
7060 return true;
7061 }
7062
7063
7064 /* Function vectorizable_scan_store.
7065
7066 Helper of vectorizable_score, arguments like on vectorizable_store.
7067 Handle only the transformation, checking is done in check_scan_store. */
7068
7069 static bool
vectorizable_scan_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,int ncopies)7070 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7071 stmt_vec_info *vec_stmt, int ncopies)
7072 {
7073 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7074 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7075 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7076 vec_info *vinfo = stmt_info->vinfo;
7077 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7078
7079 if (dump_enabled_p ())
7080 dump_printf_loc (MSG_NOTE, vect_location,
7081 "transform scan store. ncopies = %d\n", ncopies);
7082
7083 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7084 tree rhs = gimple_assign_rhs1 (stmt);
7085 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7086
7087 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7088 bool inscan_var_store
7089 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7090
7091 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7092 {
7093 use_operand_p use_p;
7094 imm_use_iterator iter;
7095 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7096 {
7097 gimple *use_stmt = USE_STMT (use_p);
7098 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7099 continue;
7100 rhs = gimple_assign_lhs (use_stmt);
7101 break;
7102 }
7103 }
7104
7105 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7106 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7107 if (code == POINTER_PLUS_EXPR)
7108 code = PLUS_EXPR;
7109 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7110 && commutative_tree_code (code));
7111 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7112 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7113 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7114 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7115 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7116 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7117 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7118 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7119 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7120 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7121 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7122
7123 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7124 {
7125 std::swap (rhs1, rhs2);
7126 std::swap (var1, var2);
7127 std::swap (load1_dr_info, load2_dr_info);
7128 }
7129
7130 tree *init = loop_vinfo->scan_map->get (var1);
7131 gcc_assert (init);
7132
7133 unsigned HOST_WIDE_INT nunits;
7134 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7135 gcc_unreachable ();
7136 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7137 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7138 gcc_assert (units_log2 > 0);
7139 auto_vec<tree, 16> perms;
7140 perms.quick_grow (units_log2 + 1);
7141 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7142 for (int i = 0; i <= units_log2; ++i)
7143 {
7144 unsigned HOST_WIDE_INT j, k;
7145 vec_perm_builder sel (nunits, nunits, 1);
7146 sel.quick_grow (nunits);
7147 if (i == units_log2)
7148 for (j = 0; j < nunits; ++j)
7149 sel[j] = nunits - 1;
7150 else
7151 {
7152 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7153 sel[j] = j;
7154 for (k = 0; j < nunits; ++j, ++k)
7155 sel[j] = nunits + k;
7156 }
7157 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7158 if (!use_whole_vector.is_empty ()
7159 && use_whole_vector[i] != scan_store_kind_perm)
7160 {
7161 if (zero_vec == NULL_TREE)
7162 zero_vec = build_zero_cst (vectype);
7163 if (masktype == NULL_TREE
7164 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7165 masktype = truth_type_for (vectype);
7166 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7167 }
7168 else
7169 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7170 }
7171
7172 stmt_vec_info prev_stmt_info = NULL;
7173 tree vec_oprnd1 = NULL_TREE;
7174 tree vec_oprnd2 = NULL_TREE;
7175 tree vec_oprnd3 = NULL_TREE;
7176 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7177 tree dataref_offset = build_int_cst (ref_type, 0);
7178 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7179 tree ldataref_ptr = NULL_TREE;
7180 tree orig = NULL_TREE;
7181 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7182 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7183 for (int j = 0; j < ncopies; j++)
7184 {
7185 stmt_vec_info new_stmt_info;
7186 if (j == 0)
7187 {
7188 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7189 if (ldataref_ptr == NULL)
7190 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7191 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7192 orig = vec_oprnd3;
7193 }
7194 else
7195 {
7196 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7197 if (ldataref_ptr == NULL)
7198 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7199 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7200 if (!inscan_var_store)
7201 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7202 }
7203
7204 if (ldataref_ptr)
7205 {
7206 vec_oprnd2 = make_ssa_name (vectype);
7207 tree data_ref = fold_build2 (MEM_REF, vectype,
7208 unshare_expr (ldataref_ptr),
7209 dataref_offset);
7210 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7211 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7212 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7213 if (prev_stmt_info == NULL)
7214 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7215 else
7216 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7217 prev_stmt_info = new_stmt_info;
7218 }
7219
7220 tree v = vec_oprnd2;
7221 for (int i = 0; i < units_log2; ++i)
7222 {
7223 tree new_temp = make_ssa_name (vectype);
7224 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7225 (zero_vec
7226 && (use_whole_vector[i]
7227 != scan_store_kind_perm))
7228 ? zero_vec : vec_oprnd1, v,
7229 perms[i]);
7230 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7231 if (prev_stmt_info == NULL)
7232 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7233 else
7234 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7235 prev_stmt_info = new_stmt_info;
7236
7237 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7238 {
7239 /* Whole vector shift shifted in zero bits, but if *init
7240 is not initializer_zerop, we need to replace those elements
7241 with elements from vec_oprnd1. */
7242 tree_vector_builder vb (masktype, nunits, 1);
7243 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7244 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7245 ? boolean_false_node : boolean_true_node);
7246
7247 tree new_temp2 = make_ssa_name (vectype);
7248 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7249 new_temp, vec_oprnd1);
7250 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7251 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7252 prev_stmt_info = new_stmt_info;
7253 new_temp = new_temp2;
7254 }
7255
7256 /* For exclusive scan, perform the perms[i] permutation once
7257 more. */
7258 if (i == 0
7259 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7260 && v == vec_oprnd2)
7261 {
7262 v = new_temp;
7263 --i;
7264 continue;
7265 }
7266
7267 tree new_temp2 = make_ssa_name (vectype);
7268 g = gimple_build_assign (new_temp2, code, v, new_temp);
7269 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7271 prev_stmt_info = new_stmt_info;
7272
7273 v = new_temp2;
7274 }
7275
7276 tree new_temp = make_ssa_name (vectype);
7277 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7278 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7280 prev_stmt_info = new_stmt_info;
7281
7282 tree last_perm_arg = new_temp;
7283 /* For exclusive scan, new_temp computed above is the exclusive scan
7284 prefix sum. Turn it into inclusive prefix sum for the broadcast
7285 of the last element into orig. */
7286 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7287 {
7288 last_perm_arg = make_ssa_name (vectype);
7289 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7290 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7291 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7292 prev_stmt_info = new_stmt_info;
7293 }
7294
7295 orig = make_ssa_name (vectype);
7296 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7297 last_perm_arg, perms[units_log2]);
7298 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7299 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7300 prev_stmt_info = new_stmt_info;
7301
7302 if (!inscan_var_store)
7303 {
7304 tree data_ref = fold_build2 (MEM_REF, vectype,
7305 unshare_expr (dataref_ptr),
7306 dataref_offset);
7307 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7308 g = gimple_build_assign (data_ref, new_temp);
7309 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7310 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7311 prev_stmt_info = new_stmt_info;
7312 }
7313 }
7314
7315 if (inscan_var_store)
7316 for (int j = 0; j < ncopies; j++)
7317 {
7318 if (j != 0)
7319 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7320
7321 tree data_ref = fold_build2 (MEM_REF, vectype,
7322 unshare_expr (dataref_ptr),
7323 dataref_offset);
7324 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7325 gimple *g = gimple_build_assign (data_ref, orig);
7326 stmt_vec_info new_stmt_info
7327 = vect_finish_stmt_generation (stmt_info, g, gsi);
7328 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7329 prev_stmt_info = new_stmt_info;
7330 }
7331 return true;
7332 }
7333
7334
7335 /* Function vectorizable_store.
7336
7337 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7338 that can be vectorized.
7339 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7340 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7341 Return true if STMT_INFO is vectorizable in this way. */
7342
7343 static bool
vectorizable_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7344 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7345 stmt_vec_info *vec_stmt, slp_tree slp_node,
7346 stmt_vector_for_cost *cost_vec)
7347 {
7348 tree data_ref;
7349 tree op;
7350 tree vec_oprnd = NULL_TREE;
7351 tree elem_type;
7352 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7353 class loop *loop = NULL;
7354 machine_mode vec_mode;
7355 tree dummy;
7356 enum dr_alignment_support alignment_support_scheme;
7357 enum vect_def_type rhs_dt = vect_unknown_def_type;
7358 enum vect_def_type mask_dt = vect_unknown_def_type;
7359 stmt_vec_info prev_stmt_info = NULL;
7360 tree dataref_ptr = NULL_TREE;
7361 tree dataref_offset = NULL_TREE;
7362 gimple *ptr_incr = NULL;
7363 int ncopies;
7364 int j;
7365 stmt_vec_info first_stmt_info;
7366 bool grouped_store;
7367 unsigned int group_size, i;
7368 vec<tree> oprnds = vNULL;
7369 vec<tree> result_chain = vNULL;
7370 tree offset = NULL_TREE;
7371 vec<tree> vec_oprnds = vNULL;
7372 bool slp = (slp_node != NULL);
7373 unsigned int vec_num;
7374 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7375 vec_info *vinfo = stmt_info->vinfo;
7376 tree aggr_type;
7377 gather_scatter_info gs_info;
7378 poly_uint64 vf;
7379 vec_load_store_type vls_type;
7380 tree ref_type;
7381
7382 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7383 return false;
7384
7385 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7386 && ! vec_stmt)
7387 return false;
7388
7389 /* Is vectorizable store? */
7390
7391 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7392 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7393 {
7394 tree scalar_dest = gimple_assign_lhs (assign);
7395 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7396 && is_pattern_stmt_p (stmt_info))
7397 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7398 if (TREE_CODE (scalar_dest) != ARRAY_REF
7399 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7400 && TREE_CODE (scalar_dest) != INDIRECT_REF
7401 && TREE_CODE (scalar_dest) != COMPONENT_REF
7402 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7403 && TREE_CODE (scalar_dest) != REALPART_EXPR
7404 && TREE_CODE (scalar_dest) != MEM_REF)
7405 return false;
7406 }
7407 else
7408 {
7409 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7410 if (!call || !gimple_call_internal_p (call))
7411 return false;
7412
7413 internal_fn ifn = gimple_call_internal_fn (call);
7414 if (!internal_store_fn_p (ifn))
7415 return false;
7416
7417 if (slp_node != NULL)
7418 {
7419 if (dump_enabled_p ())
7420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7421 "SLP of masked stores not supported.\n");
7422 return false;
7423 }
7424
7425 int mask_index = internal_fn_mask_index (ifn);
7426 if (mask_index >= 0)
7427 {
7428 mask = gimple_call_arg (call, mask_index);
7429 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
7430 &mask_vectype))
7431 return false;
7432 }
7433 }
7434
7435 op = vect_get_store_rhs (stmt_info);
7436
7437 /* Cannot have hybrid store SLP -- that would mean storing to the
7438 same location twice. */
7439 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7440
7441 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7442 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7443
7444 if (loop_vinfo)
7445 {
7446 loop = LOOP_VINFO_LOOP (loop_vinfo);
7447 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7448 }
7449 else
7450 vf = 1;
7451
7452 /* Multiple types in SLP are handled by creating the appropriate number of
7453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7454 case of SLP. */
7455 if (slp)
7456 ncopies = 1;
7457 else
7458 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7459
7460 gcc_assert (ncopies >= 1);
7461
7462 /* FORNOW. This restriction should be relaxed. */
7463 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7464 {
7465 if (dump_enabled_p ())
7466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7467 "multiple types in nested loop.\n");
7468 return false;
7469 }
7470
7471 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7472 return false;
7473
7474 elem_type = TREE_TYPE (vectype);
7475 vec_mode = TYPE_MODE (vectype);
7476
7477 if (!STMT_VINFO_DATA_REF (stmt_info))
7478 return false;
7479
7480 vect_memory_access_type memory_access_type;
7481 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7482 &memory_access_type, &gs_info))
7483 return false;
7484
7485 if (mask)
7486 {
7487 if (memory_access_type == VMAT_CONTIGUOUS)
7488 {
7489 if (!VECTOR_MODE_P (vec_mode)
7490 || !can_vec_mask_load_store_p (vec_mode,
7491 TYPE_MODE (mask_vectype), false))
7492 return false;
7493 }
7494 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7495 && (memory_access_type != VMAT_GATHER_SCATTER
7496 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7497 {
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7500 "unsupported access type for masked store.\n");
7501 return false;
7502 }
7503 }
7504 else
7505 {
7506 /* FORNOW. In some cases can vectorize even if data-type not supported
7507 (e.g. - array initialization with 0). */
7508 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7509 return false;
7510 }
7511
7512 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7513 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7514 && memory_access_type != VMAT_GATHER_SCATTER
7515 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7516 if (grouped_store)
7517 {
7518 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7519 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7520 group_size = DR_GROUP_SIZE (first_stmt_info);
7521 }
7522 else
7523 {
7524 first_stmt_info = stmt_info;
7525 first_dr_info = dr_info;
7526 group_size = vec_num = 1;
7527 }
7528
7529 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7530 {
7531 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7532 memory_access_type))
7533 return false;
7534 }
7535
7536 if (!vec_stmt) /* transformation not required. */
7537 {
7538 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7539
7540 if (loop_vinfo
7541 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7542 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7543 memory_access_type, &gs_info, mask);
7544
7545 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7546 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7547 vls_type, slp_node, cost_vec);
7548 return true;
7549 }
7550 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7551
7552 /* Transform. */
7553
7554 ensure_base_align (dr_info);
7555
7556 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7557 {
7558 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7559 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7560 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7561 tree ptr, var, scale, vec_mask;
7562 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7563 tree mask_halfvectype = mask_vectype;
7564 edge pe = loop_preheader_edge (loop);
7565 gimple_seq seq;
7566 basic_block new_bb;
7567 enum { NARROW, NONE, WIDEN } modifier;
7568 poly_uint64 scatter_off_nunits
7569 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7570
7571 if (known_eq (nunits, scatter_off_nunits))
7572 modifier = NONE;
7573 else if (known_eq (nunits * 2, scatter_off_nunits))
7574 {
7575 modifier = WIDEN;
7576
7577 /* Currently gathers and scatters are only supported for
7578 fixed-length vectors. */
7579 unsigned int count = scatter_off_nunits.to_constant ();
7580 vec_perm_builder sel (count, count, 1);
7581 for (i = 0; i < (unsigned int) count; ++i)
7582 sel.quick_push (i | (count / 2));
7583
7584 vec_perm_indices indices (sel, 1, count);
7585 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7586 indices);
7587 gcc_assert (perm_mask != NULL_TREE);
7588 }
7589 else if (known_eq (nunits, scatter_off_nunits * 2))
7590 {
7591 modifier = NARROW;
7592
7593 /* Currently gathers and scatters are only supported for
7594 fixed-length vectors. */
7595 unsigned int count = nunits.to_constant ();
7596 vec_perm_builder sel (count, count, 1);
7597 for (i = 0; i < (unsigned int) count; ++i)
7598 sel.quick_push (i | (count / 2));
7599
7600 vec_perm_indices indices (sel, 2, count);
7601 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7602 gcc_assert (perm_mask != NULL_TREE);
7603 ncopies *= 2;
7604
7605 if (mask)
7606 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7607 }
7608 else
7609 gcc_unreachable ();
7610
7611 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7612 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7613 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7614 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7615 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7616 scaletype = TREE_VALUE (arglist);
7617
7618 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7619 && TREE_CODE (rettype) == VOID_TYPE);
7620
7621 ptr = fold_convert (ptrtype, gs_info.base);
7622 if (!is_gimple_min_invariant (ptr))
7623 {
7624 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7625 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7626 gcc_assert (!new_bb);
7627 }
7628
7629 if (mask == NULL_TREE)
7630 {
7631 mask_arg = build_int_cst (masktype, -1);
7632 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7633 }
7634
7635 scale = build_int_cst (scaletype, gs_info.scale);
7636
7637 prev_stmt_info = NULL;
7638 for (j = 0; j < ncopies; ++j)
7639 {
7640 if (j == 0)
7641 {
7642 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7643 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7644 stmt_info);
7645 if (mask)
7646 {
7647 tree mask_vectype = truth_type_for (vectype);
7648 mask_op = vec_mask
7649 = vect_get_vec_def_for_operand (mask,
7650 stmt_info, mask_vectype);
7651 }
7652 }
7653 else if (modifier != NONE && (j & 1))
7654 {
7655 if (modifier == WIDEN)
7656 {
7657 src
7658 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7659 vec_oprnd1);
7660 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7661 stmt_info, gsi);
7662 if (mask)
7663 mask_op
7664 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7665 vec_mask);
7666 }
7667 else if (modifier == NARROW)
7668 {
7669 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7670 stmt_info, gsi);
7671 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7672 vec_oprnd0);
7673 }
7674 else
7675 gcc_unreachable ();
7676 }
7677 else
7678 {
7679 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7680 vec_oprnd1);
7681 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7682 vec_oprnd0);
7683 if (mask)
7684 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7685 vec_mask);
7686 }
7687
7688 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7689 {
7690 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7691 TYPE_VECTOR_SUBPARTS (srctype)));
7692 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7693 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7694 gassign *new_stmt
7695 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7696 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7697 src = var;
7698 }
7699
7700 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7701 {
7702 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7703 TYPE_VECTOR_SUBPARTS (idxtype)));
7704 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7705 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7706 gassign *new_stmt
7707 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7708 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7709 op = var;
7710 }
7711
7712 if (mask)
7713 {
7714 tree utype;
7715 mask_arg = mask_op;
7716 if (modifier == NARROW)
7717 {
7718 var = vect_get_new_ssa_name (mask_halfvectype,
7719 vect_simple_var);
7720 gassign *new_stmt
7721 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7722 : VEC_UNPACK_LO_EXPR,
7723 mask_op);
7724 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7725 mask_arg = var;
7726 }
7727 tree optype = TREE_TYPE (mask_arg);
7728 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7729 utype = masktype;
7730 else
7731 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7732 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7733 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7734 gassign *new_stmt
7735 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7736 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7737 mask_arg = var;
7738 if (!useless_type_conversion_p (masktype, utype))
7739 {
7740 gcc_assert (TYPE_PRECISION (utype)
7741 <= TYPE_PRECISION (masktype));
7742 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7743 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7744 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7745 mask_arg = var;
7746 }
7747 }
7748
7749 gcall *new_stmt
7750 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7751 stmt_vec_info new_stmt_info
7752 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7753
7754 if (prev_stmt_info == NULL)
7755 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7756 else
7757 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7758 prev_stmt_info = new_stmt_info;
7759 }
7760 return true;
7761 }
7762 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7763 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7764
7765 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7766 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7767
7768 if (grouped_store)
7769 {
7770 /* FORNOW */
7771 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7772
7773 /* We vectorize all the stmts of the interleaving group when we
7774 reach the last stmt in the group. */
7775 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7776 < DR_GROUP_SIZE (first_stmt_info)
7777 && !slp)
7778 {
7779 *vec_stmt = NULL;
7780 return true;
7781 }
7782
7783 if (slp)
7784 {
7785 grouped_store = false;
7786 /* VEC_NUM is the number of vect stmts to be created for this
7787 group. */
7788 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7789 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7790 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7791 == first_stmt_info);
7792 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7793 op = vect_get_store_rhs (first_stmt_info);
7794 }
7795 else
7796 /* VEC_NUM is the number of vect stmts to be created for this
7797 group. */
7798 vec_num = group_size;
7799
7800 ref_type = get_group_alias_ptr_type (first_stmt_info);
7801 }
7802 else
7803 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7804
7805 if (dump_enabled_p ())
7806 dump_printf_loc (MSG_NOTE, vect_location,
7807 "transform store. ncopies = %d\n", ncopies);
7808
7809 if (memory_access_type == VMAT_ELEMENTWISE
7810 || memory_access_type == VMAT_STRIDED_SLP)
7811 {
7812 gimple_stmt_iterator incr_gsi;
7813 bool insert_after;
7814 gimple *incr;
7815 tree offvar;
7816 tree ivstep;
7817 tree running_off;
7818 tree stride_base, stride_step, alias_off;
7819 tree vec_oprnd;
7820 tree dr_offset;
7821 unsigned int g;
7822 /* Checked by get_load_store_type. */
7823 unsigned int const_nunits = nunits.to_constant ();
7824
7825 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7826 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7827
7828 dr_offset = get_dr_vinfo_offset (first_dr_info);
7829 stride_base
7830 = fold_build_pointer_plus
7831 (DR_BASE_ADDRESS (first_dr_info->dr),
7832 size_binop (PLUS_EXPR,
7833 convert_to_ptrofftype (dr_offset),
7834 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7835 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7836
7837 /* For a store with loop-invariant (but other than power-of-2)
7838 stride (i.e. not a grouped access) like so:
7839
7840 for (i = 0; i < n; i += stride)
7841 array[i] = ...;
7842
7843 we generate a new induction variable and new stores from
7844 the components of the (vectorized) rhs:
7845
7846 for (j = 0; ; j += VF*stride)
7847 vectemp = ...;
7848 tmp1 = vectemp[0];
7849 array[j] = tmp1;
7850 tmp2 = vectemp[1];
7851 array[j + stride] = tmp2;
7852 ...
7853 */
7854
7855 unsigned nstores = const_nunits;
7856 unsigned lnel = 1;
7857 tree ltype = elem_type;
7858 tree lvectype = vectype;
7859 if (slp)
7860 {
7861 if (group_size < const_nunits
7862 && const_nunits % group_size == 0)
7863 {
7864 nstores = const_nunits / group_size;
7865 lnel = group_size;
7866 ltype = build_vector_type (elem_type, group_size);
7867 lvectype = vectype;
7868
7869 /* First check if vec_extract optab doesn't support extraction
7870 of vector elts directly. */
7871 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7872 machine_mode vmode;
7873 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7874 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7875 group_size).exists (&vmode)
7876 || (convert_optab_handler (vec_extract_optab,
7877 TYPE_MODE (vectype), vmode)
7878 == CODE_FOR_nothing))
7879 {
7880 /* Try to avoid emitting an extract of vector elements
7881 by performing the extracts using an integer type of the
7882 same size, extracting from a vector of those and then
7883 re-interpreting it as the original vector type if
7884 supported. */
7885 unsigned lsize
7886 = group_size * GET_MODE_BITSIZE (elmode);
7887 unsigned int lnunits = const_nunits / group_size;
7888 /* If we can't construct such a vector fall back to
7889 element extracts from the original vector type and
7890 element size stores. */
7891 if (int_mode_for_size (lsize, 0).exists (&elmode)
7892 && VECTOR_MODE_P (TYPE_MODE (vectype))
7893 && related_vector_mode (TYPE_MODE (vectype), elmode,
7894 lnunits).exists (&vmode)
7895 && (convert_optab_handler (vec_extract_optab,
7896 vmode, elmode)
7897 != CODE_FOR_nothing))
7898 {
7899 nstores = lnunits;
7900 lnel = group_size;
7901 ltype = build_nonstandard_integer_type (lsize, 1);
7902 lvectype = build_vector_type (ltype, nstores);
7903 }
7904 /* Else fall back to vector extraction anyway.
7905 Fewer stores are more important than avoiding spilling
7906 of the vector we extract from. Compared to the
7907 construction case in vectorizable_load no store-forwarding
7908 issue exists here for reasonable archs. */
7909 }
7910 }
7911 else if (group_size >= const_nunits
7912 && group_size % const_nunits == 0)
7913 {
7914 nstores = 1;
7915 lnel = const_nunits;
7916 ltype = vectype;
7917 lvectype = vectype;
7918 }
7919 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7920 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7921 }
7922
7923 ivstep = stride_step;
7924 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7925 build_int_cst (TREE_TYPE (ivstep), vf));
7926
7927 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7928
7929 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7930 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7931 create_iv (stride_base, ivstep, NULL,
7932 loop, &incr_gsi, insert_after,
7933 &offvar, NULL);
7934 incr = gsi_stmt (incr_gsi);
7935 loop_vinfo->add_stmt (incr);
7936
7937 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7938
7939 prev_stmt_info = NULL;
7940 alias_off = build_int_cst (ref_type, 0);
7941 stmt_vec_info next_stmt_info = first_stmt_info;
7942 for (g = 0; g < group_size; g++)
7943 {
7944 running_off = offvar;
7945 if (g)
7946 {
7947 tree size = TYPE_SIZE_UNIT (ltype);
7948 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7949 size);
7950 tree newoff = copy_ssa_name (running_off, NULL);
7951 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7952 running_off, pos);
7953 vect_finish_stmt_generation (stmt_info, incr, gsi);
7954 running_off = newoff;
7955 }
7956 unsigned int group_el = 0;
7957 unsigned HOST_WIDE_INT
7958 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7959 for (j = 0; j < ncopies; j++)
7960 {
7961 /* We've set op and dt above, from vect_get_store_rhs,
7962 and first_stmt_info == stmt_info. */
7963 if (j == 0)
7964 {
7965 if (slp)
7966 {
7967 vect_get_vec_defs (op, NULL_TREE, stmt_info,
7968 &vec_oprnds, NULL, slp_node);
7969 vec_oprnd = vec_oprnds[0];
7970 }
7971 else
7972 {
7973 op = vect_get_store_rhs (next_stmt_info);
7974 vec_oprnd = vect_get_vec_def_for_operand
7975 (op, next_stmt_info);
7976 }
7977 }
7978 else
7979 {
7980 if (slp)
7981 vec_oprnd = vec_oprnds[j];
7982 else
7983 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7984 vec_oprnd);
7985 }
7986 /* Pun the vector to extract from if necessary. */
7987 if (lvectype != vectype)
7988 {
7989 tree tem = make_ssa_name (lvectype);
7990 gimple *pun
7991 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7992 lvectype, vec_oprnd));
7993 vect_finish_stmt_generation (stmt_info, pun, gsi);
7994 vec_oprnd = tem;
7995 }
7996 for (i = 0; i < nstores; i++)
7997 {
7998 tree newref, newoff;
7999 gimple *incr, *assign;
8000 tree size = TYPE_SIZE (ltype);
8001 /* Extract the i'th component. */
8002 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8003 bitsize_int (i), size);
8004 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8005 size, pos);
8006
8007 elem = force_gimple_operand_gsi (gsi, elem, true,
8008 NULL_TREE, true,
8009 GSI_SAME_STMT);
8010
8011 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8012 group_el * elsz);
8013 newref = build2 (MEM_REF, ltype,
8014 running_off, this_off);
8015 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8016
8017 /* And store it to *running_off. */
8018 assign = gimple_build_assign (newref, elem);
8019 stmt_vec_info assign_info
8020 = vect_finish_stmt_generation (stmt_info, assign, gsi);
8021
8022 group_el += lnel;
8023 if (! slp
8024 || group_el == group_size)
8025 {
8026 newoff = copy_ssa_name (running_off, NULL);
8027 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8028 running_off, stride_step);
8029 vect_finish_stmt_generation (stmt_info, incr, gsi);
8030
8031 running_off = newoff;
8032 group_el = 0;
8033 }
8034 if (g == group_size - 1
8035 && !slp)
8036 {
8037 if (j == 0 && i == 0)
8038 STMT_VINFO_VEC_STMT (stmt_info)
8039 = *vec_stmt = assign_info;
8040 else
8041 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8042 prev_stmt_info = assign_info;
8043 }
8044 }
8045 }
8046 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8047 if (slp)
8048 break;
8049 }
8050
8051 vec_oprnds.release ();
8052 return true;
8053 }
8054
8055 auto_vec<tree> dr_chain (group_size);
8056 oprnds.create (group_size);
8057
8058 /* Gather-scatter accesses perform only component accesses, alignment
8059 is irrelevant for them. */
8060 if (memory_access_type == VMAT_GATHER_SCATTER)
8061 alignment_support_scheme = dr_unaligned_supported;
8062 else
8063 alignment_support_scheme
8064 = vect_supportable_dr_alignment (first_dr_info, false);
8065
8066 gcc_assert (alignment_support_scheme);
8067 vec_loop_masks *loop_masks
8068 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8069 ? &LOOP_VINFO_MASKS (loop_vinfo)
8070 : NULL);
8071 /* Targets with store-lane instructions must not require explicit
8072 realignment. vect_supportable_dr_alignment always returns either
8073 dr_aligned or dr_unaligned_supported for masked operations. */
8074 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8075 && !mask
8076 && !loop_masks)
8077 || alignment_support_scheme == dr_aligned
8078 || alignment_support_scheme == dr_unaligned_supported);
8079
8080 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8081 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8082 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8083
8084 tree bump;
8085 tree vec_offset = NULL_TREE;
8086 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8087 {
8088 aggr_type = NULL_TREE;
8089 bump = NULL_TREE;
8090 }
8091 else if (memory_access_type == VMAT_GATHER_SCATTER)
8092 {
8093 aggr_type = elem_type;
8094 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8095 &bump, &vec_offset);
8096 }
8097 else
8098 {
8099 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8100 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8101 else
8102 aggr_type = vectype;
8103 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8104 memory_access_type);
8105 }
8106
8107 if (mask)
8108 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8109
8110 /* In case the vectorization factor (VF) is bigger than the number
8111 of elements that we can fit in a vectype (nunits), we have to generate
8112 more than one vector stmt - i.e - we need to "unroll" the
8113 vector stmt by a factor VF/nunits. For more details see documentation in
8114 vect_get_vec_def_for_copy_stmt. */
8115
8116 /* In case of interleaving (non-unit grouped access):
8117
8118 S1: &base + 2 = x2
8119 S2: &base = x0
8120 S3: &base + 1 = x1
8121 S4: &base + 3 = x3
8122
8123 We create vectorized stores starting from base address (the access of the
8124 first stmt in the chain (S2 in the above example), when the last store stmt
8125 of the chain (S4) is reached:
8126
8127 VS1: &base = vx2
8128 VS2: &base + vec_size*1 = vx0
8129 VS3: &base + vec_size*2 = vx1
8130 VS4: &base + vec_size*3 = vx3
8131
8132 Then permutation statements are generated:
8133
8134 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8135 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8136 ...
8137
8138 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8139 (the order of the data-refs in the output of vect_permute_store_chain
8140 corresponds to the order of scalar stmts in the interleaving chain - see
8141 the documentation of vect_permute_store_chain()).
8142
8143 In case of both multiple types and interleaving, above vector stores and
8144 permutation stmts are created for every copy. The result vector stmts are
8145 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8146 STMT_VINFO_RELATED_STMT for the next copies.
8147 */
8148
8149 prev_stmt_info = NULL;
8150 tree vec_mask = NULL_TREE;
8151 for (j = 0; j < ncopies; j++)
8152 {
8153 stmt_vec_info new_stmt_info;
8154 if (j == 0)
8155 {
8156 if (slp)
8157 {
8158 /* Get vectorized arguments for SLP_NODE. */
8159 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8160 NULL, slp_node);
8161
8162 vec_oprnd = vec_oprnds[0];
8163 }
8164 else
8165 {
8166 /* For interleaved stores we collect vectorized defs for all the
8167 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8168 used as an input to vect_permute_store_chain(), and OPRNDS as
8169 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8170
8171 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8172 OPRNDS are of size 1. */
8173 stmt_vec_info next_stmt_info = first_stmt_info;
8174 for (i = 0; i < group_size; i++)
8175 {
8176 /* Since gaps are not supported for interleaved stores,
8177 DR_GROUP_SIZE is the exact number of stmts in the chain.
8178 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8179 that there is no interleaving, DR_GROUP_SIZE is 1,
8180 and only one iteration of the loop will be executed. */
8181 op = vect_get_store_rhs (next_stmt_info);
8182 vec_oprnd = vect_get_vec_def_for_operand
8183 (op, next_stmt_info);
8184 dr_chain.quick_push (vec_oprnd);
8185 oprnds.quick_push (vec_oprnd);
8186 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8187 }
8188 if (mask)
8189 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8190 mask_vectype);
8191 }
8192
8193 /* We should have catched mismatched types earlier. */
8194 gcc_assert (useless_type_conversion_p (vectype,
8195 TREE_TYPE (vec_oprnd)));
8196 bool simd_lane_access_p
8197 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8198 if (simd_lane_access_p
8199 && !loop_masks
8200 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8201 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8202 && integer_zerop (get_dr_vinfo_offset (first_dr_info))
8203 && integer_zerop (DR_INIT (first_dr_info->dr))
8204 && alias_sets_conflict_p (get_alias_set (aggr_type),
8205 get_alias_set (TREE_TYPE (ref_type))))
8206 {
8207 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8208 dataref_offset = build_int_cst (ref_type, 0);
8209 }
8210 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8211 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8212 &dataref_ptr, &vec_offset);
8213 else
8214 dataref_ptr
8215 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8216 simd_lane_access_p ? loop : NULL,
8217 offset, &dummy, gsi, &ptr_incr,
8218 simd_lane_access_p, NULL_TREE, bump);
8219 }
8220 else
8221 {
8222 /* For interleaved stores we created vectorized defs for all the
8223 defs stored in OPRNDS in the previous iteration (previous copy).
8224 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8225 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8226 next copy.
8227 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8228 OPRNDS are of size 1. */
8229 for (i = 0; i < group_size; i++)
8230 {
8231 op = oprnds[i];
8232 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8233 dr_chain[i] = vec_oprnd;
8234 oprnds[i] = vec_oprnd;
8235 }
8236 if (mask)
8237 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8238 if (dataref_offset)
8239 dataref_offset
8240 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8241 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8242 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8243 else
8244 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8245 stmt_info, bump);
8246 }
8247
8248 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8249 {
8250 tree vec_array;
8251
8252 /* Get an array into which we can store the individual vectors. */
8253 vec_array = create_vector_array (vectype, vec_num);
8254
8255 /* Invalidate the current contents of VEC_ARRAY. This should
8256 become an RTL clobber too, which prevents the vector registers
8257 from being upward-exposed. */
8258 vect_clobber_variable (stmt_info, gsi, vec_array);
8259
8260 /* Store the individual vectors into the array. */
8261 for (i = 0; i < vec_num; i++)
8262 {
8263 vec_oprnd = dr_chain[i];
8264 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8265 }
8266
8267 tree final_mask = NULL;
8268 if (loop_masks)
8269 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8270 vectype, j);
8271 if (vec_mask)
8272 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8273 vec_mask, gsi);
8274
8275 gcall *call;
8276 if (final_mask)
8277 {
8278 /* Emit:
8279 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8280 VEC_ARRAY). */
8281 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8282 tree alias_ptr = build_int_cst (ref_type, align);
8283 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8284 dataref_ptr, alias_ptr,
8285 final_mask, vec_array);
8286 }
8287 else
8288 {
8289 /* Emit:
8290 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8291 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8292 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8293 vec_array);
8294 gimple_call_set_lhs (call, data_ref);
8295 }
8296 gimple_call_set_nothrow (call, true);
8297 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8298
8299 /* Record that VEC_ARRAY is now dead. */
8300 vect_clobber_variable (stmt_info, gsi, vec_array);
8301 }
8302 else
8303 {
8304 new_stmt_info = NULL;
8305 if (grouped_store)
8306 {
8307 if (j == 0)
8308 result_chain.create (group_size);
8309 /* Permute. */
8310 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8311 &result_chain);
8312 }
8313
8314 stmt_vec_info next_stmt_info = first_stmt_info;
8315 for (i = 0; i < vec_num; i++)
8316 {
8317 unsigned misalign;
8318 unsigned HOST_WIDE_INT align;
8319
8320 tree final_mask = NULL_TREE;
8321 if (loop_masks)
8322 final_mask = vect_get_loop_mask (gsi, loop_masks,
8323 vec_num * ncopies,
8324 vectype, vec_num * j + i);
8325 if (vec_mask)
8326 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8327 vec_mask, gsi);
8328
8329 if (memory_access_type == VMAT_GATHER_SCATTER)
8330 {
8331 tree scale = size_int (gs_info.scale);
8332 gcall *call;
8333 if (loop_masks)
8334 call = gimple_build_call_internal
8335 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8336 scale, vec_oprnd, final_mask);
8337 else
8338 call = gimple_build_call_internal
8339 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8340 scale, vec_oprnd);
8341 gimple_call_set_nothrow (call, true);
8342 new_stmt_info
8343 = vect_finish_stmt_generation (stmt_info, call, gsi);
8344 break;
8345 }
8346
8347 if (i > 0)
8348 /* Bump the vector pointer. */
8349 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8350 stmt_info, bump);
8351
8352 if (slp)
8353 vec_oprnd = vec_oprnds[i];
8354 else if (grouped_store)
8355 /* For grouped stores vectorized defs are interleaved in
8356 vect_permute_store_chain(). */
8357 vec_oprnd = result_chain[i];
8358
8359 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8360 if (aligned_access_p (first_dr_info))
8361 misalign = 0;
8362 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8363 {
8364 align = dr_alignment (vect_dr_behavior (first_dr_info));
8365 misalign = 0;
8366 }
8367 else
8368 misalign = DR_MISALIGNMENT (first_dr_info);
8369 if (dataref_offset == NULL_TREE
8370 && TREE_CODE (dataref_ptr) == SSA_NAME)
8371 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8372 misalign);
8373
8374 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8375 {
8376 tree perm_mask = perm_mask_for_reverse (vectype);
8377 tree perm_dest = vect_create_destination_var
8378 (vect_get_store_rhs (stmt_info), vectype);
8379 tree new_temp = make_ssa_name (perm_dest);
8380
8381 /* Generate the permute statement. */
8382 gimple *perm_stmt
8383 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8384 vec_oprnd, perm_mask);
8385 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8386
8387 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8388 vec_oprnd = new_temp;
8389 }
8390
8391 /* Arguments are ready. Create the new vector stmt. */
8392 if (final_mask)
8393 {
8394 align = least_bit_hwi (misalign | align);
8395 tree ptr = build_int_cst (ref_type, align);
8396 gcall *call
8397 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8398 dataref_ptr, ptr,
8399 final_mask, vec_oprnd);
8400 gimple_call_set_nothrow (call, true);
8401 new_stmt_info
8402 = vect_finish_stmt_generation (stmt_info, call, gsi);
8403 }
8404 else
8405 {
8406 data_ref = fold_build2 (MEM_REF, vectype,
8407 dataref_ptr,
8408 dataref_offset
8409 ? dataref_offset
8410 : build_int_cst (ref_type, 0));
8411 if (aligned_access_p (first_dr_info))
8412 ;
8413 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8414 TREE_TYPE (data_ref)
8415 = build_aligned_type (TREE_TYPE (data_ref),
8416 align * BITS_PER_UNIT);
8417 else
8418 TREE_TYPE (data_ref)
8419 = build_aligned_type (TREE_TYPE (data_ref),
8420 TYPE_ALIGN (elem_type));
8421 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8422 gassign *new_stmt
8423 = gimple_build_assign (data_ref, vec_oprnd);
8424 new_stmt_info
8425 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8426 }
8427
8428 if (slp)
8429 continue;
8430
8431 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8432 if (!next_stmt_info)
8433 break;
8434 }
8435 }
8436 if (!slp)
8437 {
8438 if (j == 0)
8439 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8440 else
8441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8442 prev_stmt_info = new_stmt_info;
8443 }
8444 }
8445
8446 oprnds.release ();
8447 result_chain.release ();
8448 vec_oprnds.release ();
8449
8450 return true;
8451 }
8452
8453 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8454 VECTOR_CST mask. No checks are made that the target platform supports the
8455 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8456 vect_gen_perm_mask_checked. */
8457
8458 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8459 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8460 {
8461 tree mask_type;
8462
8463 poly_uint64 nunits = sel.length ();
8464 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8465
8466 mask_type = build_vector_type (ssizetype, nunits);
8467 return vec_perm_indices_to_tree (mask_type, sel);
8468 }
8469
8470 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8471 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8472
8473 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8474 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8475 {
8476 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8477 return vect_gen_perm_mask_any (vectype, sel);
8478 }
8479
8480 /* Given a vector variable X and Y, that was generated for the scalar
8481 STMT_INFO, generate instructions to permute the vector elements of X and Y
8482 using permutation mask MASK_VEC, insert them at *GSI and return the
8483 permuted vector variable. */
8484
8485 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8486 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8487 gimple_stmt_iterator *gsi)
8488 {
8489 tree vectype = TREE_TYPE (x);
8490 tree perm_dest, data_ref;
8491 gimple *perm_stmt;
8492
8493 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8494 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8495 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8496 else
8497 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8498 data_ref = make_ssa_name (perm_dest);
8499
8500 /* Generate the permute statement. */
8501 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8502 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8503
8504 return data_ref;
8505 }
8506
8507 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8508 inserting them on the loops preheader edge. Returns true if we
8509 were successful in doing so (and thus STMT_INFO can be moved then),
8510 otherwise returns false. */
8511
8512 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8513 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8514 {
8515 ssa_op_iter i;
8516 tree op;
8517 bool any = false;
8518
8519 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8520 {
8521 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8522 if (!gimple_nop_p (def_stmt)
8523 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8524 {
8525 /* Make sure we don't need to recurse. While we could do
8526 so in simple cases when there are more complex use webs
8527 we don't have an easy way to preserve stmt order to fulfil
8528 dependencies within them. */
8529 tree op2;
8530 ssa_op_iter i2;
8531 if (gimple_code (def_stmt) == GIMPLE_PHI)
8532 return false;
8533 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8534 {
8535 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8536 if (!gimple_nop_p (def_stmt2)
8537 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8538 return false;
8539 }
8540 any = true;
8541 }
8542 }
8543
8544 if (!any)
8545 return true;
8546
8547 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8548 {
8549 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8550 if (!gimple_nop_p (def_stmt)
8551 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8552 {
8553 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8554 gsi_remove (&gsi, false);
8555 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8556 }
8557 }
8558
8559 return true;
8560 }
8561
8562 /* vectorizable_load.
8563
8564 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8565 that can be vectorized.
8566 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8567 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8568 Return true if STMT_INFO is vectorizable in this way. */
8569
8570 static bool
vectorizable_load(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,slp_instance slp_node_instance,stmt_vector_for_cost * cost_vec)8571 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8572 stmt_vec_info *vec_stmt, slp_tree slp_node,
8573 slp_instance slp_node_instance,
8574 stmt_vector_for_cost *cost_vec)
8575 {
8576 tree scalar_dest;
8577 tree vec_dest = NULL;
8578 tree data_ref = NULL;
8579 stmt_vec_info prev_stmt_info;
8580 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8581 class loop *loop = NULL;
8582 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8583 bool nested_in_vect_loop = false;
8584 tree elem_type;
8585 tree new_temp;
8586 machine_mode mode;
8587 tree dummy;
8588 enum dr_alignment_support alignment_support_scheme;
8589 tree dataref_ptr = NULL_TREE;
8590 tree dataref_offset = NULL_TREE;
8591 gimple *ptr_incr = NULL;
8592 int ncopies;
8593 int i, j;
8594 unsigned int group_size;
8595 poly_uint64 group_gap_adj;
8596 tree msq = NULL_TREE, lsq;
8597 tree offset = NULL_TREE;
8598 tree byte_offset = NULL_TREE;
8599 tree realignment_token = NULL_TREE;
8600 gphi *phi = NULL;
8601 vec<tree> dr_chain = vNULL;
8602 bool grouped_load = false;
8603 stmt_vec_info first_stmt_info;
8604 stmt_vec_info first_stmt_info_for_drptr = NULL;
8605 bool compute_in_loop = false;
8606 class loop *at_loop;
8607 int vec_num;
8608 bool slp = (slp_node != NULL);
8609 bool slp_perm = false;
8610 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8611 poly_uint64 vf;
8612 tree aggr_type;
8613 gather_scatter_info gs_info;
8614 vec_info *vinfo = stmt_info->vinfo;
8615 tree ref_type;
8616 enum vect_def_type mask_dt = vect_unknown_def_type;
8617
8618 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8619 return false;
8620
8621 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8622 && ! vec_stmt)
8623 return false;
8624
8625 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8626 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8627 {
8628 scalar_dest = gimple_assign_lhs (assign);
8629 if (TREE_CODE (scalar_dest) != SSA_NAME)
8630 return false;
8631
8632 tree_code code = gimple_assign_rhs_code (assign);
8633 if (code != ARRAY_REF
8634 && code != BIT_FIELD_REF
8635 && code != INDIRECT_REF
8636 && code != COMPONENT_REF
8637 && code != IMAGPART_EXPR
8638 && code != REALPART_EXPR
8639 && code != MEM_REF
8640 && TREE_CODE_CLASS (code) != tcc_declaration)
8641 return false;
8642 }
8643 else
8644 {
8645 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8646 if (!call || !gimple_call_internal_p (call))
8647 return false;
8648
8649 internal_fn ifn = gimple_call_internal_fn (call);
8650 if (!internal_load_fn_p (ifn))
8651 return false;
8652
8653 scalar_dest = gimple_call_lhs (call);
8654 if (!scalar_dest)
8655 return false;
8656
8657 int mask_index = internal_fn_mask_index (ifn);
8658 if (mask_index >= 0)
8659 {
8660 mask = gimple_call_arg (call, mask_index);
8661 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
8662 &mask_vectype))
8663 return false;
8664 }
8665 }
8666
8667 if (!STMT_VINFO_DATA_REF (stmt_info))
8668 return false;
8669
8670 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8671 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8672
8673 if (loop_vinfo)
8674 {
8675 loop = LOOP_VINFO_LOOP (loop_vinfo);
8676 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8677 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8678 }
8679 else
8680 vf = 1;
8681
8682 /* Multiple types in SLP are handled by creating the appropriate number of
8683 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8684 case of SLP. */
8685 if (slp)
8686 ncopies = 1;
8687 else
8688 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8689
8690 gcc_assert (ncopies >= 1);
8691
8692 /* FORNOW. This restriction should be relaxed. */
8693 if (nested_in_vect_loop && ncopies > 1)
8694 {
8695 if (dump_enabled_p ())
8696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8697 "multiple types in nested loop.\n");
8698 return false;
8699 }
8700
8701 /* Invalidate assumptions made by dependence analysis when vectorization
8702 on the unrolled body effectively re-orders stmts. */
8703 if (ncopies > 1
8704 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8705 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8706 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8707 {
8708 if (dump_enabled_p ())
8709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8710 "cannot perform implicit CSE when unrolling "
8711 "with negative dependence distance\n");
8712 return false;
8713 }
8714
8715 elem_type = TREE_TYPE (vectype);
8716 mode = TYPE_MODE (vectype);
8717
8718 /* FORNOW. In some cases can vectorize even if data-type not supported
8719 (e.g. - data copies). */
8720 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8721 {
8722 if (dump_enabled_p ())
8723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8724 "Aligned load, but unsupported type.\n");
8725 return false;
8726 }
8727
8728 /* Check if the load is a part of an interleaving chain. */
8729 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8730 {
8731 grouped_load = true;
8732 /* FORNOW */
8733 gcc_assert (!nested_in_vect_loop);
8734 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8735
8736 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8737 group_size = DR_GROUP_SIZE (first_stmt_info);
8738
8739 /* Refuse non-SLP vectorization of SLP-only groups. */
8740 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8741 {
8742 if (dump_enabled_p ())
8743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8744 "cannot vectorize load in non-SLP mode.\n");
8745 return false;
8746 }
8747
8748 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8749 slp_perm = true;
8750
8751 /* Invalidate assumptions made by dependence analysis when vectorization
8752 on the unrolled body effectively re-orders stmts. */
8753 if (!PURE_SLP_STMT (stmt_info)
8754 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8755 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8756 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8757 {
8758 if (dump_enabled_p ())
8759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8760 "cannot perform implicit CSE when performing "
8761 "group loads with negative dependence distance\n");
8762 return false;
8763 }
8764 }
8765 else
8766 group_size = 1;
8767
8768 vect_memory_access_type memory_access_type;
8769 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8770 &memory_access_type, &gs_info))
8771 return false;
8772
8773 if (mask)
8774 {
8775 if (memory_access_type == VMAT_CONTIGUOUS)
8776 {
8777 machine_mode vec_mode = TYPE_MODE (vectype);
8778 if (!VECTOR_MODE_P (vec_mode)
8779 || !can_vec_mask_load_store_p (vec_mode,
8780 TYPE_MODE (mask_vectype), true))
8781 return false;
8782 }
8783 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8784 && memory_access_type != VMAT_GATHER_SCATTER)
8785 {
8786 if (dump_enabled_p ())
8787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8788 "unsupported access type for masked load.\n");
8789 return false;
8790 }
8791 }
8792
8793 if (!vec_stmt) /* transformation not required. */
8794 {
8795 if (!slp)
8796 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8797
8798 if (loop_vinfo
8799 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8800 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8801 memory_access_type, &gs_info, mask);
8802
8803 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8804 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8805 slp_node_instance, slp_node, cost_vec);
8806 return true;
8807 }
8808
8809 if (!slp)
8810 gcc_assert (memory_access_type
8811 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8812
8813 if (dump_enabled_p ())
8814 dump_printf_loc (MSG_NOTE, vect_location,
8815 "transform load. ncopies = %d\n", ncopies);
8816
8817 /* Transform. */
8818
8819 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8820 ensure_base_align (dr_info);
8821
8822 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8823 {
8824 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8825 return true;
8826 }
8827
8828 if (memory_access_type == VMAT_INVARIANT)
8829 {
8830 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8831 /* If we have versioned for aliasing or the loop doesn't
8832 have any data dependencies that would preclude this,
8833 then we are sure this is a loop invariant load and
8834 thus we can insert it on the preheader edge. */
8835 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8836 && !nested_in_vect_loop
8837 && hoist_defs_of_uses (stmt_info, loop));
8838 if (hoist_p)
8839 {
8840 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8841 if (dump_enabled_p ())
8842 dump_printf_loc (MSG_NOTE, vect_location,
8843 "hoisting out of the vectorized loop: %G", stmt);
8844 scalar_dest = copy_ssa_name (scalar_dest);
8845 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8846 gsi_insert_on_edge_immediate
8847 (loop_preheader_edge (loop),
8848 gimple_build_assign (scalar_dest, rhs));
8849 }
8850 /* These copies are all equivalent, but currently the representation
8851 requires a separate STMT_VINFO_VEC_STMT for each one. */
8852 prev_stmt_info = NULL;
8853 gimple_stmt_iterator gsi2 = *gsi;
8854 gsi_next (&gsi2);
8855 for (j = 0; j < ncopies; j++)
8856 {
8857 stmt_vec_info new_stmt_info;
8858 if (hoist_p)
8859 {
8860 new_temp = vect_init_vector (stmt_info, scalar_dest,
8861 vectype, NULL);
8862 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8863 new_stmt_info = vinfo->add_stmt (new_stmt);
8864 }
8865 else
8866 {
8867 new_temp = vect_init_vector (stmt_info, scalar_dest,
8868 vectype, &gsi2);
8869 new_stmt_info = vinfo->lookup_def (new_temp);
8870 }
8871 if (slp)
8872 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8873 else if (j == 0)
8874 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8875 else
8876 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8877 prev_stmt_info = new_stmt_info;
8878 }
8879 return true;
8880 }
8881
8882 if (memory_access_type == VMAT_ELEMENTWISE
8883 || memory_access_type == VMAT_STRIDED_SLP)
8884 {
8885 gimple_stmt_iterator incr_gsi;
8886 bool insert_after;
8887 gimple *incr;
8888 tree offvar;
8889 tree ivstep;
8890 tree running_off;
8891 vec<constructor_elt, va_gc> *v = NULL;
8892 tree stride_base, stride_step, alias_off;
8893 /* Checked by get_load_store_type. */
8894 unsigned int const_nunits = nunits.to_constant ();
8895 unsigned HOST_WIDE_INT cst_offset = 0;
8896 tree dr_offset;
8897
8898 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8899 gcc_assert (!nested_in_vect_loop);
8900
8901 if (grouped_load)
8902 {
8903 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8904 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8905 }
8906 else
8907 {
8908 first_stmt_info = stmt_info;
8909 first_dr_info = dr_info;
8910 }
8911 if (slp && grouped_load)
8912 {
8913 group_size = DR_GROUP_SIZE (first_stmt_info);
8914 ref_type = get_group_alias_ptr_type (first_stmt_info);
8915 }
8916 else
8917 {
8918 if (grouped_load)
8919 cst_offset
8920 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8921 * vect_get_place_in_interleaving_chain (stmt_info,
8922 first_stmt_info));
8923 group_size = 1;
8924 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8925 }
8926
8927 dr_offset = get_dr_vinfo_offset (first_dr_info);
8928 stride_base
8929 = fold_build_pointer_plus
8930 (DR_BASE_ADDRESS (first_dr_info->dr),
8931 size_binop (PLUS_EXPR,
8932 convert_to_ptrofftype (dr_offset),
8933 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8934 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8935
8936 /* For a load with loop-invariant (but other than power-of-2)
8937 stride (i.e. not a grouped access) like so:
8938
8939 for (i = 0; i < n; i += stride)
8940 ... = array[i];
8941
8942 we generate a new induction variable and new accesses to
8943 form a new vector (or vectors, depending on ncopies):
8944
8945 for (j = 0; ; j += VF*stride)
8946 tmp1 = array[j];
8947 tmp2 = array[j + stride];
8948 ...
8949 vectemp = {tmp1, tmp2, ...}
8950 */
8951
8952 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8953 build_int_cst (TREE_TYPE (stride_step), vf));
8954
8955 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8956
8957 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8958 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8959 create_iv (stride_base, ivstep, NULL,
8960 loop, &incr_gsi, insert_after,
8961 &offvar, NULL);
8962 incr = gsi_stmt (incr_gsi);
8963 loop_vinfo->add_stmt (incr);
8964
8965 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8966
8967 prev_stmt_info = NULL;
8968 running_off = offvar;
8969 alias_off = build_int_cst (ref_type, 0);
8970 int nloads = const_nunits;
8971 int lnel = 1;
8972 tree ltype = TREE_TYPE (vectype);
8973 tree lvectype = vectype;
8974 auto_vec<tree> dr_chain;
8975 if (memory_access_type == VMAT_STRIDED_SLP)
8976 {
8977 if (group_size < const_nunits)
8978 {
8979 /* First check if vec_init optab supports construction from vector
8980 elts directly. Otherwise avoid emitting a constructor of
8981 vector elements by performing the loads using an integer type
8982 of the same size, constructing a vector of those and then
8983 re-interpreting it as the original vector type. This avoids a
8984 huge runtime penalty due to the general inability to perform
8985 store forwarding from smaller stores to a larger load. */
8986 tree ptype;
8987 tree vtype
8988 = vector_vector_composition_type (vectype,
8989 const_nunits / group_size,
8990 &ptype);
8991 if (vtype != NULL_TREE)
8992 {
8993 nloads = const_nunits / group_size;
8994 lnel = group_size;
8995 lvectype = vtype;
8996 ltype = ptype;
8997 }
8998 }
8999 else
9000 {
9001 nloads = 1;
9002 lnel = const_nunits;
9003 ltype = vectype;
9004 }
9005 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9006 }
9007 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9008 else if (nloads == 1)
9009 ltype = vectype;
9010
9011 if (slp)
9012 {
9013 /* For SLP permutation support we need to load the whole group,
9014 not only the number of vector stmts the permutation result
9015 fits in. */
9016 if (slp_perm)
9017 {
9018 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9019 variable VF. */
9020 unsigned int const_vf = vf.to_constant ();
9021 ncopies = CEIL (group_size * const_vf, const_nunits);
9022 dr_chain.create (ncopies);
9023 }
9024 else
9025 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9026 }
9027 unsigned int group_el = 0;
9028 unsigned HOST_WIDE_INT
9029 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9030 for (j = 0; j < ncopies; j++)
9031 {
9032 if (nloads > 1)
9033 vec_alloc (v, nloads);
9034 stmt_vec_info new_stmt_info = NULL;
9035 for (i = 0; i < nloads; i++)
9036 {
9037 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9038 group_el * elsz + cst_offset);
9039 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9040 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9041 gassign *new_stmt
9042 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9043 new_stmt_info
9044 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9045 if (nloads > 1)
9046 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9047 gimple_assign_lhs (new_stmt));
9048
9049 group_el += lnel;
9050 if (! slp
9051 || group_el == group_size)
9052 {
9053 tree newoff = copy_ssa_name (running_off);
9054 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9055 running_off, stride_step);
9056 vect_finish_stmt_generation (stmt_info, incr, gsi);
9057
9058 running_off = newoff;
9059 group_el = 0;
9060 }
9061 }
9062 if (nloads > 1)
9063 {
9064 tree vec_inv = build_constructor (lvectype, v);
9065 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9066 new_stmt_info = vinfo->lookup_def (new_temp);
9067 if (lvectype != vectype)
9068 {
9069 gassign *new_stmt
9070 = gimple_build_assign (make_ssa_name (vectype),
9071 VIEW_CONVERT_EXPR,
9072 build1 (VIEW_CONVERT_EXPR,
9073 vectype, new_temp));
9074 new_stmt_info
9075 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9076 }
9077 }
9078
9079 if (slp)
9080 {
9081 if (slp_perm)
9082 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9083 else
9084 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9085 }
9086 else
9087 {
9088 if (j == 0)
9089 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9090 else
9091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9092 prev_stmt_info = new_stmt_info;
9093 }
9094 }
9095 if (slp_perm)
9096 {
9097 unsigned n_perms;
9098 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9099 slp_node_instance, false, &n_perms);
9100 }
9101 return true;
9102 }
9103
9104 if (memory_access_type == VMAT_GATHER_SCATTER
9105 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9106 grouped_load = false;
9107
9108 if (grouped_load)
9109 {
9110 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9111 group_size = DR_GROUP_SIZE (first_stmt_info);
9112 /* For SLP vectorization we directly vectorize a subchain
9113 without permutation. */
9114 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9115 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9116 /* For BB vectorization always use the first stmt to base
9117 the data ref pointer on. */
9118 if (bb_vinfo)
9119 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9120
9121 /* Check if the chain of loads is already vectorized. */
9122 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9123 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9124 ??? But we can only do so if there is exactly one
9125 as we have no way to get at the rest. Leave the CSE
9126 opportunity alone.
9127 ??? With the group load eventually participating
9128 in multiple different permutations (having multiple
9129 slp nodes which refer to the same group) the CSE
9130 is even wrong code. See PR56270. */
9131 && !slp)
9132 {
9133 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9134 return true;
9135 }
9136 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9137 group_gap_adj = 0;
9138
9139 /* VEC_NUM is the number of vect stmts to be created for this group. */
9140 if (slp)
9141 {
9142 grouped_load = false;
9143 /* If an SLP permutation is from N elements to N elements,
9144 and if one vector holds a whole number of N, we can load
9145 the inputs to the permutation in the same way as an
9146 unpermuted sequence. In other cases we need to load the
9147 whole group, not only the number of vector stmts the
9148 permutation result fits in. */
9149 if (slp_perm
9150 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9151 || !multiple_p (nunits, group_size)))
9152 {
9153 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9154 variable VF; see vect_transform_slp_perm_load. */
9155 unsigned int const_vf = vf.to_constant ();
9156 unsigned int const_nunits = nunits.to_constant ();
9157 vec_num = CEIL (group_size * const_vf, const_nunits);
9158 group_gap_adj = vf * group_size - nunits * vec_num;
9159 }
9160 else
9161 {
9162 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9163 group_gap_adj
9164 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9165 }
9166 }
9167 else
9168 vec_num = group_size;
9169
9170 ref_type = get_group_alias_ptr_type (first_stmt_info);
9171 }
9172 else
9173 {
9174 first_stmt_info = stmt_info;
9175 first_dr_info = dr_info;
9176 group_size = vec_num = 1;
9177 group_gap_adj = 0;
9178 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9179 }
9180
9181 /* Gather-scatter accesses perform only component accesses, alignment
9182 is irrelevant for them. */
9183 if (memory_access_type == VMAT_GATHER_SCATTER)
9184 alignment_support_scheme = dr_unaligned_supported;
9185 else
9186 alignment_support_scheme
9187 = vect_supportable_dr_alignment (first_dr_info, false);
9188
9189 gcc_assert (alignment_support_scheme);
9190 vec_loop_masks *loop_masks
9191 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9192 ? &LOOP_VINFO_MASKS (loop_vinfo)
9193 : NULL);
9194 /* Targets with store-lane instructions must not require explicit
9195 realignment. vect_supportable_dr_alignment always returns either
9196 dr_aligned or dr_unaligned_supported for masked operations. */
9197 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9198 && !mask
9199 && !loop_masks)
9200 || alignment_support_scheme == dr_aligned
9201 || alignment_support_scheme == dr_unaligned_supported);
9202
9203 /* In case the vectorization factor (VF) is bigger than the number
9204 of elements that we can fit in a vectype (nunits), we have to generate
9205 more than one vector stmt - i.e - we need to "unroll" the
9206 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9207 from one copy of the vector stmt to the next, in the field
9208 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9209 stages to find the correct vector defs to be used when vectorizing
9210 stmts that use the defs of the current stmt. The example below
9211 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9212 need to create 4 vectorized stmts):
9213
9214 before vectorization:
9215 RELATED_STMT VEC_STMT
9216 S1: x = memref - -
9217 S2: z = x + 1 - -
9218
9219 step 1: vectorize stmt S1:
9220 We first create the vector stmt VS1_0, and, as usual, record a
9221 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9222 Next, we create the vector stmt VS1_1, and record a pointer to
9223 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9224 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9225 stmts and pointers:
9226 RELATED_STMT VEC_STMT
9227 VS1_0: vx0 = memref0 VS1_1 -
9228 VS1_1: vx1 = memref1 VS1_2 -
9229 VS1_2: vx2 = memref2 VS1_3 -
9230 VS1_3: vx3 = memref3 - -
9231 S1: x = load - VS1_0
9232 S2: z = x + 1 - -
9233
9234 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9235 information we recorded in RELATED_STMT field is used to vectorize
9236 stmt S2. */
9237
9238 /* In case of interleaving (non-unit grouped access):
9239
9240 S1: x2 = &base + 2
9241 S2: x0 = &base
9242 S3: x1 = &base + 1
9243 S4: x3 = &base + 3
9244
9245 Vectorized loads are created in the order of memory accesses
9246 starting from the access of the first stmt of the chain:
9247
9248 VS1: vx0 = &base
9249 VS2: vx1 = &base + vec_size*1
9250 VS3: vx3 = &base + vec_size*2
9251 VS4: vx4 = &base + vec_size*3
9252
9253 Then permutation statements are generated:
9254
9255 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9256 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9257 ...
9258
9259 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9260 (the order of the data-refs in the output of vect_permute_load_chain
9261 corresponds to the order of scalar stmts in the interleaving chain - see
9262 the documentation of vect_permute_load_chain()).
9263 The generation of permutation stmts and recording them in
9264 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9265
9266 In case of both multiple types and interleaving, the vector loads and
9267 permutation stmts above are created for every copy. The result vector
9268 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9269 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9270
9271 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9272 on a target that supports unaligned accesses (dr_unaligned_supported)
9273 we generate the following code:
9274 p = initial_addr;
9275 indx = 0;
9276 loop {
9277 p = p + indx * vectype_size;
9278 vec_dest = *(p);
9279 indx = indx + 1;
9280 }
9281
9282 Otherwise, the data reference is potentially unaligned on a target that
9283 does not support unaligned accesses (dr_explicit_realign_optimized) -
9284 then generate the following code, in which the data in each iteration is
9285 obtained by two vector loads, one from the previous iteration, and one
9286 from the current iteration:
9287 p1 = initial_addr;
9288 msq_init = *(floor(p1))
9289 p2 = initial_addr + VS - 1;
9290 realignment_token = call target_builtin;
9291 indx = 0;
9292 loop {
9293 p2 = p2 + indx * vectype_size
9294 lsq = *(floor(p2))
9295 vec_dest = realign_load (msq, lsq, realignment_token)
9296 indx = indx + 1;
9297 msq = lsq;
9298 } */
9299
9300 /* If the misalignment remains the same throughout the execution of the
9301 loop, we can create the init_addr and permutation mask at the loop
9302 preheader. Otherwise, it needs to be created inside the loop.
9303 This can only occur when vectorizing memory accesses in the inner-loop
9304 nested within an outer-loop that is being vectorized. */
9305
9306 if (nested_in_vect_loop
9307 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9308 GET_MODE_SIZE (TYPE_MODE (vectype))))
9309 {
9310 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9311 compute_in_loop = true;
9312 }
9313
9314 bool diff_first_stmt_info
9315 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9316
9317 if ((alignment_support_scheme == dr_explicit_realign_optimized
9318 || alignment_support_scheme == dr_explicit_realign)
9319 && !compute_in_loop)
9320 {
9321 /* If we have different first_stmt_info, we can't set up realignment
9322 here, since we can't guarantee first_stmt_info DR has been
9323 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9324 distance from first_stmt_info DR instead as below. */
9325 if (!diff_first_stmt_info)
9326 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
9327 alignment_support_scheme, NULL_TREE,
9328 &at_loop);
9329 if (alignment_support_scheme == dr_explicit_realign_optimized)
9330 {
9331 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9332 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9333 size_one_node);
9334 gcc_assert (!first_stmt_info_for_drptr);
9335 }
9336 }
9337 else
9338 at_loop = loop;
9339
9340 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9341 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9342
9343 tree bump;
9344 tree vec_offset = NULL_TREE;
9345 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9346 {
9347 aggr_type = NULL_TREE;
9348 bump = NULL_TREE;
9349 }
9350 else if (memory_access_type == VMAT_GATHER_SCATTER)
9351 {
9352 aggr_type = elem_type;
9353 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9354 &bump, &vec_offset);
9355 }
9356 else
9357 {
9358 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9359 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9360 else
9361 aggr_type = vectype;
9362 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9363 memory_access_type);
9364 }
9365
9366 tree vec_mask = NULL_TREE;
9367 prev_stmt_info = NULL;
9368 poly_uint64 group_elt = 0;
9369 for (j = 0; j < ncopies; j++)
9370 {
9371 stmt_vec_info new_stmt_info = NULL;
9372 /* 1. Create the vector or array pointer update chain. */
9373 if (j == 0)
9374 {
9375 bool simd_lane_access_p
9376 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9377 if (simd_lane_access_p
9378 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9379 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9380 && integer_zerop (get_dr_vinfo_offset (first_dr_info))
9381 && integer_zerop (DR_INIT (first_dr_info->dr))
9382 && alias_sets_conflict_p (get_alias_set (aggr_type),
9383 get_alias_set (TREE_TYPE (ref_type)))
9384 && (alignment_support_scheme == dr_aligned
9385 || alignment_support_scheme == dr_unaligned_supported))
9386 {
9387 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9388 dataref_offset = build_int_cst (ref_type, 0);
9389 }
9390 else if (diff_first_stmt_info)
9391 {
9392 dataref_ptr
9393 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9394 aggr_type, at_loop, offset, &dummy,
9395 gsi, &ptr_incr, simd_lane_access_p,
9396 byte_offset, bump);
9397 /* Adjust the pointer by the difference to first_stmt. */
9398 data_reference_p ptrdr
9399 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9400 tree diff
9401 = fold_convert (sizetype,
9402 size_binop (MINUS_EXPR,
9403 DR_INIT (first_dr_info->dr),
9404 DR_INIT (ptrdr)));
9405 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9406 stmt_info, diff);
9407 if (alignment_support_scheme == dr_explicit_realign)
9408 {
9409 msq = vect_setup_realignment (first_stmt_info_for_drptr, gsi,
9410 &realignment_token,
9411 alignment_support_scheme,
9412 dataref_ptr, &at_loop);
9413 gcc_assert (!compute_in_loop);
9414 }
9415 }
9416 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9417 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9418 &dataref_ptr, &vec_offset);
9419 else
9420 dataref_ptr
9421 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9422 offset, &dummy, gsi, &ptr_incr,
9423 simd_lane_access_p,
9424 byte_offset, bump);
9425 if (mask)
9426 {
9427 if (slp_node)
9428 {
9429 auto_vec<vec<tree> > vec_defs (1);
9430 vect_get_slp_defs (slp_node, &vec_defs);
9431 vec_mask = vec_defs[0][0];
9432 }
9433 else
9434 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9435 mask_vectype);
9436 }
9437 }
9438 else
9439 {
9440 if (dataref_offset)
9441 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9442 bump);
9443 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9444 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9445 else
9446 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9447 stmt_info, bump);
9448 if (mask)
9449 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9450 }
9451
9452 if (grouped_load || slp_perm)
9453 dr_chain.create (vec_num);
9454
9455 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9456 {
9457 tree vec_array;
9458
9459 vec_array = create_vector_array (vectype, vec_num);
9460
9461 tree final_mask = NULL_TREE;
9462 if (loop_masks)
9463 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9464 vectype, j);
9465 if (vec_mask)
9466 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9467 vec_mask, gsi);
9468
9469 gcall *call;
9470 if (final_mask)
9471 {
9472 /* Emit:
9473 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9474 VEC_MASK). */
9475 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9476 tree alias_ptr = build_int_cst (ref_type, align);
9477 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9478 dataref_ptr, alias_ptr,
9479 final_mask);
9480 }
9481 else
9482 {
9483 /* Emit:
9484 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9485 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9486 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9487 }
9488 gimple_call_set_lhs (call, vec_array);
9489 gimple_call_set_nothrow (call, true);
9490 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9491
9492 /* Extract each vector into an SSA_NAME. */
9493 for (i = 0; i < vec_num; i++)
9494 {
9495 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9496 vec_array, i);
9497 dr_chain.quick_push (new_temp);
9498 }
9499
9500 /* Record the mapping between SSA_NAMEs and statements. */
9501 vect_record_grouped_load_vectors (stmt_info, dr_chain);
9502
9503 /* Record that VEC_ARRAY is now dead. */
9504 vect_clobber_variable (stmt_info, gsi, vec_array);
9505 }
9506 else
9507 {
9508 for (i = 0; i < vec_num; i++)
9509 {
9510 tree final_mask = NULL_TREE;
9511 if (loop_masks
9512 && memory_access_type != VMAT_INVARIANT)
9513 final_mask = vect_get_loop_mask (gsi, loop_masks,
9514 vec_num * ncopies,
9515 vectype, vec_num * j + i);
9516 if (vec_mask)
9517 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9518 vec_mask, gsi);
9519
9520 if (i > 0)
9521 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9522 stmt_info, bump);
9523
9524 /* 2. Create the vector-load in the loop. */
9525 gimple *new_stmt = NULL;
9526 switch (alignment_support_scheme)
9527 {
9528 case dr_aligned:
9529 case dr_unaligned_supported:
9530 {
9531 unsigned int misalign;
9532 unsigned HOST_WIDE_INT align;
9533
9534 if (memory_access_type == VMAT_GATHER_SCATTER)
9535 {
9536 tree zero = build_zero_cst (vectype);
9537 tree scale = size_int (gs_info.scale);
9538 gcall *call;
9539 if (loop_masks)
9540 call = gimple_build_call_internal
9541 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9542 vec_offset, scale, zero, final_mask);
9543 else
9544 call = gimple_build_call_internal
9545 (IFN_GATHER_LOAD, 4, dataref_ptr,
9546 vec_offset, scale, zero);
9547 gimple_call_set_nothrow (call, true);
9548 new_stmt = call;
9549 data_ref = NULL_TREE;
9550 break;
9551 }
9552
9553 align =
9554 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9555 if (alignment_support_scheme == dr_aligned)
9556 {
9557 gcc_assert (aligned_access_p (first_dr_info));
9558 misalign = 0;
9559 }
9560 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9561 {
9562 align = dr_alignment
9563 (vect_dr_behavior (first_dr_info));
9564 misalign = 0;
9565 }
9566 else
9567 misalign = DR_MISALIGNMENT (first_dr_info);
9568 if (dataref_offset == NULL_TREE
9569 && TREE_CODE (dataref_ptr) == SSA_NAME)
9570 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9571 align, misalign);
9572
9573 if (final_mask)
9574 {
9575 align = least_bit_hwi (misalign | align);
9576 tree ptr = build_int_cst (ref_type, align);
9577 gcall *call
9578 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9579 dataref_ptr, ptr,
9580 final_mask);
9581 gimple_call_set_nothrow (call, true);
9582 new_stmt = call;
9583 data_ref = NULL_TREE;
9584 }
9585 else
9586 {
9587 tree ltype = vectype;
9588 tree new_vtype = NULL_TREE;
9589 /* If there's no peeling for gaps but we have a gap
9590 with slp loads then load the lower half of the
9591 vector only. See get_group_load_store_type for
9592 when we apply this optimization. */
9593 if (slp
9594 && loop_vinfo
9595 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9596 && DR_GROUP_GAP (first_stmt_info) != 0
9597 && known_eq (nunits,
9598 (group_size
9599 - DR_GROUP_GAP (first_stmt_info)) * 2)
9600 && known_eq (nunits, group_size))
9601 {
9602 tree half_vtype;
9603 new_vtype
9604 = vector_vector_composition_type (vectype, 2,
9605 &half_vtype);
9606 if (new_vtype != NULL_TREE)
9607 ltype = half_vtype;
9608 }
9609 tree offset
9610 = (dataref_offset ? dataref_offset
9611 : build_int_cst (ref_type, 0));
9612 if (ltype != vectype
9613 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9614 {
9615 unsigned HOST_WIDE_INT gap
9616 = DR_GROUP_GAP (first_stmt_info);
9617 gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9618 tree gapcst = build_int_cst (ref_type, gap);
9619 offset = size_binop (PLUS_EXPR, offset, gapcst);
9620 }
9621 data_ref
9622 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9623 if (alignment_support_scheme == dr_aligned)
9624 ;
9625 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9626 TREE_TYPE (data_ref)
9627 = build_aligned_type (TREE_TYPE (data_ref),
9628 align * BITS_PER_UNIT);
9629 else
9630 TREE_TYPE (data_ref)
9631 = build_aligned_type (TREE_TYPE (data_ref),
9632 TYPE_ALIGN (elem_type));
9633 if (ltype != vectype)
9634 {
9635 vect_copy_ref_info (data_ref,
9636 DR_REF (first_dr_info->dr));
9637 tree tem = make_ssa_name (ltype);
9638 new_stmt = gimple_build_assign (tem, data_ref);
9639 vect_finish_stmt_generation (stmt_info, new_stmt,
9640 gsi);
9641 data_ref = NULL;
9642 vec<constructor_elt, va_gc> *v;
9643 vec_alloc (v, 2);
9644 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9645 {
9646 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9647 build_zero_cst (ltype));
9648 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9649 }
9650 else
9651 {
9652 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9653 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9654 build_zero_cst (ltype));
9655 }
9656 gcc_assert (new_vtype != NULL_TREE);
9657 if (new_vtype == vectype)
9658 new_stmt = gimple_build_assign (
9659 vec_dest, build_constructor (vectype, v));
9660 else
9661 {
9662 tree new_vname = make_ssa_name (new_vtype);
9663 new_stmt = gimple_build_assign (
9664 new_vname, build_constructor (new_vtype, v));
9665 vect_finish_stmt_generation (stmt_info,
9666 new_stmt, gsi);
9667 new_stmt = gimple_build_assign (
9668 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9669 new_vname));
9670 }
9671 }
9672 }
9673 break;
9674 }
9675 case dr_explicit_realign:
9676 {
9677 tree ptr, bump;
9678
9679 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9680
9681 if (compute_in_loop)
9682 msq = vect_setup_realignment (first_stmt_info, gsi,
9683 &realignment_token,
9684 dr_explicit_realign,
9685 dataref_ptr, NULL);
9686
9687 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9688 ptr = copy_ssa_name (dataref_ptr);
9689 else
9690 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9691 // For explicit realign the target alignment should be
9692 // known at compile time.
9693 unsigned HOST_WIDE_INT align =
9694 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9695 new_stmt = gimple_build_assign
9696 (ptr, BIT_AND_EXPR, dataref_ptr,
9697 build_int_cst
9698 (TREE_TYPE (dataref_ptr),
9699 -(HOST_WIDE_INT) align));
9700 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9701 data_ref
9702 = build2 (MEM_REF, vectype, ptr,
9703 build_int_cst (ref_type, 0));
9704 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9705 vec_dest = vect_create_destination_var (scalar_dest,
9706 vectype);
9707 new_stmt = gimple_build_assign (vec_dest, data_ref);
9708 new_temp = make_ssa_name (vec_dest, new_stmt);
9709 gimple_assign_set_lhs (new_stmt, new_temp);
9710 gimple_move_vops (new_stmt, stmt_info->stmt);
9711 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9712 msq = new_temp;
9713
9714 bump = size_binop (MULT_EXPR, vs,
9715 TYPE_SIZE_UNIT (elem_type));
9716 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9717 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9718 stmt_info, bump);
9719 new_stmt = gimple_build_assign
9720 (NULL_TREE, BIT_AND_EXPR, ptr,
9721 build_int_cst
9722 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9723 ptr = copy_ssa_name (ptr, new_stmt);
9724 gimple_assign_set_lhs (new_stmt, ptr);
9725 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9726 data_ref
9727 = build2 (MEM_REF, vectype, ptr,
9728 build_int_cst (ref_type, 0));
9729 break;
9730 }
9731 case dr_explicit_realign_optimized:
9732 {
9733 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9734 new_temp = copy_ssa_name (dataref_ptr);
9735 else
9736 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9737 // We should only be doing this if we know the target
9738 // alignment at compile time.
9739 unsigned HOST_WIDE_INT align =
9740 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9741 new_stmt = gimple_build_assign
9742 (new_temp, BIT_AND_EXPR, dataref_ptr,
9743 build_int_cst (TREE_TYPE (dataref_ptr),
9744 -(HOST_WIDE_INT) align));
9745 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9746 data_ref
9747 = build2 (MEM_REF, vectype, new_temp,
9748 build_int_cst (ref_type, 0));
9749 break;
9750 }
9751 default:
9752 gcc_unreachable ();
9753 }
9754 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9755 /* DATA_REF is null if we've already built the statement. */
9756 if (data_ref)
9757 {
9758 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9759 new_stmt = gimple_build_assign (vec_dest, data_ref);
9760 }
9761 new_temp = make_ssa_name (vec_dest, new_stmt);
9762 gimple_set_lhs (new_stmt, new_temp);
9763 new_stmt_info
9764 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9765
9766 /* 3. Handle explicit realignment if necessary/supported.
9767 Create in loop:
9768 vec_dest = realign_load (msq, lsq, realignment_token) */
9769 if (alignment_support_scheme == dr_explicit_realign_optimized
9770 || alignment_support_scheme == dr_explicit_realign)
9771 {
9772 lsq = gimple_assign_lhs (new_stmt);
9773 if (!realignment_token)
9774 realignment_token = dataref_ptr;
9775 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9776 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9777 msq, lsq, realignment_token);
9778 new_temp = make_ssa_name (vec_dest, new_stmt);
9779 gimple_assign_set_lhs (new_stmt, new_temp);
9780 new_stmt_info
9781 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9782
9783 if (alignment_support_scheme == dr_explicit_realign_optimized)
9784 {
9785 gcc_assert (phi);
9786 if (i == vec_num - 1 && j == ncopies - 1)
9787 add_phi_arg (phi, lsq,
9788 loop_latch_edge (containing_loop),
9789 UNKNOWN_LOCATION);
9790 msq = lsq;
9791 }
9792 }
9793
9794 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9795 {
9796 tree perm_mask = perm_mask_for_reverse (vectype);
9797 new_temp = permute_vec_elements (new_temp, new_temp,
9798 perm_mask, stmt_info, gsi);
9799 new_stmt_info = vinfo->lookup_def (new_temp);
9800 }
9801
9802 /* Collect vector loads and later create their permutation in
9803 vect_transform_grouped_load (). */
9804 if (grouped_load || slp_perm)
9805 dr_chain.quick_push (new_temp);
9806
9807 /* Store vector loads in the corresponding SLP_NODE. */
9808 if (slp && !slp_perm)
9809 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9810
9811 /* With SLP permutation we load the gaps as well, without
9812 we need to skip the gaps after we manage to fully load
9813 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9814 group_elt += nunits;
9815 if (maybe_ne (group_gap_adj, 0U)
9816 && !slp_perm
9817 && known_eq (group_elt, group_size - group_gap_adj))
9818 {
9819 poly_wide_int bump_val
9820 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9821 * group_gap_adj);
9822 tree bump = wide_int_to_tree (sizetype, bump_val);
9823 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9824 stmt_info, bump);
9825 group_elt = 0;
9826 }
9827 }
9828 /* Bump the vector pointer to account for a gap or for excess
9829 elements loaded for a permuted SLP load. */
9830 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9831 {
9832 poly_wide_int bump_val
9833 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9834 * group_gap_adj);
9835 tree bump = wide_int_to_tree (sizetype, bump_val);
9836 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9837 stmt_info, bump);
9838 }
9839 }
9840
9841 if (slp && !slp_perm)
9842 continue;
9843
9844 if (slp_perm)
9845 {
9846 unsigned n_perms;
9847 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9848 slp_node_instance, false,
9849 &n_perms))
9850 {
9851 dr_chain.release ();
9852 return false;
9853 }
9854 }
9855 else
9856 {
9857 if (grouped_load)
9858 {
9859 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9860 vect_transform_grouped_load (stmt_info, dr_chain,
9861 group_size, gsi);
9862 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9863 }
9864 else
9865 {
9866 if (j == 0)
9867 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9868 else
9869 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9870 prev_stmt_info = new_stmt_info;
9871 }
9872 }
9873 dr_chain.release ();
9874 }
9875
9876 return true;
9877 }
9878
9879 /* Function vect_is_simple_cond.
9880
9881 Input:
9882 LOOP - the loop that is being vectorized.
9883 COND - Condition that is checked for simple use.
9884
9885 Output:
9886 *COMP_VECTYPE - the vector type for the comparison.
9887 *DTS - The def types for the arguments of the comparison
9888
9889 Returns whether a COND can be vectorized. Checks whether
9890 condition operands are supportable using vec_is_simple_use. */
9891
9892 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)9893 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9894 tree *comp_vectype, enum vect_def_type *dts,
9895 tree vectype)
9896 {
9897 tree lhs, rhs;
9898 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9899
9900 /* Mask case. */
9901 if (TREE_CODE (cond) == SSA_NAME
9902 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9903 {
9904 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9905 || !*comp_vectype
9906 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9907 return false;
9908 return true;
9909 }
9910
9911 if (!COMPARISON_CLASS_P (cond))
9912 return false;
9913
9914 lhs = TREE_OPERAND (cond, 0);
9915 rhs = TREE_OPERAND (cond, 1);
9916
9917 if (TREE_CODE (lhs) == SSA_NAME)
9918 {
9919 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9920 return false;
9921 }
9922 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9923 || TREE_CODE (lhs) == FIXED_CST)
9924 dts[0] = vect_constant_def;
9925 else
9926 return false;
9927
9928 if (TREE_CODE (rhs) == SSA_NAME)
9929 {
9930 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9931 return false;
9932 }
9933 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9934 || TREE_CODE (rhs) == FIXED_CST)
9935 dts[1] = vect_constant_def;
9936 else
9937 return false;
9938
9939 if (vectype1 && vectype2
9940 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9941 TYPE_VECTOR_SUBPARTS (vectype2)))
9942 return false;
9943
9944 *comp_vectype = vectype1 ? vectype1 : vectype2;
9945 /* Invariant comparison. */
9946 if (! *comp_vectype)
9947 {
9948 tree scalar_type = TREE_TYPE (lhs);
9949 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9950 *comp_vectype = truth_type_for (vectype);
9951 else
9952 {
9953 /* If we can widen the comparison to match vectype do so. */
9954 if (INTEGRAL_TYPE_P (scalar_type)
9955 && !slp_node
9956 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9957 TYPE_SIZE (TREE_TYPE (vectype))))
9958 scalar_type = build_nonstandard_integer_type
9959 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9960 TYPE_UNSIGNED (scalar_type));
9961 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9962 slp_node);
9963 }
9964 }
9965
9966 return true;
9967 }
9968
9969 /* vectorizable_condition.
9970
9971 Check if STMT_INFO is conditional modify expression that can be vectorized.
9972 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9973 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9974 at GSI.
9975
9976 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9977
9978 Return true if STMT_INFO is vectorizable in this way. */
9979
9980 static bool
vectorizable_condition(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)9981 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9982 stmt_vec_info *vec_stmt,
9983 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9984 {
9985 vec_info *vinfo = stmt_info->vinfo;
9986 tree scalar_dest = NULL_TREE;
9987 tree vec_dest = NULL_TREE;
9988 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9989 tree then_clause, else_clause;
9990 tree comp_vectype = NULL_TREE;
9991 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9992 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9993 tree vec_compare;
9994 tree new_temp;
9995 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9996 enum vect_def_type dts[4]
9997 = {vect_unknown_def_type, vect_unknown_def_type,
9998 vect_unknown_def_type, vect_unknown_def_type};
9999 int ndts = 4;
10000 int ncopies;
10001 int vec_num;
10002 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10003 stmt_vec_info prev_stmt_info = NULL;
10004 int i, j;
10005 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10006 vec<tree> vec_oprnds0 = vNULL;
10007 vec<tree> vec_oprnds1 = vNULL;
10008 vec<tree> vec_oprnds2 = vNULL;
10009 vec<tree> vec_oprnds3 = vNULL;
10010 tree vec_cmp_type;
10011 bool masked = false;
10012
10013 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10014 return false;
10015
10016 /* Is vectorizable conditional operation? */
10017 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10018 if (!stmt)
10019 return false;
10020
10021 code = gimple_assign_rhs_code (stmt);
10022 if (code != COND_EXPR)
10023 return false;
10024
10025 stmt_vec_info reduc_info = NULL;
10026 int reduc_index = -1;
10027 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10028 bool for_reduction
10029 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10030 if (for_reduction)
10031 {
10032 if (STMT_SLP_TYPE (stmt_info))
10033 return false;
10034 reduc_info = info_for_reduction (stmt_info);
10035 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10036 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10037 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10038 || reduc_index != -1);
10039 }
10040 else
10041 {
10042 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10043 return false;
10044
10045 /* FORNOW: only supported as part of a reduction. */
10046 if (STMT_VINFO_LIVE_P (stmt_info))
10047 {
10048 if (dump_enabled_p ())
10049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10050 "value used after loop.\n");
10051 return false;
10052 }
10053 }
10054
10055 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10056 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10057
10058 if (slp_node)
10059 {
10060 ncopies = 1;
10061 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10062 }
10063 else
10064 {
10065 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10066 vec_num = 1;
10067 }
10068
10069 gcc_assert (ncopies >= 1);
10070 if (for_reduction && ncopies > 1)
10071 return false; /* FORNOW */
10072
10073 cond_expr = gimple_assign_rhs1 (stmt);
10074 then_clause = gimple_assign_rhs2 (stmt);
10075 else_clause = gimple_assign_rhs3 (stmt);
10076
10077 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node,
10078 &comp_vectype, &dts[0], vectype)
10079 || !comp_vectype)
10080 return false;
10081
10082 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
10083 return false;
10084 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
10085 return false;
10086
10087 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10088 return false;
10089
10090 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10091 return false;
10092
10093 masked = !COMPARISON_CLASS_P (cond_expr);
10094 vec_cmp_type = truth_type_for (comp_vectype);
10095
10096 if (vec_cmp_type == NULL_TREE)
10097 return false;
10098
10099 cond_code = TREE_CODE (cond_expr);
10100 if (!masked)
10101 {
10102 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10103 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10104 }
10105
10106 /* For conditional reductions, the "then" value needs to be the candidate
10107 value calculated by this iteration while the "else" value needs to be
10108 the result carried over from previous iterations. If the COND_EXPR
10109 is the other way around, we need to swap it. */
10110 bool must_invert_cmp_result = false;
10111 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10112 {
10113 if (masked)
10114 must_invert_cmp_result = true;
10115 else
10116 {
10117 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10118 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10119 if (new_code == ERROR_MARK)
10120 must_invert_cmp_result = true;
10121 else
10122 {
10123 cond_code = new_code;
10124 /* Make sure we don't accidentally use the old condition. */
10125 cond_expr = NULL_TREE;
10126 }
10127 }
10128 std::swap (then_clause, else_clause);
10129 }
10130
10131 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10132 {
10133 /* Boolean values may have another representation in vectors
10134 and therefore we prefer bit operations over comparison for
10135 them (which also works for scalar masks). We store opcodes
10136 to use in bitop1 and bitop2. Statement is vectorized as
10137 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10138 depending on bitop1 and bitop2 arity. */
10139 switch (cond_code)
10140 {
10141 case GT_EXPR:
10142 bitop1 = BIT_NOT_EXPR;
10143 bitop2 = BIT_AND_EXPR;
10144 break;
10145 case GE_EXPR:
10146 bitop1 = BIT_NOT_EXPR;
10147 bitop2 = BIT_IOR_EXPR;
10148 break;
10149 case LT_EXPR:
10150 bitop1 = BIT_NOT_EXPR;
10151 bitop2 = BIT_AND_EXPR;
10152 std::swap (cond_expr0, cond_expr1);
10153 break;
10154 case LE_EXPR:
10155 bitop1 = BIT_NOT_EXPR;
10156 bitop2 = BIT_IOR_EXPR;
10157 std::swap (cond_expr0, cond_expr1);
10158 break;
10159 case NE_EXPR:
10160 bitop1 = BIT_XOR_EXPR;
10161 break;
10162 case EQ_EXPR:
10163 bitop1 = BIT_XOR_EXPR;
10164 bitop2 = BIT_NOT_EXPR;
10165 break;
10166 default:
10167 return false;
10168 }
10169 cond_code = SSA_NAME;
10170 }
10171
10172 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10173 && reduction_type == EXTRACT_LAST_REDUCTION
10174 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10175 {
10176 if (dump_enabled_p ())
10177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10178 "reduction comparison operation not supported.\n");
10179 return false;
10180 }
10181
10182 if (!vec_stmt)
10183 {
10184 if (bitop1 != NOP_EXPR)
10185 {
10186 machine_mode mode = TYPE_MODE (comp_vectype);
10187 optab optab;
10188
10189 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10190 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10191 return false;
10192
10193 if (bitop2 != NOP_EXPR)
10194 {
10195 optab = optab_for_tree_code (bitop2, comp_vectype,
10196 optab_default);
10197 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10198 return false;
10199 }
10200 }
10201
10202 if (loop_vinfo
10203 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10204 && reduction_type == EXTRACT_LAST_REDUCTION)
10205 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10206 ncopies * vec_num, vectype, NULL);
10207
10208 vect_cost_for_stmt kind = vector_stmt;
10209 if (reduction_type == EXTRACT_LAST_REDUCTION)
10210 /* Count one reduction-like operation per vector. */
10211 kind = vec_to_scalar;
10212 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10213 return false;
10214
10215 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10216 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10217 cost_vec, kind);
10218 return true;
10219 }
10220
10221 /* Transform. */
10222
10223 if (!slp_node)
10224 {
10225 vec_oprnds0.create (1);
10226 vec_oprnds1.create (1);
10227 vec_oprnds2.create (1);
10228 vec_oprnds3.create (1);
10229 }
10230
10231 /* Handle def. */
10232 scalar_dest = gimple_assign_lhs (stmt);
10233 if (reduction_type != EXTRACT_LAST_REDUCTION)
10234 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10235
10236 /* Handle cond expr. */
10237 for (j = 0; j < ncopies; j++)
10238 {
10239 bool swap_cond_operands = false;
10240
10241 /* See whether another part of the vectorized code applies a loop
10242 mask to the condition, or to its inverse. */
10243
10244 vec_loop_masks *masks = NULL;
10245 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10246 {
10247 if (reduction_type == EXTRACT_LAST_REDUCTION)
10248 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10249 else
10250 {
10251 scalar_cond_masked_key cond (cond_expr, ncopies);
10252 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10253 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10254 else
10255 {
10256 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10257 cond.code = invert_tree_comparison (cond.code, honor_nans);
10258 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10259 {
10260 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10261 cond_code = cond.code;
10262 swap_cond_operands = true;
10263 }
10264 }
10265 }
10266 }
10267
10268 stmt_vec_info new_stmt_info = NULL;
10269 if (j == 0)
10270 {
10271 if (slp_node)
10272 {
10273 auto_vec<vec<tree>, 4> vec_defs;
10274 vect_get_slp_defs (slp_node, &vec_defs);
10275 vec_oprnds3 = vec_defs.pop ();
10276 vec_oprnds2 = vec_defs.pop ();
10277 if (!masked)
10278 vec_oprnds1 = vec_defs.pop ();
10279 vec_oprnds0 = vec_defs.pop ();
10280 }
10281 else
10282 {
10283 if (masked)
10284 {
10285 vec_cond_lhs
10286 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10287 comp_vectype);
10288 }
10289 else
10290 {
10291 vec_cond_lhs
10292 = vect_get_vec_def_for_operand (cond_expr0,
10293 stmt_info, comp_vectype);
10294 vec_cond_rhs
10295 = vect_get_vec_def_for_operand (cond_expr1,
10296 stmt_info, comp_vectype);
10297 }
10298 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10299 stmt_info);
10300 if (reduction_type != EXTRACT_LAST_REDUCTION)
10301 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10302 stmt_info);
10303 }
10304 }
10305 else
10306 {
10307 vec_cond_lhs
10308 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10309 if (!masked)
10310 vec_cond_rhs
10311 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10312
10313 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10314 vec_oprnds2.pop ());
10315 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10316 vec_oprnds3.pop ());
10317 }
10318
10319 if (!slp_node)
10320 {
10321 vec_oprnds0.quick_push (vec_cond_lhs);
10322 if (!masked)
10323 vec_oprnds1.quick_push (vec_cond_rhs);
10324 vec_oprnds2.quick_push (vec_then_clause);
10325 vec_oprnds3.quick_push (vec_else_clause);
10326 }
10327
10328 /* Arguments are ready. Create the new vector stmt. */
10329 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10330 {
10331 vec_then_clause = vec_oprnds2[i];
10332 vec_else_clause = vec_oprnds3[i];
10333
10334 if (swap_cond_operands)
10335 std::swap (vec_then_clause, vec_else_clause);
10336
10337 if (masked)
10338 vec_compare = vec_cond_lhs;
10339 else
10340 {
10341 vec_cond_rhs = vec_oprnds1[i];
10342 if (bitop1 == NOP_EXPR)
10343 vec_compare = build2 (cond_code, vec_cmp_type,
10344 vec_cond_lhs, vec_cond_rhs);
10345 else
10346 {
10347 new_temp = make_ssa_name (vec_cmp_type);
10348 gassign *new_stmt;
10349 if (bitop1 == BIT_NOT_EXPR)
10350 new_stmt = gimple_build_assign (new_temp, bitop1,
10351 vec_cond_rhs);
10352 else
10353 new_stmt
10354 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10355 vec_cond_rhs);
10356 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10357 if (bitop2 == NOP_EXPR)
10358 vec_compare = new_temp;
10359 else if (bitop2 == BIT_NOT_EXPR)
10360 {
10361 /* Instead of doing ~x ? y : z do x ? z : y. */
10362 vec_compare = new_temp;
10363 std::swap (vec_then_clause, vec_else_clause);
10364 }
10365 else
10366 {
10367 vec_compare = make_ssa_name (vec_cmp_type);
10368 new_stmt
10369 = gimple_build_assign (vec_compare, bitop2,
10370 vec_cond_lhs, new_temp);
10371 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10372 }
10373 }
10374 }
10375
10376 /* If we decided to apply a loop mask to the result of the vector
10377 comparison, AND the comparison with the mask now. Later passes
10378 should then be able to reuse the AND results between mulitple
10379 vector statements.
10380
10381 For example:
10382 for (int i = 0; i < 100; ++i)
10383 x[i] = y[i] ? z[i] : 10;
10384
10385 results in following optimized GIMPLE:
10386
10387 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10388 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10389 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10390 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10391 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10392 vect_iftmp.11_47, { 10, ... }>;
10393
10394 instead of using a masked and unmasked forms of
10395 vec != { 0, ... } (masked in the MASK_LOAD,
10396 unmasked in the VEC_COND_EXPR). */
10397
10398 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10399 in cases where that's necessary. */
10400
10401 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10402 {
10403 if (!is_gimple_val (vec_compare))
10404 {
10405 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10406 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10407 vec_compare);
10408 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10409 vec_compare = vec_compare_name;
10410 }
10411
10412 if (must_invert_cmp_result)
10413 {
10414 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10415 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10416 BIT_NOT_EXPR,
10417 vec_compare);
10418 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10419 vec_compare = vec_compare_name;
10420 }
10421
10422 if (masks)
10423 {
10424 unsigned vec_num = vec_oprnds0.length ();
10425 tree loop_mask
10426 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10427 vectype, vec_num * j + i);
10428 tree tmp2 = make_ssa_name (vec_cmp_type);
10429 gassign *g
10430 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10431 loop_mask);
10432 vect_finish_stmt_generation (stmt_info, g, gsi);
10433 vec_compare = tmp2;
10434 }
10435 }
10436
10437 if (reduction_type == EXTRACT_LAST_REDUCTION)
10438 {
10439 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10440 tree lhs = gimple_get_lhs (old_stmt);
10441 gcall *new_stmt = gimple_build_call_internal
10442 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10443 vec_then_clause);
10444 gimple_call_set_lhs (new_stmt, lhs);
10445 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10446 if (old_stmt == gsi_stmt (*gsi))
10447 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10448 else
10449 {
10450 /* In this case we're moving the definition to later in the
10451 block. That doesn't matter because the only uses of the
10452 lhs are in phi statements. */
10453 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10454 gsi_remove (&old_gsi, true);
10455 new_stmt_info
10456 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10457 }
10458 }
10459 else
10460 {
10461 new_temp = make_ssa_name (vec_dest);
10462 gassign *new_stmt
10463 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10464 vec_then_clause, vec_else_clause);
10465 new_stmt_info
10466 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10467 }
10468 if (slp_node)
10469 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10470 }
10471
10472 if (slp_node)
10473 continue;
10474
10475 if (j == 0)
10476 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10477 else
10478 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10479
10480 prev_stmt_info = new_stmt_info;
10481 }
10482
10483 vec_oprnds0.release ();
10484 vec_oprnds1.release ();
10485 vec_oprnds2.release ();
10486 vec_oprnds3.release ();
10487
10488 return true;
10489 }
10490
10491 /* vectorizable_comparison.
10492
10493 Check if STMT_INFO is comparison expression that can be vectorized.
10494 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10495 comparison, put it in VEC_STMT, and insert it at GSI.
10496
10497 Return true if STMT_INFO is vectorizable in this way. */
10498
10499 static bool
vectorizable_comparison(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10500 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10501 stmt_vec_info *vec_stmt,
10502 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10503 {
10504 vec_info *vinfo = stmt_info->vinfo;
10505 tree lhs, rhs1, rhs2;
10506 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10507 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10508 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10509 tree new_temp;
10510 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10511 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10512 int ndts = 2;
10513 poly_uint64 nunits;
10514 int ncopies;
10515 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10516 stmt_vec_info prev_stmt_info = NULL;
10517 int i, j;
10518 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10519 vec<tree> vec_oprnds0 = vNULL;
10520 vec<tree> vec_oprnds1 = vNULL;
10521 tree mask_type;
10522 tree mask;
10523
10524 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10525 return false;
10526
10527 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10528 return false;
10529
10530 mask_type = vectype;
10531 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10532
10533 if (slp_node)
10534 ncopies = 1;
10535 else
10536 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10537
10538 gcc_assert (ncopies >= 1);
10539 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10540 return false;
10541
10542 if (STMT_VINFO_LIVE_P (stmt_info))
10543 {
10544 if (dump_enabled_p ())
10545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10546 "value used after loop.\n");
10547 return false;
10548 }
10549
10550 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10551 if (!stmt)
10552 return false;
10553
10554 code = gimple_assign_rhs_code (stmt);
10555
10556 if (TREE_CODE_CLASS (code) != tcc_comparison)
10557 return false;
10558
10559 rhs1 = gimple_assign_rhs1 (stmt);
10560 rhs2 = gimple_assign_rhs2 (stmt);
10561
10562 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10563 return false;
10564
10565 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10566 return false;
10567
10568 if (vectype1 && vectype2
10569 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10570 TYPE_VECTOR_SUBPARTS (vectype2)))
10571 return false;
10572
10573 vectype = vectype1 ? vectype1 : vectype2;
10574
10575 /* Invariant comparison. */
10576 if (!vectype)
10577 {
10578 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10579 vectype = mask_type;
10580 else
10581 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10582 slp_node);
10583 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10584 return false;
10585 }
10586 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10587 return false;
10588
10589 /* Can't compare mask and non-mask types. */
10590 if (vectype1 && vectype2
10591 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10592 return false;
10593
10594 /* Boolean values may have another representation in vectors
10595 and therefore we prefer bit operations over comparison for
10596 them (which also works for scalar masks). We store opcodes
10597 to use in bitop1 and bitop2. Statement is vectorized as
10598 BITOP2 (rhs1 BITOP1 rhs2) or
10599 rhs1 BITOP2 (BITOP1 rhs2)
10600 depending on bitop1 and bitop2 arity. */
10601 bool swap_p = false;
10602 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10603 {
10604 if (code == GT_EXPR)
10605 {
10606 bitop1 = BIT_NOT_EXPR;
10607 bitop2 = BIT_AND_EXPR;
10608 }
10609 else if (code == GE_EXPR)
10610 {
10611 bitop1 = BIT_NOT_EXPR;
10612 bitop2 = BIT_IOR_EXPR;
10613 }
10614 else if (code == LT_EXPR)
10615 {
10616 bitop1 = BIT_NOT_EXPR;
10617 bitop2 = BIT_AND_EXPR;
10618 swap_p = true;
10619 }
10620 else if (code == LE_EXPR)
10621 {
10622 bitop1 = BIT_NOT_EXPR;
10623 bitop2 = BIT_IOR_EXPR;
10624 swap_p = true;
10625 }
10626 else
10627 {
10628 bitop1 = BIT_XOR_EXPR;
10629 if (code == EQ_EXPR)
10630 bitop2 = BIT_NOT_EXPR;
10631 }
10632 }
10633
10634 if (!vec_stmt)
10635 {
10636 if (bitop1 == NOP_EXPR)
10637 {
10638 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10639 return false;
10640 }
10641 else
10642 {
10643 machine_mode mode = TYPE_MODE (vectype);
10644 optab optab;
10645
10646 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10647 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10648 return false;
10649
10650 if (bitop2 != NOP_EXPR)
10651 {
10652 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10653 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10654 return false;
10655 }
10656 }
10657
10658 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10659 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10660 dts, ndts, slp_node, cost_vec);
10661 return true;
10662 }
10663
10664 /* Transform. */
10665 if (!slp_node)
10666 {
10667 vec_oprnds0.create (1);
10668 vec_oprnds1.create (1);
10669 }
10670
10671 /* Handle def. */
10672 lhs = gimple_assign_lhs (stmt);
10673 mask = vect_create_destination_var (lhs, mask_type);
10674
10675 /* Handle cmp expr. */
10676 for (j = 0; j < ncopies; j++)
10677 {
10678 stmt_vec_info new_stmt_info = NULL;
10679 if (j == 0)
10680 {
10681 if (slp_node)
10682 {
10683 auto_vec<vec<tree>, 2> vec_defs;
10684 vect_get_slp_defs (slp_node, &vec_defs);
10685 vec_oprnds1 = vec_defs.pop ();
10686 vec_oprnds0 = vec_defs.pop ();
10687 if (swap_p)
10688 std::swap (vec_oprnds0, vec_oprnds1);
10689 }
10690 else
10691 {
10692 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10693 vectype);
10694 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10695 vectype);
10696 }
10697 }
10698 else
10699 {
10700 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10701 vec_oprnds0.pop ());
10702 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10703 vec_oprnds1.pop ());
10704 }
10705
10706 if (!slp_node)
10707 {
10708 if (swap_p && j == 0)
10709 std::swap (vec_rhs1, vec_rhs2);
10710 vec_oprnds0.quick_push (vec_rhs1);
10711 vec_oprnds1.quick_push (vec_rhs2);
10712 }
10713
10714 /* Arguments are ready. Create the new vector stmt. */
10715 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10716 {
10717 vec_rhs2 = vec_oprnds1[i];
10718
10719 new_temp = make_ssa_name (mask);
10720 if (bitop1 == NOP_EXPR)
10721 {
10722 gassign *new_stmt = gimple_build_assign (new_temp, code,
10723 vec_rhs1, vec_rhs2);
10724 new_stmt_info
10725 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10726 }
10727 else
10728 {
10729 gassign *new_stmt;
10730 if (bitop1 == BIT_NOT_EXPR)
10731 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10732 else
10733 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10734 vec_rhs2);
10735 new_stmt_info
10736 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10737 if (bitop2 != NOP_EXPR)
10738 {
10739 tree res = make_ssa_name (mask);
10740 if (bitop2 == BIT_NOT_EXPR)
10741 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10742 else
10743 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10744 new_temp);
10745 new_stmt_info
10746 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10747 }
10748 }
10749 if (slp_node)
10750 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10751 }
10752
10753 if (slp_node)
10754 continue;
10755
10756 if (j == 0)
10757 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10758 else
10759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10760
10761 prev_stmt_info = new_stmt_info;
10762 }
10763
10764 vec_oprnds0.release ();
10765 vec_oprnds1.release ();
10766
10767 return true;
10768 }
10769
10770 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10771 can handle all live statements in the node. Otherwise return true
10772 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10773 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10774
10775 static bool
can_vectorize_live_stmts(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10776 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10777 slp_tree slp_node, slp_instance slp_node_instance,
10778 bool vec_stmt_p,
10779 stmt_vector_for_cost *cost_vec)
10780 {
10781 if (slp_node)
10782 {
10783 stmt_vec_info slp_stmt_info;
10784 unsigned int i;
10785 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10786 {
10787 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10788 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10789 slp_node_instance, i,
10790 vec_stmt_p, cost_vec))
10791 return false;
10792 }
10793 }
10794 else if (STMT_VINFO_LIVE_P (stmt_info)
10795 && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10796 slp_node_instance, -1,
10797 vec_stmt_p, cost_vec))
10798 return false;
10799
10800 return true;
10801 }
10802
10803 /* Make sure the statement is vectorizable. */
10804
10805 opt_result
vect_analyze_stmt(stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10806 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10807 slp_tree node, slp_instance node_instance,
10808 stmt_vector_for_cost *cost_vec)
10809 {
10810 vec_info *vinfo = stmt_info->vinfo;
10811 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10812 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10813 bool ok;
10814 gimple_seq pattern_def_seq;
10815
10816 if (dump_enabled_p ())
10817 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10818 stmt_info->stmt);
10819
10820 if (gimple_has_volatile_ops (stmt_info->stmt))
10821 return opt_result::failure_at (stmt_info->stmt,
10822 "not vectorized:"
10823 " stmt has volatile operands: %G\n",
10824 stmt_info->stmt);
10825
10826 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10827 && node == NULL
10828 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10829 {
10830 gimple_stmt_iterator si;
10831
10832 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10833 {
10834 stmt_vec_info pattern_def_stmt_info
10835 = vinfo->lookup_stmt (gsi_stmt (si));
10836 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10837 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10838 {
10839 /* Analyze def stmt of STMT if it's a pattern stmt. */
10840 if (dump_enabled_p ())
10841 dump_printf_loc (MSG_NOTE, vect_location,
10842 "==> examining pattern def statement: %G",
10843 pattern_def_stmt_info->stmt);
10844
10845 opt_result res
10846 = vect_analyze_stmt (pattern_def_stmt_info,
10847 need_to_vectorize, node, node_instance,
10848 cost_vec);
10849 if (!res)
10850 return res;
10851 }
10852 }
10853 }
10854
10855 /* Skip stmts that do not need to be vectorized. In loops this is expected
10856 to include:
10857 - the COND_EXPR which is the loop exit condition
10858 - any LABEL_EXPRs in the loop
10859 - computations that are used only for array indexing or loop control.
10860 In basic blocks we only analyze statements that are a part of some SLP
10861 instance, therefore, all the statements are relevant.
10862
10863 Pattern statement needs to be analyzed instead of the original statement
10864 if the original statement is not relevant. Otherwise, we analyze both
10865 statements. In basic blocks we are called from some SLP instance
10866 traversal, don't analyze pattern stmts instead, the pattern stmts
10867 already will be part of SLP instance. */
10868
10869 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10870 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10871 && !STMT_VINFO_LIVE_P (stmt_info))
10872 {
10873 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10874 && pattern_stmt_info
10875 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10876 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10877 {
10878 /* Analyze PATTERN_STMT instead of the original stmt. */
10879 stmt_info = pattern_stmt_info;
10880 if (dump_enabled_p ())
10881 dump_printf_loc (MSG_NOTE, vect_location,
10882 "==> examining pattern statement: %G",
10883 stmt_info->stmt);
10884 }
10885 else
10886 {
10887 if (dump_enabled_p ())
10888 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10889
10890 return opt_result::success ();
10891 }
10892 }
10893 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10894 && node == NULL
10895 && pattern_stmt_info
10896 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10897 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10898 {
10899 /* Analyze PATTERN_STMT too. */
10900 if (dump_enabled_p ())
10901 dump_printf_loc (MSG_NOTE, vect_location,
10902 "==> examining pattern statement: %G",
10903 pattern_stmt_info->stmt);
10904
10905 opt_result res
10906 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10907 node_instance, cost_vec);
10908 if (!res)
10909 return res;
10910 }
10911
10912 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10913 {
10914 case vect_internal_def:
10915 break;
10916
10917 case vect_reduction_def:
10918 case vect_nested_cycle:
10919 gcc_assert (!bb_vinfo
10920 && (relevance == vect_used_in_outer
10921 || relevance == vect_used_in_outer_by_reduction
10922 || relevance == vect_used_by_reduction
10923 || relevance == vect_unused_in_scope
10924 || relevance == vect_used_only_live));
10925 break;
10926
10927 case vect_induction_def:
10928 gcc_assert (!bb_vinfo);
10929 break;
10930
10931 case vect_constant_def:
10932 case vect_external_def:
10933 case vect_unknown_def_type:
10934 default:
10935 gcc_unreachable ();
10936 }
10937
10938 if (STMT_VINFO_RELEVANT_P (stmt_info))
10939 {
10940 tree type = gimple_expr_type (stmt_info->stmt);
10941 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10942 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10943 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10944 || (call && gimple_call_lhs (call) == NULL_TREE));
10945 *need_to_vectorize = true;
10946 }
10947
10948 if (PURE_SLP_STMT (stmt_info) && !node)
10949 {
10950 if (dump_enabled_p ())
10951 dump_printf_loc (MSG_NOTE, vect_location,
10952 "handled only by SLP analysis\n");
10953 return opt_result::success ();
10954 }
10955
10956 ok = true;
10957 if (!bb_vinfo
10958 && (STMT_VINFO_RELEVANT_P (stmt_info)
10959 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10960 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10961 -mveclibabi= takes preference over library functions with
10962 the simd attribute. */
10963 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10964 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10965 cost_vec)
10966 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10967 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10968 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10969 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10970 cost_vec)
10971 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10972 || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10973 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10974 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10975 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10976 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10977 cost_vec)
10978 || vectorizable_lc_phi (stmt_info, NULL, node));
10979 else
10980 {
10981 if (bb_vinfo)
10982 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10983 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10984 cost_vec)
10985 || vectorizable_conversion (stmt_info, NULL, NULL, node,
10986 cost_vec)
10987 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10988 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10989 || vectorizable_assignment (stmt_info, NULL, NULL, node,
10990 cost_vec)
10991 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10992 cost_vec)
10993 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10994 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10995 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10996 cost_vec));
10997 }
10998
10999 if (!ok)
11000 return opt_result::failure_at (stmt_info->stmt,
11001 "not vectorized:"
11002 " relevant stmt not supported: %G",
11003 stmt_info->stmt);
11004
11005 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11006 need extra handling, except for vectorizable reductions. */
11007 if (!bb_vinfo
11008 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11009 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11010 && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
11011 false, cost_vec))
11012 return opt_result::failure_at (stmt_info->stmt,
11013 "not vectorized:"
11014 " live stmt not supported: %G",
11015 stmt_info->stmt);
11016
11017 return opt_result::success ();
11018 }
11019
11020
11021 /* Function vect_transform_stmt.
11022
11023 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11024
11025 bool
vect_transform_stmt(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11026 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11027 slp_tree slp_node, slp_instance slp_node_instance)
11028 {
11029 vec_info *vinfo = stmt_info->vinfo;
11030 bool is_store = false;
11031 stmt_vec_info vec_stmt = NULL;
11032 bool done;
11033
11034 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11035 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11036
11037 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
11038 && nested_in_vect_loop_p
11039 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
11040 stmt_info));
11041
11042 gimple *stmt = stmt_info->stmt;
11043 switch (STMT_VINFO_TYPE (stmt_info))
11044 {
11045 case type_demotion_vec_info_type:
11046 case type_promotion_vec_info_type:
11047 case type_conversion_vec_info_type:
11048 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
11049 NULL);
11050 gcc_assert (done);
11051 break;
11052
11053 case induc_vec_info_type:
11054 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
11055 NULL);
11056 gcc_assert (done);
11057 break;
11058
11059 case shift_vec_info_type:
11060 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11061 gcc_assert (done);
11062 break;
11063
11064 case op_vec_info_type:
11065 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
11066 NULL);
11067 gcc_assert (done);
11068 break;
11069
11070 case assignment_vec_info_type:
11071 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
11072 NULL);
11073 gcc_assert (done);
11074 break;
11075
11076 case load_vec_info_type:
11077 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
11078 slp_node_instance, NULL);
11079 gcc_assert (done);
11080 break;
11081
11082 case store_vec_info_type:
11083 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11084 gcc_assert (done);
11085 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11086 {
11087 /* In case of interleaving, the whole chain is vectorized when the
11088 last store in the chain is reached. Store stmts before the last
11089 one are skipped, and there vec_stmt_info shouldn't be freed
11090 meanwhile. */
11091 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11092 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11093 is_store = true;
11094 }
11095 else
11096 is_store = true;
11097 break;
11098
11099 case condition_vec_info_type:
11100 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11101 gcc_assert (done);
11102 break;
11103
11104 case comparison_vec_info_type:
11105 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
11106 slp_node, NULL);
11107 gcc_assert (done);
11108 break;
11109
11110 case call_vec_info_type:
11111 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11112 stmt = gsi_stmt (*gsi);
11113 break;
11114
11115 case call_simd_clone_vec_info_type:
11116 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
11117 slp_node, NULL);
11118 stmt = gsi_stmt (*gsi);
11119 break;
11120
11121 case reduc_vec_info_type:
11122 done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
11123 gcc_assert (done);
11124 break;
11125
11126 case cycle_phi_info_type:
11127 done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
11128 slp_node_instance);
11129 gcc_assert (done);
11130 break;
11131
11132 case lc_phi_info_type:
11133 done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
11134 gcc_assert (done);
11135 break;
11136
11137 default:
11138 if (!STMT_VINFO_LIVE_P (stmt_info))
11139 {
11140 if (dump_enabled_p ())
11141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11142 "stmt not supported.\n");
11143 gcc_unreachable ();
11144 }
11145 }
11146
11147 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11148 This would break hybrid SLP vectorization. */
11149 if (slp_node)
11150 gcc_assert (!vec_stmt
11151 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11152
11153 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11154 is being vectorized, but outside the immediately enclosing loop. */
11155 if (vec_stmt
11156 && nested_p
11157 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11158 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11159 || STMT_VINFO_RELEVANT (stmt_info) ==
11160 vect_used_in_outer_by_reduction))
11161 {
11162 class loop *innerloop = LOOP_VINFO_LOOP (
11163 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11164 imm_use_iterator imm_iter;
11165 use_operand_p use_p;
11166 tree scalar_dest;
11167
11168 if (dump_enabled_p ())
11169 dump_printf_loc (MSG_NOTE, vect_location,
11170 "Record the vdef for outer-loop vectorization.\n");
11171
11172 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11173 (to be used when vectorizing outer-loop stmts that use the DEF of
11174 STMT). */
11175 if (gimple_code (stmt) == GIMPLE_PHI)
11176 scalar_dest = PHI_RESULT (stmt);
11177 else
11178 scalar_dest = gimple_get_lhs (stmt);
11179
11180 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11181 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11182 {
11183 stmt_vec_info exit_phi_info
11184 = vinfo->lookup_stmt (USE_STMT (use_p));
11185 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11186 }
11187 }
11188
11189 if (vec_stmt)
11190 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11191
11192 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11193 return is_store;
11194
11195 /* If this stmt defines a value used on a backedge, update the
11196 vectorized PHIs. */
11197 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11198 stmt_vec_info reduc_info;
11199 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11200 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11201 && (reduc_info = info_for_reduction (orig_stmt_info))
11202 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11203 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11204 {
11205 gphi *phi;
11206 edge e;
11207 if (!slp_node
11208 && (phi = dyn_cast <gphi *>
11209 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11210 && dominated_by_p (CDI_DOMINATORS,
11211 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11212 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11213 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11214 == gimple_get_lhs (orig_stmt_info->stmt)))
11215 {
11216 stmt_vec_info phi_info
11217 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11218 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11219 do
11220 {
11221 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11222 gimple_get_lhs (vec_stmt->stmt), e,
11223 gimple_phi_arg_location (phi, e->dest_idx));
11224 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11225 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11226 }
11227 while (phi_info);
11228 gcc_assert (!vec_stmt);
11229 }
11230 else if (slp_node
11231 && slp_node != slp_node_instance->reduc_phis)
11232 {
11233 slp_tree phi_node = slp_node_instance->reduc_phis;
11234 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11235 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11236 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11237 == SLP_TREE_VEC_STMTS (slp_node).length ());
11238 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11239 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11240 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11241 e, gimple_phi_arg_location (phi, e->dest_idx));
11242 }
11243 }
11244
11245 /* Handle stmts whose DEF is used outside the loop-nest that is
11246 being vectorized. */
11247 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11248 slp_node_instance, true, NULL);
11249 gcc_assert (done);
11250
11251 return false;
11252 }
11253
11254
11255 /* Remove a group of stores (for SLP or interleaving), free their
11256 stmt_vec_info. */
11257
11258 void
vect_remove_stores(stmt_vec_info first_stmt_info)11259 vect_remove_stores (stmt_vec_info first_stmt_info)
11260 {
11261 vec_info *vinfo = first_stmt_info->vinfo;
11262 stmt_vec_info next_stmt_info = first_stmt_info;
11263
11264 while (next_stmt_info)
11265 {
11266 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11267 next_stmt_info = vect_orig_stmt (next_stmt_info);
11268 /* Free the attached stmt_vec_info and remove the stmt. */
11269 vinfo->remove_stmt (next_stmt_info);
11270 next_stmt_info = tmp;
11271 }
11272 }
11273
11274 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11275 elements of type SCALAR_TYPE, or null if the target doesn't support
11276 such a type.
11277
11278 If NUNITS is zero, return a vector type that contains elements of
11279 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11280
11281 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11282 for this vectorization region and want to "autodetect" the best choice.
11283 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11284 and we want the new type to be interoperable with it. PREVAILING_MODE
11285 in this case can be a scalar integer mode or a vector mode; when it
11286 is a vector mode, the function acts like a tree-level version of
11287 related_vector_mode. */
11288
11289 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11290 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11291 tree scalar_type, poly_uint64 nunits)
11292 {
11293 tree orig_scalar_type = scalar_type;
11294 scalar_mode inner_mode;
11295 machine_mode simd_mode;
11296 tree vectype;
11297
11298 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11299 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11300 return NULL_TREE;
11301
11302 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11303
11304 /* For vector types of elements whose mode precision doesn't
11305 match their types precision we use a element type of mode
11306 precision. The vectorization routines will have to make sure
11307 they support the proper result truncation/extension.
11308 We also make sure to build vector types with INTEGER_TYPE
11309 component type only. */
11310 if (INTEGRAL_TYPE_P (scalar_type)
11311 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11312 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11313 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11314 TYPE_UNSIGNED (scalar_type));
11315
11316 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11317 When the component mode passes the above test simply use a type
11318 corresponding to that mode. The theory is that any use that
11319 would cause problems with this will disable vectorization anyway. */
11320 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11321 && !INTEGRAL_TYPE_P (scalar_type))
11322 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11323
11324 /* We can't build a vector type of elements with alignment bigger than
11325 their size. */
11326 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11327 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11328 TYPE_UNSIGNED (scalar_type));
11329
11330 /* If we felt back to using the mode fail if there was
11331 no scalar type for it. */
11332 if (scalar_type == NULL_TREE)
11333 return NULL_TREE;
11334
11335 /* If no prevailing mode was supplied, use the mode the target prefers.
11336 Otherwise lookup a vector mode based on the prevailing mode. */
11337 if (prevailing_mode == VOIDmode)
11338 {
11339 gcc_assert (known_eq (nunits, 0U));
11340 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11341 if (SCALAR_INT_MODE_P (simd_mode))
11342 {
11343 /* Traditional behavior is not to take the integer mode
11344 literally, but simply to use it as a way of determining
11345 the vector size. It is up to mode_for_vector to decide
11346 what the TYPE_MODE should be.
11347
11348 Note that nunits == 1 is allowed in order to support single
11349 element vector types. */
11350 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11351 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11352 return NULL_TREE;
11353 }
11354 }
11355 else if (SCALAR_INT_MODE_P (prevailing_mode)
11356 || !related_vector_mode (prevailing_mode,
11357 inner_mode, nunits).exists (&simd_mode))
11358 {
11359 /* Fall back to using mode_for_vector, mostly in the hope of being
11360 able to use an integer mode. */
11361 if (known_eq (nunits, 0U)
11362 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11363 return NULL_TREE;
11364
11365 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11366 return NULL_TREE;
11367 }
11368
11369 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11370
11371 /* In cases where the mode was chosen by mode_for_vector, check that
11372 the target actually supports the chosen mode, or that it at least
11373 allows the vector mode to be replaced by a like-sized integer. */
11374 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11375 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11376 return NULL_TREE;
11377
11378 /* Re-attach the address-space qualifier if we canonicalized the scalar
11379 type. */
11380 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11381 return build_qualified_type
11382 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11383
11384 return vectype;
11385 }
11386
11387 /* Function get_vectype_for_scalar_type.
11388
11389 Returns the vector type corresponding to SCALAR_TYPE as supported
11390 by the target. If GROUP_SIZE is nonzero and we're performing BB
11391 vectorization, make sure that the number of elements in the vector
11392 is no bigger than GROUP_SIZE. */
11393
11394 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11395 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11396 unsigned int group_size)
11397 {
11398 /* For BB vectorization, we should always have a group size once we've
11399 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11400 are tentative requests during things like early data reference
11401 analysis and pattern recognition. */
11402 if (is_a <bb_vec_info> (vinfo))
11403 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11404 else
11405 group_size = 0;
11406
11407 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11408 scalar_type);
11409 if (vectype && vinfo->vector_mode == VOIDmode)
11410 vinfo->vector_mode = TYPE_MODE (vectype);
11411
11412 /* Register the natural choice of vector type, before the group size
11413 has been applied. */
11414 if (vectype)
11415 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11416
11417 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11418 try again with an explicit number of elements. */
11419 if (vectype
11420 && group_size
11421 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11422 {
11423 /* Start with the biggest number of units that fits within
11424 GROUP_SIZE and halve it until we find a valid vector type.
11425 Usually either the first attempt will succeed or all will
11426 fail (in the latter case because GROUP_SIZE is too small
11427 for the target), but it's possible that a target could have
11428 a hole between supported vector types.
11429
11430 If GROUP_SIZE is not a power of 2, this has the effect of
11431 trying the largest power of 2 that fits within the group,
11432 even though the group is not a multiple of that vector size.
11433 The BB vectorizer will then try to carve up the group into
11434 smaller pieces. */
11435 unsigned int nunits = 1 << floor_log2 (group_size);
11436 do
11437 {
11438 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11439 scalar_type, nunits);
11440 nunits /= 2;
11441 }
11442 while (nunits > 1 && !vectype);
11443 }
11444
11445 return vectype;
11446 }
11447
11448 /* Return the vector type corresponding to SCALAR_TYPE as supported
11449 by the target. NODE, if nonnull, is the SLP tree node that will
11450 use the returned vector type. */
11451
11452 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11453 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11454 {
11455 unsigned int group_size = 0;
11456 if (node)
11457 {
11458 group_size = SLP_TREE_SCALAR_OPS (node).length ();
11459 if (group_size == 0)
11460 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11461 }
11462 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11463 }
11464
11465 /* Function get_mask_type_for_scalar_type.
11466
11467 Returns the mask type corresponding to a result of comparison
11468 of vectors of specified SCALAR_TYPE as supported by target.
11469 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11470 make sure that the number of elements in the vector is no bigger
11471 than GROUP_SIZE. */
11472
11473 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11474 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11475 unsigned int group_size)
11476 {
11477 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11478
11479 if (!vectype)
11480 return NULL;
11481
11482 return truth_type_for (vectype);
11483 }
11484
11485 /* Function get_same_sized_vectype
11486
11487 Returns a vector type corresponding to SCALAR_TYPE of size
11488 VECTOR_TYPE if supported by the target. */
11489
11490 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11491 get_same_sized_vectype (tree scalar_type, tree vector_type)
11492 {
11493 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11494 return truth_type_for (vector_type);
11495
11496 poly_uint64 nunits;
11497 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11498 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11499 return NULL_TREE;
11500
11501 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11502 scalar_type, nunits);
11503 }
11504
11505 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11506 would not change the chosen vector modes. */
11507
11508 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11509 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11510 {
11511 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11512 i != vinfo->used_vector_modes.end (); ++i)
11513 if (!VECTOR_MODE_P (*i)
11514 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11515 return false;
11516 return true;
11517 }
11518
11519 /* Function vect_is_simple_use.
11520
11521 Input:
11522 VINFO - the vect info of the loop or basic block that is being vectorized.
11523 OPERAND - operand in the loop or bb.
11524 Output:
11525 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11526 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11527 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11528 the definition could be anywhere in the function
11529 DT - the type of definition
11530
11531 Returns whether a stmt with OPERAND can be vectorized.
11532 For loops, supportable operands are constants, loop invariants, and operands
11533 that are defined by the current iteration of the loop. Unsupportable
11534 operands are those that are defined by a previous iteration of the loop (as
11535 is the case in reduction/induction computations).
11536 For basic blocks, supportable operands are constants and bb invariants.
11537 For now, operands defined outside the basic block are not supported. */
11538
11539 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11540 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11541 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11542 {
11543 if (def_stmt_info_out)
11544 *def_stmt_info_out = NULL;
11545 if (def_stmt_out)
11546 *def_stmt_out = NULL;
11547 *dt = vect_unknown_def_type;
11548
11549 if (dump_enabled_p ())
11550 {
11551 dump_printf_loc (MSG_NOTE, vect_location,
11552 "vect_is_simple_use: operand ");
11553 if (TREE_CODE (operand) == SSA_NAME
11554 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11555 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11556 else
11557 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11558 }
11559
11560 if (CONSTANT_CLASS_P (operand))
11561 *dt = vect_constant_def;
11562 else if (is_gimple_min_invariant (operand))
11563 *dt = vect_external_def;
11564 else if (TREE_CODE (operand) != SSA_NAME)
11565 *dt = vect_unknown_def_type;
11566 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11567 *dt = vect_external_def;
11568 else
11569 {
11570 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11571 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11572 if (!stmt_vinfo)
11573 *dt = vect_external_def;
11574 else
11575 {
11576 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11577 def_stmt = stmt_vinfo->stmt;
11578 switch (gimple_code (def_stmt))
11579 {
11580 case GIMPLE_PHI:
11581 case GIMPLE_ASSIGN:
11582 case GIMPLE_CALL:
11583 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11584 break;
11585 default:
11586 *dt = vect_unknown_def_type;
11587 break;
11588 }
11589 if (def_stmt_info_out)
11590 *def_stmt_info_out = stmt_vinfo;
11591 }
11592 if (def_stmt_out)
11593 *def_stmt_out = def_stmt;
11594 }
11595
11596 if (dump_enabled_p ())
11597 {
11598 dump_printf (MSG_NOTE, ", type of def: ");
11599 switch (*dt)
11600 {
11601 case vect_uninitialized_def:
11602 dump_printf (MSG_NOTE, "uninitialized\n");
11603 break;
11604 case vect_constant_def:
11605 dump_printf (MSG_NOTE, "constant\n");
11606 break;
11607 case vect_external_def:
11608 dump_printf (MSG_NOTE, "external\n");
11609 break;
11610 case vect_internal_def:
11611 dump_printf (MSG_NOTE, "internal\n");
11612 break;
11613 case vect_induction_def:
11614 dump_printf (MSG_NOTE, "induction\n");
11615 break;
11616 case vect_reduction_def:
11617 dump_printf (MSG_NOTE, "reduction\n");
11618 break;
11619 case vect_double_reduction_def:
11620 dump_printf (MSG_NOTE, "double reduction\n");
11621 break;
11622 case vect_nested_cycle:
11623 dump_printf (MSG_NOTE, "nested cycle\n");
11624 break;
11625 case vect_unknown_def_type:
11626 dump_printf (MSG_NOTE, "unknown\n");
11627 break;
11628 }
11629 }
11630
11631 if (*dt == vect_unknown_def_type)
11632 {
11633 if (dump_enabled_p ())
11634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11635 "Unsupported pattern.\n");
11636 return false;
11637 }
11638
11639 return true;
11640 }
11641
11642 /* Function vect_is_simple_use.
11643
11644 Same as vect_is_simple_use but also determines the vector operand
11645 type of OPERAND and stores it to *VECTYPE. If the definition of
11646 OPERAND is vect_uninitialized_def, vect_constant_def or
11647 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11648 is responsible to compute the best suited vector type for the
11649 scalar operand. */
11650
11651 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11652 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11653 tree *vectype, stmt_vec_info *def_stmt_info_out,
11654 gimple **def_stmt_out)
11655 {
11656 stmt_vec_info def_stmt_info;
11657 gimple *def_stmt;
11658 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11659 return false;
11660
11661 if (def_stmt_out)
11662 *def_stmt_out = def_stmt;
11663 if (def_stmt_info_out)
11664 *def_stmt_info_out = def_stmt_info;
11665
11666 /* Now get a vector type if the def is internal, otherwise supply
11667 NULL_TREE and leave it up to the caller to figure out a proper
11668 type for the use stmt. */
11669 if (*dt == vect_internal_def
11670 || *dt == vect_induction_def
11671 || *dt == vect_reduction_def
11672 || *dt == vect_double_reduction_def
11673 || *dt == vect_nested_cycle)
11674 {
11675 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11676 gcc_assert (*vectype != NULL_TREE);
11677 if (dump_enabled_p ())
11678 dump_printf_loc (MSG_NOTE, vect_location,
11679 "vect_is_simple_use: vectype %T\n", *vectype);
11680 }
11681 else if (*dt == vect_uninitialized_def
11682 || *dt == vect_constant_def
11683 || *dt == vect_external_def)
11684 *vectype = NULL_TREE;
11685 else
11686 gcc_unreachable ();
11687
11688 return true;
11689 }
11690
11691
11692 /* Function supportable_widening_operation
11693
11694 Check whether an operation represented by the code CODE is a
11695 widening operation that is supported by the target platform in
11696 vector form (i.e., when operating on arguments of type VECTYPE_IN
11697 producing a result of type VECTYPE_OUT).
11698
11699 Widening operations we currently support are NOP (CONVERT), FLOAT,
11700 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11701 are supported by the target platform either directly (via vector
11702 tree-codes), or via target builtins.
11703
11704 Output:
11705 - CODE1 and CODE2 are codes of vector operations to be used when
11706 vectorizing the operation, if available.
11707 - MULTI_STEP_CVT determines the number of required intermediate steps in
11708 case of multi-step conversion (like char->short->int - in that case
11709 MULTI_STEP_CVT will be 1).
11710 - INTERM_TYPES contains the intermediate type required to perform the
11711 widening operation (short in the above example). */
11712
11713 bool
supportable_widening_operation(enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11714 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11715 tree vectype_out, tree vectype_in,
11716 enum tree_code *code1, enum tree_code *code2,
11717 int *multi_step_cvt,
11718 vec<tree> *interm_types)
11719 {
11720 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11721 class loop *vect_loop = NULL;
11722 machine_mode vec_mode;
11723 enum insn_code icode1, icode2;
11724 optab optab1, optab2;
11725 tree vectype = vectype_in;
11726 tree wide_vectype = vectype_out;
11727 enum tree_code c1, c2;
11728 int i;
11729 tree prev_type, intermediate_type;
11730 machine_mode intermediate_mode, prev_mode;
11731 optab optab3, optab4;
11732
11733 *multi_step_cvt = 0;
11734 if (loop_info)
11735 vect_loop = LOOP_VINFO_LOOP (loop_info);
11736
11737 switch (code)
11738 {
11739 case WIDEN_MULT_EXPR:
11740 /* The result of a vectorized widening operation usually requires
11741 two vectors (because the widened results do not fit into one vector).
11742 The generated vector results would normally be expected to be
11743 generated in the same order as in the original scalar computation,
11744 i.e. if 8 results are generated in each vector iteration, they are
11745 to be organized as follows:
11746 vect1: [res1,res2,res3,res4],
11747 vect2: [res5,res6,res7,res8].
11748
11749 However, in the special case that the result of the widening
11750 operation is used in a reduction computation only, the order doesn't
11751 matter (because when vectorizing a reduction we change the order of
11752 the computation). Some targets can take advantage of this and
11753 generate more efficient code. For example, targets like Altivec,
11754 that support widen_mult using a sequence of {mult_even,mult_odd}
11755 generate the following vectors:
11756 vect1: [res1,res3,res5,res7],
11757 vect2: [res2,res4,res6,res8].
11758
11759 When vectorizing outer-loops, we execute the inner-loop sequentially
11760 (each vectorized inner-loop iteration contributes to VF outer-loop
11761 iterations in parallel). We therefore don't allow to change the
11762 order of the computation in the inner-loop during outer-loop
11763 vectorization. */
11764 /* TODO: Another case in which order doesn't *really* matter is when we
11765 widen and then contract again, e.g. (short)((int)x * y >> 8).
11766 Normally, pack_trunc performs an even/odd permute, whereas the
11767 repack from an even/odd expansion would be an interleave, which
11768 would be significantly simpler for e.g. AVX2. */
11769 /* In any case, in order to avoid duplicating the code below, recurse
11770 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11771 are properly set up for the caller. If we fail, we'll continue with
11772 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11773 if (vect_loop
11774 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11775 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11776 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11777 stmt_info, vectype_out,
11778 vectype_in, code1, code2,
11779 multi_step_cvt, interm_types))
11780 {
11781 /* Elements in a vector with vect_used_by_reduction property cannot
11782 be reordered if the use chain with this property does not have the
11783 same operation. One such an example is s += a * b, where elements
11784 in a and b cannot be reordered. Here we check if the vector defined
11785 by STMT is only directly used in the reduction statement. */
11786 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11787 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11788 if (use_stmt_info
11789 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11790 return true;
11791 }
11792 c1 = VEC_WIDEN_MULT_LO_EXPR;
11793 c2 = VEC_WIDEN_MULT_HI_EXPR;
11794 break;
11795
11796 case DOT_PROD_EXPR:
11797 c1 = DOT_PROD_EXPR;
11798 c2 = DOT_PROD_EXPR;
11799 break;
11800
11801 case SAD_EXPR:
11802 c1 = SAD_EXPR;
11803 c2 = SAD_EXPR;
11804 break;
11805
11806 case VEC_WIDEN_MULT_EVEN_EXPR:
11807 /* Support the recursion induced just above. */
11808 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11809 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11810 break;
11811
11812 case WIDEN_LSHIFT_EXPR:
11813 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11814 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11815 break;
11816
11817 CASE_CONVERT:
11818 c1 = VEC_UNPACK_LO_EXPR;
11819 c2 = VEC_UNPACK_HI_EXPR;
11820 break;
11821
11822 case FLOAT_EXPR:
11823 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11824 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11825 break;
11826
11827 case FIX_TRUNC_EXPR:
11828 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11829 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11830 break;
11831
11832 default:
11833 gcc_unreachable ();
11834 }
11835
11836 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11837 std::swap (c1, c2);
11838
11839 if (code == FIX_TRUNC_EXPR)
11840 {
11841 /* The signedness is determined from output operand. */
11842 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11843 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11844 }
11845 else if (CONVERT_EXPR_CODE_P (code)
11846 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11847 && VECTOR_BOOLEAN_TYPE_P (vectype)
11848 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11849 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11850 {
11851 /* If the input and result modes are the same, a different optab
11852 is needed where we pass in the number of units in vectype. */
11853 optab1 = vec_unpacks_sbool_lo_optab;
11854 optab2 = vec_unpacks_sbool_hi_optab;
11855 }
11856 else
11857 {
11858 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11859 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11860 }
11861
11862 if (!optab1 || !optab2)
11863 return false;
11864
11865 vec_mode = TYPE_MODE (vectype);
11866 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11867 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11868 return false;
11869
11870 *code1 = c1;
11871 *code2 = c2;
11872
11873 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11874 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11875 {
11876 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11877 return true;
11878 /* For scalar masks we may have different boolean
11879 vector types having the same QImode. Thus we
11880 add additional check for elements number. */
11881 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11882 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11883 return true;
11884 }
11885
11886 /* Check if it's a multi-step conversion that can be done using intermediate
11887 types. */
11888
11889 prev_type = vectype;
11890 prev_mode = vec_mode;
11891
11892 if (!CONVERT_EXPR_CODE_P (code))
11893 return false;
11894
11895 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11896 intermediate steps in promotion sequence. We try
11897 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11898 not. */
11899 interm_types->create (MAX_INTERM_CVT_STEPS);
11900 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11901 {
11902 intermediate_mode = insn_data[icode1].operand[0].mode;
11903 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11904 intermediate_type
11905 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11906 else
11907 intermediate_type
11908 = lang_hooks.types.type_for_mode (intermediate_mode,
11909 TYPE_UNSIGNED (prev_type));
11910
11911 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11912 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11913 && intermediate_mode == prev_mode
11914 && SCALAR_INT_MODE_P (prev_mode))
11915 {
11916 /* If the input and result modes are the same, a different optab
11917 is needed where we pass in the number of units in vectype. */
11918 optab3 = vec_unpacks_sbool_lo_optab;
11919 optab4 = vec_unpacks_sbool_hi_optab;
11920 }
11921 else
11922 {
11923 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11924 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11925 }
11926
11927 if (!optab3 || !optab4
11928 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11929 || insn_data[icode1].operand[0].mode != intermediate_mode
11930 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11931 || insn_data[icode2].operand[0].mode != intermediate_mode
11932 || ((icode1 = optab_handler (optab3, intermediate_mode))
11933 == CODE_FOR_nothing)
11934 || ((icode2 = optab_handler (optab4, intermediate_mode))
11935 == CODE_FOR_nothing))
11936 break;
11937
11938 interm_types->quick_push (intermediate_type);
11939 (*multi_step_cvt)++;
11940
11941 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11942 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11943 {
11944 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11945 return true;
11946 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11947 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11948 return true;
11949 }
11950
11951 prev_type = intermediate_type;
11952 prev_mode = intermediate_mode;
11953 }
11954
11955 interm_types->release ();
11956 return false;
11957 }
11958
11959
11960 /* Function supportable_narrowing_operation
11961
11962 Check whether an operation represented by the code CODE is a
11963 narrowing operation that is supported by the target platform in
11964 vector form (i.e., when operating on arguments of type VECTYPE_IN
11965 and producing a result of type VECTYPE_OUT).
11966
11967 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11968 and FLOAT. This function checks if these operations are supported by
11969 the target platform directly via vector tree-codes.
11970
11971 Output:
11972 - CODE1 is the code of a vector operation to be used when
11973 vectorizing the operation, if available.
11974 - MULTI_STEP_CVT determines the number of required intermediate steps in
11975 case of multi-step conversion (like int->short->char - in that case
11976 MULTI_STEP_CVT will be 1).
11977 - INTERM_TYPES contains the intermediate type required to perform the
11978 narrowing operation (short in the above example). */
11979
11980 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)11981 supportable_narrowing_operation (enum tree_code code,
11982 tree vectype_out, tree vectype_in,
11983 enum tree_code *code1, int *multi_step_cvt,
11984 vec<tree> *interm_types)
11985 {
11986 machine_mode vec_mode;
11987 enum insn_code icode1;
11988 optab optab1, interm_optab;
11989 tree vectype = vectype_in;
11990 tree narrow_vectype = vectype_out;
11991 enum tree_code c1;
11992 tree intermediate_type, prev_type;
11993 machine_mode intermediate_mode, prev_mode;
11994 int i;
11995 bool uns;
11996
11997 *multi_step_cvt = 0;
11998 switch (code)
11999 {
12000 CASE_CONVERT:
12001 c1 = VEC_PACK_TRUNC_EXPR;
12002 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12003 && VECTOR_BOOLEAN_TYPE_P (vectype)
12004 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12005 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12006 optab1 = vec_pack_sbool_trunc_optab;
12007 else
12008 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12009 break;
12010
12011 case FIX_TRUNC_EXPR:
12012 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12013 /* The signedness is determined from output operand. */
12014 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12015 break;
12016
12017 case FLOAT_EXPR:
12018 c1 = VEC_PACK_FLOAT_EXPR;
12019 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12020 break;
12021
12022 default:
12023 gcc_unreachable ();
12024 }
12025
12026 if (!optab1)
12027 return false;
12028
12029 vec_mode = TYPE_MODE (vectype);
12030 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12031 return false;
12032
12033 *code1 = c1;
12034
12035 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12036 {
12037 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12038 return true;
12039 /* For scalar masks we may have different boolean
12040 vector types having the same QImode. Thus we
12041 add additional check for elements number. */
12042 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12043 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12044 return true;
12045 }
12046
12047 if (code == FLOAT_EXPR)
12048 return false;
12049
12050 /* Check if it's a multi-step conversion that can be done using intermediate
12051 types. */
12052 prev_mode = vec_mode;
12053 prev_type = vectype;
12054 if (code == FIX_TRUNC_EXPR)
12055 uns = TYPE_UNSIGNED (vectype_out);
12056 else
12057 uns = TYPE_UNSIGNED (vectype);
12058
12059 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12060 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12061 costly than signed. */
12062 if (code == FIX_TRUNC_EXPR && uns)
12063 {
12064 enum insn_code icode2;
12065
12066 intermediate_type
12067 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12068 interm_optab
12069 = optab_for_tree_code (c1, intermediate_type, optab_default);
12070 if (interm_optab != unknown_optab
12071 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12072 && insn_data[icode1].operand[0].mode
12073 == insn_data[icode2].operand[0].mode)
12074 {
12075 uns = false;
12076 optab1 = interm_optab;
12077 icode1 = icode2;
12078 }
12079 }
12080
12081 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12082 intermediate steps in promotion sequence. We try
12083 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12084 interm_types->create (MAX_INTERM_CVT_STEPS);
12085 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12086 {
12087 intermediate_mode = insn_data[icode1].operand[0].mode;
12088 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12089 intermediate_type
12090 = vect_double_mask_nunits (prev_type, intermediate_mode);
12091 else
12092 intermediate_type
12093 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12094 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12095 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12096 && intermediate_mode == prev_mode
12097 && SCALAR_INT_MODE_P (prev_mode))
12098 interm_optab = vec_pack_sbool_trunc_optab;
12099 else
12100 interm_optab
12101 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12102 optab_default);
12103 if (!interm_optab
12104 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12105 || insn_data[icode1].operand[0].mode != intermediate_mode
12106 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12107 == CODE_FOR_nothing))
12108 break;
12109
12110 interm_types->quick_push (intermediate_type);
12111 (*multi_step_cvt)++;
12112
12113 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12114 {
12115 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12116 return true;
12117 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12118 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12119 return true;
12120 }
12121
12122 prev_mode = intermediate_mode;
12123 prev_type = intermediate_type;
12124 optab1 = interm_optab;
12125 }
12126
12127 interm_types->release ();
12128 return false;
12129 }
12130
12131 /* Generate and return a statement that sets vector mask MASK such that
12132 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12133
12134 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)12135 vect_gen_while (tree mask, tree start_index, tree end_index)
12136 {
12137 tree cmp_type = TREE_TYPE (start_index);
12138 tree mask_type = TREE_TYPE (mask);
12139 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12140 cmp_type, mask_type,
12141 OPTIMIZE_FOR_SPEED));
12142 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12143 start_index, end_index,
12144 build_zero_cst (mask_type));
12145 gimple_call_set_lhs (call, mask);
12146 return call;
12147 }
12148
12149 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12150 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12151
12152 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12153 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12154 tree end_index)
12155 {
12156 tree tmp = make_ssa_name (mask_type);
12157 gcall *call = vect_gen_while (tmp, start_index, end_index);
12158 gimple_seq_add_stmt (seq, call);
12159 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12160 }
12161
12162 /* Try to compute the vector types required to vectorize STMT_INFO,
12163 returning true on success and false if vectorization isn't possible.
12164 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12165 take sure that the number of elements in the vectors is no bigger
12166 than GROUP_SIZE.
12167
12168 On success:
12169
12170 - Set *STMT_VECTYPE_OUT to:
12171 - NULL_TREE if the statement doesn't need to be vectorized;
12172 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12173
12174 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12175 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12176 statement does not help to determine the overall number of units. */
12177
12178 opt_result
vect_get_vector_types_for_stmt(stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12179 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
12180 tree *stmt_vectype_out,
12181 tree *nunits_vectype_out,
12182 unsigned int group_size)
12183 {
12184 vec_info *vinfo = stmt_info->vinfo;
12185 gimple *stmt = stmt_info->stmt;
12186
12187 /* For BB vectorization, we should always have a group size once we've
12188 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12189 are tentative requests during things like early data reference
12190 analysis and pattern recognition. */
12191 if (is_a <bb_vec_info> (vinfo))
12192 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12193 else
12194 group_size = 0;
12195
12196 *stmt_vectype_out = NULL_TREE;
12197 *nunits_vectype_out = NULL_TREE;
12198
12199 if (gimple_get_lhs (stmt) == NULL_TREE
12200 /* MASK_STORE has no lhs, but is ok. */
12201 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12202 {
12203 if (is_a <gcall *> (stmt))
12204 {
12205 /* Ignore calls with no lhs. These must be calls to
12206 #pragma omp simd functions, and what vectorization factor
12207 it really needs can't be determined until
12208 vectorizable_simd_clone_call. */
12209 if (dump_enabled_p ())
12210 dump_printf_loc (MSG_NOTE, vect_location,
12211 "defer to SIMD clone analysis.\n");
12212 return opt_result::success ();
12213 }
12214
12215 return opt_result::failure_at (stmt,
12216 "not vectorized: irregular stmt.%G", stmt);
12217 }
12218
12219 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12220 return opt_result::failure_at (stmt,
12221 "not vectorized: vector stmt in loop:%G",
12222 stmt);
12223
12224 tree vectype;
12225 tree scalar_type = NULL_TREE;
12226 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12227 {
12228 vectype = STMT_VINFO_VECTYPE (stmt_info);
12229 if (dump_enabled_p ())
12230 dump_printf_loc (MSG_NOTE, vect_location,
12231 "precomputed vectype: %T\n", vectype);
12232 }
12233 else if (vect_use_mask_type_p (stmt_info))
12234 {
12235 unsigned int precision = stmt_info->mask_precision;
12236 scalar_type = build_nonstandard_integer_type (precision, 1);
12237 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12238 if (!vectype)
12239 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12240 " data-type %T\n", scalar_type);
12241 if (dump_enabled_p ())
12242 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12243 }
12244 else
12245 {
12246 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12247 scalar_type = TREE_TYPE (DR_REF (dr));
12248 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12249 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12250 else
12251 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12252
12253 if (dump_enabled_p ())
12254 {
12255 if (group_size)
12256 dump_printf_loc (MSG_NOTE, vect_location,
12257 "get vectype for scalar type (group size %d):"
12258 " %T\n", group_size, scalar_type);
12259 else
12260 dump_printf_loc (MSG_NOTE, vect_location,
12261 "get vectype for scalar type: %T\n", scalar_type);
12262 }
12263 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12264 if (!vectype)
12265 return opt_result::failure_at (stmt,
12266 "not vectorized:"
12267 " unsupported data-type %T\n",
12268 scalar_type);
12269
12270 if (dump_enabled_p ())
12271 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12272 }
12273 *stmt_vectype_out = vectype;
12274
12275 /* Don't try to compute scalar types if the stmt produces a boolean
12276 vector; use the existing vector type instead. */
12277 tree nunits_vectype = vectype;
12278 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12279 {
12280 /* The number of units is set according to the smallest scalar
12281 type (or the largest vector size, but we only support one
12282 vector size per vectorization). */
12283 HOST_WIDE_INT dummy;
12284 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12285 if (scalar_type != TREE_TYPE (vectype))
12286 {
12287 if (dump_enabled_p ())
12288 dump_printf_loc (MSG_NOTE, vect_location,
12289 "get vectype for smallest scalar type: %T\n",
12290 scalar_type);
12291 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12292 group_size);
12293 if (!nunits_vectype)
12294 return opt_result::failure_at
12295 (stmt, "not vectorized: unsupported data-type %T\n",
12296 scalar_type);
12297 if (dump_enabled_p ())
12298 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12299 nunits_vectype);
12300 }
12301 }
12302
12303 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12304 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12305
12306 if (dump_enabled_p ())
12307 {
12308 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12309 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12310 dump_printf (MSG_NOTE, "\n");
12311 }
12312
12313 *nunits_vectype_out = nunits_vectype;
12314 return opt_result::success ();
12315 }
12316