1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
stmt_in_inner_loop_p(vec_info * vinfo,class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 static unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,slp_tree node,tree vectype,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
98 {
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
105
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
109
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 }
113
114 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
119 {
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
122 }
123
124 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,slp_tree node,tree vectype,int misalign,enum vect_cost_model_location where)125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
129 {
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
132 }
133
134 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,enum vect_cost_model_location where)135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
138 {
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
143 }
144
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
146
147 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149 {
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
152 }
153
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
158
159 static tree
read_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
163 {
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
166
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
173
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
178
179 return vect_name;
180 }
181
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
185
186 static void
write_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
190 {
191 tree array_ref;
192 gimple *new_stmt;
193
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
197
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
200 }
201
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
205
206 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
208 {
209 tree mem_ref;
210
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
215 }
216
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
219
220 static void
vect_clobber_variable(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
223 {
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
227 }
228
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
230
231 /* Function vect_mark_relevant.
232
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
234
235 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
238 {
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
246
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252 {
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
257
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
267 }
268
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
272
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
275 {
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
280 }
281
282 worklist->safe_push (stmt_info);
283 }
284
285
286 /* Function is_simple_and_all_uses_invariant
287
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
289
290 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
293 {
294 tree op;
295 ssa_op_iter iter;
296
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
300
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
302 {
303 enum vect_def_type dt = vect_uninitialized_def;
304
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
306 {
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
311 }
312
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
315 }
316 return true;
317 }
318
319 /* Function vect_stmt_relevant_p.
320
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
323
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
328
329 CHECKME: what other side effects would the vectorizer allow? */
330
331 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
334 {
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
340
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
343
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
348
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
353 {
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
358 }
359
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
362 {
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
364 {
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
367 {
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
370
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
374
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
379
380 *live_p = true;
381 }
382 }
383 }
384
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
387 {
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
392 }
393
394 return (*live_p || *relevant);
395 }
396
397
398 /* Function exist_non_indexing_operands_for_use_p
399
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
402
403 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
405 {
406 tree operand;
407
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
413
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
419
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
423
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
426
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
429 {
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
432 {
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
445 }
446 return false;
447 }
448
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
454
455 if (operand == use)
456 return true;
457
458 return false;
459 }
460
461
462 /*
463 Function process_use.
464
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
472
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
486
487 Return true if everything is as expected. Return false otherwise. */
488
489 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
493 {
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
496
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
501
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
506
507 if (!dstmt_vinfo)
508 return opt_result::success ();
509
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
512
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
521 {
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
527 }
528
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
537 {
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
541
542 switch (relevant)
543 {
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
548
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
553
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
558
559 case vect_used_in_scope:
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
569 ...
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
575 {
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
579
580 switch (relevant)
581 {
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
587
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
592
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
596
597 default:
598 gcc_unreachable ();
599 }
600 }
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
611 {
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
616 }
617
618
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
621 }
622
623
624 /* Function vect_mark_stmts_to_be_vectorized.
625
626 Not all stmts in the loop need to be vectorized. For example:
627
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
632
633 3. j = j + 1
634
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
637
638 This pass detects such stmts. */
639
640 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
642 {
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
651
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
653
654 auto_vec<stmt_vec_info, 64> worklist;
655
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
658 {
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
661 {
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
666
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
669 }
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
671 {
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
678
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
681 }
682 }
683
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
686 {
687 use_operand_p use_p;
688 ssa_op_iter iter;
689
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
694
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
699
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
702
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
710
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
712 {
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
722
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
730
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
738
739 default:
740 break;
741 }
742
743 if (is_pattern_stmt_p (stmt_vinfo))
744 {
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
749 {
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
752
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
755 {
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
766 }
767 for (; i < gimple_num_ops (assign); i++)
768 {
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
771 {
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
777 }
778 }
779 }
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
781 {
782 for (i = 0; i < gimple_call_num_args (call); i++)
783 {
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
790 }
791 }
792 }
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
795 {
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
802 }
803
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
805 {
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
813 {
814 if (fatal)
815 *fatal = false;
816 return res;
817 }
818 }
819 } /* while worklist */
820
821 return opt_result::success ();
822 }
823
824 /* Function vect_model_simple_cost.
825
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
829
830 static void
vect_model_simple_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,slp_tree node,stmt_vector_for_cost * cost_vec,vect_cost_for_stmt kind=vector_stmt)831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
838 {
839 int inside_cost = 0, prologue_cost = 0;
840
841 gcc_assert (cost_vec != NULL);
842
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
846
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
855
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
859
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
864 }
865
866
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
874
875 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec,bool widen_arith)876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
881 {
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
884
885 for (i = 0; i < pwr + 1; i++)
886 {
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
892 }
893
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
899
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
904 }
905
906 /* Returns true if the current function returns DECL. */
907
908 static bool
cfun_returns(tree decl)909 cfun_returns (tree decl)
910 {
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
914 {
915 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
924 do
925 {
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
927 }
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
933 }
934 return false;
935 }
936
937 /* Function vect_model_store_cost
938
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
941
942 static void
vect_model_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 dr_alignment_support alignment_support_scheme,
946 int misalignment,
947 vec_load_store_type vls_type, slp_tree slp_node,
948 stmt_vector_for_cost *cost_vec)
949 {
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 stmt_vec_info first_stmt_info = stmt_info;
952 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
953
954 /* ??? Somehow we need to fix this at the callers. */
955 if (slp_node)
956 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
957
958 if (vls_type == VLS_STORE_INVARIANT)
959 {
960 if (!slp_node)
961 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962 stmt_info, 0, vect_prologue);
963 }
964
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node && grouped_access_p)
968 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
969
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p = (first_stmt_info == stmt_info);
974
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
979 if (first_stmt_p
980 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
981 {
982 /* Uses a high and low interleave or shuffle operations for each
983 needed permute. */
984 int group_size = DR_GROUP_SIZE (first_stmt_info);
985 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987 stmt_info, 0, vect_body);
988
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: strided group_size = %d .\n",
992 group_size);
993 }
994
995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996 /* Costs of the stores. */
997 if (memory_access_type == VMAT_ELEMENTWISE
998 || memory_access_type == VMAT_GATHER_SCATTER)
999 {
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 inside_cost += record_stmt_cost (cost_vec,
1003 ncopies * assumed_nunits,
1004 scalar_store, stmt_info, 0, vect_body);
1005 }
1006 else
1007 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008 misalignment, &inside_cost, cost_vec);
1009
1010 if (memory_access_type == VMAT_ELEMENTWISE
1011 || memory_access_type == VMAT_STRIDED_SLP)
1012 {
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015 inside_cost += record_stmt_cost (cost_vec,
1016 ncopies * assumed_nunits,
1017 vec_to_scalar, stmt_info, 0, vect_body);
1018 }
1019
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025 if (base
1026 && (TREE_CODE (base) == RESULT_DECL
1027 || (DECL_P (base) && cfun_returns (base)))
1028 && !aggregate_value_p (base, cfun->decl))
1029 {
1030 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1032 if (REG_P (reg))
1033 {
1034 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1037 if (nregs > 1)
1038 {
1039 /* Spill. */
1040 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041 vector_store,
1042 stmt_info, 0, vect_epilogue);
1043 /* Loads. */
1044 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045 scalar_load,
1046 stmt_info, 0, vect_epilogue);
1047 }
1048 }
1049 }
1050
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1055 }
1056
1057
1058 /* Calculate cost of DR's memory access. */
1059 void
vect_get_store_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061 dr_alignment_support alignment_support_scheme,
1062 int misalignment,
1063 unsigned int *inside_cost,
1064 stmt_vector_for_cost *body_cost_vec)
1065 {
1066 switch (alignment_support_scheme)
1067 {
1068 case dr_aligned:
1069 {
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 vector_store, stmt_info, 0,
1072 vect_body);
1073
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE, vect_location,
1076 "vect_model_store_cost: aligned.\n");
1077 break;
1078 }
1079
1080 case dr_unaligned_supported:
1081 {
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 unaligned_store, stmt_info,
1085 misalignment, vect_body);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: unaligned supported by "
1089 "hardware.\n");
1090 break;
1091 }
1092
1093 case dr_unaligned_unsupported:
1094 {
1095 *inside_cost = VECT_MAX_COST;
1096
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099 "vect_model_store_cost: unsupported access.\n");
1100 break;
1101 }
1102
1103 default:
1104 gcc_unreachable ();
1105 }
1106 }
1107
1108
1109 /* Function vect_model_load_cost
1110
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1115
1116 static void
vect_model_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,unsigned ncopies,poly_uint64 vf,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,gather_scatter_info * gs_info,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1117 vect_model_load_cost (vec_info *vinfo,
1118 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119 vect_memory_access_type memory_access_type,
1120 dr_alignment_support alignment_support_scheme,
1121 int misalignment,
1122 gather_scatter_info *gs_info,
1123 slp_tree slp_node,
1124 stmt_vector_for_cost *cost_vec)
1125 {
1126 unsigned int inside_cost = 0, prologue_cost = 0;
1127 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1128
1129 gcc_assert (cost_vec);
1130
1131 /* ??? Somehow we need to fix this at the callers. */
1132 if (slp_node)
1133 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1134
1135 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1136 {
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms, n_loads;
1142 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143 vf, true, &n_perms, &n_loads);
1144 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145 first_stmt_info, 0, vect_body);
1146
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1149 ncopies = n_loads;
1150 }
1151
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info = stmt_info;
1155 if (!slp_node && grouped_access_p)
1156 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1157
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p = (first_stmt_info == stmt_info);
1162
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1166 {
1167 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168 stmt_vec_info next_stmt_info = first_stmt_info;
1169 do
1170 {
1171 gaps -= 1;
1172 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1173 }
1174 while (next_stmt_info);
1175 if (gaps)
1176 {
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE, vect_location,
1179 "vect_model_load_cost: %d unused vectors.\n",
1180 gaps);
1181 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182 alignment_support_scheme, misalignment, false,
1183 &inside_cost, &prologue_cost,
1184 cost_vec, cost_vec, true);
1185 }
1186 }
1187
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1192 if (first_stmt_p
1193 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1194 {
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size = DR_GROUP_SIZE (first_stmt_info);
1198 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 stmt_info, 0, vect_body);
1201
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1205 group_size);
1206 }
1207
1208 /* The loads themselves. */
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_GATHER_SCATTER)
1211 {
1212 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214 if (memory_access_type == VMAT_GATHER_SCATTER
1215 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1218 the load). */
1219 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220 vec_to_scalar, stmt_info, 0,
1221 vect_body);
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost += record_stmt_cost (cost_vec,
1224 ncopies * assumed_nunits,
1225 scalar_load, stmt_info, 0, vect_body);
1226 }
1227 else if (memory_access_type == VMAT_INVARIANT)
1228 {
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost += record_stmt_cost (cost_vec, 1,
1231 scalar_load, stmt_info, 0,
1232 vect_prologue);
1233 prologue_cost += record_stmt_cost (cost_vec, 1,
1234 scalar_to_vec, stmt_info, 0,
1235 vect_prologue);
1236 }
1237 else
1238 vect_get_load_cost (vinfo, stmt_info, ncopies,
1239 alignment_support_scheme, misalignment, first_stmt_p,
1240 &inside_cost, &prologue_cost,
1241 cost_vec, cost_vec, true);
1242 if (memory_access_type == VMAT_ELEMENTWISE
1243 || memory_access_type == VMAT_STRIDED_SLP
1244 || (memory_access_type == VMAT_GATHER_SCATTER
1245 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247 stmt_info, 0, vect_body);
1248
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1253 }
1254
1255
1256 /* Calculate cost of DR's memory access. */
1257 void
vect_get_load_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259 dr_alignment_support alignment_support_scheme,
1260 int misalignment,
1261 bool add_realign_cost, unsigned int *inside_cost,
1262 unsigned int *prologue_cost,
1263 stmt_vector_for_cost *prologue_cost_vec,
1264 stmt_vector_for_cost *body_cost_vec,
1265 bool record_prologue_costs)
1266 {
1267 switch (alignment_support_scheme)
1268 {
1269 case dr_aligned:
1270 {
1271 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272 stmt_info, 0, vect_body);
1273
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE, vect_location,
1276 "vect_model_load_cost: aligned.\n");
1277
1278 break;
1279 }
1280 case dr_unaligned_supported:
1281 {
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284 unaligned_load, stmt_info,
1285 misalignment, vect_body);
1286
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "vect_model_load_cost: unaligned supported by "
1290 "hardware.\n");
1291
1292 break;
1293 }
1294 case dr_explicit_realign:
1295 {
1296 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297 vector_load, stmt_info, 0, vect_body);
1298 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299 vec_perm, stmt_info, 0, vect_body);
1300
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1303 prologue costs. */
1304 if (targetm.vectorize.builtin_mask_for_load)
1305 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306 stmt_info, 0, vect_body);
1307
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_model_load_cost: explicit realign\n");
1311
1312 break;
1313 }
1314 case dr_explicit_realign_optimized:
1315 {
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "vect_model_load_cost: unaligned software "
1319 "pipelined.\n");
1320
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1327
1328 if (add_realign_cost && record_prologue_costs)
1329 {
1330 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331 vector_stmt, stmt_info,
1332 0, vect_prologue);
1333 if (targetm.vectorize.builtin_mask_for_load)
1334 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335 vector_stmt, stmt_info,
1336 0, vect_prologue);
1337 }
1338
1339 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340 stmt_info, 0, vect_body);
1341 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342 stmt_info, 0, vect_body);
1343
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_model_load_cost: explicit realign optimized"
1347 "\n");
1348
1349 break;
1350 }
1351
1352 case dr_unaligned_unsupported:
1353 {
1354 *inside_cost = VECT_MAX_COST;
1355
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358 "vect_model_load_cost: unsupported access.\n");
1359 break;
1360 }
1361
1362 default:
1363 gcc_unreachable ();
1364 }
1365 }
1366
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1369
1370 static void
vect_init_vector_1(vec_info * vinfo,stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372 gimple_stmt_iterator *gsi)
1373 {
1374 if (gsi)
1375 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376 else
1377 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1378
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE, vect_location,
1381 "created new init_stmt: %G", new_stmt);
1382 }
1383
1384 /* Function vect_init_vector.
1385
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1393
1394 tree
vect_init_vector(vec_info * vinfo,stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396 gimple_stmt_iterator *gsi)
1397 {
1398 gimple *init_stmt;
1399 tree new_temp;
1400
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1403 {
1404 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1406 {
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type))
1410 {
1411 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412 tree false_val = build_zero_cst (TREE_TYPE (type));
1413
1414 if (CONSTANT_CLASS_P (val))
1415 val = integer_zerop (val) ? false_val : true_val;
1416 else
1417 {
1418 new_temp = make_ssa_name (TREE_TYPE (type));
1419 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420 val, true_val, false_val);
1421 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422 val = new_temp;
1423 }
1424 }
1425 else
1426 {
1427 gimple_seq stmts = NULL;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430 TREE_TYPE (type), val);
1431 else
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436 !gsi_end_p (gsi2); )
1437 {
1438 init_stmt = gsi_stmt (gsi2);
1439 gsi_remove (&gsi2, false);
1440 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1441 }
1442 }
1443 }
1444 val = build_vector_from_val (type, val);
1445 }
1446
1447 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448 init_stmt = gimple_build_assign (new_temp, val);
1449 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450 return new_temp;
1451 }
1452
1453
1454 /* Function vect_get_vec_defs_for_operand.
1455
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1458
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1461
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1465
1466 void
vect_get_vec_defs_for_operand(vec_info * vinfo,stmt_vec_info stmt_vinfo,unsigned ncopies,tree op,vec<tree> * vec_oprnds,tree vectype)1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468 unsigned ncopies,
1469 tree op, vec<tree> *vec_oprnds, tree vectype)
1470 {
1471 gimple *def_stmt;
1472 enum vect_def_type dt;
1473 bool is_simple_use;
1474 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1475
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE, vect_location,
1478 "vect_get_vec_defs_for_operand: %T\n", op);
1479
1480 stmt_vec_info def_stmt_info;
1481 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482 &def_stmt_info, &def_stmt);
1483 gcc_assert (is_simple_use);
1484 if (def_stmt && dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1486
1487 vec_oprnds->create (ncopies);
1488 if (dt == vect_constant_def || dt == vect_external_def)
1489 {
1490 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491 tree vector_type;
1492
1493 if (vectype)
1494 vector_type = vectype;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497 vector_type = truth_type_for (stmt_vectype);
1498 else
1499 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1500
1501 gcc_assert (vector_type);
1502 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503 while (ncopies--)
1504 vec_oprnds->quick_push (vop);
1505 }
1506 else
1507 {
1508 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510 for (unsigned i = 0; i < ncopies; ++i)
1511 vec_oprnds->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1513 }
1514 }
1515
1516
1517 /* Get vectorized definitions for OP0 and OP1. */
1518
1519 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree vectype0,tree op1,vec<tree> * vec_oprnds1,tree vectype1,tree op2,vec<tree> * vec_oprnds2,tree vectype2,tree op3,vec<tree> * vec_oprnds3,tree vectype3)1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521 unsigned ncopies,
1522 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1526 {
1527 if (slp_node)
1528 {
1529 if (op0)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531 if (op1)
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533 if (op2)
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535 if (op3)
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1537 }
1538 else
1539 {
1540 if (op0)
1541 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542 op0, vec_oprnds0, vectype0);
1543 if (op1)
1544 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545 op1, vec_oprnds1, vectype1);
1546 if (op2)
1547 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548 op2, vec_oprnds2, vectype2);
1549 if (op3)
1550 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551 op3, vec_oprnds3, vectype3);
1552 }
1553 }
1554
1555 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree op1,vec<tree> * vec_oprnds1,tree op2,vec<tree> * vec_oprnds2,tree op3,vec<tree> * vec_oprnds3)1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557 unsigned ncopies,
1558 tree op0, vec<tree> *vec_oprnds0,
1559 tree op1, vec<tree> *vec_oprnds1,
1560 tree op2, vec<tree> *vec_oprnds2,
1561 tree op3, vec<tree> *vec_oprnds3)
1562 {
1563 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564 op0, vec_oprnds0, NULL_TREE,
1565 op1, vec_oprnds1, NULL_TREE,
1566 op2, vec_oprnds2, NULL_TREE,
1567 op3, vec_oprnds3, NULL_TREE);
1568 }
1569
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1573
1574 static void
vect_finish_stmt_generation_1(vec_info *,stmt_vec_info stmt_info,gimple * vec_stmt)1575 vect_finish_stmt_generation_1 (vec_info *,
1576 stmt_vec_info stmt_info, gimple *vec_stmt)
1577 {
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1580
1581 if (stmt_info)
1582 {
1583 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1584
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1591 }
1592 else
1593 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1594 }
1595
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1599
1600 void
vect_finish_replace_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt)1601 vect_finish_replace_stmt (vec_info *vinfo,
1602 stmt_vec_info stmt_info, gimple *vec_stmt)
1603 {
1604 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1606
1607 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608 gsi_replace (&gsi, vec_stmt, true);
1609
1610 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1611 }
1612
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1615
1616 void
vect_finish_stmt_generation(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1617 vect_finish_stmt_generation (vec_info *vinfo,
1618 stmt_vec_info stmt_info, gimple *vec_stmt,
1619 gimple_stmt_iterator *gsi)
1620 {
1621 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1622
1623 if (!gsi_end_p (*gsi)
1624 && gimple_has_mem_ops (vec_stmt))
1625 {
1626 gimple *at_stmt = gsi_stmt (*gsi);
1627 tree vuse = gimple_vuse (at_stmt);
1628 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1629 {
1630 tree vdef = gimple_vdef (at_stmt);
1631 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632 gimple_set_modified (vec_stmt, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638 && ((is_gimple_assign (vec_stmt)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640 || (is_gimple_call (vec_stmt)
1641 && !(gimple_call_flags (vec_stmt)
1642 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1643 {
1644 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1645 gimple_set_vdef (vec_stmt, new_vdef);
1646 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1647 }
1648 }
1649 }
1650 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1651 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1652 }
1653
1654 /* We want to vectorize a call to combined function CFN with function
1655 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1656 as the types of all inputs. Check whether this is possible using
1657 an internal function, returning its code if so or IFN_LAST if not. */
1658
1659 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1660 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1661 tree vectype_out, tree vectype_in)
1662 {
1663 internal_fn ifn;
1664 if (internal_fn_p (cfn))
1665 ifn = as_internal_fn (cfn);
1666 else
1667 ifn = associated_internal_fn (fndecl);
1668 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1669 {
1670 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1671 if (info.vectorizable)
1672 {
1673 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1674 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1675 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1676 OPTIMIZE_FOR_SPEED))
1677 return ifn;
1678 }
1679 }
1680 return IFN_LAST;
1681 }
1682
1683
1684 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1685 gimple_stmt_iterator *);
1686
1687 /* Check whether a load or store statement in the loop described by
1688 LOOP_VINFO is possible in a loop using partial vectors. This is
1689 testing whether the vectorizer pass has the appropriate support,
1690 as well as whether the target does.
1691
1692 VLS_TYPE says whether the statement is a load or store and VECTYPE
1693 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1694 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1695 says how the load or store is going to be implemented and GROUP_SIZE
1696 is the number of load or store statements in the containing group.
1697 If the access is a gather load or scatter store, GS_INFO describes
1698 its arguments. If the load or store is conditional, SCALAR_MASK is the
1699 condition under which it occurs.
1700
1701 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1702 vectors is not supported, otherwise record the required rgroup control
1703 types. */
1704
1705 static void
check_load_store_for_partial_vectors(loop_vec_info loop_vinfo,tree vectype,slp_tree slp_node,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1706 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1707 slp_tree slp_node,
1708 vec_load_store_type vls_type,
1709 int group_size,
1710 vect_memory_access_type
1711 memory_access_type,
1712 gather_scatter_info *gs_info,
1713 tree scalar_mask)
1714 {
1715 /* Invariant loads need no special support. */
1716 if (memory_access_type == VMAT_INVARIANT)
1717 return;
1718
1719 unsigned int nvectors;
1720 if (slp_node)
1721 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1722 else
1723 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1724
1725 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1726 machine_mode vecmode = TYPE_MODE (vectype);
1727 bool is_load = (vls_type == VLS_LOAD);
1728 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1729 {
1730 if (is_load
1731 ? !vect_load_lanes_supported (vectype, group_size, true)
1732 : !vect_store_lanes_supported (vectype, group_size, true))
1733 {
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736 "can't operate on partial vectors because"
1737 " the target doesn't have an appropriate"
1738 " load/store-lanes instruction.\n");
1739 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1740 return;
1741 }
1742 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1743 scalar_mask);
1744 return;
1745 }
1746
1747 if (memory_access_type == VMAT_GATHER_SCATTER)
1748 {
1749 internal_fn ifn = (is_load
1750 ? IFN_MASK_GATHER_LOAD
1751 : IFN_MASK_SCATTER_STORE);
1752 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1753 gs_info->memory_type,
1754 gs_info->offset_vectype,
1755 gs_info->scale))
1756 {
1757 if (dump_enabled_p ())
1758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1759 "can't operate on partial vectors because"
1760 " the target doesn't have an appropriate"
1761 " gather load or scatter store instruction.\n");
1762 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1763 return;
1764 }
1765 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1766 scalar_mask);
1767 return;
1768 }
1769
1770 if (memory_access_type != VMAT_CONTIGUOUS
1771 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1772 {
1773 /* Element X of the data must come from iteration i * VF + X of the
1774 scalar loop. We need more work to support other mappings. */
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1777 "can't operate on partial vectors because an"
1778 " access isn't contiguous.\n");
1779 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1780 return;
1781 }
1782
1783 if (!VECTOR_MODE_P (vecmode))
1784 {
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787 "can't operate on partial vectors when emulating"
1788 " vector operations.\n");
1789 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1790 return;
1791 }
1792
1793 /* We might load more scalars than we need for permuting SLP loads.
1794 We checked in get_group_load_store_type that the extra elements
1795 don't leak into a new vector. */
1796 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1797 {
1798 unsigned int nvectors;
1799 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1800 return nvectors;
1801 gcc_unreachable ();
1802 };
1803
1804 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1805 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1806 machine_mode mask_mode;
1807 bool using_partial_vectors_p = false;
1808 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1809 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1810 {
1811 nvectors = group_memory_nvectors (group_size * vf, nunits);
1812 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1813 using_partial_vectors_p = true;
1814 }
1815
1816 machine_mode vmode;
1817 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1818 {
1819 nvectors = group_memory_nvectors (group_size * vf, nunits);
1820 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1821 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1822 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1823 using_partial_vectors_p = true;
1824 }
1825
1826 if (!using_partial_vectors_p)
1827 {
1828 if (dump_enabled_p ())
1829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1830 "can't operate on partial vectors because the"
1831 " target doesn't have the appropriate partial"
1832 " vectorization load or store.\n");
1833 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1834 }
1835 }
1836
1837 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1838 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1839 that needs to be applied to all loads and stores in a vectorized loop.
1840 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1841 otherwise return VEC_MASK & LOOP_MASK.
1842
1843 MASK_TYPE is the type of both masks. If new statements are needed,
1844 insert them before GSI. */
1845
1846 static tree
prepare_vec_mask(loop_vec_info loop_vinfo,tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1847 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1848 tree vec_mask, gimple_stmt_iterator *gsi)
1849 {
1850 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1851 if (!loop_mask)
1852 return vec_mask;
1853
1854 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1855
1856 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1857 return vec_mask;
1858
1859 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1860 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1861 vec_mask, loop_mask);
1862
1863 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1864 return and_res;
1865 }
1866
1867 /* Determine whether we can use a gather load or scatter store to vectorize
1868 strided load or store STMT_INFO by truncating the current offset to a
1869 smaller width. We need to be able to construct an offset vector:
1870
1871 { 0, X, X*2, X*3, ... }
1872
1873 without loss of precision, where X is STMT_INFO's DR_STEP.
1874
1875 Return true if this is possible, describing the gather load or scatter
1876 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1877
1878 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1879 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1880 loop_vec_info loop_vinfo, bool masked_p,
1881 gather_scatter_info *gs_info)
1882 {
1883 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1884 data_reference *dr = dr_info->dr;
1885 tree step = DR_STEP (dr);
1886 if (TREE_CODE (step) != INTEGER_CST)
1887 {
1888 /* ??? Perhaps we could use range information here? */
1889 if (dump_enabled_p ())
1890 dump_printf_loc (MSG_NOTE, vect_location,
1891 "cannot truncate variable step.\n");
1892 return false;
1893 }
1894
1895 /* Get the number of bits in an element. */
1896 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1897 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1898 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1899
1900 /* Set COUNT to the upper limit on the number of elements - 1.
1901 Start with the maximum vectorization factor. */
1902 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1903
1904 /* Try lowering COUNT to the number of scalar latch iterations. */
1905 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1906 widest_int max_iters;
1907 if (max_loop_iterations (loop, &max_iters)
1908 && max_iters < count)
1909 count = max_iters.to_shwi ();
1910
1911 /* Try scales of 1 and the element size. */
1912 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1913 wi::overflow_type overflow = wi::OVF_NONE;
1914 for (int i = 0; i < 2; ++i)
1915 {
1916 int scale = scales[i];
1917 widest_int factor;
1918 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1919 continue;
1920
1921 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1922 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1923 if (overflow)
1924 continue;
1925 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1926 unsigned int min_offset_bits = wi::min_precision (range, sign);
1927
1928 /* Find the narrowest viable offset type. */
1929 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1930 tree offset_type = build_nonstandard_integer_type (offset_bits,
1931 sign == UNSIGNED);
1932
1933 /* See whether the target supports the operation with an offset
1934 no narrower than OFFSET_TYPE. */
1935 tree memory_type = TREE_TYPE (DR_REF (dr));
1936 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1937 vectype, memory_type, offset_type, scale,
1938 &gs_info->ifn, &gs_info->offset_vectype)
1939 || gs_info->ifn == IFN_LAST)
1940 continue;
1941
1942 gs_info->decl = NULL_TREE;
1943 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1944 but we don't need to store that here. */
1945 gs_info->base = NULL_TREE;
1946 gs_info->element_type = TREE_TYPE (vectype);
1947 gs_info->offset = fold_convert (offset_type, step);
1948 gs_info->offset_dt = vect_constant_def;
1949 gs_info->scale = scale;
1950 gs_info->memory_type = memory_type;
1951 return true;
1952 }
1953
1954 if (overflow && dump_enabled_p ())
1955 dump_printf_loc (MSG_NOTE, vect_location,
1956 "truncating gather/scatter offset to %d bits"
1957 " might change its value.\n", element_bits);
1958
1959 return false;
1960 }
1961
1962 /* Return true if we can use gather/scatter internal functions to
1963 vectorize STMT_INFO, which is a grouped or strided load or store.
1964 MASKED_P is true if load or store is conditional. When returning
1965 true, fill in GS_INFO with the information required to perform the
1966 operation. */
1967
1968 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1969 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1970 loop_vec_info loop_vinfo, bool masked_p,
1971 gather_scatter_info *gs_info)
1972 {
1973 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1974 || gs_info->ifn == IFN_LAST)
1975 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1976 masked_p, gs_info);
1977
1978 tree old_offset_type = TREE_TYPE (gs_info->offset);
1979 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1980
1981 gcc_assert (TYPE_PRECISION (new_offset_type)
1982 >= TYPE_PRECISION (old_offset_type));
1983 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1984
1985 if (dump_enabled_p ())
1986 dump_printf_loc (MSG_NOTE, vect_location,
1987 "using gather/scatter for strided/grouped access,"
1988 " scale = %d\n", gs_info->scale);
1989
1990 return true;
1991 }
1992
1993 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1994 elements with a known constant step. Return -1 if that step
1995 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1996
1997 static int
compare_step_with_zero(vec_info * vinfo,stmt_vec_info stmt_info)1998 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1999 {
2000 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2001 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2002 size_zero_node);
2003 }
2004
2005 /* If the target supports a permute mask that reverses the elements in
2006 a vector of type VECTYPE, return that mask, otherwise return null. */
2007
2008 static tree
perm_mask_for_reverse(tree vectype)2009 perm_mask_for_reverse (tree vectype)
2010 {
2011 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2012
2013 /* The encoding has a single stepped pattern. */
2014 vec_perm_builder sel (nunits, 1, 3);
2015 for (int i = 0; i < 3; ++i)
2016 sel.quick_push (nunits - 1 - i);
2017
2018 vec_perm_indices indices (sel, 1, nunits);
2019 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2020 return NULL_TREE;
2021 return vect_gen_perm_mask_checked (vectype, indices);
2022 }
2023
2024 /* A subroutine of get_load_store_type, with a subset of the same
2025 arguments. Handle the case where STMT_INFO is a load or store that
2026 accesses consecutive elements with a negative step. Sets *POFFSET
2027 to the offset to be applied to the DR for the first access. */
2028
2029 static vect_memory_access_type
get_negative_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies,poly_int64 * poffset)2030 get_negative_load_store_type (vec_info *vinfo,
2031 stmt_vec_info stmt_info, tree vectype,
2032 vec_load_store_type vls_type,
2033 unsigned int ncopies, poly_int64 *poffset)
2034 {
2035 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2036 dr_alignment_support alignment_support_scheme;
2037
2038 if (ncopies > 1)
2039 {
2040 if (dump_enabled_p ())
2041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2042 "multiple types with negative step.\n");
2043 return VMAT_ELEMENTWISE;
2044 }
2045
2046 /* For backward running DRs the first access in vectype actually is
2047 N-1 elements before the address of the DR. */
2048 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2049 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2050
2051 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2052 alignment_support_scheme
2053 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2054 if (alignment_support_scheme != dr_aligned
2055 && alignment_support_scheme != dr_unaligned_supported)
2056 {
2057 if (dump_enabled_p ())
2058 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2059 "negative step but alignment required.\n");
2060 *poffset = 0;
2061 return VMAT_ELEMENTWISE;
2062 }
2063
2064 if (vls_type == VLS_STORE_INVARIANT)
2065 {
2066 if (dump_enabled_p ())
2067 dump_printf_loc (MSG_NOTE, vect_location,
2068 "negative step with invariant source;"
2069 " no permute needed.\n");
2070 return VMAT_CONTIGUOUS_DOWN;
2071 }
2072
2073 if (!perm_mask_for_reverse (vectype))
2074 {
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "negative step and reversing not supported.\n");
2078 *poffset = 0;
2079 return VMAT_ELEMENTWISE;
2080 }
2081
2082 return VMAT_CONTIGUOUS_REVERSE;
2083 }
2084
2085 /* STMT_INFO is either a masked or unconditional store. Return the value
2086 being stored. */
2087
2088 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2089 vect_get_store_rhs (stmt_vec_info stmt_info)
2090 {
2091 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2092 {
2093 gcc_assert (gimple_assign_single_p (assign));
2094 return gimple_assign_rhs1 (assign);
2095 }
2096 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2097 {
2098 internal_fn ifn = gimple_call_internal_fn (call);
2099 int index = internal_fn_stored_value_index (ifn);
2100 gcc_assert (index >= 0);
2101 return gimple_call_arg (call, index);
2102 }
2103 gcc_unreachable ();
2104 }
2105
2106 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2107
2108 This function returns a vector type which can be composed with NETLS pieces,
2109 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2110 same vector size as the return vector. It checks target whether supports
2111 pieces-size vector mode for construction firstly, if target fails to, check
2112 pieces-size scalar mode for construction further. It returns NULL_TREE if
2113 fails to find the available composition.
2114
2115 For example, for (vtype=V16QI, nelts=4), we can probably get:
2116 - V16QI with PTYPE V4QI.
2117 - V4SI with PTYPE SI.
2118 - NULL_TREE. */
2119
2120 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2121 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2122 {
2123 gcc_assert (VECTOR_TYPE_P (vtype));
2124 gcc_assert (known_gt (nelts, 0U));
2125
2126 machine_mode vmode = TYPE_MODE (vtype);
2127 if (!VECTOR_MODE_P (vmode))
2128 return NULL_TREE;
2129
2130 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2131 unsigned int pbsize;
2132 if (constant_multiple_p (vbsize, nelts, &pbsize))
2133 {
2134 /* First check if vec_init optab supports construction from
2135 vector pieces directly. */
2136 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2137 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2138 machine_mode rmode;
2139 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2140 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2141 != CODE_FOR_nothing))
2142 {
2143 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2144 return vtype;
2145 }
2146
2147 /* Otherwise check if exists an integer type of the same piece size and
2148 if vec_init optab supports construction from it directly. */
2149 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2150 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2151 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2152 != CODE_FOR_nothing))
2153 {
2154 *ptype = build_nonstandard_integer_type (pbsize, 1);
2155 return build_vector_type (*ptype, nelts);
2156 }
2157 }
2158
2159 return NULL_TREE;
2160 }
2161
2162 /* A subroutine of get_load_store_type, with a subset of the same
2163 arguments. Handle the case where STMT_INFO is part of a grouped load
2164 or store.
2165
2166 For stores, the statements in the group are all consecutive
2167 and there is no gap at the end. For loads, the statements in the
2168 group might not be consecutive; there can be gaps between statements
2169 as well as at the end. */
2170
2171 static bool
get_group_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2172 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2173 tree vectype, slp_tree slp_node,
2174 bool masked_p, vec_load_store_type vls_type,
2175 vect_memory_access_type *memory_access_type,
2176 poly_int64 *poffset,
2177 dr_alignment_support *alignment_support_scheme,
2178 int *misalignment,
2179 gather_scatter_info *gs_info)
2180 {
2181 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2182 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2183 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2184 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2185 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2186 bool single_element_p = (stmt_info == first_stmt_info
2187 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2188 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2189 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2190
2191 /* True if the vectorized statements would access beyond the last
2192 statement in the group. */
2193 bool overrun_p = false;
2194
2195 /* True if we can cope with such overrun by peeling for gaps, so that
2196 there is at least one final scalar iteration after the vector loop. */
2197 bool can_overrun_p = (!masked_p
2198 && vls_type == VLS_LOAD
2199 && loop_vinfo
2200 && !loop->inner);
2201
2202 /* There can only be a gap at the end of the group if the stride is
2203 known at compile time. */
2204 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2205
2206 /* Stores can't yet have gaps. */
2207 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2208
2209 if (slp_node)
2210 {
2211 /* For SLP vectorization we directly vectorize a subchain
2212 without permutation. */
2213 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2214 first_dr_info
2215 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2216 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2217 {
2218 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2219 separated by the stride, until we have a complete vector.
2220 Fall back to scalar accesses if that isn't possible. */
2221 if (multiple_p (nunits, group_size))
2222 *memory_access_type = VMAT_STRIDED_SLP;
2223 else
2224 *memory_access_type = VMAT_ELEMENTWISE;
2225 }
2226 else
2227 {
2228 overrun_p = loop_vinfo && gap != 0;
2229 if (overrun_p && vls_type != VLS_LOAD)
2230 {
2231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2232 "Grouped store with gaps requires"
2233 " non-consecutive accesses\n");
2234 return false;
2235 }
2236 /* An overrun is fine if the trailing elements are smaller
2237 than the alignment boundary B. Every vector access will
2238 be a multiple of B and so we are guaranteed to access a
2239 non-gap element in the same B-sized block. */
2240 if (overrun_p
2241 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2242 vectype)
2243 / vect_get_scalar_dr_size (first_dr_info)))
2244 overrun_p = false;
2245
2246 /* If the gap splits the vector in half and the target
2247 can do half-vector operations avoid the epilogue peeling
2248 by simply loading half of the vector only. Usually
2249 the construction with an upper zero half will be elided. */
2250 dr_alignment_support alss;
2251 int misalign = dr_misalignment (first_dr_info, vectype);
2252 tree half_vtype;
2253 if (overrun_p
2254 && !masked_p
2255 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2256 vectype, misalign)))
2257 == dr_aligned
2258 || alss == dr_unaligned_supported)
2259 && known_eq (nunits, (group_size - gap) * 2)
2260 && known_eq (nunits, group_size)
2261 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2262 != NULL_TREE))
2263 overrun_p = false;
2264
2265 if (overrun_p && !can_overrun_p)
2266 {
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269 "Peeling for outer loop is not supported\n");
2270 return false;
2271 }
2272 int cmp = compare_step_with_zero (vinfo, stmt_info);
2273 if (cmp < 0)
2274 {
2275 if (single_element_p)
2276 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2277 only correct for single element "interleaving" SLP. */
2278 *memory_access_type = get_negative_load_store_type
2279 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2280 else
2281 {
2282 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2283 separated by the stride, until we have a complete vector.
2284 Fall back to scalar accesses if that isn't possible. */
2285 if (multiple_p (nunits, group_size))
2286 *memory_access_type = VMAT_STRIDED_SLP;
2287 else
2288 *memory_access_type = VMAT_ELEMENTWISE;
2289 }
2290 }
2291 else
2292 {
2293 gcc_assert (!loop_vinfo || cmp > 0);
2294 *memory_access_type = VMAT_CONTIGUOUS;
2295 }
2296
2297 /* When we have a contiguous access across loop iterations
2298 but the access in the loop doesn't cover the full vector
2299 we can end up with no gap recorded but still excess
2300 elements accessed, see PR103116. Make sure we peel for
2301 gaps if necessary and sufficient and give up if not. */
2302 if (loop_vinfo
2303 && *memory_access_type == VMAT_CONTIGUOUS
2304 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2305 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2306 nunits))
2307 {
2308 unsigned HOST_WIDE_INT cnunits, cvf;
2309 if (!can_overrun_p
2310 || !nunits.is_constant (&cnunits)
2311 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2312 /* Peeling for gaps assumes that a single scalar iteration
2313 is enough to make sure the last vector iteration doesn't
2314 access excess elements.
2315 ??? Enhancements include peeling multiple iterations
2316 or using masked loads with a static mask. */
2317 || (group_size * cvf) % cnunits + group_size < cnunits)
2318 {
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2321 "peeling for gaps insufficient for "
2322 "access\n");
2323 return false;
2324 }
2325 overrun_p = true;
2326 }
2327 }
2328 }
2329 else
2330 {
2331 /* We can always handle this case using elementwise accesses,
2332 but see if something more efficient is available. */
2333 *memory_access_type = VMAT_ELEMENTWISE;
2334
2335 /* If there is a gap at the end of the group then these optimizations
2336 would access excess elements in the last iteration. */
2337 bool would_overrun_p = (gap != 0);
2338 /* An overrun is fine if the trailing elements are smaller than the
2339 alignment boundary B. Every vector access will be a multiple of B
2340 and so we are guaranteed to access a non-gap element in the
2341 same B-sized block. */
2342 if (would_overrun_p
2343 && !masked_p
2344 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2345 / vect_get_scalar_dr_size (first_dr_info)))
2346 would_overrun_p = false;
2347
2348 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2349 && (can_overrun_p || !would_overrun_p)
2350 && compare_step_with_zero (vinfo, stmt_info) > 0)
2351 {
2352 /* First cope with the degenerate case of a single-element
2353 vector. */
2354 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2355 ;
2356
2357 /* Otherwise try using LOAD/STORE_LANES. */
2358 else if (vls_type == VLS_LOAD
2359 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2360 : vect_store_lanes_supported (vectype, group_size,
2361 masked_p))
2362 {
2363 *memory_access_type = VMAT_LOAD_STORE_LANES;
2364 overrun_p = would_overrun_p;
2365 }
2366
2367 /* If that fails, try using permuting loads. */
2368 else if (vls_type == VLS_LOAD
2369 ? vect_grouped_load_supported (vectype, single_element_p,
2370 group_size)
2371 : vect_grouped_store_supported (vectype, group_size))
2372 {
2373 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2374 overrun_p = would_overrun_p;
2375 }
2376 }
2377
2378 /* As a last resort, trying using a gather load or scatter store.
2379
2380 ??? Although the code can handle all group sizes correctly,
2381 it probably isn't a win to use separate strided accesses based
2382 on nearby locations. Or, even if it's a win over scalar code,
2383 it might not be a win over vectorizing at a lower VF, if that
2384 allows us to use contiguous accesses. */
2385 if (*memory_access_type == VMAT_ELEMENTWISE
2386 && single_element_p
2387 && loop_vinfo
2388 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2389 masked_p, gs_info))
2390 *memory_access_type = VMAT_GATHER_SCATTER;
2391 }
2392
2393 if (*memory_access_type == VMAT_GATHER_SCATTER
2394 || *memory_access_type == VMAT_ELEMENTWISE)
2395 {
2396 *alignment_support_scheme = dr_unaligned_supported;
2397 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2398 }
2399 else
2400 {
2401 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2402 *alignment_support_scheme
2403 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2404 *misalignment);
2405 }
2406
2407 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2408 {
2409 /* STMT is the leader of the group. Check the operands of all the
2410 stmts of the group. */
2411 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2412 while (next_stmt_info)
2413 {
2414 tree op = vect_get_store_rhs (next_stmt_info);
2415 enum vect_def_type dt;
2416 if (!vect_is_simple_use (op, vinfo, &dt))
2417 {
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 "use not simple.\n");
2421 return false;
2422 }
2423 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2424 }
2425 }
2426
2427 if (overrun_p)
2428 {
2429 gcc_assert (can_overrun_p);
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "Data access with gaps requires scalar "
2433 "epilogue loop\n");
2434 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2435 }
2436
2437 return true;
2438 }
2439
2440 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2441 if there is a memory access type that the vectorized form can use,
2442 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2443 or scatters, fill in GS_INFO accordingly. In addition
2444 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2445 the target does not support the alignment scheme. *MISALIGNMENT
2446 is set according to the alignment of the access (including
2447 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2448
2449 SLP says whether we're performing SLP rather than loop vectorization.
2450 MASKED_P is true if the statement is conditional on a vectorized mask.
2451 VECTYPE is the vector type that the vectorized statements will use.
2452 NCOPIES is the number of vector statements that will be needed. */
2453
2454 static bool
get_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2455 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2456 tree vectype, slp_tree slp_node,
2457 bool masked_p, vec_load_store_type vls_type,
2458 unsigned int ncopies,
2459 vect_memory_access_type *memory_access_type,
2460 poly_int64 *poffset,
2461 dr_alignment_support *alignment_support_scheme,
2462 int *misalignment,
2463 gather_scatter_info *gs_info)
2464 {
2465 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2466 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2467 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2468 *poffset = 0;
2469 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2470 {
2471 *memory_access_type = VMAT_GATHER_SCATTER;
2472 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2473 gcc_unreachable ();
2474 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2475 &gs_info->offset_dt,
2476 &gs_info->offset_vectype))
2477 {
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480 "%s index use not simple.\n",
2481 vls_type == VLS_LOAD ? "gather" : "scatter");
2482 return false;
2483 }
2484 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2485 {
2486 if (vls_type != VLS_LOAD)
2487 {
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490 "unsupported emulated scatter.\n");
2491 return false;
2492 }
2493 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2494 || !TYPE_VECTOR_SUBPARTS
2495 (gs_info->offset_vectype).is_constant ()
2496 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2497 (gs_info->offset_vectype),
2498 TYPE_VECTOR_SUBPARTS (vectype)))
2499 {
2500 if (dump_enabled_p ())
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502 "unsupported vector types for emulated "
2503 "gather.\n");
2504 return false;
2505 }
2506 }
2507 /* Gather-scatter accesses perform only component accesses, alignment
2508 is irrelevant for them. */
2509 *alignment_support_scheme = dr_unaligned_supported;
2510 }
2511 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2512 {
2513 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2514 masked_p,
2515 vls_type, memory_access_type, poffset,
2516 alignment_support_scheme,
2517 misalignment, gs_info))
2518 return false;
2519 }
2520 else if (STMT_VINFO_STRIDED_P (stmt_info))
2521 {
2522 gcc_assert (!slp_node);
2523 if (loop_vinfo
2524 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2525 masked_p, gs_info))
2526 *memory_access_type = VMAT_GATHER_SCATTER;
2527 else
2528 *memory_access_type = VMAT_ELEMENTWISE;
2529 /* Alignment is irrelevant here. */
2530 *alignment_support_scheme = dr_unaligned_supported;
2531 }
2532 else
2533 {
2534 int cmp = compare_step_with_zero (vinfo, stmt_info);
2535 if (cmp == 0)
2536 {
2537 gcc_assert (vls_type == VLS_LOAD);
2538 *memory_access_type = VMAT_INVARIANT;
2539 /* Invariant accesses perform only component accesses, alignment
2540 is irrelevant for them. */
2541 *alignment_support_scheme = dr_unaligned_supported;
2542 }
2543 else
2544 {
2545 if (cmp < 0)
2546 *memory_access_type = get_negative_load_store_type
2547 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2548 else
2549 *memory_access_type = VMAT_CONTIGUOUS;
2550 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2551 vectype, *poffset);
2552 *alignment_support_scheme
2553 = vect_supportable_dr_alignment (vinfo,
2554 STMT_VINFO_DR_INFO (stmt_info),
2555 vectype, *misalignment);
2556 }
2557 }
2558
2559 if ((*memory_access_type == VMAT_ELEMENTWISE
2560 || *memory_access_type == VMAT_STRIDED_SLP)
2561 && !nunits.is_constant ())
2562 {
2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565 "Not using elementwise accesses due to variable "
2566 "vectorization factor.\n");
2567 return false;
2568 }
2569
2570 if (*alignment_support_scheme == dr_unaligned_unsupported)
2571 {
2572 if (dump_enabled_p ())
2573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2574 "unsupported unaligned access\n");
2575 return false;
2576 }
2577
2578 /* FIXME: At the moment the cost model seems to underestimate the
2579 cost of using elementwise accesses. This check preserves the
2580 traditional behavior until that can be fixed. */
2581 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2582 if (!first_stmt_info)
2583 first_stmt_info = stmt_info;
2584 if (*memory_access_type == VMAT_ELEMENTWISE
2585 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2586 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2587 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2588 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2589 {
2590 if (dump_enabled_p ())
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2592 "not falling back to elementwise accesses\n");
2593 return false;
2594 }
2595 return true;
2596 }
2597
2598 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2599 conditional operation STMT_INFO. When returning true, store the mask
2600 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2601 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2602 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2603
2604 static bool
vect_check_scalar_mask(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned mask_index,tree * mask,slp_tree * mask_node,vect_def_type * mask_dt_out,tree * mask_vectype_out)2605 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2606 slp_tree slp_node, unsigned mask_index,
2607 tree *mask, slp_tree *mask_node,
2608 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2609 {
2610 enum vect_def_type mask_dt;
2611 tree mask_vectype;
2612 slp_tree mask_node_1;
2613 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2614 mask, &mask_node_1, &mask_dt, &mask_vectype))
2615 {
2616 if (dump_enabled_p ())
2617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618 "mask use not simple.\n");
2619 return false;
2620 }
2621
2622 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2623 {
2624 if (dump_enabled_p ())
2625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2626 "mask argument is not a boolean.\n");
2627 return false;
2628 }
2629
2630 /* If the caller is not prepared for adjusting an external/constant
2631 SLP mask vector type fail. */
2632 if (slp_node
2633 && !mask_node
2634 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2635 {
2636 if (dump_enabled_p ())
2637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2638 "SLP mask argument is not vectorized.\n");
2639 return false;
2640 }
2641
2642 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2643 if (!mask_vectype)
2644 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2645
2646 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2647 {
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650 "could not find an appropriate vector mask type.\n");
2651 return false;
2652 }
2653
2654 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2655 TYPE_VECTOR_SUBPARTS (vectype)))
2656 {
2657 if (dump_enabled_p ())
2658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2659 "vector mask type %T"
2660 " does not match vector data type %T.\n",
2661 mask_vectype, vectype);
2662
2663 return false;
2664 }
2665
2666 *mask_dt_out = mask_dt;
2667 *mask_vectype_out = mask_vectype;
2668 if (mask_node)
2669 *mask_node = mask_node_1;
2670 return true;
2671 }
2672
2673 /* Return true if stored value RHS is suitable for vectorizing store
2674 statement STMT_INFO. When returning true, store the type of the
2675 definition in *RHS_DT_OUT, the type of the vectorized store value in
2676 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2677
2678 static bool
vect_check_store_rhs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2679 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2680 slp_tree slp_node, tree rhs,
2681 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2682 vec_load_store_type *vls_type_out)
2683 {
2684 /* In the case this is a store from a constant make sure
2685 native_encode_expr can handle it. */
2686 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2687 {
2688 if (dump_enabled_p ())
2689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2690 "cannot encode constant as a byte sequence.\n");
2691 return false;
2692 }
2693
2694 unsigned op_no = 0;
2695 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2696 {
2697 if (gimple_call_internal_p (call)
2698 && internal_store_fn_p (gimple_call_internal_fn (call)))
2699 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2700 }
2701
2702 enum vect_def_type rhs_dt;
2703 tree rhs_vectype;
2704 slp_tree slp_op;
2705 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2706 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2707 {
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710 "use not simple.\n");
2711 return false;
2712 }
2713
2714 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2715 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2716 {
2717 if (dump_enabled_p ())
2718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2719 "incompatible vector types.\n");
2720 return false;
2721 }
2722
2723 *rhs_dt_out = rhs_dt;
2724 *rhs_vectype_out = rhs_vectype;
2725 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2726 *vls_type_out = VLS_STORE_INVARIANT;
2727 else
2728 *vls_type_out = VLS_STORE;
2729 return true;
2730 }
2731
2732 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2733 Note that we support masks with floating-point type, in which case the
2734 floats are interpreted as a bitmask. */
2735
2736 static tree
vect_build_all_ones_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree masktype)2737 vect_build_all_ones_mask (vec_info *vinfo,
2738 stmt_vec_info stmt_info, tree masktype)
2739 {
2740 if (TREE_CODE (masktype) == INTEGER_TYPE)
2741 return build_int_cst (masktype, -1);
2742 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2743 {
2744 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2745 mask = build_vector_from_val (masktype, mask);
2746 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2747 }
2748 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2749 {
2750 REAL_VALUE_TYPE r;
2751 long tmp[6];
2752 for (int j = 0; j < 6; ++j)
2753 tmp[j] = -1;
2754 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2755 tree mask = build_real (TREE_TYPE (masktype), r);
2756 mask = build_vector_from_val (masktype, mask);
2757 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2758 }
2759 gcc_unreachable ();
2760 }
2761
2762 /* Build an all-zero merge value of type VECTYPE while vectorizing
2763 STMT_INFO as a gather load. */
2764
2765 static tree
vect_build_zero_merge_argument(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype)2766 vect_build_zero_merge_argument (vec_info *vinfo,
2767 stmt_vec_info stmt_info, tree vectype)
2768 {
2769 tree merge;
2770 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2771 merge = build_int_cst (TREE_TYPE (vectype), 0);
2772 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2773 {
2774 REAL_VALUE_TYPE r;
2775 long tmp[6];
2776 for (int j = 0; j < 6; ++j)
2777 tmp[j] = 0;
2778 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2779 merge = build_real (TREE_TYPE (vectype), r);
2780 }
2781 else
2782 gcc_unreachable ();
2783 merge = build_vector_from_val (vectype, merge);
2784 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2785 }
2786
2787 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2788 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2789 the gather load operation. If the load is conditional, MASK is the
2790 unvectorized condition and MASK_DT is its definition type, otherwise
2791 MASK is null. */
2792
2793 static void
vect_build_gather_load_calls(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask)2794 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2795 gimple_stmt_iterator *gsi,
2796 gimple **vec_stmt,
2797 gather_scatter_info *gs_info,
2798 tree mask)
2799 {
2800 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2801 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2802 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2803 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2804 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2805 edge pe = loop_preheader_edge (loop);
2806 enum { NARROW, NONE, WIDEN } modifier;
2807 poly_uint64 gather_off_nunits
2808 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2809
2810 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2811 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2812 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2813 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2814 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2815 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2816 tree scaletype = TREE_VALUE (arglist);
2817 tree real_masktype = masktype;
2818 gcc_checking_assert (types_compatible_p (srctype, rettype)
2819 && (!mask
2820 || TREE_CODE (masktype) == INTEGER_TYPE
2821 || types_compatible_p (srctype, masktype)));
2822 if (mask)
2823 masktype = truth_type_for (srctype);
2824
2825 tree mask_halftype = masktype;
2826 tree perm_mask = NULL_TREE;
2827 tree mask_perm_mask = NULL_TREE;
2828 if (known_eq (nunits, gather_off_nunits))
2829 modifier = NONE;
2830 else if (known_eq (nunits * 2, gather_off_nunits))
2831 {
2832 modifier = WIDEN;
2833
2834 /* Currently widening gathers and scatters are only supported for
2835 fixed-length vectors. */
2836 int count = gather_off_nunits.to_constant ();
2837 vec_perm_builder sel (count, count, 1);
2838 for (int i = 0; i < count; ++i)
2839 sel.quick_push (i | (count / 2));
2840
2841 vec_perm_indices indices (sel, 1, count);
2842 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2843 indices);
2844 }
2845 else if (known_eq (nunits, gather_off_nunits * 2))
2846 {
2847 modifier = NARROW;
2848
2849 /* Currently narrowing gathers and scatters are only supported for
2850 fixed-length vectors. */
2851 int count = nunits.to_constant ();
2852 vec_perm_builder sel (count, count, 1);
2853 sel.quick_grow (count);
2854 for (int i = 0; i < count; ++i)
2855 sel[i] = i < count / 2 ? i : i + count / 2;
2856 vec_perm_indices indices (sel, 2, count);
2857 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2858
2859 ncopies *= 2;
2860
2861 if (mask && VECTOR_TYPE_P (real_masktype))
2862 {
2863 for (int i = 0; i < count; ++i)
2864 sel[i] = i | (count / 2);
2865 indices.new_vector (sel, 2, count);
2866 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2867 }
2868 else if (mask)
2869 mask_halftype = truth_type_for (gs_info->offset_vectype);
2870 }
2871 else
2872 gcc_unreachable ();
2873
2874 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2875 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2876
2877 tree ptr = fold_convert (ptrtype, gs_info->base);
2878 if (!is_gimple_min_invariant (ptr))
2879 {
2880 gimple_seq seq;
2881 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2882 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2883 gcc_assert (!new_bb);
2884 }
2885
2886 tree scale = build_int_cst (scaletype, gs_info->scale);
2887
2888 tree vec_oprnd0 = NULL_TREE;
2889 tree vec_mask = NULL_TREE;
2890 tree src_op = NULL_TREE;
2891 tree mask_op = NULL_TREE;
2892 tree prev_res = NULL_TREE;
2893
2894 if (!mask)
2895 {
2896 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2897 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2898 }
2899
2900 auto_vec<tree> vec_oprnds0;
2901 auto_vec<tree> vec_masks;
2902 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2903 modifier == WIDEN ? ncopies / 2 : ncopies,
2904 gs_info->offset, &vec_oprnds0);
2905 if (mask)
2906 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2907 modifier == NARROW ? ncopies / 2 : ncopies,
2908 mask, &vec_masks, masktype);
2909 for (int j = 0; j < ncopies; ++j)
2910 {
2911 tree op, var;
2912 if (modifier == WIDEN && (j & 1))
2913 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2914 perm_mask, stmt_info, gsi);
2915 else
2916 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2917
2918 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2919 {
2920 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2921 TYPE_VECTOR_SUBPARTS (idxtype)));
2922 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2923 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2924 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2925 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2926 op = var;
2927 }
2928
2929 if (mask)
2930 {
2931 if (mask_perm_mask && (j & 1))
2932 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2933 mask_perm_mask, stmt_info, gsi);
2934 else
2935 {
2936 if (modifier == NARROW)
2937 {
2938 if ((j & 1) == 0)
2939 vec_mask = vec_masks[j / 2];
2940 }
2941 else
2942 vec_mask = vec_masks[j];
2943
2944 mask_op = vec_mask;
2945 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2946 {
2947 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2948 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2949 gcc_assert (known_eq (sub1, sub2));
2950 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2951 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2952 gassign *new_stmt
2953 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2954 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2955 mask_op = var;
2956 }
2957 }
2958 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2959 {
2960 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2961 gassign *new_stmt
2962 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2963 : VEC_UNPACK_LO_EXPR,
2964 mask_op);
2965 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2966 mask_op = var;
2967 }
2968 src_op = mask_op;
2969 }
2970
2971 tree mask_arg = mask_op;
2972 if (masktype != real_masktype)
2973 {
2974 tree utype, optype = TREE_TYPE (mask_op);
2975 if (VECTOR_TYPE_P (real_masktype)
2976 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2977 utype = real_masktype;
2978 else
2979 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2980 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2981 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2982 gassign *new_stmt
2983 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2984 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2985 mask_arg = var;
2986 if (!useless_type_conversion_p (real_masktype, utype))
2987 {
2988 gcc_assert (TYPE_PRECISION (utype)
2989 <= TYPE_PRECISION (real_masktype));
2990 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2991 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2992 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2993 mask_arg = var;
2994 }
2995 src_op = build_zero_cst (srctype);
2996 }
2997 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2998 mask_arg, scale);
2999
3000 if (!useless_type_conversion_p (vectype, rettype))
3001 {
3002 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3003 TYPE_VECTOR_SUBPARTS (rettype)));
3004 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3005 gimple_call_set_lhs (new_stmt, op);
3006 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3007 var = make_ssa_name (vec_dest);
3008 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3009 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3010 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3011 }
3012 else
3013 {
3014 var = make_ssa_name (vec_dest, new_stmt);
3015 gimple_call_set_lhs (new_stmt, var);
3016 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3017 }
3018
3019 if (modifier == NARROW)
3020 {
3021 if ((j & 1) == 0)
3022 {
3023 prev_res = var;
3024 continue;
3025 }
3026 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3027 stmt_info, gsi);
3028 new_stmt = SSA_NAME_DEF_STMT (var);
3029 }
3030
3031 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3032 }
3033 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3034 }
3035
3036 /* Prepare the base and offset in GS_INFO for vectorization.
3037 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3038 to the vectorized offset argument for the first copy of STMT_INFO.
3039 STMT_INFO is the statement described by GS_INFO and LOOP is the
3040 containing loop. */
3041
3042 static void
vect_get_gather_scatter_ops(loop_vec_info loop_vinfo,class loop * loop,stmt_vec_info stmt_info,slp_tree slp_node,gather_scatter_info * gs_info,tree * dataref_ptr,vec<tree> * vec_offset)3043 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3044 class loop *loop, stmt_vec_info stmt_info,
3045 slp_tree slp_node, gather_scatter_info *gs_info,
3046 tree *dataref_ptr, vec<tree> *vec_offset)
3047 {
3048 gimple_seq stmts = NULL;
3049 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3050 if (stmts != NULL)
3051 {
3052 basic_block new_bb;
3053 edge pe = loop_preheader_edge (loop);
3054 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3055 gcc_assert (!new_bb);
3056 }
3057 if (slp_node)
3058 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3059 else
3060 {
3061 unsigned ncopies
3062 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3063 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3064 gs_info->offset, vec_offset,
3065 gs_info->offset_vectype);
3066 }
3067 }
3068
3069 /* Prepare to implement a grouped or strided load or store using
3070 the gather load or scatter store operation described by GS_INFO.
3071 STMT_INFO is the load or store statement.
3072
3073 Set *DATAREF_BUMP to the amount that should be added to the base
3074 address after each copy of the vectorized statement. Set *VEC_OFFSET
3075 to an invariant offset vector in which element I has the value
3076 I * DR_STEP / SCALE. */
3077
3078 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3079 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3080 loop_vec_info loop_vinfo,
3081 gather_scatter_info *gs_info,
3082 tree *dataref_bump, tree *vec_offset)
3083 {
3084 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3085 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3086
3087 tree bump = size_binop (MULT_EXPR,
3088 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3089 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3090 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3091
3092 /* The offset given in GS_INFO can have pointer type, so use the element
3093 type of the vector instead. */
3094 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3095
3096 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3097 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3098 ssize_int (gs_info->scale));
3099 step = fold_convert (offset_type, step);
3100
3101 /* Create {0, X, X*2, X*3, ...}. */
3102 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3103 build_zero_cst (offset_type), step);
3104 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3105 }
3106
3107 /* Return the amount that should be added to a vector pointer to move
3108 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3109 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3110 vectorization. */
3111
3112 static tree
vect_get_data_ptr_increment(vec_info * vinfo,dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3113 vect_get_data_ptr_increment (vec_info *vinfo,
3114 dr_vec_info *dr_info, tree aggr_type,
3115 vect_memory_access_type memory_access_type)
3116 {
3117 if (memory_access_type == VMAT_INVARIANT)
3118 return size_zero_node;
3119
3120 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3121 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3122 if (tree_int_cst_sgn (step) == -1)
3123 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3124 return iv_step;
3125 }
3126
3127 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3128
3129 static bool
vectorizable_bswap(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_tree * slp_op,tree vectype_in,stmt_vector_for_cost * cost_vec)3130 vectorizable_bswap (vec_info *vinfo,
3131 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3132 gimple **vec_stmt, slp_tree slp_node,
3133 slp_tree *slp_op,
3134 tree vectype_in, stmt_vector_for_cost *cost_vec)
3135 {
3136 tree op, vectype;
3137 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3138 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3139 unsigned ncopies;
3140
3141 op = gimple_call_arg (stmt, 0);
3142 vectype = STMT_VINFO_VECTYPE (stmt_info);
3143 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3144
3145 /* Multiple types in SLP are handled by creating the appropriate number of
3146 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3147 case of SLP. */
3148 if (slp_node)
3149 ncopies = 1;
3150 else
3151 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3152
3153 gcc_assert (ncopies >= 1);
3154
3155 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3156 if (! char_vectype)
3157 return false;
3158
3159 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3160 unsigned word_bytes;
3161 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3162 return false;
3163
3164 /* The encoding uses one stepped pattern for each byte in the word. */
3165 vec_perm_builder elts (num_bytes, word_bytes, 3);
3166 for (unsigned i = 0; i < 3; ++i)
3167 for (unsigned j = 0; j < word_bytes; ++j)
3168 elts.quick_push ((i + 1) * word_bytes - j - 1);
3169
3170 vec_perm_indices indices (elts, 1, num_bytes);
3171 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3172 return false;
3173
3174 if (! vec_stmt)
3175 {
3176 if (slp_node
3177 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3178 {
3179 if (dump_enabled_p ())
3180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3181 "incompatible vector types for invariants\n");
3182 return false;
3183 }
3184
3185 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3186 DUMP_VECT_SCOPE ("vectorizable_bswap");
3187 record_stmt_cost (cost_vec,
3188 1, vector_stmt, stmt_info, 0, vect_prologue);
3189 record_stmt_cost (cost_vec,
3190 slp_node
3191 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3192 vec_perm, stmt_info, 0, vect_body);
3193 return true;
3194 }
3195
3196 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3197
3198 /* Transform. */
3199 vec<tree> vec_oprnds = vNULL;
3200 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3201 op, &vec_oprnds);
3202 /* Arguments are ready. create the new vector stmt. */
3203 unsigned i;
3204 tree vop;
3205 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3206 {
3207 gimple *new_stmt;
3208 tree tem = make_ssa_name (char_vectype);
3209 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3210 char_vectype, vop));
3211 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3212 tree tem2 = make_ssa_name (char_vectype);
3213 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3214 tem, tem, bswap_vconst);
3215 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3216 tem = make_ssa_name (vectype);
3217 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3218 vectype, tem2));
3219 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3220 if (slp_node)
3221 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3222 else
3223 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3224 }
3225
3226 if (!slp_node)
3227 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3228
3229 vec_oprnds.release ();
3230 return true;
3231 }
3232
3233 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3234 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3235 in a single step. On success, store the binary pack code in
3236 *CONVERT_CODE. */
3237
3238 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3239 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3240 tree_code *convert_code)
3241 {
3242 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3243 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3244 return false;
3245
3246 tree_code code;
3247 int multi_step_cvt = 0;
3248 auto_vec <tree, 8> interm_types;
3249 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3250 &code, &multi_step_cvt, &interm_types)
3251 || multi_step_cvt)
3252 return false;
3253
3254 *convert_code = code;
3255 return true;
3256 }
3257
3258 /* Function vectorizable_call.
3259
3260 Check if STMT_INFO performs a function call that can be vectorized.
3261 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3262 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3263 Return true if STMT_INFO is vectorizable in this way. */
3264
3265 static bool
vectorizable_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3266 vectorizable_call (vec_info *vinfo,
3267 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3268 gimple **vec_stmt, slp_tree slp_node,
3269 stmt_vector_for_cost *cost_vec)
3270 {
3271 gcall *stmt;
3272 tree vec_dest;
3273 tree scalar_dest;
3274 tree op;
3275 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3276 tree vectype_out, vectype_in;
3277 poly_uint64 nunits_in;
3278 poly_uint64 nunits_out;
3279 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3280 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3281 tree fndecl, new_temp, rhs_type;
3282 enum vect_def_type dt[4]
3283 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3284 vect_unknown_def_type };
3285 tree vectypes[ARRAY_SIZE (dt)] = {};
3286 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3287 int ndts = ARRAY_SIZE (dt);
3288 int ncopies, j;
3289 auto_vec<tree, 8> vargs;
3290 enum { NARROW, NONE, WIDEN } modifier;
3291 size_t i, nargs;
3292 tree lhs;
3293
3294 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3295 return false;
3296
3297 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3298 && ! vec_stmt)
3299 return false;
3300
3301 /* Is STMT_INFO a vectorizable call? */
3302 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3303 if (!stmt)
3304 return false;
3305
3306 if (gimple_call_internal_p (stmt)
3307 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3308 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3309 /* Handled by vectorizable_load and vectorizable_store. */
3310 return false;
3311
3312 if (gimple_call_lhs (stmt) == NULL_TREE
3313 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3314 return false;
3315
3316 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3317
3318 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3319
3320 /* Process function arguments. */
3321 rhs_type = NULL_TREE;
3322 vectype_in = NULL_TREE;
3323 nargs = gimple_call_num_args (stmt);
3324
3325 /* Bail out if the function has more than four arguments, we do not have
3326 interesting builtin functions to vectorize with more than two arguments
3327 except for fma. No arguments is also not good. */
3328 if (nargs == 0 || nargs > 4)
3329 return false;
3330
3331 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3332 combined_fn cfn = gimple_call_combined_fn (stmt);
3333 if (cfn == CFN_GOMP_SIMD_LANE)
3334 {
3335 nargs = 0;
3336 rhs_type = unsigned_type_node;
3337 }
3338
3339 int mask_opno = -1;
3340 if (internal_fn_p (cfn))
3341 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3342
3343 for (i = 0; i < nargs; i++)
3344 {
3345 if ((int) i == mask_opno)
3346 {
3347 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3348 &op, &slp_op[i], &dt[i], &vectypes[i]))
3349 return false;
3350 continue;
3351 }
3352
3353 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3354 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3355 {
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "use not simple.\n");
3359 return false;
3360 }
3361
3362 /* We can only handle calls with arguments of the same type. */
3363 if (rhs_type
3364 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3365 {
3366 if (dump_enabled_p ())
3367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3368 "argument types differ.\n");
3369 return false;
3370 }
3371 if (!rhs_type)
3372 rhs_type = TREE_TYPE (op);
3373
3374 if (!vectype_in)
3375 vectype_in = vectypes[i];
3376 else if (vectypes[i]
3377 && !types_compatible_p (vectypes[i], vectype_in))
3378 {
3379 if (dump_enabled_p ())
3380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3381 "argument vector types differ.\n");
3382 return false;
3383 }
3384 }
3385 /* If all arguments are external or constant defs, infer the vector type
3386 from the scalar type. */
3387 if (!vectype_in)
3388 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3389 if (vec_stmt)
3390 gcc_assert (vectype_in);
3391 if (!vectype_in)
3392 {
3393 if (dump_enabled_p ())
3394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3395 "no vectype for scalar type %T\n", rhs_type);
3396
3397 return false;
3398 }
3399 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3400 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3401 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3402 by a pack of the two vectors into an SI vector. We would need
3403 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3404 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3405 {
3406 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3408 "mismatched vector sizes %T and %T\n",
3409 vectype_in, vectype_out);
3410 return false;
3411 }
3412
3413 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3414 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3415 {
3416 if (dump_enabled_p ())
3417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3418 "mixed mask and nonmask vector types\n");
3419 return false;
3420 }
3421
3422 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3423 {
3424 if (dump_enabled_p ())
3425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3426 "use emulated vector type for call\n");
3427 return false;
3428 }
3429
3430 /* FORNOW */
3431 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3432 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3433 if (known_eq (nunits_in * 2, nunits_out))
3434 modifier = NARROW;
3435 else if (known_eq (nunits_out, nunits_in))
3436 modifier = NONE;
3437 else if (known_eq (nunits_out * 2, nunits_in))
3438 modifier = WIDEN;
3439 else
3440 return false;
3441
3442 /* We only handle functions that do not read or clobber memory. */
3443 if (gimple_vuse (stmt))
3444 {
3445 if (dump_enabled_p ())
3446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3447 "function reads from or writes to memory.\n");
3448 return false;
3449 }
3450
3451 /* For now, we only vectorize functions if a target specific builtin
3452 is available. TODO -- in some cases, it might be profitable to
3453 insert the calls for pieces of the vector, in order to be able
3454 to vectorize other operations in the loop. */
3455 fndecl = NULL_TREE;
3456 internal_fn ifn = IFN_LAST;
3457 tree callee = gimple_call_fndecl (stmt);
3458
3459 /* First try using an internal function. */
3460 tree_code convert_code = ERROR_MARK;
3461 if (cfn != CFN_LAST
3462 && (modifier == NONE
3463 || (modifier == NARROW
3464 && simple_integer_narrowing (vectype_out, vectype_in,
3465 &convert_code))))
3466 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3467 vectype_in);
3468
3469 /* If that fails, try asking for a target-specific built-in function. */
3470 if (ifn == IFN_LAST)
3471 {
3472 if (cfn != CFN_LAST)
3473 fndecl = targetm.vectorize.builtin_vectorized_function
3474 (cfn, vectype_out, vectype_in);
3475 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3476 fndecl = targetm.vectorize.builtin_md_vectorized_function
3477 (callee, vectype_out, vectype_in);
3478 }
3479
3480 if (ifn == IFN_LAST && !fndecl)
3481 {
3482 if (cfn == CFN_GOMP_SIMD_LANE
3483 && !slp_node
3484 && loop_vinfo
3485 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3486 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3487 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3488 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3489 {
3490 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3491 { 0, 1, 2, ... vf - 1 } vector. */
3492 gcc_assert (nargs == 0);
3493 }
3494 else if (modifier == NONE
3495 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3496 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3497 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3498 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3499 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3500 slp_op, vectype_in, cost_vec);
3501 else
3502 {
3503 if (dump_enabled_p ())
3504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3505 "function is not vectorizable.\n");
3506 return false;
3507 }
3508 }
3509
3510 if (slp_node)
3511 ncopies = 1;
3512 else if (modifier == NARROW && ifn == IFN_LAST)
3513 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3514 else
3515 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3516
3517 /* Sanity check: make sure that at least one copy of the vectorized stmt
3518 needs to be generated. */
3519 gcc_assert (ncopies >= 1);
3520
3521 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3522 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3523 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3524 if (!vec_stmt) /* transformation not required. */
3525 {
3526 if (slp_node)
3527 for (i = 0; i < nargs; ++i)
3528 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3529 vectypes[i]
3530 ? vectypes[i] : vectype_in))
3531 {
3532 if (dump_enabled_p ())
3533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3534 "incompatible vector types for invariants\n");
3535 return false;
3536 }
3537 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3538 DUMP_VECT_SCOPE ("vectorizable_call");
3539 vect_model_simple_cost (vinfo, stmt_info,
3540 ncopies, dt, ndts, slp_node, cost_vec);
3541 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3542 record_stmt_cost (cost_vec, ncopies / 2,
3543 vec_promote_demote, stmt_info, 0, vect_body);
3544
3545 if (loop_vinfo
3546 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3547 && (reduc_idx >= 0 || mask_opno >= 0))
3548 {
3549 if (reduc_idx >= 0
3550 && (cond_fn == IFN_LAST
3551 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3552 OPTIMIZE_FOR_SPEED)))
3553 {
3554 if (dump_enabled_p ())
3555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3556 "can't use a fully-masked loop because no"
3557 " conditional operation is available.\n");
3558 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3559 }
3560 else
3561 {
3562 unsigned int nvectors
3563 = (slp_node
3564 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3565 : ncopies);
3566 tree scalar_mask = NULL_TREE;
3567 if (mask_opno >= 0)
3568 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3569 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3570 vectype_out, scalar_mask);
3571 }
3572 }
3573 return true;
3574 }
3575
3576 /* Transform. */
3577
3578 if (dump_enabled_p ())
3579 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3580
3581 /* Handle def. */
3582 scalar_dest = gimple_call_lhs (stmt);
3583 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3584
3585 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3586 unsigned int vect_nargs = nargs;
3587 if (masked_loop_p && reduc_idx >= 0)
3588 {
3589 ifn = cond_fn;
3590 vect_nargs += 2;
3591 }
3592
3593 if (modifier == NONE || ifn != IFN_LAST)
3594 {
3595 tree prev_res = NULL_TREE;
3596 vargs.safe_grow (vect_nargs, true);
3597 auto_vec<vec<tree> > vec_defs (nargs);
3598 for (j = 0; j < ncopies; ++j)
3599 {
3600 /* Build argument list for the vectorized call. */
3601 if (slp_node)
3602 {
3603 vec<tree> vec_oprnds0;
3604
3605 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3606 vec_oprnds0 = vec_defs[0];
3607
3608 /* Arguments are ready. Create the new vector stmt. */
3609 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3610 {
3611 int varg = 0;
3612 if (masked_loop_p && reduc_idx >= 0)
3613 {
3614 unsigned int vec_num = vec_oprnds0.length ();
3615 /* Always true for SLP. */
3616 gcc_assert (ncopies == 1);
3617 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3618 vectype_out, i);
3619 }
3620 size_t k;
3621 for (k = 0; k < nargs; k++)
3622 {
3623 vec<tree> vec_oprndsk = vec_defs[k];
3624 vargs[varg++] = vec_oprndsk[i];
3625 }
3626 if (masked_loop_p && reduc_idx >= 0)
3627 vargs[varg++] = vargs[reduc_idx + 1];
3628 gimple *new_stmt;
3629 if (modifier == NARROW)
3630 {
3631 /* We don't define any narrowing conditional functions
3632 at present. */
3633 gcc_assert (mask_opno < 0);
3634 tree half_res = make_ssa_name (vectype_in);
3635 gcall *call
3636 = gimple_build_call_internal_vec (ifn, vargs);
3637 gimple_call_set_lhs (call, half_res);
3638 gimple_call_set_nothrow (call, true);
3639 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3640 if ((i & 1) == 0)
3641 {
3642 prev_res = half_res;
3643 continue;
3644 }
3645 new_temp = make_ssa_name (vec_dest);
3646 new_stmt = gimple_build_assign (new_temp, convert_code,
3647 prev_res, half_res);
3648 vect_finish_stmt_generation (vinfo, stmt_info,
3649 new_stmt, gsi);
3650 }
3651 else
3652 {
3653 if (mask_opno >= 0 && masked_loop_p)
3654 {
3655 unsigned int vec_num = vec_oprnds0.length ();
3656 /* Always true for SLP. */
3657 gcc_assert (ncopies == 1);
3658 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3659 vectype_out, i);
3660 vargs[mask_opno] = prepare_vec_mask
3661 (loop_vinfo, TREE_TYPE (mask), mask,
3662 vargs[mask_opno], gsi);
3663 }
3664
3665 gcall *call;
3666 if (ifn != IFN_LAST)
3667 call = gimple_build_call_internal_vec (ifn, vargs);
3668 else
3669 call = gimple_build_call_vec (fndecl, vargs);
3670 new_temp = make_ssa_name (vec_dest, call);
3671 gimple_call_set_lhs (call, new_temp);
3672 gimple_call_set_nothrow (call, true);
3673 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3674 new_stmt = call;
3675 }
3676 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3677 }
3678 continue;
3679 }
3680
3681 int varg = 0;
3682 if (masked_loop_p && reduc_idx >= 0)
3683 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3684 vectype_out, j);
3685 for (i = 0; i < nargs; i++)
3686 {
3687 op = gimple_call_arg (stmt, i);
3688 if (j == 0)
3689 {
3690 vec_defs.quick_push (vNULL);
3691 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3692 op, &vec_defs[i],
3693 vectypes[i]);
3694 }
3695 vargs[varg++] = vec_defs[i][j];
3696 }
3697 if (masked_loop_p && reduc_idx >= 0)
3698 vargs[varg++] = vargs[reduc_idx + 1];
3699
3700 if (mask_opno >= 0 && masked_loop_p)
3701 {
3702 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3703 vectype_out, j);
3704 vargs[mask_opno]
3705 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3706 vargs[mask_opno], gsi);
3707 }
3708
3709 gimple *new_stmt;
3710 if (cfn == CFN_GOMP_SIMD_LANE)
3711 {
3712 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3713 tree new_var
3714 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3715 gimple *init_stmt = gimple_build_assign (new_var, cst);
3716 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3717 new_temp = make_ssa_name (vec_dest);
3718 new_stmt = gimple_build_assign (new_temp, new_var);
3719 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3720 }
3721 else if (modifier == NARROW)
3722 {
3723 /* We don't define any narrowing conditional functions at
3724 present. */
3725 gcc_assert (mask_opno < 0);
3726 tree half_res = make_ssa_name (vectype_in);
3727 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3728 gimple_call_set_lhs (call, half_res);
3729 gimple_call_set_nothrow (call, true);
3730 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3731 if ((j & 1) == 0)
3732 {
3733 prev_res = half_res;
3734 continue;
3735 }
3736 new_temp = make_ssa_name (vec_dest);
3737 new_stmt = gimple_build_assign (new_temp, convert_code,
3738 prev_res, half_res);
3739 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3740 }
3741 else
3742 {
3743 gcall *call;
3744 if (ifn != IFN_LAST)
3745 call = gimple_build_call_internal_vec (ifn, vargs);
3746 else
3747 call = gimple_build_call_vec (fndecl, vargs);
3748 new_temp = make_ssa_name (vec_dest, call);
3749 gimple_call_set_lhs (call, new_temp);
3750 gimple_call_set_nothrow (call, true);
3751 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3752 new_stmt = call;
3753 }
3754
3755 if (j == (modifier == NARROW ? 1 : 0))
3756 *vec_stmt = new_stmt;
3757 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3758 }
3759 for (i = 0; i < nargs; i++)
3760 {
3761 vec<tree> vec_oprndsi = vec_defs[i];
3762 vec_oprndsi.release ();
3763 }
3764 }
3765 else if (modifier == NARROW)
3766 {
3767 auto_vec<vec<tree> > vec_defs (nargs);
3768 /* We don't define any narrowing conditional functions at present. */
3769 gcc_assert (mask_opno < 0);
3770 for (j = 0; j < ncopies; ++j)
3771 {
3772 /* Build argument list for the vectorized call. */
3773 if (j == 0)
3774 vargs.create (nargs * 2);
3775 else
3776 vargs.truncate (0);
3777
3778 if (slp_node)
3779 {
3780 vec<tree> vec_oprnds0;
3781
3782 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3783 vec_oprnds0 = vec_defs[0];
3784
3785 /* Arguments are ready. Create the new vector stmt. */
3786 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3787 {
3788 size_t k;
3789 vargs.truncate (0);
3790 for (k = 0; k < nargs; k++)
3791 {
3792 vec<tree> vec_oprndsk = vec_defs[k];
3793 vargs.quick_push (vec_oprndsk[i]);
3794 vargs.quick_push (vec_oprndsk[i + 1]);
3795 }
3796 gcall *call;
3797 if (ifn != IFN_LAST)
3798 call = gimple_build_call_internal_vec (ifn, vargs);
3799 else
3800 call = gimple_build_call_vec (fndecl, vargs);
3801 new_temp = make_ssa_name (vec_dest, call);
3802 gimple_call_set_lhs (call, new_temp);
3803 gimple_call_set_nothrow (call, true);
3804 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3805 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3806 }
3807 continue;
3808 }
3809
3810 for (i = 0; i < nargs; i++)
3811 {
3812 op = gimple_call_arg (stmt, i);
3813 if (j == 0)
3814 {
3815 vec_defs.quick_push (vNULL);
3816 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3817 op, &vec_defs[i], vectypes[i]);
3818 }
3819 vec_oprnd0 = vec_defs[i][2*j];
3820 vec_oprnd1 = vec_defs[i][2*j+1];
3821
3822 vargs.quick_push (vec_oprnd0);
3823 vargs.quick_push (vec_oprnd1);
3824 }
3825
3826 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3827 new_temp = make_ssa_name (vec_dest, new_stmt);
3828 gimple_call_set_lhs (new_stmt, new_temp);
3829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3830
3831 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3832 }
3833
3834 if (!slp_node)
3835 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3836
3837 for (i = 0; i < nargs; i++)
3838 {
3839 vec<tree> vec_oprndsi = vec_defs[i];
3840 vec_oprndsi.release ();
3841 }
3842 }
3843 else
3844 /* No current target implements this case. */
3845 return false;
3846
3847 vargs.release ();
3848
3849 /* The call in STMT might prevent it from being removed in dce.
3850 We however cannot remove it here, due to the way the ssa name
3851 it defines is mapped to the new definition. So just replace
3852 rhs of the statement with something harmless. */
3853
3854 if (slp_node)
3855 return true;
3856
3857 stmt_info = vect_orig_stmt (stmt_info);
3858 lhs = gimple_get_lhs (stmt_info->stmt);
3859
3860 gassign *new_stmt
3861 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3862 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3863
3864 return true;
3865 }
3866
3867
3868 struct simd_call_arg_info
3869 {
3870 tree vectype;
3871 tree op;
3872 HOST_WIDE_INT linear_step;
3873 enum vect_def_type dt;
3874 unsigned int align;
3875 bool simd_lane_linear;
3876 };
3877
3878 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3879 is linear within simd lane (but not within whole loop), note it in
3880 *ARGINFO. */
3881
3882 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3883 vect_simd_lane_linear (tree op, class loop *loop,
3884 struct simd_call_arg_info *arginfo)
3885 {
3886 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3887
3888 if (!is_gimple_assign (def_stmt)
3889 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3890 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3891 return;
3892
3893 tree base = gimple_assign_rhs1 (def_stmt);
3894 HOST_WIDE_INT linear_step = 0;
3895 tree v = gimple_assign_rhs2 (def_stmt);
3896 while (TREE_CODE (v) == SSA_NAME)
3897 {
3898 tree t;
3899 def_stmt = SSA_NAME_DEF_STMT (v);
3900 if (is_gimple_assign (def_stmt))
3901 switch (gimple_assign_rhs_code (def_stmt))
3902 {
3903 case PLUS_EXPR:
3904 t = gimple_assign_rhs2 (def_stmt);
3905 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3906 return;
3907 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3908 v = gimple_assign_rhs1 (def_stmt);
3909 continue;
3910 case MULT_EXPR:
3911 t = gimple_assign_rhs2 (def_stmt);
3912 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3913 return;
3914 linear_step = tree_to_shwi (t);
3915 v = gimple_assign_rhs1 (def_stmt);
3916 continue;
3917 CASE_CONVERT:
3918 t = gimple_assign_rhs1 (def_stmt);
3919 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3920 || (TYPE_PRECISION (TREE_TYPE (v))
3921 < TYPE_PRECISION (TREE_TYPE (t))))
3922 return;
3923 if (!linear_step)
3924 linear_step = 1;
3925 v = t;
3926 continue;
3927 default:
3928 return;
3929 }
3930 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3931 && loop->simduid
3932 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3933 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3934 == loop->simduid))
3935 {
3936 if (!linear_step)
3937 linear_step = 1;
3938 arginfo->linear_step = linear_step;
3939 arginfo->op = base;
3940 arginfo->simd_lane_linear = true;
3941 return;
3942 }
3943 }
3944 }
3945
3946 /* Return the number of elements in vector type VECTYPE, which is associated
3947 with a SIMD clone. At present these vectors always have a constant
3948 length. */
3949
3950 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3951 simd_clone_subparts (tree vectype)
3952 {
3953 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3954 }
3955
3956 /* Function vectorizable_simd_clone_call.
3957
3958 Check if STMT_INFO performs a function call that can be vectorized
3959 by calling a simd clone of the function.
3960 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3961 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3962 Return true if STMT_INFO is vectorizable in this way. */
3963
3964 static bool
vectorizable_simd_clone_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3965 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3966 gimple_stmt_iterator *gsi,
3967 gimple **vec_stmt, slp_tree slp_node,
3968 stmt_vector_for_cost *)
3969 {
3970 tree vec_dest;
3971 tree scalar_dest;
3972 tree op, type;
3973 tree vec_oprnd0 = NULL_TREE;
3974 tree vectype;
3975 poly_uint64 nunits;
3976 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3977 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3978 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3979 tree fndecl, new_temp;
3980 int ncopies, j;
3981 auto_vec<simd_call_arg_info> arginfo;
3982 vec<tree> vargs = vNULL;
3983 size_t i, nargs;
3984 tree lhs, rtype, ratype;
3985 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3986
3987 /* Is STMT a vectorizable call? */
3988 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3989 if (!stmt)
3990 return false;
3991
3992 fndecl = gimple_call_fndecl (stmt);
3993 if (fndecl == NULL_TREE)
3994 return false;
3995
3996 struct cgraph_node *node = cgraph_node::get (fndecl);
3997 if (node == NULL || node->simd_clones == NULL)
3998 return false;
3999
4000 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4001 return false;
4002
4003 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4004 && ! vec_stmt)
4005 return false;
4006
4007 if (gimple_call_lhs (stmt)
4008 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4009 return false;
4010
4011 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4012
4013 vectype = STMT_VINFO_VECTYPE (stmt_info);
4014
4015 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4016 return false;
4017
4018 /* FORNOW */
4019 if (slp_node)
4020 return false;
4021
4022 /* Process function arguments. */
4023 nargs = gimple_call_num_args (stmt);
4024
4025 /* Bail out if the function has zero arguments. */
4026 if (nargs == 0)
4027 return false;
4028
4029 arginfo.reserve (nargs, true);
4030
4031 for (i = 0; i < nargs; i++)
4032 {
4033 simd_call_arg_info thisarginfo;
4034 affine_iv iv;
4035
4036 thisarginfo.linear_step = 0;
4037 thisarginfo.align = 0;
4038 thisarginfo.op = NULL_TREE;
4039 thisarginfo.simd_lane_linear = false;
4040
4041 op = gimple_call_arg (stmt, i);
4042 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4043 &thisarginfo.vectype)
4044 || thisarginfo.dt == vect_uninitialized_def)
4045 {
4046 if (dump_enabled_p ())
4047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4048 "use not simple.\n");
4049 return false;
4050 }
4051
4052 if (thisarginfo.dt == vect_constant_def
4053 || thisarginfo.dt == vect_external_def)
4054 gcc_assert (thisarginfo.vectype == NULL_TREE);
4055 else
4056 {
4057 gcc_assert (thisarginfo.vectype != NULL_TREE);
4058 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4059 {
4060 if (dump_enabled_p ())
4061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4062 "vector mask arguments are not supported\n");
4063 return false;
4064 }
4065 }
4066
4067 /* For linear arguments, the analyze phase should have saved
4068 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4069 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4070 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4071 {
4072 gcc_assert (vec_stmt);
4073 thisarginfo.linear_step
4074 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4075 thisarginfo.op
4076 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4077 thisarginfo.simd_lane_linear
4078 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4079 == boolean_true_node);
4080 /* If loop has been peeled for alignment, we need to adjust it. */
4081 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4082 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4083 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4084 {
4085 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4086 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4087 tree opt = TREE_TYPE (thisarginfo.op);
4088 bias = fold_convert (TREE_TYPE (step), bias);
4089 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4090 thisarginfo.op
4091 = fold_build2 (POINTER_TYPE_P (opt)
4092 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4093 thisarginfo.op, bias);
4094 }
4095 }
4096 else if (!vec_stmt
4097 && thisarginfo.dt != vect_constant_def
4098 && thisarginfo.dt != vect_external_def
4099 && loop_vinfo
4100 && TREE_CODE (op) == SSA_NAME
4101 && simple_iv (loop, loop_containing_stmt (stmt), op,
4102 &iv, false)
4103 && tree_fits_shwi_p (iv.step))
4104 {
4105 thisarginfo.linear_step = tree_to_shwi (iv.step);
4106 thisarginfo.op = iv.base;
4107 }
4108 else if ((thisarginfo.dt == vect_constant_def
4109 || thisarginfo.dt == vect_external_def)
4110 && POINTER_TYPE_P (TREE_TYPE (op)))
4111 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4112 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4113 linear too. */
4114 if (POINTER_TYPE_P (TREE_TYPE (op))
4115 && !thisarginfo.linear_step
4116 && !vec_stmt
4117 && thisarginfo.dt != vect_constant_def
4118 && thisarginfo.dt != vect_external_def
4119 && loop_vinfo
4120 && !slp_node
4121 && TREE_CODE (op) == SSA_NAME)
4122 vect_simd_lane_linear (op, loop, &thisarginfo);
4123
4124 arginfo.quick_push (thisarginfo);
4125 }
4126
4127 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4128 if (!vf.is_constant ())
4129 {
4130 if (dump_enabled_p ())
4131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4132 "not considering SIMD clones; not yet supported"
4133 " for variable-width vectors.\n");
4134 return false;
4135 }
4136
4137 unsigned int badness = 0;
4138 struct cgraph_node *bestn = NULL;
4139 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4140 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4141 else
4142 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4143 n = n->simdclone->next_clone)
4144 {
4145 unsigned int this_badness = 0;
4146 unsigned int num_calls;
4147 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4148 || n->simdclone->nargs != nargs)
4149 continue;
4150 if (num_calls != 1)
4151 this_badness += exact_log2 (num_calls) * 4096;
4152 if (n->simdclone->inbranch)
4153 this_badness += 8192;
4154 int target_badness = targetm.simd_clone.usable (n);
4155 if (target_badness < 0)
4156 continue;
4157 this_badness += target_badness * 512;
4158 /* FORNOW: Have to add code to add the mask argument. */
4159 if (n->simdclone->inbranch)
4160 continue;
4161 for (i = 0; i < nargs; i++)
4162 {
4163 switch (n->simdclone->args[i].arg_type)
4164 {
4165 case SIMD_CLONE_ARG_TYPE_VECTOR:
4166 if (!useless_type_conversion_p
4167 (n->simdclone->args[i].orig_type,
4168 TREE_TYPE (gimple_call_arg (stmt, i))))
4169 i = -1;
4170 else if (arginfo[i].dt == vect_constant_def
4171 || arginfo[i].dt == vect_external_def
4172 || arginfo[i].linear_step)
4173 this_badness += 64;
4174 break;
4175 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4176 if (arginfo[i].dt != vect_constant_def
4177 && arginfo[i].dt != vect_external_def)
4178 i = -1;
4179 break;
4180 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4181 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4182 if (arginfo[i].dt == vect_constant_def
4183 || arginfo[i].dt == vect_external_def
4184 || (arginfo[i].linear_step
4185 != n->simdclone->args[i].linear_step))
4186 i = -1;
4187 break;
4188 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4189 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4194 /* FORNOW */
4195 i = -1;
4196 break;
4197 case SIMD_CLONE_ARG_TYPE_MASK:
4198 gcc_unreachable ();
4199 }
4200 if (i == (size_t) -1)
4201 break;
4202 if (n->simdclone->args[i].alignment > arginfo[i].align)
4203 {
4204 i = -1;
4205 break;
4206 }
4207 if (arginfo[i].align)
4208 this_badness += (exact_log2 (arginfo[i].align)
4209 - exact_log2 (n->simdclone->args[i].alignment));
4210 }
4211 if (i == (size_t) -1)
4212 continue;
4213 if (bestn == NULL || this_badness < badness)
4214 {
4215 bestn = n;
4216 badness = this_badness;
4217 }
4218 }
4219
4220 if (bestn == NULL)
4221 return false;
4222
4223 for (i = 0; i < nargs; i++)
4224 if ((arginfo[i].dt == vect_constant_def
4225 || arginfo[i].dt == vect_external_def)
4226 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4227 {
4228 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4229 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4230 slp_node);
4231 if (arginfo[i].vectype == NULL
4232 || !constant_multiple_p (bestn->simdclone->simdlen,
4233 simd_clone_subparts (arginfo[i].vectype)))
4234 return false;
4235 }
4236
4237 fndecl = bestn->decl;
4238 nunits = bestn->simdclone->simdlen;
4239 ncopies = vector_unroll_factor (vf, nunits);
4240
4241 /* If the function isn't const, only allow it in simd loops where user
4242 has asserted that at least nunits consecutive iterations can be
4243 performed using SIMD instructions. */
4244 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4245 && gimple_vuse (stmt))
4246 return false;
4247
4248 /* Sanity check: make sure that at least one copy of the vectorized stmt
4249 needs to be generated. */
4250 gcc_assert (ncopies >= 1);
4251
4252 if (!vec_stmt) /* transformation not required. */
4253 {
4254 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4255 for (i = 0; i < nargs; i++)
4256 if ((bestn->simdclone->args[i].arg_type
4257 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4258 || (bestn->simdclone->args[i].arg_type
4259 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4260 {
4261 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4262 + 1,
4263 true);
4264 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4265 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4266 ? size_type_node : TREE_TYPE (arginfo[i].op);
4267 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4268 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4269 tree sll = arginfo[i].simd_lane_linear
4270 ? boolean_true_node : boolean_false_node;
4271 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4272 }
4273 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4274 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4275 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4276 dt, slp_node, cost_vec); */
4277 return true;
4278 }
4279
4280 /* Transform. */
4281
4282 if (dump_enabled_p ())
4283 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4284
4285 /* Handle def. */
4286 scalar_dest = gimple_call_lhs (stmt);
4287 vec_dest = NULL_TREE;
4288 rtype = NULL_TREE;
4289 ratype = NULL_TREE;
4290 if (scalar_dest)
4291 {
4292 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4293 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4294 if (TREE_CODE (rtype) == ARRAY_TYPE)
4295 {
4296 ratype = rtype;
4297 rtype = TREE_TYPE (ratype);
4298 }
4299 }
4300
4301 auto_vec<vec<tree> > vec_oprnds;
4302 auto_vec<unsigned> vec_oprnds_i;
4303 vec_oprnds.safe_grow_cleared (nargs, true);
4304 vec_oprnds_i.safe_grow_cleared (nargs, true);
4305 for (j = 0; j < ncopies; ++j)
4306 {
4307 /* Build argument list for the vectorized call. */
4308 if (j == 0)
4309 vargs.create (nargs);
4310 else
4311 vargs.truncate (0);
4312
4313 for (i = 0; i < nargs; i++)
4314 {
4315 unsigned int k, l, m, o;
4316 tree atype;
4317 op = gimple_call_arg (stmt, i);
4318 switch (bestn->simdclone->args[i].arg_type)
4319 {
4320 case SIMD_CLONE_ARG_TYPE_VECTOR:
4321 atype = bestn->simdclone->args[i].vector_type;
4322 o = vector_unroll_factor (nunits,
4323 simd_clone_subparts (atype));
4324 for (m = j * o; m < (j + 1) * o; m++)
4325 {
4326 if (simd_clone_subparts (atype)
4327 < simd_clone_subparts (arginfo[i].vectype))
4328 {
4329 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4330 k = (simd_clone_subparts (arginfo[i].vectype)
4331 / simd_clone_subparts (atype));
4332 gcc_assert ((k & (k - 1)) == 0);
4333 if (m == 0)
4334 {
4335 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4336 ncopies * o / k, op,
4337 &vec_oprnds[i]);
4338 vec_oprnds_i[i] = 0;
4339 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4340 }
4341 else
4342 {
4343 vec_oprnd0 = arginfo[i].op;
4344 if ((m & (k - 1)) == 0)
4345 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4346 }
4347 arginfo[i].op = vec_oprnd0;
4348 vec_oprnd0
4349 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4350 bitsize_int (prec),
4351 bitsize_int ((m & (k - 1)) * prec));
4352 gassign *new_stmt
4353 = gimple_build_assign (make_ssa_name (atype),
4354 vec_oprnd0);
4355 vect_finish_stmt_generation (vinfo, stmt_info,
4356 new_stmt, gsi);
4357 vargs.safe_push (gimple_assign_lhs (new_stmt));
4358 }
4359 else
4360 {
4361 k = (simd_clone_subparts (atype)
4362 / simd_clone_subparts (arginfo[i].vectype));
4363 gcc_assert ((k & (k - 1)) == 0);
4364 vec<constructor_elt, va_gc> *ctor_elts;
4365 if (k != 1)
4366 vec_alloc (ctor_elts, k);
4367 else
4368 ctor_elts = NULL;
4369 for (l = 0; l < k; l++)
4370 {
4371 if (m == 0 && l == 0)
4372 {
4373 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4374 k * o * ncopies,
4375 op,
4376 &vec_oprnds[i]);
4377 vec_oprnds_i[i] = 0;
4378 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4379 }
4380 else
4381 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4382 arginfo[i].op = vec_oprnd0;
4383 if (k == 1)
4384 break;
4385 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4386 vec_oprnd0);
4387 }
4388 if (k == 1)
4389 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4390 atype))
4391 {
4392 vec_oprnd0
4393 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4394 gassign *new_stmt
4395 = gimple_build_assign (make_ssa_name (atype),
4396 vec_oprnd0);
4397 vect_finish_stmt_generation (vinfo, stmt_info,
4398 new_stmt, gsi);
4399 vargs.safe_push (gimple_assign_lhs (new_stmt));
4400 }
4401 else
4402 vargs.safe_push (vec_oprnd0);
4403 else
4404 {
4405 vec_oprnd0 = build_constructor (atype, ctor_elts);
4406 gassign *new_stmt
4407 = gimple_build_assign (make_ssa_name (atype),
4408 vec_oprnd0);
4409 vect_finish_stmt_generation (vinfo, stmt_info,
4410 new_stmt, gsi);
4411 vargs.safe_push (gimple_assign_lhs (new_stmt));
4412 }
4413 }
4414 }
4415 break;
4416 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4417 vargs.safe_push (op);
4418 break;
4419 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4420 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4421 if (j == 0)
4422 {
4423 gimple_seq stmts;
4424 arginfo[i].op
4425 = force_gimple_operand (unshare_expr (arginfo[i].op),
4426 &stmts, true, NULL_TREE);
4427 if (stmts != NULL)
4428 {
4429 basic_block new_bb;
4430 edge pe = loop_preheader_edge (loop);
4431 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4432 gcc_assert (!new_bb);
4433 }
4434 if (arginfo[i].simd_lane_linear)
4435 {
4436 vargs.safe_push (arginfo[i].op);
4437 break;
4438 }
4439 tree phi_res = copy_ssa_name (op);
4440 gphi *new_phi = create_phi_node (phi_res, loop->header);
4441 add_phi_arg (new_phi, arginfo[i].op,
4442 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4443 enum tree_code code
4444 = POINTER_TYPE_P (TREE_TYPE (op))
4445 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4446 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4447 ? sizetype : TREE_TYPE (op);
4448 poly_widest_int cst
4449 = wi::mul (bestn->simdclone->args[i].linear_step,
4450 ncopies * nunits);
4451 tree tcst = wide_int_to_tree (type, cst);
4452 tree phi_arg = copy_ssa_name (op);
4453 gassign *new_stmt
4454 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4455 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4456 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4457 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4458 UNKNOWN_LOCATION);
4459 arginfo[i].op = phi_res;
4460 vargs.safe_push (phi_res);
4461 }
4462 else
4463 {
4464 enum tree_code code
4465 = POINTER_TYPE_P (TREE_TYPE (op))
4466 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4467 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4468 ? sizetype : TREE_TYPE (op);
4469 poly_widest_int cst
4470 = wi::mul (bestn->simdclone->args[i].linear_step,
4471 j * nunits);
4472 tree tcst = wide_int_to_tree (type, cst);
4473 new_temp = make_ssa_name (TREE_TYPE (op));
4474 gassign *new_stmt
4475 = gimple_build_assign (new_temp, code,
4476 arginfo[i].op, tcst);
4477 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4478 vargs.safe_push (new_temp);
4479 }
4480 break;
4481 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4482 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4483 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4484 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4485 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4486 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4487 default:
4488 gcc_unreachable ();
4489 }
4490 }
4491
4492 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4493 if (vec_dest)
4494 {
4495 gcc_assert (ratype
4496 || known_eq (simd_clone_subparts (rtype), nunits));
4497 if (ratype)
4498 new_temp = create_tmp_var (ratype);
4499 else if (useless_type_conversion_p (vectype, rtype))
4500 new_temp = make_ssa_name (vec_dest, new_call);
4501 else
4502 new_temp = make_ssa_name (rtype, new_call);
4503 gimple_call_set_lhs (new_call, new_temp);
4504 }
4505 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4506 gimple *new_stmt = new_call;
4507
4508 if (vec_dest)
4509 {
4510 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4511 {
4512 unsigned int k, l;
4513 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4514 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4515 k = vector_unroll_factor (nunits,
4516 simd_clone_subparts (vectype));
4517 gcc_assert ((k & (k - 1)) == 0);
4518 for (l = 0; l < k; l++)
4519 {
4520 tree t;
4521 if (ratype)
4522 {
4523 t = build_fold_addr_expr (new_temp);
4524 t = build2 (MEM_REF, vectype, t,
4525 build_int_cst (TREE_TYPE (t), l * bytes));
4526 }
4527 else
4528 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4529 bitsize_int (prec), bitsize_int (l * prec));
4530 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4531 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4532
4533 if (j == 0 && l == 0)
4534 *vec_stmt = new_stmt;
4535 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4536 }
4537
4538 if (ratype)
4539 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4540 continue;
4541 }
4542 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4543 {
4544 unsigned int k = (simd_clone_subparts (vectype)
4545 / simd_clone_subparts (rtype));
4546 gcc_assert ((k & (k - 1)) == 0);
4547 if ((j & (k - 1)) == 0)
4548 vec_alloc (ret_ctor_elts, k);
4549 if (ratype)
4550 {
4551 unsigned int m, o;
4552 o = vector_unroll_factor (nunits,
4553 simd_clone_subparts (rtype));
4554 for (m = 0; m < o; m++)
4555 {
4556 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4557 size_int (m), NULL_TREE, NULL_TREE);
4558 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4559 tem);
4560 vect_finish_stmt_generation (vinfo, stmt_info,
4561 new_stmt, gsi);
4562 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4563 gimple_assign_lhs (new_stmt));
4564 }
4565 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4566 }
4567 else
4568 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4569 if ((j & (k - 1)) != k - 1)
4570 continue;
4571 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4572 new_stmt
4573 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4574 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4575
4576 if ((unsigned) j == k - 1)
4577 *vec_stmt = new_stmt;
4578 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4579 continue;
4580 }
4581 else if (ratype)
4582 {
4583 tree t = build_fold_addr_expr (new_temp);
4584 t = build2 (MEM_REF, vectype, t,
4585 build_int_cst (TREE_TYPE (t), 0));
4586 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4587 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4588 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4589 }
4590 else if (!useless_type_conversion_p (vectype, rtype))
4591 {
4592 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4593 new_stmt
4594 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4595 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4596 }
4597 }
4598
4599 if (j == 0)
4600 *vec_stmt = new_stmt;
4601 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4602 }
4603
4604 for (i = 0; i < nargs; ++i)
4605 {
4606 vec<tree> oprndsi = vec_oprnds[i];
4607 oprndsi.release ();
4608 }
4609 vargs.release ();
4610
4611 /* The call in STMT might prevent it from being removed in dce.
4612 We however cannot remove it here, due to the way the ssa name
4613 it defines is mapped to the new definition. So just replace
4614 rhs of the statement with something harmless. */
4615
4616 if (slp_node)
4617 return true;
4618
4619 gimple *new_stmt;
4620 if (scalar_dest)
4621 {
4622 type = TREE_TYPE (scalar_dest);
4623 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4624 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4625 }
4626 else
4627 new_stmt = gimple_build_nop ();
4628 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4629 unlink_stmt_vdef (stmt);
4630
4631 return true;
4632 }
4633
4634
4635 /* Function vect_gen_widened_results_half
4636
4637 Create a vector stmt whose code, type, number of arguments, and result
4638 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4639 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4640 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4641 needs to be created (DECL is a function-decl of a target-builtin).
4642 STMT_INFO is the original scalar stmt that we are vectorizing. */
4643
4644 static gimple *
vect_gen_widened_results_half(vec_info * vinfo,enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4645 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4646 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4647 tree vec_dest, gimple_stmt_iterator *gsi,
4648 stmt_vec_info stmt_info)
4649 {
4650 gimple *new_stmt;
4651 tree new_temp;
4652
4653 /* Generate half of the widened result: */
4654 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4655 if (op_type != binary_op)
4656 vec_oprnd1 = NULL;
4657 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4658 new_temp = make_ssa_name (vec_dest, new_stmt);
4659 gimple_assign_set_lhs (new_stmt, new_temp);
4660 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4661
4662 return new_stmt;
4663 }
4664
4665
4666 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4667 For multi-step conversions store the resulting vectors and call the function
4668 recursively. */
4669
4670 static void
vect_create_vectorized_demotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> & vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code)4671 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4672 int multi_step_cvt,
4673 stmt_vec_info stmt_info,
4674 vec<tree> &vec_dsts,
4675 gimple_stmt_iterator *gsi,
4676 slp_tree slp_node, enum tree_code code)
4677 {
4678 unsigned int i;
4679 tree vop0, vop1, new_tmp, vec_dest;
4680
4681 vec_dest = vec_dsts.pop ();
4682
4683 for (i = 0; i < vec_oprnds->length (); i += 2)
4684 {
4685 /* Create demotion operation. */
4686 vop0 = (*vec_oprnds)[i];
4687 vop1 = (*vec_oprnds)[i + 1];
4688 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4689 new_tmp = make_ssa_name (vec_dest, new_stmt);
4690 gimple_assign_set_lhs (new_stmt, new_tmp);
4691 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4692
4693 if (multi_step_cvt)
4694 /* Store the resulting vector for next recursive call. */
4695 (*vec_oprnds)[i/2] = new_tmp;
4696 else
4697 {
4698 /* This is the last step of the conversion sequence. Store the
4699 vectors in SLP_NODE or in vector info of the scalar statement
4700 (or in STMT_VINFO_RELATED_STMT chain). */
4701 if (slp_node)
4702 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4703 else
4704 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4705 }
4706 }
4707
4708 /* For multi-step demotion operations we first generate demotion operations
4709 from the source type to the intermediate types, and then combine the
4710 results (stored in VEC_OPRNDS) in demotion operation to the destination
4711 type. */
4712 if (multi_step_cvt)
4713 {
4714 /* At each level of recursion we have half of the operands we had at the
4715 previous level. */
4716 vec_oprnds->truncate ((i+1)/2);
4717 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4718 multi_step_cvt - 1,
4719 stmt_info, vec_dsts, gsi,
4720 slp_node, VEC_PACK_TRUNC_EXPR);
4721 }
4722
4723 vec_dsts.quick_push (vec_dest);
4724 }
4725
4726
4727 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4728 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4729 STMT_INFO. For multi-step conversions store the resulting vectors and
4730 call the function recursively. */
4731
4732 static void
vect_create_vectorized_promotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4733 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4734 vec<tree> *vec_oprnds0,
4735 vec<tree> *vec_oprnds1,
4736 stmt_vec_info stmt_info, tree vec_dest,
4737 gimple_stmt_iterator *gsi,
4738 enum tree_code code1,
4739 enum tree_code code2, int op_type)
4740 {
4741 int i;
4742 tree vop0, vop1, new_tmp1, new_tmp2;
4743 gimple *new_stmt1, *new_stmt2;
4744 vec<tree> vec_tmp = vNULL;
4745
4746 vec_tmp.create (vec_oprnds0->length () * 2);
4747 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4748 {
4749 if (op_type == binary_op)
4750 vop1 = (*vec_oprnds1)[i];
4751 else
4752 vop1 = NULL_TREE;
4753
4754 /* Generate the two halves of promotion operation. */
4755 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4756 op_type, vec_dest, gsi,
4757 stmt_info);
4758 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4759 op_type, vec_dest, gsi,
4760 stmt_info);
4761 if (is_gimple_call (new_stmt1))
4762 {
4763 new_tmp1 = gimple_call_lhs (new_stmt1);
4764 new_tmp2 = gimple_call_lhs (new_stmt2);
4765 }
4766 else
4767 {
4768 new_tmp1 = gimple_assign_lhs (new_stmt1);
4769 new_tmp2 = gimple_assign_lhs (new_stmt2);
4770 }
4771
4772 /* Store the results for the next step. */
4773 vec_tmp.quick_push (new_tmp1);
4774 vec_tmp.quick_push (new_tmp2);
4775 }
4776
4777 vec_oprnds0->release ();
4778 *vec_oprnds0 = vec_tmp;
4779 }
4780
4781 /* Create vectorized promotion stmts for widening stmts using only half the
4782 potential vector size for input. */
4783 static void
vect_create_half_widening_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,int op_type)4784 vect_create_half_widening_stmts (vec_info *vinfo,
4785 vec<tree> *vec_oprnds0,
4786 vec<tree> *vec_oprnds1,
4787 stmt_vec_info stmt_info, tree vec_dest,
4788 gimple_stmt_iterator *gsi,
4789 enum tree_code code1,
4790 int op_type)
4791 {
4792 int i;
4793 tree vop0, vop1;
4794 gimple *new_stmt1;
4795 gimple *new_stmt2;
4796 gimple *new_stmt3;
4797 vec<tree> vec_tmp = vNULL;
4798
4799 vec_tmp.create (vec_oprnds0->length ());
4800 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4801 {
4802 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4803
4804 gcc_assert (op_type == binary_op);
4805 vop1 = (*vec_oprnds1)[i];
4806
4807 /* Widen the first vector input. */
4808 out_type = TREE_TYPE (vec_dest);
4809 new_tmp1 = make_ssa_name (out_type);
4810 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4811 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4812 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4813 {
4814 /* Widen the second vector input. */
4815 new_tmp2 = make_ssa_name (out_type);
4816 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4817 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4818 /* Perform the operation. With both vector inputs widened. */
4819 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4820 }
4821 else
4822 {
4823 /* Perform the operation. With the single vector input widened. */
4824 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4825 }
4826
4827 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4828 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4830
4831 /* Store the results for the next step. */
4832 vec_tmp.quick_push (new_tmp3);
4833 }
4834
4835 vec_oprnds0->release ();
4836 *vec_oprnds0 = vec_tmp;
4837 }
4838
4839
4840 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4841 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4842 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4843 Return true if STMT_INFO is vectorizable in this way. */
4844
4845 static bool
vectorizable_conversion(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4846 vectorizable_conversion (vec_info *vinfo,
4847 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4848 gimple **vec_stmt, slp_tree slp_node,
4849 stmt_vector_for_cost *cost_vec)
4850 {
4851 tree vec_dest;
4852 tree scalar_dest;
4853 tree op0, op1 = NULL_TREE;
4854 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4855 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4856 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4857 tree new_temp;
4858 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4859 int ndts = 2;
4860 poly_uint64 nunits_in;
4861 poly_uint64 nunits_out;
4862 tree vectype_out, vectype_in;
4863 int ncopies, i;
4864 tree lhs_type, rhs_type;
4865 enum { NARROW, NONE, WIDEN } modifier;
4866 vec<tree> vec_oprnds0 = vNULL;
4867 vec<tree> vec_oprnds1 = vNULL;
4868 tree vop0;
4869 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4870 int multi_step_cvt = 0;
4871 vec<tree> interm_types = vNULL;
4872 tree intermediate_type, cvt_type = NULL_TREE;
4873 int op_type;
4874 unsigned short fltsz;
4875
4876 /* Is STMT a vectorizable conversion? */
4877
4878 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4879 return false;
4880
4881 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4882 && ! vec_stmt)
4883 return false;
4884
4885 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4886 if (!stmt)
4887 return false;
4888
4889 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4890 return false;
4891
4892 code = gimple_assign_rhs_code (stmt);
4893 if (!CONVERT_EXPR_CODE_P (code)
4894 && code != FIX_TRUNC_EXPR
4895 && code != FLOAT_EXPR
4896 && code != WIDEN_PLUS_EXPR
4897 && code != WIDEN_MINUS_EXPR
4898 && code != WIDEN_MULT_EXPR
4899 && code != WIDEN_LSHIFT_EXPR)
4900 return false;
4901
4902 bool widen_arith = (code == WIDEN_PLUS_EXPR
4903 || code == WIDEN_MINUS_EXPR
4904 || code == WIDEN_MULT_EXPR
4905 || code == WIDEN_LSHIFT_EXPR);
4906 op_type = TREE_CODE_LENGTH (code);
4907
4908 /* Check types of lhs and rhs. */
4909 scalar_dest = gimple_assign_lhs (stmt);
4910 lhs_type = TREE_TYPE (scalar_dest);
4911 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4912
4913 /* Check the operands of the operation. */
4914 slp_tree slp_op0, slp_op1 = NULL;
4915 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4916 0, &op0, &slp_op0, &dt[0], &vectype_in))
4917 {
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 "use not simple.\n");
4921 return false;
4922 }
4923
4924 rhs_type = TREE_TYPE (op0);
4925 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4926 && !((INTEGRAL_TYPE_P (lhs_type)
4927 && INTEGRAL_TYPE_P (rhs_type))
4928 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4929 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4930 return false;
4931
4932 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4933 && ((INTEGRAL_TYPE_P (lhs_type)
4934 && !type_has_mode_precision_p (lhs_type))
4935 || (INTEGRAL_TYPE_P (rhs_type)
4936 && !type_has_mode_precision_p (rhs_type))))
4937 {
4938 if (dump_enabled_p ())
4939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4940 "type conversion to/from bit-precision unsupported."
4941 "\n");
4942 return false;
4943 }
4944
4945 if (op_type == binary_op)
4946 {
4947 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4948 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4949
4950 op1 = gimple_assign_rhs2 (stmt);
4951 tree vectype1_in;
4952 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4953 &op1, &slp_op1, &dt[1], &vectype1_in))
4954 {
4955 if (dump_enabled_p ())
4956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4957 "use not simple.\n");
4958 return false;
4959 }
4960 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4961 OP1. */
4962 if (!vectype_in)
4963 vectype_in = vectype1_in;
4964 }
4965
4966 /* If op0 is an external or constant def, infer the vector type
4967 from the scalar type. */
4968 if (!vectype_in)
4969 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4970 if (vec_stmt)
4971 gcc_assert (vectype_in);
4972 if (!vectype_in)
4973 {
4974 if (dump_enabled_p ())
4975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4976 "no vectype for scalar type %T\n", rhs_type);
4977
4978 return false;
4979 }
4980
4981 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4982 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4983 {
4984 if (dump_enabled_p ())
4985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4986 "can't convert between boolean and non "
4987 "boolean vectors %T\n", rhs_type);
4988
4989 return false;
4990 }
4991
4992 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4993 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4994 if (known_eq (nunits_out, nunits_in))
4995 if (widen_arith)
4996 modifier = WIDEN;
4997 else
4998 modifier = NONE;
4999 else if (multiple_p (nunits_out, nunits_in))
5000 modifier = NARROW;
5001 else
5002 {
5003 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5004 modifier = WIDEN;
5005 }
5006
5007 /* Multiple types in SLP are handled by creating the appropriate number of
5008 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5009 case of SLP. */
5010 if (slp_node)
5011 ncopies = 1;
5012 else if (modifier == NARROW)
5013 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5014 else
5015 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5016
5017 /* Sanity check: make sure that at least one copy of the vectorized stmt
5018 needs to be generated. */
5019 gcc_assert (ncopies >= 1);
5020
5021 bool found_mode = false;
5022 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5023 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5024 opt_scalar_mode rhs_mode_iter;
5025
5026 /* Supportable by target? */
5027 switch (modifier)
5028 {
5029 case NONE:
5030 if (code != FIX_TRUNC_EXPR
5031 && code != FLOAT_EXPR
5032 && !CONVERT_EXPR_CODE_P (code))
5033 return false;
5034 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5035 break;
5036 /* FALLTHRU */
5037 unsupported:
5038 if (dump_enabled_p ())
5039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5040 "conversion not supported by target.\n");
5041 return false;
5042
5043 case WIDEN:
5044 if (known_eq (nunits_in, nunits_out))
5045 {
5046 if (!supportable_half_widening_operation (code, vectype_out,
5047 vectype_in, &code1))
5048 goto unsupported;
5049 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5050 break;
5051 }
5052 if (supportable_widening_operation (vinfo, code, stmt_info,
5053 vectype_out, vectype_in, &code1,
5054 &code2, &multi_step_cvt,
5055 &interm_types))
5056 {
5057 /* Binary widening operation can only be supported directly by the
5058 architecture. */
5059 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5060 break;
5061 }
5062
5063 if (code != FLOAT_EXPR
5064 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5065 goto unsupported;
5066
5067 fltsz = GET_MODE_SIZE (lhs_mode);
5068 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5069 {
5070 rhs_mode = rhs_mode_iter.require ();
5071 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5072 break;
5073
5074 cvt_type
5075 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5076 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5077 if (cvt_type == NULL_TREE)
5078 goto unsupported;
5079
5080 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5081 {
5082 if (!supportable_convert_operation (code, vectype_out,
5083 cvt_type, &codecvt1))
5084 goto unsupported;
5085 }
5086 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5087 vectype_out, cvt_type,
5088 &codecvt1, &codecvt2,
5089 &multi_step_cvt,
5090 &interm_types))
5091 continue;
5092 else
5093 gcc_assert (multi_step_cvt == 0);
5094
5095 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5096 cvt_type,
5097 vectype_in, &code1, &code2,
5098 &multi_step_cvt, &interm_types))
5099 {
5100 found_mode = true;
5101 break;
5102 }
5103 }
5104
5105 if (!found_mode)
5106 goto unsupported;
5107
5108 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5109 codecvt2 = ERROR_MARK;
5110 else
5111 {
5112 multi_step_cvt++;
5113 interm_types.safe_push (cvt_type);
5114 cvt_type = NULL_TREE;
5115 }
5116 break;
5117
5118 case NARROW:
5119 gcc_assert (op_type == unary_op);
5120 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5121 &code1, &multi_step_cvt,
5122 &interm_types))
5123 break;
5124
5125 if (code != FIX_TRUNC_EXPR
5126 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5127 goto unsupported;
5128
5129 cvt_type
5130 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5131 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5132 if (cvt_type == NULL_TREE)
5133 goto unsupported;
5134 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5135 &codecvt1))
5136 goto unsupported;
5137 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5138 &code1, &multi_step_cvt,
5139 &interm_types))
5140 break;
5141 goto unsupported;
5142
5143 default:
5144 gcc_unreachable ();
5145 }
5146
5147 if (!vec_stmt) /* transformation not required. */
5148 {
5149 if (slp_node
5150 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5151 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5152 {
5153 if (dump_enabled_p ())
5154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5155 "incompatible vector types for invariants\n");
5156 return false;
5157 }
5158 DUMP_VECT_SCOPE ("vectorizable_conversion");
5159 if (modifier == NONE)
5160 {
5161 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5162 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5163 cost_vec);
5164 }
5165 else if (modifier == NARROW)
5166 {
5167 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5168 /* The final packing step produces one vector result per copy. */
5169 unsigned int nvectors
5170 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5171 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5172 multi_step_cvt, cost_vec,
5173 widen_arith);
5174 }
5175 else
5176 {
5177 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5178 /* The initial unpacking step produces two vector results
5179 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5180 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5181 unsigned int nvectors
5182 = (slp_node
5183 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5184 : ncopies * 2);
5185 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5186 multi_step_cvt, cost_vec,
5187 widen_arith);
5188 }
5189 interm_types.release ();
5190 return true;
5191 }
5192
5193 /* Transform. */
5194 if (dump_enabled_p ())
5195 dump_printf_loc (MSG_NOTE, vect_location,
5196 "transform conversion. ncopies = %d.\n", ncopies);
5197
5198 if (op_type == binary_op)
5199 {
5200 if (CONSTANT_CLASS_P (op0))
5201 op0 = fold_convert (TREE_TYPE (op1), op0);
5202 else if (CONSTANT_CLASS_P (op1))
5203 op1 = fold_convert (TREE_TYPE (op0), op1);
5204 }
5205
5206 /* In case of multi-step conversion, we first generate conversion operations
5207 to the intermediate types, and then from that types to the final one.
5208 We create vector destinations for the intermediate type (TYPES) received
5209 from supportable_*_operation, and store them in the correct order
5210 for future use in vect_create_vectorized_*_stmts (). */
5211 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5212 vec_dest = vect_create_destination_var (scalar_dest,
5213 (cvt_type && modifier == WIDEN)
5214 ? cvt_type : vectype_out);
5215 vec_dsts.quick_push (vec_dest);
5216
5217 if (multi_step_cvt)
5218 {
5219 for (i = interm_types.length () - 1;
5220 interm_types.iterate (i, &intermediate_type); i--)
5221 {
5222 vec_dest = vect_create_destination_var (scalar_dest,
5223 intermediate_type);
5224 vec_dsts.quick_push (vec_dest);
5225 }
5226 }
5227
5228 if (cvt_type)
5229 vec_dest = vect_create_destination_var (scalar_dest,
5230 modifier == WIDEN
5231 ? vectype_out : cvt_type);
5232
5233 int ninputs = 1;
5234 if (!slp_node)
5235 {
5236 if (modifier == WIDEN)
5237 ;
5238 else if (modifier == NARROW)
5239 {
5240 if (multi_step_cvt)
5241 ninputs = vect_pow2 (multi_step_cvt);
5242 ninputs *= 2;
5243 }
5244 }
5245
5246 switch (modifier)
5247 {
5248 case NONE:
5249 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5250 op0, &vec_oprnds0);
5251 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5252 {
5253 /* Arguments are ready, create the new vector stmt. */
5254 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5255 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5256 new_temp = make_ssa_name (vec_dest, new_stmt);
5257 gimple_assign_set_lhs (new_stmt, new_temp);
5258 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5259
5260 if (slp_node)
5261 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5262 else
5263 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5264 }
5265 break;
5266
5267 case WIDEN:
5268 /* In case the vectorization factor (VF) is bigger than the number
5269 of elements that we can fit in a vectype (nunits), we have to
5270 generate more than one vector stmt - i.e - we need to "unroll"
5271 the vector stmt by a factor VF/nunits. */
5272 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5273 op0, &vec_oprnds0,
5274 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5275 &vec_oprnds1);
5276 if (code == WIDEN_LSHIFT_EXPR)
5277 {
5278 int oprnds_size = vec_oprnds0.length ();
5279 vec_oprnds1.create (oprnds_size);
5280 for (i = 0; i < oprnds_size; ++i)
5281 vec_oprnds1.quick_push (op1);
5282 }
5283 /* Arguments are ready. Create the new vector stmts. */
5284 for (i = multi_step_cvt; i >= 0; i--)
5285 {
5286 tree this_dest = vec_dsts[i];
5287 enum tree_code c1 = code1, c2 = code2;
5288 if (i == 0 && codecvt2 != ERROR_MARK)
5289 {
5290 c1 = codecvt1;
5291 c2 = codecvt2;
5292 }
5293 if (known_eq (nunits_out, nunits_in))
5294 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5295 &vec_oprnds1, stmt_info,
5296 this_dest, gsi,
5297 c1, op_type);
5298 else
5299 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5300 &vec_oprnds1, stmt_info,
5301 this_dest, gsi,
5302 c1, c2, op_type);
5303 }
5304
5305 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5306 {
5307 gimple *new_stmt;
5308 if (cvt_type)
5309 {
5310 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5311 new_temp = make_ssa_name (vec_dest);
5312 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5313 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5314 }
5315 else
5316 new_stmt = SSA_NAME_DEF_STMT (vop0);
5317
5318 if (slp_node)
5319 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5320 else
5321 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5322 }
5323 break;
5324
5325 case NARROW:
5326 /* In case the vectorization factor (VF) is bigger than the number
5327 of elements that we can fit in a vectype (nunits), we have to
5328 generate more than one vector stmt - i.e - we need to "unroll"
5329 the vector stmt by a factor VF/nunits. */
5330 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5331 op0, &vec_oprnds0);
5332 /* Arguments are ready. Create the new vector stmts. */
5333 if (cvt_type)
5334 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5335 {
5336 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5337 new_temp = make_ssa_name (vec_dest);
5338 gassign *new_stmt
5339 = gimple_build_assign (new_temp, codecvt1, vop0);
5340 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5341 vec_oprnds0[i] = new_temp;
5342 }
5343
5344 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5345 multi_step_cvt,
5346 stmt_info, vec_dsts, gsi,
5347 slp_node, code1);
5348 break;
5349 }
5350 if (!slp_node)
5351 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5352
5353 vec_oprnds0.release ();
5354 vec_oprnds1.release ();
5355 interm_types.release ();
5356
5357 return true;
5358 }
5359
5360 /* Return true if we can assume from the scalar form of STMT_INFO that
5361 neither the scalar nor the vector forms will generate code. STMT_INFO
5362 is known not to involve a data reference. */
5363
5364 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5365 vect_nop_conversion_p (stmt_vec_info stmt_info)
5366 {
5367 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5368 if (!stmt)
5369 return false;
5370
5371 tree lhs = gimple_assign_lhs (stmt);
5372 tree_code code = gimple_assign_rhs_code (stmt);
5373 tree rhs = gimple_assign_rhs1 (stmt);
5374
5375 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5376 return true;
5377
5378 if (CONVERT_EXPR_CODE_P (code))
5379 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5380
5381 return false;
5382 }
5383
5384 /* Function vectorizable_assignment.
5385
5386 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5387 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5388 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5389 Return true if STMT_INFO is vectorizable in this way. */
5390
5391 static bool
vectorizable_assignment(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5392 vectorizable_assignment (vec_info *vinfo,
5393 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5394 gimple **vec_stmt, slp_tree slp_node,
5395 stmt_vector_for_cost *cost_vec)
5396 {
5397 tree vec_dest;
5398 tree scalar_dest;
5399 tree op;
5400 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5401 tree new_temp;
5402 enum vect_def_type dt[1] = {vect_unknown_def_type};
5403 int ndts = 1;
5404 int ncopies;
5405 int i;
5406 vec<tree> vec_oprnds = vNULL;
5407 tree vop;
5408 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5409 enum tree_code code;
5410 tree vectype_in;
5411
5412 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5413 return false;
5414
5415 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5416 && ! vec_stmt)
5417 return false;
5418
5419 /* Is vectorizable assignment? */
5420 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5421 if (!stmt)
5422 return false;
5423
5424 scalar_dest = gimple_assign_lhs (stmt);
5425 if (TREE_CODE (scalar_dest) != SSA_NAME)
5426 return false;
5427
5428 if (STMT_VINFO_DATA_REF (stmt_info))
5429 return false;
5430
5431 code = gimple_assign_rhs_code (stmt);
5432 if (!(gimple_assign_single_p (stmt)
5433 || code == PAREN_EXPR
5434 || CONVERT_EXPR_CODE_P (code)))
5435 return false;
5436
5437 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5438 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5439
5440 /* Multiple types in SLP are handled by creating the appropriate number of
5441 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5442 case of SLP. */
5443 if (slp_node)
5444 ncopies = 1;
5445 else
5446 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5447
5448 gcc_assert (ncopies >= 1);
5449
5450 slp_tree slp_op;
5451 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5452 &dt[0], &vectype_in))
5453 {
5454 if (dump_enabled_p ())
5455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5456 "use not simple.\n");
5457 return false;
5458 }
5459 if (!vectype_in)
5460 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5461
5462 /* We can handle NOP_EXPR conversions that do not change the number
5463 of elements or the vector size. */
5464 if ((CONVERT_EXPR_CODE_P (code)
5465 || code == VIEW_CONVERT_EXPR)
5466 && (!vectype_in
5467 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5468 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5469 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5470 return false;
5471
5472 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5473 {
5474 if (dump_enabled_p ())
5475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5476 "can't convert between boolean and non "
5477 "boolean vectors %T\n", TREE_TYPE (op));
5478
5479 return false;
5480 }
5481
5482 /* We do not handle bit-precision changes. */
5483 if ((CONVERT_EXPR_CODE_P (code)
5484 || code == VIEW_CONVERT_EXPR)
5485 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5486 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5487 || !type_has_mode_precision_p (TREE_TYPE (op)))
5488 /* But a conversion that does not change the bit-pattern is ok. */
5489 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5490 > TYPE_PRECISION (TREE_TYPE (op)))
5491 && TYPE_UNSIGNED (TREE_TYPE (op))))
5492 {
5493 if (dump_enabled_p ())
5494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5495 "type conversion to/from bit-precision "
5496 "unsupported.\n");
5497 return false;
5498 }
5499
5500 if (!vec_stmt) /* transformation not required. */
5501 {
5502 if (slp_node
5503 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5504 {
5505 if (dump_enabled_p ())
5506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5507 "incompatible vector types for invariants\n");
5508 return false;
5509 }
5510 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5511 DUMP_VECT_SCOPE ("vectorizable_assignment");
5512 if (!vect_nop_conversion_p (stmt_info))
5513 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5514 cost_vec);
5515 return true;
5516 }
5517
5518 /* Transform. */
5519 if (dump_enabled_p ())
5520 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5521
5522 /* Handle def. */
5523 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5524
5525 /* Handle use. */
5526 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5527
5528 /* Arguments are ready. create the new vector stmt. */
5529 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5530 {
5531 if (CONVERT_EXPR_CODE_P (code)
5532 || code == VIEW_CONVERT_EXPR)
5533 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5534 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5535 new_temp = make_ssa_name (vec_dest, new_stmt);
5536 gimple_assign_set_lhs (new_stmt, new_temp);
5537 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5538 if (slp_node)
5539 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5540 else
5541 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5542 }
5543 if (!slp_node)
5544 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5545
5546 vec_oprnds.release ();
5547 return true;
5548 }
5549
5550
5551 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5552 either as shift by a scalar or by a vector. */
5553
5554 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5555 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5556 {
5557
5558 machine_mode vec_mode;
5559 optab optab;
5560 int icode;
5561 tree vectype;
5562
5563 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5564 if (!vectype)
5565 return false;
5566
5567 optab = optab_for_tree_code (code, vectype, optab_scalar);
5568 if (!optab
5569 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5570 {
5571 optab = optab_for_tree_code (code, vectype, optab_vector);
5572 if (!optab
5573 || (optab_handler (optab, TYPE_MODE (vectype))
5574 == CODE_FOR_nothing))
5575 return false;
5576 }
5577
5578 vec_mode = TYPE_MODE (vectype);
5579 icode = (int) optab_handler (optab, vec_mode);
5580 if (icode == CODE_FOR_nothing)
5581 return false;
5582
5583 return true;
5584 }
5585
5586
5587 /* Function vectorizable_shift.
5588
5589 Check if STMT_INFO performs a shift operation that can be vectorized.
5590 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5591 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5592 Return true if STMT_INFO is vectorizable in this way. */
5593
5594 static bool
vectorizable_shift(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5595 vectorizable_shift (vec_info *vinfo,
5596 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5597 gimple **vec_stmt, slp_tree slp_node,
5598 stmt_vector_for_cost *cost_vec)
5599 {
5600 tree vec_dest;
5601 tree scalar_dest;
5602 tree op0, op1 = NULL;
5603 tree vec_oprnd1 = NULL_TREE;
5604 tree vectype;
5605 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5606 enum tree_code code;
5607 machine_mode vec_mode;
5608 tree new_temp;
5609 optab optab;
5610 int icode;
5611 machine_mode optab_op2_mode;
5612 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5613 int ndts = 2;
5614 poly_uint64 nunits_in;
5615 poly_uint64 nunits_out;
5616 tree vectype_out;
5617 tree op1_vectype;
5618 int ncopies;
5619 int i;
5620 vec<tree> vec_oprnds0 = vNULL;
5621 vec<tree> vec_oprnds1 = vNULL;
5622 tree vop0, vop1;
5623 unsigned int k;
5624 bool scalar_shift_arg = true;
5625 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5626 bool incompatible_op1_vectype_p = false;
5627
5628 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5629 return false;
5630
5631 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5632 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5633 && ! vec_stmt)
5634 return false;
5635
5636 /* Is STMT a vectorizable binary/unary operation? */
5637 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5638 if (!stmt)
5639 return false;
5640
5641 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5642 return false;
5643
5644 code = gimple_assign_rhs_code (stmt);
5645
5646 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5647 || code == RROTATE_EXPR))
5648 return false;
5649
5650 scalar_dest = gimple_assign_lhs (stmt);
5651 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5652 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5653 {
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5656 "bit-precision shifts not supported.\n");
5657 return false;
5658 }
5659
5660 slp_tree slp_op0;
5661 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5662 0, &op0, &slp_op0, &dt[0], &vectype))
5663 {
5664 if (dump_enabled_p ())
5665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5666 "use not simple.\n");
5667 return false;
5668 }
5669 /* If op0 is an external or constant def, infer the vector type
5670 from the scalar type. */
5671 if (!vectype)
5672 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5673 if (vec_stmt)
5674 gcc_assert (vectype);
5675 if (!vectype)
5676 {
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "no vectype for scalar type\n");
5680 return false;
5681 }
5682
5683 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5684 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5685 if (maybe_ne (nunits_out, nunits_in))
5686 return false;
5687
5688 stmt_vec_info op1_def_stmt_info;
5689 slp_tree slp_op1;
5690 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5691 &dt[1], &op1_vectype, &op1_def_stmt_info))
5692 {
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5695 "use not simple.\n");
5696 return false;
5697 }
5698
5699 /* Multiple types in SLP are handled by creating the appropriate number of
5700 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5701 case of SLP. */
5702 if (slp_node)
5703 ncopies = 1;
5704 else
5705 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5706
5707 gcc_assert (ncopies >= 1);
5708
5709 /* Determine whether the shift amount is a vector, or scalar. If the
5710 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5711
5712 if ((dt[1] == vect_internal_def
5713 || dt[1] == vect_induction_def
5714 || dt[1] == vect_nested_cycle)
5715 && !slp_node)
5716 scalar_shift_arg = false;
5717 else if (dt[1] == vect_constant_def
5718 || dt[1] == vect_external_def
5719 || dt[1] == vect_internal_def)
5720 {
5721 /* In SLP, need to check whether the shift count is the same,
5722 in loops if it is a constant or invariant, it is always
5723 a scalar shift. */
5724 if (slp_node)
5725 {
5726 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5727 stmt_vec_info slpstmt_info;
5728
5729 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5730 {
5731 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5732 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5733 scalar_shift_arg = false;
5734 }
5735
5736 /* For internal SLP defs we have to make sure we see scalar stmts
5737 for all vector elements.
5738 ??? For different vectors we could resort to a different
5739 scalar shift operand but code-generation below simply always
5740 takes the first. */
5741 if (dt[1] == vect_internal_def
5742 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5743 stmts.length ()))
5744 scalar_shift_arg = false;
5745 }
5746
5747 /* If the shift amount is computed by a pattern stmt we cannot
5748 use the scalar amount directly thus give up and use a vector
5749 shift. */
5750 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5751 scalar_shift_arg = false;
5752 }
5753 else
5754 {
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5757 "operand mode requires invariant argument.\n");
5758 return false;
5759 }
5760
5761 /* Vector shifted by vector. */
5762 bool was_scalar_shift_arg = scalar_shift_arg;
5763 if (!scalar_shift_arg)
5764 {
5765 optab = optab_for_tree_code (code, vectype, optab_vector);
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_NOTE, vect_location,
5768 "vector/vector shift/rotate found.\n");
5769
5770 if (!op1_vectype)
5771 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5772 slp_op1);
5773 incompatible_op1_vectype_p
5774 = (op1_vectype == NULL_TREE
5775 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5776 TYPE_VECTOR_SUBPARTS (vectype))
5777 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5778 if (incompatible_op1_vectype_p
5779 && (!slp_node
5780 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5781 || slp_op1->refcnt != 1))
5782 {
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5785 "unusable type for last operand in"
5786 " vector/vector shift/rotate.\n");
5787 return false;
5788 }
5789 }
5790 /* See if the machine has a vector shifted by scalar insn and if not
5791 then see if it has a vector shifted by vector insn. */
5792 else
5793 {
5794 optab = optab_for_tree_code (code, vectype, optab_scalar);
5795 if (optab
5796 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5797 {
5798 if (dump_enabled_p ())
5799 dump_printf_loc (MSG_NOTE, vect_location,
5800 "vector/scalar shift/rotate found.\n");
5801 }
5802 else
5803 {
5804 optab = optab_for_tree_code (code, vectype, optab_vector);
5805 if (optab
5806 && (optab_handler (optab, TYPE_MODE (vectype))
5807 != CODE_FOR_nothing))
5808 {
5809 scalar_shift_arg = false;
5810
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE, vect_location,
5813 "vector/vector shift/rotate found.\n");
5814
5815 if (!op1_vectype)
5816 op1_vectype = get_vectype_for_scalar_type (vinfo,
5817 TREE_TYPE (op1),
5818 slp_op1);
5819
5820 /* Unlike the other binary operators, shifts/rotates have
5821 the rhs being int, instead of the same type as the lhs,
5822 so make sure the scalar is the right type if we are
5823 dealing with vectors of long long/long/short/char. */
5824 incompatible_op1_vectype_p
5825 = (!op1_vectype
5826 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5827 TREE_TYPE (op1)));
5828 if (incompatible_op1_vectype_p
5829 && dt[1] == vect_internal_def)
5830 {
5831 if (dump_enabled_p ())
5832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5833 "unusable type for last operand in"
5834 " vector/vector shift/rotate.\n");
5835 return false;
5836 }
5837 }
5838 }
5839 }
5840
5841 /* Supportable by target? */
5842 if (!optab)
5843 {
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "no optab.\n");
5847 return false;
5848 }
5849 vec_mode = TYPE_MODE (vectype);
5850 icode = (int) optab_handler (optab, vec_mode);
5851 if (icode == CODE_FOR_nothing)
5852 {
5853 if (dump_enabled_p ())
5854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5855 "op not supported by target.\n");
5856 return false;
5857 }
5858 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5859 if (vect_emulated_vector_p (vectype))
5860 return false;
5861
5862 if (!vec_stmt) /* transformation not required. */
5863 {
5864 if (slp_node
5865 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5866 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5867 && (!incompatible_op1_vectype_p
5868 || dt[1] == vect_constant_def)
5869 && !vect_maybe_update_slp_op_vectype
5870 (slp_op1,
5871 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5872 {
5873 if (dump_enabled_p ())
5874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5875 "incompatible vector types for invariants\n");
5876 return false;
5877 }
5878 /* Now adjust the constant shift amount in place. */
5879 if (slp_node
5880 && incompatible_op1_vectype_p
5881 && dt[1] == vect_constant_def)
5882 {
5883 for (unsigned i = 0;
5884 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5885 {
5886 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5887 = fold_convert (TREE_TYPE (vectype),
5888 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5889 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5890 == INTEGER_CST));
5891 }
5892 }
5893 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5894 DUMP_VECT_SCOPE ("vectorizable_shift");
5895 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5896 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5897 return true;
5898 }
5899
5900 /* Transform. */
5901
5902 if (dump_enabled_p ())
5903 dump_printf_loc (MSG_NOTE, vect_location,
5904 "transform binary/unary operation.\n");
5905
5906 if (incompatible_op1_vectype_p && !slp_node)
5907 {
5908 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5909 op1 = fold_convert (TREE_TYPE (vectype), op1);
5910 if (dt[1] != vect_constant_def)
5911 op1 = vect_init_vector (vinfo, stmt_info, op1,
5912 TREE_TYPE (vectype), NULL);
5913 }
5914
5915 /* Handle def. */
5916 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5917
5918 if (scalar_shift_arg && dt[1] != vect_internal_def)
5919 {
5920 /* Vector shl and shr insn patterns can be defined with scalar
5921 operand 2 (shift operand). In this case, use constant or loop
5922 invariant op1 directly, without extending it to vector mode
5923 first. */
5924 optab_op2_mode = insn_data[icode].operand[2].mode;
5925 if (!VECTOR_MODE_P (optab_op2_mode))
5926 {
5927 if (dump_enabled_p ())
5928 dump_printf_loc (MSG_NOTE, vect_location,
5929 "operand 1 using scalar mode.\n");
5930 vec_oprnd1 = op1;
5931 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5932 vec_oprnds1.quick_push (vec_oprnd1);
5933 /* Store vec_oprnd1 for every vector stmt to be created.
5934 We check during the analysis that all the shift arguments
5935 are the same.
5936 TODO: Allow different constants for different vector
5937 stmts generated for an SLP instance. */
5938 for (k = 0;
5939 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5940 vec_oprnds1.quick_push (vec_oprnd1);
5941 }
5942 }
5943 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5944 {
5945 if (was_scalar_shift_arg)
5946 {
5947 /* If the argument was the same in all lanes create
5948 the correctly typed vector shift amount directly. */
5949 op1 = fold_convert (TREE_TYPE (vectype), op1);
5950 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5951 !loop_vinfo ? gsi : NULL);
5952 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5953 !loop_vinfo ? gsi : NULL);
5954 vec_oprnds1.create (slp_node->vec_stmts_size);
5955 for (k = 0; k < slp_node->vec_stmts_size; k++)
5956 vec_oprnds1.quick_push (vec_oprnd1);
5957 }
5958 else if (dt[1] == vect_constant_def)
5959 /* The constant shift amount has been adjusted in place. */
5960 ;
5961 else
5962 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5963 }
5964
5965 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5966 (a special case for certain kind of vector shifts); otherwise,
5967 operand 1 should be of a vector type (the usual case). */
5968 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5969 op0, &vec_oprnds0,
5970 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5971
5972 /* Arguments are ready. Create the new vector stmt. */
5973 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5974 {
5975 /* For internal defs where we need to use a scalar shift arg
5976 extract the first lane. */
5977 if (scalar_shift_arg && dt[1] == vect_internal_def)
5978 {
5979 vop1 = vec_oprnds1[0];
5980 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5981 gassign *new_stmt
5982 = gimple_build_assign (new_temp,
5983 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5984 vop1,
5985 TYPE_SIZE (TREE_TYPE (new_temp)),
5986 bitsize_zero_node));
5987 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5988 vop1 = new_temp;
5989 }
5990 else
5991 vop1 = vec_oprnds1[i];
5992 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5993 new_temp = make_ssa_name (vec_dest, new_stmt);
5994 gimple_assign_set_lhs (new_stmt, new_temp);
5995 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5996 if (slp_node)
5997 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5998 else
5999 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6000 }
6001
6002 if (!slp_node)
6003 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6004
6005 vec_oprnds0.release ();
6006 vec_oprnds1.release ();
6007
6008 return true;
6009 }
6010
6011
6012 /* Function vectorizable_operation.
6013
6014 Check if STMT_INFO performs a binary, unary or ternary operation that can
6015 be vectorized.
6016 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6017 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6018 Return true if STMT_INFO is vectorizable in this way. */
6019
6020 static bool
vectorizable_operation(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)6021 vectorizable_operation (vec_info *vinfo,
6022 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6023 gimple **vec_stmt, slp_tree slp_node,
6024 stmt_vector_for_cost *cost_vec)
6025 {
6026 tree vec_dest;
6027 tree scalar_dest;
6028 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6029 tree vectype;
6030 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6031 enum tree_code code, orig_code;
6032 machine_mode vec_mode;
6033 tree new_temp;
6034 int op_type;
6035 optab optab;
6036 bool target_support_p;
6037 enum vect_def_type dt[3]
6038 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6039 int ndts = 3;
6040 poly_uint64 nunits_in;
6041 poly_uint64 nunits_out;
6042 tree vectype_out;
6043 int ncopies, vec_num;
6044 int i;
6045 vec<tree> vec_oprnds0 = vNULL;
6046 vec<tree> vec_oprnds1 = vNULL;
6047 vec<tree> vec_oprnds2 = vNULL;
6048 tree vop0, vop1, vop2;
6049 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6050
6051 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6052 return false;
6053
6054 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6055 && ! vec_stmt)
6056 return false;
6057
6058 /* Is STMT a vectorizable binary/unary operation? */
6059 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6060 if (!stmt)
6061 return false;
6062
6063 /* Loads and stores are handled in vectorizable_{load,store}. */
6064 if (STMT_VINFO_DATA_REF (stmt_info))
6065 return false;
6066
6067 orig_code = code = gimple_assign_rhs_code (stmt);
6068
6069 /* Shifts are handled in vectorizable_shift. */
6070 if (code == LSHIFT_EXPR
6071 || code == RSHIFT_EXPR
6072 || code == LROTATE_EXPR
6073 || code == RROTATE_EXPR)
6074 return false;
6075
6076 /* Comparisons are handled in vectorizable_comparison. */
6077 if (TREE_CODE_CLASS (code) == tcc_comparison)
6078 return false;
6079
6080 /* Conditions are handled in vectorizable_condition. */
6081 if (code == COND_EXPR)
6082 return false;
6083
6084 /* For pointer addition and subtraction, we should use the normal
6085 plus and minus for the vector operation. */
6086 if (code == POINTER_PLUS_EXPR)
6087 code = PLUS_EXPR;
6088 if (code == POINTER_DIFF_EXPR)
6089 code = MINUS_EXPR;
6090
6091 /* Support only unary or binary operations. */
6092 op_type = TREE_CODE_LENGTH (code);
6093 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6094 {
6095 if (dump_enabled_p ())
6096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6097 "num. args = %d (not unary/binary/ternary op).\n",
6098 op_type);
6099 return false;
6100 }
6101
6102 scalar_dest = gimple_assign_lhs (stmt);
6103 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6104
6105 /* Most operations cannot handle bit-precision types without extra
6106 truncations. */
6107 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6108 if (!mask_op_p
6109 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6110 /* Exception are bitwise binary operations. */
6111 && code != BIT_IOR_EXPR
6112 && code != BIT_XOR_EXPR
6113 && code != BIT_AND_EXPR)
6114 {
6115 if (dump_enabled_p ())
6116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6117 "bit-precision arithmetic not supported.\n");
6118 return false;
6119 }
6120
6121 slp_tree slp_op0;
6122 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6123 0, &op0, &slp_op0, &dt[0], &vectype))
6124 {
6125 if (dump_enabled_p ())
6126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6127 "use not simple.\n");
6128 return false;
6129 }
6130 bool is_invariant = (dt[0] == vect_external_def
6131 || dt[0] == vect_constant_def);
6132 /* If op0 is an external or constant def, infer the vector type
6133 from the scalar type. */
6134 if (!vectype)
6135 {
6136 /* For boolean type we cannot determine vectype by
6137 invariant value (don't know whether it is a vector
6138 of booleans or vector of integers). We use output
6139 vectype because operations on boolean don't change
6140 type. */
6141 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6142 {
6143 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6144 {
6145 if (dump_enabled_p ())
6146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6147 "not supported operation on bool value.\n");
6148 return false;
6149 }
6150 vectype = vectype_out;
6151 }
6152 else
6153 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6154 slp_node);
6155 }
6156 if (vec_stmt)
6157 gcc_assert (vectype);
6158 if (!vectype)
6159 {
6160 if (dump_enabled_p ())
6161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6162 "no vectype for scalar type %T\n",
6163 TREE_TYPE (op0));
6164
6165 return false;
6166 }
6167
6168 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6169 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6170 if (maybe_ne (nunits_out, nunits_in))
6171 return false;
6172
6173 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6174 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6175 if (op_type == binary_op || op_type == ternary_op)
6176 {
6177 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6178 1, &op1, &slp_op1, &dt[1], &vectype2))
6179 {
6180 if (dump_enabled_p ())
6181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6182 "use not simple.\n");
6183 return false;
6184 }
6185 is_invariant &= (dt[1] == vect_external_def
6186 || dt[1] == vect_constant_def);
6187 if (vectype2
6188 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6189 return false;
6190 }
6191 if (op_type == ternary_op)
6192 {
6193 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6194 2, &op2, &slp_op2, &dt[2], &vectype3))
6195 {
6196 if (dump_enabled_p ())
6197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6198 "use not simple.\n");
6199 return false;
6200 }
6201 is_invariant &= (dt[2] == vect_external_def
6202 || dt[2] == vect_constant_def);
6203 if (vectype3
6204 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6205 return false;
6206 }
6207
6208 /* Multiple types in SLP are handled by creating the appropriate number of
6209 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6210 case of SLP. */
6211 if (slp_node)
6212 {
6213 ncopies = 1;
6214 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6215 }
6216 else
6217 {
6218 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6219 vec_num = 1;
6220 }
6221
6222 gcc_assert (ncopies >= 1);
6223
6224 /* Reject attempts to combine mask types with nonmask types, e.g. if
6225 we have an AND between a (nonmask) boolean loaded from memory and
6226 a (mask) boolean result of a comparison.
6227
6228 TODO: We could easily fix these cases up using pattern statements. */
6229 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6230 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6231 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6232 {
6233 if (dump_enabled_p ())
6234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6235 "mixed mask and nonmask vector types\n");
6236 return false;
6237 }
6238
6239 /* Supportable by target? */
6240
6241 vec_mode = TYPE_MODE (vectype);
6242 if (code == MULT_HIGHPART_EXPR)
6243 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6244 else
6245 {
6246 optab = optab_for_tree_code (code, vectype, optab_default);
6247 if (!optab)
6248 {
6249 if (dump_enabled_p ())
6250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6251 "no optab.\n");
6252 return false;
6253 }
6254 target_support_p = (optab_handler (optab, vec_mode)
6255 != CODE_FOR_nothing);
6256 }
6257
6258 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6259 if (!target_support_p)
6260 {
6261 if (dump_enabled_p ())
6262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6263 "op not supported by target.\n");
6264 /* Check only during analysis. */
6265 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6266 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6267 return false;
6268 if (dump_enabled_p ())
6269 dump_printf_loc (MSG_NOTE, vect_location,
6270 "proceeding using word mode.\n");
6271 using_emulated_vectors_p = true;
6272 }
6273
6274 if (using_emulated_vectors_p
6275 && !vect_can_vectorize_without_simd_p (code))
6276 {
6277 if (dump_enabled_p ())
6278 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6279 return false;
6280 }
6281
6282 /* ??? We should instead expand the operations here, instead of
6283 relying on vector lowering which has this hard cap on the number
6284 of vector elements below it performs elementwise operations. */
6285 if (using_emulated_vectors_p
6286 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6287 && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
6288 || maybe_lt (nunits_out, 4U)))
6289 {
6290 if (dump_enabled_p ())
6291 dump_printf (MSG_NOTE, "not using word mode for +- and less than "
6292 "four vector elements\n");
6293 return false;
6294 }
6295
6296 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6297 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6298 internal_fn cond_fn = get_conditional_internal_fn (code);
6299
6300 /* If operating on inactive elements could generate spurious traps,
6301 we need to restrict the operation to active lanes. Note that this
6302 specifically doesn't apply to unhoisted invariants, since they
6303 operate on the same value for every lane.
6304
6305 Similarly, if this operation is part of a reduction, a fully-masked
6306 loop should only change the active lanes of the reduction chain,
6307 keeping the inactive lanes as-is. */
6308 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6309 || reduc_idx >= 0);
6310
6311 if (!vec_stmt) /* transformation not required. */
6312 {
6313 if (loop_vinfo
6314 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6315 && mask_out_inactive)
6316 {
6317 if (cond_fn == IFN_LAST
6318 || !direct_internal_fn_supported_p (cond_fn, vectype,
6319 OPTIMIZE_FOR_SPEED))
6320 {
6321 if (dump_enabled_p ())
6322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6323 "can't use a fully-masked loop because no"
6324 " conditional operation is available.\n");
6325 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6326 }
6327 else
6328 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6329 vectype, NULL);
6330 }
6331
6332 /* Put types on constant and invariant SLP children. */
6333 if (slp_node
6334 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6335 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6336 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6337 {
6338 if (dump_enabled_p ())
6339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6340 "incompatible vector types for invariants\n");
6341 return false;
6342 }
6343
6344 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6345 DUMP_VECT_SCOPE ("vectorizable_operation");
6346 vect_model_simple_cost (vinfo, stmt_info,
6347 ncopies, dt, ndts, slp_node, cost_vec);
6348 if (using_emulated_vectors_p)
6349 {
6350 /* The above vect_model_simple_cost call handles constants
6351 in the prologue and (mis-)costs one of the stmts as
6352 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6353 for the actual lowering that will be applied. */
6354 unsigned n
6355 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6356 switch (code)
6357 {
6358 case PLUS_EXPR:
6359 n *= 5;
6360 break;
6361 case MINUS_EXPR:
6362 n *= 6;
6363 break;
6364 case NEGATE_EXPR:
6365 n *= 4;
6366 break;
6367 default:;
6368 }
6369 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6370 }
6371 return true;
6372 }
6373
6374 /* Transform. */
6375
6376 if (dump_enabled_p ())
6377 dump_printf_loc (MSG_NOTE, vect_location,
6378 "transform binary/unary operation.\n");
6379
6380 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6381
6382 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6383 vectors with unsigned elements, but the result is signed. So, we
6384 need to compute the MINUS_EXPR into vectype temporary and
6385 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6386 tree vec_cvt_dest = NULL_TREE;
6387 if (orig_code == POINTER_DIFF_EXPR)
6388 {
6389 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6390 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6391 }
6392 /* Handle def. */
6393 else
6394 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6395
6396 /* In case the vectorization factor (VF) is bigger than the number
6397 of elements that we can fit in a vectype (nunits), we have to generate
6398 more than one vector stmt - i.e - we need to "unroll" the
6399 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6400 from one copy of the vector stmt to the next, in the field
6401 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6402 stages to find the correct vector defs to be used when vectorizing
6403 stmts that use the defs of the current stmt. The example below
6404 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6405 we need to create 4 vectorized stmts):
6406
6407 before vectorization:
6408 RELATED_STMT VEC_STMT
6409 S1: x = memref - -
6410 S2: z = x + 1 - -
6411
6412 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6413 there):
6414 RELATED_STMT VEC_STMT
6415 VS1_0: vx0 = memref0 VS1_1 -
6416 VS1_1: vx1 = memref1 VS1_2 -
6417 VS1_2: vx2 = memref2 VS1_3 -
6418 VS1_3: vx3 = memref3 - -
6419 S1: x = load - VS1_0
6420 S2: z = x + 1 - -
6421
6422 step2: vectorize stmt S2 (done here):
6423 To vectorize stmt S2 we first need to find the relevant vector
6424 def for the first operand 'x'. This is, as usual, obtained from
6425 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6426 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6427 relevant vector def 'vx0'. Having found 'vx0' we can generate
6428 the vector stmt VS2_0, and as usual, record it in the
6429 STMT_VINFO_VEC_STMT of stmt S2.
6430 When creating the second copy (VS2_1), we obtain the relevant vector
6431 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6432 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6433 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6434 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6435 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6436 chain of stmts and pointers:
6437 RELATED_STMT VEC_STMT
6438 VS1_0: vx0 = memref0 VS1_1 -
6439 VS1_1: vx1 = memref1 VS1_2 -
6440 VS1_2: vx2 = memref2 VS1_3 -
6441 VS1_3: vx3 = memref3 - -
6442 S1: x = load - VS1_0
6443 VS2_0: vz0 = vx0 + v1 VS2_1 -
6444 VS2_1: vz1 = vx1 + v1 VS2_2 -
6445 VS2_2: vz2 = vx2 + v1 VS2_3 -
6446 VS2_3: vz3 = vx3 + v1 - -
6447 S2: z = x + 1 - VS2_0 */
6448
6449 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6450 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6451 /* Arguments are ready. Create the new vector stmt. */
6452 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6453 {
6454 gimple *new_stmt = NULL;
6455 vop1 = ((op_type == binary_op || op_type == ternary_op)
6456 ? vec_oprnds1[i] : NULL_TREE);
6457 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6458 if (masked_loop_p && mask_out_inactive)
6459 {
6460 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6461 vectype, i);
6462 auto_vec<tree> vops (5);
6463 vops.quick_push (mask);
6464 vops.quick_push (vop0);
6465 if (vop1)
6466 vops.quick_push (vop1);
6467 if (vop2)
6468 vops.quick_push (vop2);
6469 if (reduc_idx >= 0)
6470 {
6471 /* Perform the operation on active elements only and take
6472 inactive elements from the reduction chain input. */
6473 gcc_assert (!vop2);
6474 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6475 }
6476 else
6477 {
6478 auto else_value = targetm.preferred_else_value
6479 (cond_fn, vectype, vops.length () - 1, &vops[1]);
6480 vops.quick_push (else_value);
6481 }
6482 gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
6483 new_temp = make_ssa_name (vec_dest, call);
6484 gimple_call_set_lhs (call, new_temp);
6485 gimple_call_set_nothrow (call, true);
6486 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6487 new_stmt = call;
6488 }
6489 else
6490 {
6491 tree mask = NULL_TREE;
6492 /* When combining two masks check if either of them is elsewhere
6493 combined with a loop mask, if that's the case we can mark that the
6494 new combined mask doesn't need to be combined with a loop mask. */
6495 if (masked_loop_p
6496 && code == BIT_AND_EXPR
6497 && VECTOR_BOOLEAN_TYPE_P (vectype))
6498 {
6499 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6500 ncopies}))
6501 {
6502 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6503 vectype, i);
6504
6505 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6506 vop0, gsi);
6507 }
6508
6509 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6510 ncopies }))
6511 {
6512 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6513 vectype, i);
6514
6515 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6516 vop1, gsi);
6517 }
6518 }
6519
6520 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6521 new_temp = make_ssa_name (vec_dest, new_stmt);
6522 gimple_assign_set_lhs (new_stmt, new_temp);
6523 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6524 if (using_emulated_vectors_p)
6525 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6526
6527 /* Enter the combined value into the vector cond hash so we don't
6528 AND it with a loop mask again. */
6529 if (mask)
6530 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6531
6532 if (vec_cvt_dest)
6533 {
6534 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6535 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6536 new_temp);
6537 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6538 gimple_assign_set_lhs (new_stmt, new_temp);
6539 vect_finish_stmt_generation (vinfo, stmt_info,
6540 new_stmt, gsi);
6541 }
6542 }
6543 if (slp_node)
6544 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6545 else
6546 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6547 }
6548
6549 if (!slp_node)
6550 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6551
6552 vec_oprnds0.release ();
6553 vec_oprnds1.release ();
6554 vec_oprnds2.release ();
6555
6556 return true;
6557 }
6558
6559 /* A helper function to ensure data reference DR_INFO's base alignment. */
6560
6561 static void
ensure_base_align(dr_vec_info * dr_info)6562 ensure_base_align (dr_vec_info *dr_info)
6563 {
6564 /* Alignment is only analyzed for the first element of a DR group,
6565 use that to look at base alignment we need to enforce. */
6566 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6567 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6568
6569 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6570
6571 if (dr_info->base_misaligned)
6572 {
6573 tree base_decl = dr_info->base_decl;
6574
6575 // We should only be able to increase the alignment of a base object if
6576 // we know what its new alignment should be at compile time.
6577 unsigned HOST_WIDE_INT align_base_to =
6578 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6579
6580 if (decl_in_symtab_p (base_decl))
6581 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6582 else if (DECL_ALIGN (base_decl) < align_base_to)
6583 {
6584 SET_DECL_ALIGN (base_decl, align_base_to);
6585 DECL_USER_ALIGN (base_decl) = 1;
6586 }
6587 dr_info->base_misaligned = false;
6588 }
6589 }
6590
6591
6592 /* Function get_group_alias_ptr_type.
6593
6594 Return the alias type for the group starting at FIRST_STMT_INFO. */
6595
6596 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6597 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6598 {
6599 struct data_reference *first_dr, *next_dr;
6600
6601 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6602 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6603 while (next_stmt_info)
6604 {
6605 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6606 if (get_alias_set (DR_REF (first_dr))
6607 != get_alias_set (DR_REF (next_dr)))
6608 {
6609 if (dump_enabled_p ())
6610 dump_printf_loc (MSG_NOTE, vect_location,
6611 "conflicting alias set types.\n");
6612 return ptr_type_node;
6613 }
6614 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6615 }
6616 return reference_alias_ptr_type (DR_REF (first_dr));
6617 }
6618
6619
6620 /* Function scan_operand_equal_p.
6621
6622 Helper function for check_scan_store. Compare two references
6623 with .GOMP_SIMD_LANE bases. */
6624
6625 static bool
scan_operand_equal_p(tree ref1,tree ref2)6626 scan_operand_equal_p (tree ref1, tree ref2)
6627 {
6628 tree ref[2] = { ref1, ref2 };
6629 poly_int64 bitsize[2], bitpos[2];
6630 tree offset[2], base[2];
6631 for (int i = 0; i < 2; ++i)
6632 {
6633 machine_mode mode;
6634 int unsignedp, reversep, volatilep = 0;
6635 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6636 &offset[i], &mode, &unsignedp,
6637 &reversep, &volatilep);
6638 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6639 return false;
6640 if (TREE_CODE (base[i]) == MEM_REF
6641 && offset[i] == NULL_TREE
6642 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6643 {
6644 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6645 if (is_gimple_assign (def_stmt)
6646 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6647 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6648 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6649 {
6650 if (maybe_ne (mem_ref_offset (base[i]), 0))
6651 return false;
6652 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6653 offset[i] = gimple_assign_rhs2 (def_stmt);
6654 }
6655 }
6656 }
6657
6658 if (!operand_equal_p (base[0], base[1], 0))
6659 return false;
6660 if (maybe_ne (bitsize[0], bitsize[1]))
6661 return false;
6662 if (offset[0] != offset[1])
6663 {
6664 if (!offset[0] || !offset[1])
6665 return false;
6666 if (!operand_equal_p (offset[0], offset[1], 0))
6667 {
6668 tree step[2];
6669 for (int i = 0; i < 2; ++i)
6670 {
6671 step[i] = integer_one_node;
6672 if (TREE_CODE (offset[i]) == SSA_NAME)
6673 {
6674 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6675 if (is_gimple_assign (def_stmt)
6676 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6677 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6678 == INTEGER_CST))
6679 {
6680 step[i] = gimple_assign_rhs2 (def_stmt);
6681 offset[i] = gimple_assign_rhs1 (def_stmt);
6682 }
6683 }
6684 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6685 {
6686 step[i] = TREE_OPERAND (offset[i], 1);
6687 offset[i] = TREE_OPERAND (offset[i], 0);
6688 }
6689 tree rhs1 = NULL_TREE;
6690 if (TREE_CODE (offset[i]) == SSA_NAME)
6691 {
6692 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6693 if (gimple_assign_cast_p (def_stmt))
6694 rhs1 = gimple_assign_rhs1 (def_stmt);
6695 }
6696 else if (CONVERT_EXPR_P (offset[i]))
6697 rhs1 = TREE_OPERAND (offset[i], 0);
6698 if (rhs1
6699 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6700 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6701 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6702 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6703 offset[i] = rhs1;
6704 }
6705 if (!operand_equal_p (offset[0], offset[1], 0)
6706 || !operand_equal_p (step[0], step[1], 0))
6707 return false;
6708 }
6709 }
6710 return true;
6711 }
6712
6713
6714 enum scan_store_kind {
6715 /* Normal permutation. */
6716 scan_store_kind_perm,
6717
6718 /* Whole vector left shift permutation with zero init. */
6719 scan_store_kind_lshift_zero,
6720
6721 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6722 scan_store_kind_lshift_cond
6723 };
6724
6725 /* Function check_scan_store.
6726
6727 Verify if we can perform the needed permutations or whole vector shifts.
6728 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6729 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6730 to do at each step. */
6731
6732 static int
scan_store_can_perm_p(tree vectype,tree init,vec<enum scan_store_kind> * use_whole_vector=NULL)6733 scan_store_can_perm_p (tree vectype, tree init,
6734 vec<enum scan_store_kind> *use_whole_vector = NULL)
6735 {
6736 enum machine_mode vec_mode = TYPE_MODE (vectype);
6737 unsigned HOST_WIDE_INT nunits;
6738 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6739 return -1;
6740 int units_log2 = exact_log2 (nunits);
6741 if (units_log2 <= 0)
6742 return -1;
6743
6744 int i;
6745 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6746 for (i = 0; i <= units_log2; ++i)
6747 {
6748 unsigned HOST_WIDE_INT j, k;
6749 enum scan_store_kind kind = scan_store_kind_perm;
6750 vec_perm_builder sel (nunits, nunits, 1);
6751 sel.quick_grow (nunits);
6752 if (i == units_log2)
6753 {
6754 for (j = 0; j < nunits; ++j)
6755 sel[j] = nunits - 1;
6756 }
6757 else
6758 {
6759 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6760 sel[j] = j;
6761 for (k = 0; j < nunits; ++j, ++k)
6762 sel[j] = nunits + k;
6763 }
6764 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6765 if (!can_vec_perm_const_p (vec_mode, indices))
6766 {
6767 if (i == units_log2)
6768 return -1;
6769
6770 if (whole_vector_shift_kind == scan_store_kind_perm)
6771 {
6772 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6773 return -1;
6774 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6775 /* Whole vector shifts shift in zeros, so if init is all zero
6776 constant, there is no need to do anything further. */
6777 if ((TREE_CODE (init) != INTEGER_CST
6778 && TREE_CODE (init) != REAL_CST)
6779 || !initializer_zerop (init))
6780 {
6781 tree masktype = truth_type_for (vectype);
6782 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6783 return -1;
6784 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6785 }
6786 }
6787 kind = whole_vector_shift_kind;
6788 }
6789 if (use_whole_vector)
6790 {
6791 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6792 use_whole_vector->safe_grow_cleared (i, true);
6793 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6794 use_whole_vector->safe_push (kind);
6795 }
6796 }
6797
6798 return units_log2;
6799 }
6800
6801
6802 /* Function check_scan_store.
6803
6804 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6805
6806 static bool
check_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6807 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6808 enum vect_def_type rhs_dt, bool slp, tree mask,
6809 vect_memory_access_type memory_access_type)
6810 {
6811 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6812 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6813 tree ref_type;
6814
6815 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6816 if (slp
6817 || mask
6818 || memory_access_type != VMAT_CONTIGUOUS
6819 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6820 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6821 || loop_vinfo == NULL
6822 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6823 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6824 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6825 || !integer_zerop (DR_INIT (dr_info->dr))
6826 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6827 || !alias_sets_conflict_p (get_alias_set (vectype),
6828 get_alias_set (TREE_TYPE (ref_type))))
6829 {
6830 if (dump_enabled_p ())
6831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6832 "unsupported OpenMP scan store.\n");
6833 return false;
6834 }
6835
6836 /* We need to pattern match code built by OpenMP lowering and simplified
6837 by following optimizations into something we can handle.
6838 #pragma omp simd reduction(inscan,+:r)
6839 for (...)
6840 {
6841 r += something ();
6842 #pragma omp scan inclusive (r)
6843 use (r);
6844 }
6845 shall have body with:
6846 // Initialization for input phase, store the reduction initializer:
6847 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6848 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6849 D.2042[_21] = 0;
6850 // Actual input phase:
6851 ...
6852 r.0_5 = D.2042[_20];
6853 _6 = _4 + r.0_5;
6854 D.2042[_20] = _6;
6855 // Initialization for scan phase:
6856 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6857 _26 = D.2043[_25];
6858 _27 = D.2042[_25];
6859 _28 = _26 + _27;
6860 D.2043[_25] = _28;
6861 D.2042[_25] = _28;
6862 // Actual scan phase:
6863 ...
6864 r.1_8 = D.2042[_20];
6865 ...
6866 The "omp simd array" variable D.2042 holds the privatized copy used
6867 inside of the loop and D.2043 is another one that holds copies of
6868 the current original list item. The separate GOMP_SIMD_LANE ifn
6869 kinds are there in order to allow optimizing the initializer store
6870 and combiner sequence, e.g. if it is originally some C++ish user
6871 defined reduction, but allow the vectorizer to pattern recognize it
6872 and turn into the appropriate vectorized scan.
6873
6874 For exclusive scan, this is slightly different:
6875 #pragma omp simd reduction(inscan,+:r)
6876 for (...)
6877 {
6878 use (r);
6879 #pragma omp scan exclusive (r)
6880 r += something ();
6881 }
6882 shall have body with:
6883 // Initialization for input phase, store the reduction initializer:
6884 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6885 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6886 D.2042[_21] = 0;
6887 // Actual input phase:
6888 ...
6889 r.0_5 = D.2042[_20];
6890 _6 = _4 + r.0_5;
6891 D.2042[_20] = _6;
6892 // Initialization for scan phase:
6893 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6894 _26 = D.2043[_25];
6895 D.2044[_25] = _26;
6896 _27 = D.2042[_25];
6897 _28 = _26 + _27;
6898 D.2043[_25] = _28;
6899 // Actual scan phase:
6900 ...
6901 r.1_8 = D.2044[_20];
6902 ... */
6903
6904 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6905 {
6906 /* Match the D.2042[_21] = 0; store above. Just require that
6907 it is a constant or external definition store. */
6908 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6909 {
6910 fail_init:
6911 if (dump_enabled_p ())
6912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6913 "unsupported OpenMP scan initializer store.\n");
6914 return false;
6915 }
6916
6917 if (! loop_vinfo->scan_map)
6918 loop_vinfo->scan_map = new hash_map<tree, tree>;
6919 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6920 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6921 if (cached)
6922 goto fail_init;
6923 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6924
6925 /* These stores can be vectorized normally. */
6926 return true;
6927 }
6928
6929 if (rhs_dt != vect_internal_def)
6930 {
6931 fail:
6932 if (dump_enabled_p ())
6933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6934 "unsupported OpenMP scan combiner pattern.\n");
6935 return false;
6936 }
6937
6938 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6939 tree rhs = gimple_assign_rhs1 (stmt);
6940 if (TREE_CODE (rhs) != SSA_NAME)
6941 goto fail;
6942
6943 gimple *other_store_stmt = NULL;
6944 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6945 bool inscan_var_store
6946 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6947
6948 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6949 {
6950 if (!inscan_var_store)
6951 {
6952 use_operand_p use_p;
6953 imm_use_iterator iter;
6954 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6955 {
6956 gimple *use_stmt = USE_STMT (use_p);
6957 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6958 continue;
6959 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6960 || !is_gimple_assign (use_stmt)
6961 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6962 || other_store_stmt
6963 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6964 goto fail;
6965 other_store_stmt = use_stmt;
6966 }
6967 if (other_store_stmt == NULL)
6968 goto fail;
6969 rhs = gimple_assign_lhs (other_store_stmt);
6970 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6971 goto fail;
6972 }
6973 }
6974 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6975 {
6976 use_operand_p use_p;
6977 imm_use_iterator iter;
6978 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6979 {
6980 gimple *use_stmt = USE_STMT (use_p);
6981 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6982 continue;
6983 if (other_store_stmt)
6984 goto fail;
6985 other_store_stmt = use_stmt;
6986 }
6987 }
6988 else
6989 goto fail;
6990
6991 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6992 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6993 || !is_gimple_assign (def_stmt)
6994 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6995 goto fail;
6996
6997 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6998 /* For pointer addition, we should use the normal plus for the vector
6999 operation. */
7000 switch (code)
7001 {
7002 case POINTER_PLUS_EXPR:
7003 code = PLUS_EXPR;
7004 break;
7005 case MULT_HIGHPART_EXPR:
7006 goto fail;
7007 default:
7008 break;
7009 }
7010 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7011 goto fail;
7012
7013 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7014 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7015 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7016 goto fail;
7017
7018 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7019 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7020 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7021 || !gimple_assign_load_p (load1_stmt)
7022 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7023 || !gimple_assign_load_p (load2_stmt))
7024 goto fail;
7025
7026 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7027 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7028 if (load1_stmt_info == NULL
7029 || load2_stmt_info == NULL
7030 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7031 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7032 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7033 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7034 goto fail;
7035
7036 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7037 {
7038 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7039 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7040 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7041 goto fail;
7042 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7043 tree lrhs;
7044 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7045 lrhs = rhs1;
7046 else
7047 lrhs = rhs2;
7048 use_operand_p use_p;
7049 imm_use_iterator iter;
7050 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7051 {
7052 gimple *use_stmt = USE_STMT (use_p);
7053 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7054 continue;
7055 if (other_store_stmt)
7056 goto fail;
7057 other_store_stmt = use_stmt;
7058 }
7059 }
7060
7061 if (other_store_stmt == NULL)
7062 goto fail;
7063 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7064 || !gimple_store_p (other_store_stmt))
7065 goto fail;
7066
7067 stmt_vec_info other_store_stmt_info
7068 = loop_vinfo->lookup_stmt (other_store_stmt);
7069 if (other_store_stmt_info == NULL
7070 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7071 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7072 goto fail;
7073
7074 gimple *stmt1 = stmt;
7075 gimple *stmt2 = other_store_stmt;
7076 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7077 std::swap (stmt1, stmt2);
7078 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7079 gimple_assign_rhs1 (load2_stmt)))
7080 {
7081 std::swap (rhs1, rhs2);
7082 std::swap (load1_stmt, load2_stmt);
7083 std::swap (load1_stmt_info, load2_stmt_info);
7084 }
7085 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7086 gimple_assign_rhs1 (load1_stmt)))
7087 goto fail;
7088
7089 tree var3 = NULL_TREE;
7090 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7091 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7092 gimple_assign_rhs1 (load2_stmt)))
7093 goto fail;
7094 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7095 {
7096 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7097 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7098 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7099 goto fail;
7100 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7101 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7102 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7103 || lookup_attribute ("omp simd inscan exclusive",
7104 DECL_ATTRIBUTES (var3)))
7105 goto fail;
7106 }
7107
7108 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7109 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7110 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7111 goto fail;
7112
7113 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7114 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7115 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7116 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7117 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7118 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7119 goto fail;
7120
7121 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7122 std::swap (var1, var2);
7123
7124 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7125 {
7126 if (!lookup_attribute ("omp simd inscan exclusive",
7127 DECL_ATTRIBUTES (var1)))
7128 goto fail;
7129 var1 = var3;
7130 }
7131
7132 if (loop_vinfo->scan_map == NULL)
7133 goto fail;
7134 tree *init = loop_vinfo->scan_map->get (var1);
7135 if (init == NULL)
7136 goto fail;
7137
7138 /* The IL is as expected, now check if we can actually vectorize it.
7139 Inclusive scan:
7140 _26 = D.2043[_25];
7141 _27 = D.2042[_25];
7142 _28 = _26 + _27;
7143 D.2043[_25] = _28;
7144 D.2042[_25] = _28;
7145 should be vectorized as (where _40 is the vectorized rhs
7146 from the D.2042[_21] = 0; store):
7147 _30 = MEM <vector(8) int> [(int *)&D.2043];
7148 _31 = MEM <vector(8) int> [(int *)&D.2042];
7149 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7150 _33 = _31 + _32;
7151 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7152 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7153 _35 = _33 + _34;
7154 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7155 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7156 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7157 _37 = _35 + _36;
7158 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7159 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7160 _38 = _30 + _37;
7161 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7162 MEM <vector(8) int> [(int *)&D.2043] = _39;
7163 MEM <vector(8) int> [(int *)&D.2042] = _38;
7164 Exclusive scan:
7165 _26 = D.2043[_25];
7166 D.2044[_25] = _26;
7167 _27 = D.2042[_25];
7168 _28 = _26 + _27;
7169 D.2043[_25] = _28;
7170 should be vectorized as (where _40 is the vectorized rhs
7171 from the D.2042[_21] = 0; store):
7172 _30 = MEM <vector(8) int> [(int *)&D.2043];
7173 _31 = MEM <vector(8) int> [(int *)&D.2042];
7174 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7175 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7176 _34 = _32 + _33;
7177 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7178 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7179 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7180 _36 = _34 + _35;
7181 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7182 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7183 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7184 _38 = _36 + _37;
7185 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7186 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7187 _39 = _30 + _38;
7188 _50 = _31 + _39;
7189 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7190 MEM <vector(8) int> [(int *)&D.2044] = _39;
7191 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7192 enum machine_mode vec_mode = TYPE_MODE (vectype);
7193 optab optab = optab_for_tree_code (code, vectype, optab_default);
7194 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7195 goto fail;
7196
7197 int units_log2 = scan_store_can_perm_p (vectype, *init);
7198 if (units_log2 == -1)
7199 goto fail;
7200
7201 return true;
7202 }
7203
7204
7205 /* Function vectorizable_scan_store.
7206
7207 Helper of vectorizable_score, arguments like on vectorizable_store.
7208 Handle only the transformation, checking is done in check_scan_store. */
7209
7210 static bool
vectorizable_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,int ncopies)7211 vectorizable_scan_store (vec_info *vinfo,
7212 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7213 gimple **vec_stmt, int ncopies)
7214 {
7215 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7216 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7217 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7218 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7219
7220 if (dump_enabled_p ())
7221 dump_printf_loc (MSG_NOTE, vect_location,
7222 "transform scan store. ncopies = %d\n", ncopies);
7223
7224 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7225 tree rhs = gimple_assign_rhs1 (stmt);
7226 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7227
7228 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7229 bool inscan_var_store
7230 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7231
7232 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7233 {
7234 use_operand_p use_p;
7235 imm_use_iterator iter;
7236 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7237 {
7238 gimple *use_stmt = USE_STMT (use_p);
7239 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7240 continue;
7241 rhs = gimple_assign_lhs (use_stmt);
7242 break;
7243 }
7244 }
7245
7246 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7247 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7248 if (code == POINTER_PLUS_EXPR)
7249 code = PLUS_EXPR;
7250 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7251 && commutative_tree_code (code));
7252 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7253 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7254 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7255 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7256 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7257 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7258 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7259 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7260 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7261 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7262 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7263
7264 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7265 {
7266 std::swap (rhs1, rhs2);
7267 std::swap (var1, var2);
7268 std::swap (load1_dr_info, load2_dr_info);
7269 }
7270
7271 tree *init = loop_vinfo->scan_map->get (var1);
7272 gcc_assert (init);
7273
7274 unsigned HOST_WIDE_INT nunits;
7275 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7276 gcc_unreachable ();
7277 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7278 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7279 gcc_assert (units_log2 > 0);
7280 auto_vec<tree, 16> perms;
7281 perms.quick_grow (units_log2 + 1);
7282 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7283 for (int i = 0; i <= units_log2; ++i)
7284 {
7285 unsigned HOST_WIDE_INT j, k;
7286 vec_perm_builder sel (nunits, nunits, 1);
7287 sel.quick_grow (nunits);
7288 if (i == units_log2)
7289 for (j = 0; j < nunits; ++j)
7290 sel[j] = nunits - 1;
7291 else
7292 {
7293 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7294 sel[j] = j;
7295 for (k = 0; j < nunits; ++j, ++k)
7296 sel[j] = nunits + k;
7297 }
7298 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7299 if (!use_whole_vector.is_empty ()
7300 && use_whole_vector[i] != scan_store_kind_perm)
7301 {
7302 if (zero_vec == NULL_TREE)
7303 zero_vec = build_zero_cst (vectype);
7304 if (masktype == NULL_TREE
7305 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7306 masktype = truth_type_for (vectype);
7307 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7308 }
7309 else
7310 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7311 }
7312
7313 tree vec_oprnd1 = NULL_TREE;
7314 tree vec_oprnd2 = NULL_TREE;
7315 tree vec_oprnd3 = NULL_TREE;
7316 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7317 tree dataref_offset = build_int_cst (ref_type, 0);
7318 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7319 vectype, VMAT_CONTIGUOUS);
7320 tree ldataref_ptr = NULL_TREE;
7321 tree orig = NULL_TREE;
7322 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7323 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7324 auto_vec<tree> vec_oprnds1;
7325 auto_vec<tree> vec_oprnds2;
7326 auto_vec<tree> vec_oprnds3;
7327 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7328 *init, &vec_oprnds1,
7329 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7330 rhs2, &vec_oprnds3);
7331 for (int j = 0; j < ncopies; j++)
7332 {
7333 vec_oprnd1 = vec_oprnds1[j];
7334 if (ldataref_ptr == NULL)
7335 vec_oprnd2 = vec_oprnds2[j];
7336 vec_oprnd3 = vec_oprnds3[j];
7337 if (j == 0)
7338 orig = vec_oprnd3;
7339 else if (!inscan_var_store)
7340 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7341
7342 if (ldataref_ptr)
7343 {
7344 vec_oprnd2 = make_ssa_name (vectype);
7345 tree data_ref = fold_build2 (MEM_REF, vectype,
7346 unshare_expr (ldataref_ptr),
7347 dataref_offset);
7348 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7349 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7350 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7351 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7352 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7353 }
7354
7355 tree v = vec_oprnd2;
7356 for (int i = 0; i < units_log2; ++i)
7357 {
7358 tree new_temp = make_ssa_name (vectype);
7359 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7360 (zero_vec
7361 && (use_whole_vector[i]
7362 != scan_store_kind_perm))
7363 ? zero_vec : vec_oprnd1, v,
7364 perms[i]);
7365 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7366 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7367 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7368
7369 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7370 {
7371 /* Whole vector shift shifted in zero bits, but if *init
7372 is not initializer_zerop, we need to replace those elements
7373 with elements from vec_oprnd1. */
7374 tree_vector_builder vb (masktype, nunits, 1);
7375 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7376 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7377 ? boolean_false_node : boolean_true_node);
7378
7379 tree new_temp2 = make_ssa_name (vectype);
7380 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7381 new_temp, vec_oprnd1);
7382 vect_finish_stmt_generation (vinfo, stmt_info,
7383 g, gsi);
7384 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7385 new_temp = new_temp2;
7386 }
7387
7388 /* For exclusive scan, perform the perms[i] permutation once
7389 more. */
7390 if (i == 0
7391 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7392 && v == vec_oprnd2)
7393 {
7394 v = new_temp;
7395 --i;
7396 continue;
7397 }
7398
7399 tree new_temp2 = make_ssa_name (vectype);
7400 g = gimple_build_assign (new_temp2, code, v, new_temp);
7401 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7402 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7403
7404 v = new_temp2;
7405 }
7406
7407 tree new_temp = make_ssa_name (vectype);
7408 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7409 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7410 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7411
7412 tree last_perm_arg = new_temp;
7413 /* For exclusive scan, new_temp computed above is the exclusive scan
7414 prefix sum. Turn it into inclusive prefix sum for the broadcast
7415 of the last element into orig. */
7416 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7417 {
7418 last_perm_arg = make_ssa_name (vectype);
7419 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7420 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7421 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7422 }
7423
7424 orig = make_ssa_name (vectype);
7425 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7426 last_perm_arg, perms[units_log2]);
7427 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7428 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7429
7430 if (!inscan_var_store)
7431 {
7432 tree data_ref = fold_build2 (MEM_REF, vectype,
7433 unshare_expr (dataref_ptr),
7434 dataref_offset);
7435 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7436 g = gimple_build_assign (data_ref, new_temp);
7437 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7438 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7439 }
7440 }
7441
7442 if (inscan_var_store)
7443 for (int j = 0; j < ncopies; j++)
7444 {
7445 if (j != 0)
7446 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7447
7448 tree data_ref = fold_build2 (MEM_REF, vectype,
7449 unshare_expr (dataref_ptr),
7450 dataref_offset);
7451 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7452 gimple *g = gimple_build_assign (data_ref, orig);
7453 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7454 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7455 }
7456 return true;
7457 }
7458
7459
7460 /* Function vectorizable_store.
7461
7462 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7463 that can be vectorized.
7464 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7465 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7466 Return true if STMT_INFO is vectorizable in this way. */
7467
7468 static bool
vectorizable_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7469 vectorizable_store (vec_info *vinfo,
7470 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7471 gimple **vec_stmt, slp_tree slp_node,
7472 stmt_vector_for_cost *cost_vec)
7473 {
7474 tree data_ref;
7475 tree op;
7476 tree vec_oprnd = NULL_TREE;
7477 tree elem_type;
7478 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7479 class loop *loop = NULL;
7480 machine_mode vec_mode;
7481 tree dummy;
7482 enum vect_def_type rhs_dt = vect_unknown_def_type;
7483 enum vect_def_type mask_dt = vect_unknown_def_type;
7484 tree dataref_ptr = NULL_TREE;
7485 tree dataref_offset = NULL_TREE;
7486 gimple *ptr_incr = NULL;
7487 int ncopies;
7488 int j;
7489 stmt_vec_info first_stmt_info;
7490 bool grouped_store;
7491 unsigned int group_size, i;
7492 vec<tree> oprnds = vNULL;
7493 vec<tree> result_chain = vNULL;
7494 vec<tree> vec_oprnds = vNULL;
7495 bool slp = (slp_node != NULL);
7496 unsigned int vec_num;
7497 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7498 tree aggr_type;
7499 gather_scatter_info gs_info;
7500 poly_uint64 vf;
7501 vec_load_store_type vls_type;
7502 tree ref_type;
7503
7504 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7505 return false;
7506
7507 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7508 && ! vec_stmt)
7509 return false;
7510
7511 /* Is vectorizable store? */
7512
7513 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7514 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7515 {
7516 tree scalar_dest = gimple_assign_lhs (assign);
7517 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7518 && is_pattern_stmt_p (stmt_info))
7519 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7520 if (TREE_CODE (scalar_dest) != ARRAY_REF
7521 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7522 && TREE_CODE (scalar_dest) != INDIRECT_REF
7523 && TREE_CODE (scalar_dest) != COMPONENT_REF
7524 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7525 && TREE_CODE (scalar_dest) != REALPART_EXPR
7526 && TREE_CODE (scalar_dest) != MEM_REF)
7527 return false;
7528 }
7529 else
7530 {
7531 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7532 if (!call || !gimple_call_internal_p (call))
7533 return false;
7534
7535 internal_fn ifn = gimple_call_internal_fn (call);
7536 if (!internal_store_fn_p (ifn))
7537 return false;
7538
7539 if (slp_node != NULL)
7540 {
7541 if (dump_enabled_p ())
7542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7543 "SLP of masked stores not supported.\n");
7544 return false;
7545 }
7546
7547 int mask_index = internal_fn_mask_index (ifn);
7548 if (mask_index >= 0
7549 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7550 &mask, NULL, &mask_dt, &mask_vectype))
7551 return false;
7552 }
7553
7554 op = vect_get_store_rhs (stmt_info);
7555
7556 /* Cannot have hybrid store SLP -- that would mean storing to the
7557 same location twice. */
7558 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7559
7560 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7561 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7562
7563 if (loop_vinfo)
7564 {
7565 loop = LOOP_VINFO_LOOP (loop_vinfo);
7566 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7567 }
7568 else
7569 vf = 1;
7570
7571 /* Multiple types in SLP are handled by creating the appropriate number of
7572 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7573 case of SLP. */
7574 if (slp)
7575 ncopies = 1;
7576 else
7577 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7578
7579 gcc_assert (ncopies >= 1);
7580
7581 /* FORNOW. This restriction should be relaxed. */
7582 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7583 {
7584 if (dump_enabled_p ())
7585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7586 "multiple types in nested loop.\n");
7587 return false;
7588 }
7589
7590 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7591 op, &rhs_dt, &rhs_vectype, &vls_type))
7592 return false;
7593
7594 elem_type = TREE_TYPE (vectype);
7595 vec_mode = TYPE_MODE (vectype);
7596
7597 if (!STMT_VINFO_DATA_REF (stmt_info))
7598 return false;
7599
7600 vect_memory_access_type memory_access_type;
7601 enum dr_alignment_support alignment_support_scheme;
7602 int misalignment;
7603 poly_int64 poffset;
7604 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7605 ncopies, &memory_access_type, &poffset,
7606 &alignment_support_scheme, &misalignment, &gs_info))
7607 return false;
7608
7609 if (mask)
7610 {
7611 if (memory_access_type == VMAT_CONTIGUOUS)
7612 {
7613 if (!VECTOR_MODE_P (vec_mode)
7614 || !can_vec_mask_load_store_p (vec_mode,
7615 TYPE_MODE (mask_vectype), false))
7616 return false;
7617 }
7618 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7619 && (memory_access_type != VMAT_GATHER_SCATTER
7620 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7621 {
7622 if (dump_enabled_p ())
7623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7624 "unsupported access type for masked store.\n");
7625 return false;
7626 }
7627 }
7628 else
7629 {
7630 /* FORNOW. In some cases can vectorize even if data-type not supported
7631 (e.g. - array initialization with 0). */
7632 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7633 return false;
7634 }
7635
7636 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7637 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7638 && memory_access_type != VMAT_GATHER_SCATTER
7639 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7640 if (grouped_store)
7641 {
7642 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7643 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7644 group_size = DR_GROUP_SIZE (first_stmt_info);
7645 }
7646 else
7647 {
7648 first_stmt_info = stmt_info;
7649 first_dr_info = dr_info;
7650 group_size = vec_num = 1;
7651 }
7652
7653 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7654 {
7655 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7656 memory_access_type))
7657 return false;
7658 }
7659
7660 if (!vec_stmt) /* transformation not required. */
7661 {
7662 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7663
7664 if (loop_vinfo
7665 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7666 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7667 vls_type, group_size,
7668 memory_access_type, &gs_info,
7669 mask);
7670
7671 if (slp_node
7672 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7673 vectype))
7674 {
7675 if (dump_enabled_p ())
7676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7677 "incompatible vector types for invariants\n");
7678 return false;
7679 }
7680
7681 if (dump_enabled_p ()
7682 && memory_access_type != VMAT_ELEMENTWISE
7683 && memory_access_type != VMAT_GATHER_SCATTER
7684 && alignment_support_scheme != dr_aligned)
7685 dump_printf_loc (MSG_NOTE, vect_location,
7686 "Vectorizing an unaligned access.\n");
7687
7688 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7689 vect_model_store_cost (vinfo, stmt_info, ncopies,
7690 memory_access_type, alignment_support_scheme,
7691 misalignment, vls_type, slp_node, cost_vec);
7692 return true;
7693 }
7694 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7695
7696 /* Transform. */
7697
7698 ensure_base_align (dr_info);
7699
7700 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7701 {
7702 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7703 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7704 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7705 tree ptr, var, scale, vec_mask;
7706 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7707 tree mask_halfvectype = mask_vectype;
7708 edge pe = loop_preheader_edge (loop);
7709 gimple_seq seq;
7710 basic_block new_bb;
7711 enum { NARROW, NONE, WIDEN } modifier;
7712 poly_uint64 scatter_off_nunits
7713 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7714
7715 if (known_eq (nunits, scatter_off_nunits))
7716 modifier = NONE;
7717 else if (known_eq (nunits * 2, scatter_off_nunits))
7718 {
7719 modifier = WIDEN;
7720
7721 /* Currently gathers and scatters are only supported for
7722 fixed-length vectors. */
7723 unsigned int count = scatter_off_nunits.to_constant ();
7724 vec_perm_builder sel (count, count, 1);
7725 for (i = 0; i < (unsigned int) count; ++i)
7726 sel.quick_push (i | (count / 2));
7727
7728 vec_perm_indices indices (sel, 1, count);
7729 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7730 indices);
7731 gcc_assert (perm_mask != NULL_TREE);
7732 }
7733 else if (known_eq (nunits, scatter_off_nunits * 2))
7734 {
7735 modifier = NARROW;
7736
7737 /* Currently gathers and scatters are only supported for
7738 fixed-length vectors. */
7739 unsigned int count = nunits.to_constant ();
7740 vec_perm_builder sel (count, count, 1);
7741 for (i = 0; i < (unsigned int) count; ++i)
7742 sel.quick_push (i | (count / 2));
7743
7744 vec_perm_indices indices (sel, 2, count);
7745 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7746 gcc_assert (perm_mask != NULL_TREE);
7747 ncopies *= 2;
7748
7749 if (mask)
7750 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7751 }
7752 else
7753 gcc_unreachable ();
7754
7755 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7756 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7757 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7758 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7759 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7760 scaletype = TREE_VALUE (arglist);
7761
7762 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7763 && TREE_CODE (rettype) == VOID_TYPE);
7764
7765 ptr = fold_convert (ptrtype, gs_info.base);
7766 if (!is_gimple_min_invariant (ptr))
7767 {
7768 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7769 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7770 gcc_assert (!new_bb);
7771 }
7772
7773 if (mask == NULL_TREE)
7774 {
7775 mask_arg = build_int_cst (masktype, -1);
7776 mask_arg = vect_init_vector (vinfo, stmt_info,
7777 mask_arg, masktype, NULL);
7778 }
7779
7780 scale = build_int_cst (scaletype, gs_info.scale);
7781
7782 auto_vec<tree> vec_oprnds0;
7783 auto_vec<tree> vec_oprnds1;
7784 auto_vec<tree> vec_masks;
7785 if (mask)
7786 {
7787 tree mask_vectype = truth_type_for (vectype);
7788 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7789 modifier == NARROW
7790 ? ncopies / 2 : ncopies,
7791 mask, &vec_masks, mask_vectype);
7792 }
7793 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7794 modifier == WIDEN
7795 ? ncopies / 2 : ncopies,
7796 gs_info.offset, &vec_oprnds0);
7797 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7798 modifier == NARROW
7799 ? ncopies / 2 : ncopies,
7800 op, &vec_oprnds1);
7801 for (j = 0; j < ncopies; ++j)
7802 {
7803 if (modifier == WIDEN)
7804 {
7805 if (j & 1)
7806 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7807 perm_mask, stmt_info, gsi);
7808 else
7809 op = vec_oprnd0 = vec_oprnds0[j / 2];
7810 src = vec_oprnd1 = vec_oprnds1[j];
7811 if (mask)
7812 mask_op = vec_mask = vec_masks[j];
7813 }
7814 else if (modifier == NARROW)
7815 {
7816 if (j & 1)
7817 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7818 perm_mask, stmt_info, gsi);
7819 else
7820 src = vec_oprnd1 = vec_oprnds1[j / 2];
7821 op = vec_oprnd0 = vec_oprnds0[j];
7822 if (mask)
7823 mask_op = vec_mask = vec_masks[j / 2];
7824 }
7825 else
7826 {
7827 op = vec_oprnd0 = vec_oprnds0[j];
7828 src = vec_oprnd1 = vec_oprnds1[j];
7829 if (mask)
7830 mask_op = vec_mask = vec_masks[j];
7831 }
7832
7833 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7834 {
7835 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7836 TYPE_VECTOR_SUBPARTS (srctype)));
7837 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7838 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7839 gassign *new_stmt
7840 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7841 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7842 src = var;
7843 }
7844
7845 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7846 {
7847 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7848 TYPE_VECTOR_SUBPARTS (idxtype)));
7849 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7850 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7851 gassign *new_stmt
7852 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7853 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7854 op = var;
7855 }
7856
7857 if (mask)
7858 {
7859 tree utype;
7860 mask_arg = mask_op;
7861 if (modifier == NARROW)
7862 {
7863 var = vect_get_new_ssa_name (mask_halfvectype,
7864 vect_simple_var);
7865 gassign *new_stmt
7866 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7867 : VEC_UNPACK_LO_EXPR,
7868 mask_op);
7869 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7870 mask_arg = var;
7871 }
7872 tree optype = TREE_TYPE (mask_arg);
7873 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7874 utype = masktype;
7875 else
7876 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7877 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7878 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7879 gassign *new_stmt
7880 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7881 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7882 mask_arg = var;
7883 if (!useless_type_conversion_p (masktype, utype))
7884 {
7885 gcc_assert (TYPE_PRECISION (utype)
7886 <= TYPE_PRECISION (masktype));
7887 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7888 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7889 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7890 mask_arg = var;
7891 }
7892 }
7893
7894 gcall *new_stmt
7895 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7896 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7897
7898 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7899 }
7900 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7901 return true;
7902 }
7903 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7904 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7905
7906 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7907 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7908
7909 if (grouped_store)
7910 {
7911 /* FORNOW */
7912 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7913
7914 /* We vectorize all the stmts of the interleaving group when we
7915 reach the last stmt in the group. */
7916 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7917 < DR_GROUP_SIZE (first_stmt_info)
7918 && !slp)
7919 {
7920 *vec_stmt = NULL;
7921 return true;
7922 }
7923
7924 if (slp)
7925 {
7926 grouped_store = false;
7927 /* VEC_NUM is the number of vect stmts to be created for this
7928 group. */
7929 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7930 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7931 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7932 == first_stmt_info);
7933 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7934 op = vect_get_store_rhs (first_stmt_info);
7935 }
7936 else
7937 /* VEC_NUM is the number of vect stmts to be created for this
7938 group. */
7939 vec_num = group_size;
7940
7941 ref_type = get_group_alias_ptr_type (first_stmt_info);
7942 }
7943 else
7944 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7945
7946 if (dump_enabled_p ())
7947 dump_printf_loc (MSG_NOTE, vect_location,
7948 "transform store. ncopies = %d\n", ncopies);
7949
7950 if (memory_access_type == VMAT_ELEMENTWISE
7951 || memory_access_type == VMAT_STRIDED_SLP)
7952 {
7953 gimple_stmt_iterator incr_gsi;
7954 bool insert_after;
7955 gimple *incr;
7956 tree offvar;
7957 tree ivstep;
7958 tree running_off;
7959 tree stride_base, stride_step, alias_off;
7960 tree vec_oprnd;
7961 tree dr_offset;
7962 unsigned int g;
7963 /* Checked by get_load_store_type. */
7964 unsigned int const_nunits = nunits.to_constant ();
7965
7966 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7967 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7968
7969 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7970 stride_base
7971 = fold_build_pointer_plus
7972 (DR_BASE_ADDRESS (first_dr_info->dr),
7973 size_binop (PLUS_EXPR,
7974 convert_to_ptrofftype (dr_offset),
7975 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7976 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7977
7978 /* For a store with loop-invariant (but other than power-of-2)
7979 stride (i.e. not a grouped access) like so:
7980
7981 for (i = 0; i < n; i += stride)
7982 array[i] = ...;
7983
7984 we generate a new induction variable and new stores from
7985 the components of the (vectorized) rhs:
7986
7987 for (j = 0; ; j += VF*stride)
7988 vectemp = ...;
7989 tmp1 = vectemp[0];
7990 array[j] = tmp1;
7991 tmp2 = vectemp[1];
7992 array[j + stride] = tmp2;
7993 ...
7994 */
7995
7996 unsigned nstores = const_nunits;
7997 unsigned lnel = 1;
7998 tree ltype = elem_type;
7999 tree lvectype = vectype;
8000 if (slp)
8001 {
8002 if (group_size < const_nunits
8003 && const_nunits % group_size == 0)
8004 {
8005 nstores = const_nunits / group_size;
8006 lnel = group_size;
8007 ltype = build_vector_type (elem_type, group_size);
8008 lvectype = vectype;
8009
8010 /* First check if vec_extract optab doesn't support extraction
8011 of vector elts directly. */
8012 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8013 machine_mode vmode;
8014 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8015 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8016 group_size).exists (&vmode)
8017 || (convert_optab_handler (vec_extract_optab,
8018 TYPE_MODE (vectype), vmode)
8019 == CODE_FOR_nothing))
8020 {
8021 /* Try to avoid emitting an extract of vector elements
8022 by performing the extracts using an integer type of the
8023 same size, extracting from a vector of those and then
8024 re-interpreting it as the original vector type if
8025 supported. */
8026 unsigned lsize
8027 = group_size * GET_MODE_BITSIZE (elmode);
8028 unsigned int lnunits = const_nunits / group_size;
8029 /* If we can't construct such a vector fall back to
8030 element extracts from the original vector type and
8031 element size stores. */
8032 if (int_mode_for_size (lsize, 0).exists (&elmode)
8033 && VECTOR_MODE_P (TYPE_MODE (vectype))
8034 && related_vector_mode (TYPE_MODE (vectype), elmode,
8035 lnunits).exists (&vmode)
8036 && (convert_optab_handler (vec_extract_optab,
8037 vmode, elmode)
8038 != CODE_FOR_nothing))
8039 {
8040 nstores = lnunits;
8041 lnel = group_size;
8042 ltype = build_nonstandard_integer_type (lsize, 1);
8043 lvectype = build_vector_type (ltype, nstores);
8044 }
8045 /* Else fall back to vector extraction anyway.
8046 Fewer stores are more important than avoiding spilling
8047 of the vector we extract from. Compared to the
8048 construction case in vectorizable_load no store-forwarding
8049 issue exists here for reasonable archs. */
8050 }
8051 }
8052 else if (group_size >= const_nunits
8053 && group_size % const_nunits == 0)
8054 {
8055 nstores = 1;
8056 lnel = const_nunits;
8057 ltype = vectype;
8058 lvectype = vectype;
8059 }
8060 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8061 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8062 }
8063
8064 ivstep = stride_step;
8065 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8066 build_int_cst (TREE_TYPE (ivstep), vf));
8067
8068 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8069
8070 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8071 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8072 create_iv (stride_base, ivstep, NULL,
8073 loop, &incr_gsi, insert_after,
8074 &offvar, NULL);
8075 incr = gsi_stmt (incr_gsi);
8076
8077 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8078
8079 alias_off = build_int_cst (ref_type, 0);
8080 stmt_vec_info next_stmt_info = first_stmt_info;
8081 for (g = 0; g < group_size; g++)
8082 {
8083 running_off = offvar;
8084 if (g)
8085 {
8086 tree size = TYPE_SIZE_UNIT (ltype);
8087 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8088 size);
8089 tree newoff = copy_ssa_name (running_off, NULL);
8090 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8091 running_off, pos);
8092 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8093 running_off = newoff;
8094 }
8095 if (!slp)
8096 op = vect_get_store_rhs (next_stmt_info);
8097 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8098 op, &vec_oprnds);
8099 unsigned int group_el = 0;
8100 unsigned HOST_WIDE_INT
8101 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8102 for (j = 0; j < ncopies; j++)
8103 {
8104 vec_oprnd = vec_oprnds[j];
8105 /* Pun the vector to extract from if necessary. */
8106 if (lvectype != vectype)
8107 {
8108 tree tem = make_ssa_name (lvectype);
8109 gimple *pun
8110 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8111 lvectype, vec_oprnd));
8112 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8113 vec_oprnd = tem;
8114 }
8115 for (i = 0; i < nstores; i++)
8116 {
8117 tree newref, newoff;
8118 gimple *incr, *assign;
8119 tree size = TYPE_SIZE (ltype);
8120 /* Extract the i'th component. */
8121 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8122 bitsize_int (i), size);
8123 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8124 size, pos);
8125
8126 elem = force_gimple_operand_gsi (gsi, elem, true,
8127 NULL_TREE, true,
8128 GSI_SAME_STMT);
8129
8130 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8131 group_el * elsz);
8132 newref = build2 (MEM_REF, ltype,
8133 running_off, this_off);
8134 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8135
8136 /* And store it to *running_off. */
8137 assign = gimple_build_assign (newref, elem);
8138 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8139
8140 group_el += lnel;
8141 if (! slp
8142 || group_el == group_size)
8143 {
8144 newoff = copy_ssa_name (running_off, NULL);
8145 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8146 running_off, stride_step);
8147 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8148
8149 running_off = newoff;
8150 group_el = 0;
8151 }
8152 if (g == group_size - 1
8153 && !slp)
8154 {
8155 if (j == 0 && i == 0)
8156 *vec_stmt = assign;
8157 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8158 }
8159 }
8160 }
8161 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8162 vec_oprnds.release ();
8163 if (slp)
8164 break;
8165 }
8166
8167 return true;
8168 }
8169
8170 auto_vec<tree> dr_chain (group_size);
8171 oprnds.create (group_size);
8172
8173 gcc_assert (alignment_support_scheme);
8174 vec_loop_masks *loop_masks
8175 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8176 ? &LOOP_VINFO_MASKS (loop_vinfo)
8177 : NULL);
8178 vec_loop_lens *loop_lens
8179 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8180 ? &LOOP_VINFO_LENS (loop_vinfo)
8181 : NULL);
8182
8183 /* Shouldn't go with length-based approach if fully masked. */
8184 gcc_assert (!loop_lens || !loop_masks);
8185
8186 /* Targets with store-lane instructions must not require explicit
8187 realignment. vect_supportable_dr_alignment always returns either
8188 dr_aligned or dr_unaligned_supported for masked operations. */
8189 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8190 && !mask
8191 && !loop_masks)
8192 || alignment_support_scheme == dr_aligned
8193 || alignment_support_scheme == dr_unaligned_supported);
8194
8195 tree offset = NULL_TREE;
8196 if (!known_eq (poffset, 0))
8197 offset = size_int (poffset);
8198
8199 tree bump;
8200 tree vec_offset = NULL_TREE;
8201 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8202 {
8203 aggr_type = NULL_TREE;
8204 bump = NULL_TREE;
8205 }
8206 else if (memory_access_type == VMAT_GATHER_SCATTER)
8207 {
8208 aggr_type = elem_type;
8209 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8210 &bump, &vec_offset);
8211 }
8212 else
8213 {
8214 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8215 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8216 else
8217 aggr_type = vectype;
8218 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8219 memory_access_type);
8220 }
8221
8222 if (mask)
8223 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8224
8225 /* In case the vectorization factor (VF) is bigger than the number
8226 of elements that we can fit in a vectype (nunits), we have to generate
8227 more than one vector stmt - i.e - we need to "unroll" the
8228 vector stmt by a factor VF/nunits. */
8229
8230 /* In case of interleaving (non-unit grouped access):
8231
8232 S1: &base + 2 = x2
8233 S2: &base = x0
8234 S3: &base + 1 = x1
8235 S4: &base + 3 = x3
8236
8237 We create vectorized stores starting from base address (the access of the
8238 first stmt in the chain (S2 in the above example), when the last store stmt
8239 of the chain (S4) is reached:
8240
8241 VS1: &base = vx2
8242 VS2: &base + vec_size*1 = vx0
8243 VS3: &base + vec_size*2 = vx1
8244 VS4: &base + vec_size*3 = vx3
8245
8246 Then permutation statements are generated:
8247
8248 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8249 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8250 ...
8251
8252 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8253 (the order of the data-refs in the output of vect_permute_store_chain
8254 corresponds to the order of scalar stmts in the interleaving chain - see
8255 the documentation of vect_permute_store_chain()).
8256
8257 In case of both multiple types and interleaving, above vector stores and
8258 permutation stmts are created for every copy. The result vector stmts are
8259 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8260 STMT_VINFO_RELATED_STMT for the next copies.
8261 */
8262
8263 auto_vec<tree> vec_masks;
8264 tree vec_mask = NULL;
8265 auto_vec<tree> vec_offsets;
8266 auto_vec<vec<tree> > gvec_oprnds;
8267 gvec_oprnds.safe_grow_cleared (group_size, true);
8268 for (j = 0; j < ncopies; j++)
8269 {
8270 gimple *new_stmt;
8271 if (j == 0)
8272 {
8273 if (slp)
8274 {
8275 /* Get vectorized arguments for SLP_NODE. */
8276 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8277 op, &vec_oprnds);
8278 vec_oprnd = vec_oprnds[0];
8279 }
8280 else
8281 {
8282 /* For interleaved stores we collect vectorized defs for all the
8283 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8284 used as an input to vect_permute_store_chain().
8285
8286 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8287 and OPRNDS are of size 1. */
8288 stmt_vec_info next_stmt_info = first_stmt_info;
8289 for (i = 0; i < group_size; i++)
8290 {
8291 /* Since gaps are not supported for interleaved stores,
8292 DR_GROUP_SIZE is the exact number of stmts in the chain.
8293 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8294 that there is no interleaving, DR_GROUP_SIZE is 1,
8295 and only one iteration of the loop will be executed. */
8296 op = vect_get_store_rhs (next_stmt_info);
8297 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8298 ncopies, op, &gvec_oprnds[i]);
8299 vec_oprnd = gvec_oprnds[i][0];
8300 dr_chain.quick_push (gvec_oprnds[i][0]);
8301 oprnds.quick_push (gvec_oprnds[i][0]);
8302 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8303 }
8304 if (mask)
8305 {
8306 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8307 mask, &vec_masks, mask_vectype);
8308 vec_mask = vec_masks[0];
8309 }
8310 }
8311
8312 /* We should have catched mismatched types earlier. */
8313 gcc_assert (useless_type_conversion_p (vectype,
8314 TREE_TYPE (vec_oprnd)));
8315 bool simd_lane_access_p
8316 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8317 if (simd_lane_access_p
8318 && !loop_masks
8319 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8320 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8321 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8322 && integer_zerop (DR_INIT (first_dr_info->dr))
8323 && alias_sets_conflict_p (get_alias_set (aggr_type),
8324 get_alias_set (TREE_TYPE (ref_type))))
8325 {
8326 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8327 dataref_offset = build_int_cst (ref_type, 0);
8328 }
8329 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8330 {
8331 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8332 slp_node, &gs_info, &dataref_ptr,
8333 &vec_offsets);
8334 vec_offset = vec_offsets[0];
8335 }
8336 else
8337 dataref_ptr
8338 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8339 simd_lane_access_p ? loop : NULL,
8340 offset, &dummy, gsi, &ptr_incr,
8341 simd_lane_access_p, bump);
8342 }
8343 else
8344 {
8345 /* For interleaved stores we created vectorized defs for all the
8346 defs stored in OPRNDS in the previous iteration (previous copy).
8347 DR_CHAIN is then used as an input to vect_permute_store_chain().
8348 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8349 OPRNDS are of size 1. */
8350 for (i = 0; i < group_size; i++)
8351 {
8352 vec_oprnd = gvec_oprnds[i][j];
8353 dr_chain[i] = gvec_oprnds[i][j];
8354 oprnds[i] = gvec_oprnds[i][j];
8355 }
8356 if (mask)
8357 vec_mask = vec_masks[j];
8358 if (dataref_offset)
8359 dataref_offset
8360 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8361 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8362 vec_offset = vec_offsets[j];
8363 else
8364 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8365 stmt_info, bump);
8366 }
8367
8368 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8369 {
8370 tree vec_array;
8371
8372 /* Get an array into which we can store the individual vectors. */
8373 vec_array = create_vector_array (vectype, vec_num);
8374
8375 /* Invalidate the current contents of VEC_ARRAY. This should
8376 become an RTL clobber too, which prevents the vector registers
8377 from being upward-exposed. */
8378 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8379
8380 /* Store the individual vectors into the array. */
8381 for (i = 0; i < vec_num; i++)
8382 {
8383 vec_oprnd = dr_chain[i];
8384 write_vector_array (vinfo, stmt_info,
8385 gsi, vec_oprnd, vec_array, i);
8386 }
8387
8388 tree final_mask = NULL;
8389 if (loop_masks)
8390 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8391 vectype, j);
8392 if (vec_mask)
8393 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8394 final_mask, vec_mask, gsi);
8395
8396 gcall *call;
8397 if (final_mask)
8398 {
8399 /* Emit:
8400 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8401 VEC_ARRAY). */
8402 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8403 tree alias_ptr = build_int_cst (ref_type, align);
8404 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8405 dataref_ptr, alias_ptr,
8406 final_mask, vec_array);
8407 }
8408 else
8409 {
8410 /* Emit:
8411 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8412 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8413 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8414 vec_array);
8415 gimple_call_set_lhs (call, data_ref);
8416 }
8417 gimple_call_set_nothrow (call, true);
8418 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8419 new_stmt = call;
8420
8421 /* Record that VEC_ARRAY is now dead. */
8422 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8423 }
8424 else
8425 {
8426 new_stmt = NULL;
8427 if (grouped_store)
8428 {
8429 if (j == 0)
8430 result_chain.create (group_size);
8431 /* Permute. */
8432 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8433 gsi, &result_chain);
8434 }
8435
8436 stmt_vec_info next_stmt_info = first_stmt_info;
8437 for (i = 0; i < vec_num; i++)
8438 {
8439 unsigned misalign;
8440 unsigned HOST_WIDE_INT align;
8441
8442 tree final_mask = NULL_TREE;
8443 if (loop_masks)
8444 final_mask = vect_get_loop_mask (gsi, loop_masks,
8445 vec_num * ncopies,
8446 vectype, vec_num * j + i);
8447 if (vec_mask)
8448 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8449 final_mask, vec_mask, gsi);
8450
8451 if (memory_access_type == VMAT_GATHER_SCATTER)
8452 {
8453 tree scale = size_int (gs_info.scale);
8454 gcall *call;
8455 if (final_mask)
8456 call = gimple_build_call_internal
8457 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8458 scale, vec_oprnd, final_mask);
8459 else
8460 call = gimple_build_call_internal
8461 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8462 scale, vec_oprnd);
8463 gimple_call_set_nothrow (call, true);
8464 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8465 new_stmt = call;
8466 break;
8467 }
8468
8469 if (i > 0)
8470 /* Bump the vector pointer. */
8471 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8472 gsi, stmt_info, bump);
8473
8474 if (slp)
8475 vec_oprnd = vec_oprnds[i];
8476 else if (grouped_store)
8477 /* For grouped stores vectorized defs are interleaved in
8478 vect_permute_store_chain(). */
8479 vec_oprnd = result_chain[i];
8480
8481 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8482 if (alignment_support_scheme == dr_aligned)
8483 misalign = 0;
8484 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8485 {
8486 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8487 misalign = 0;
8488 }
8489 else
8490 misalign = misalignment;
8491 if (dataref_offset == NULL_TREE
8492 && TREE_CODE (dataref_ptr) == SSA_NAME)
8493 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8494 misalign);
8495 align = least_bit_hwi (misalign | align);
8496
8497 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8498 {
8499 tree perm_mask = perm_mask_for_reverse (vectype);
8500 tree perm_dest = vect_create_destination_var
8501 (vect_get_store_rhs (stmt_info), vectype);
8502 tree new_temp = make_ssa_name (perm_dest);
8503
8504 /* Generate the permute statement. */
8505 gimple *perm_stmt
8506 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8507 vec_oprnd, perm_mask);
8508 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8509
8510 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8511 vec_oprnd = new_temp;
8512 }
8513
8514 /* Arguments are ready. Create the new vector stmt. */
8515 if (final_mask)
8516 {
8517 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8518 gcall *call
8519 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8520 dataref_ptr, ptr,
8521 final_mask, vec_oprnd);
8522 gimple_call_set_nothrow (call, true);
8523 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8524 new_stmt = call;
8525 }
8526 else if (loop_lens)
8527 {
8528 tree final_len
8529 = vect_get_loop_len (loop_vinfo, loop_lens,
8530 vec_num * ncopies, vec_num * j + i);
8531 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8532 machine_mode vmode = TYPE_MODE (vectype);
8533 opt_machine_mode new_ovmode
8534 = get_len_load_store_mode (vmode, false);
8535 machine_mode new_vmode = new_ovmode.require ();
8536 /* Need conversion if it's wrapped with VnQI. */
8537 if (vmode != new_vmode)
8538 {
8539 tree new_vtype
8540 = build_vector_type_for_mode (unsigned_intQI_type_node,
8541 new_vmode);
8542 tree var
8543 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8544 vec_oprnd
8545 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8546 gassign *new_stmt
8547 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8548 vec_oprnd);
8549 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8550 gsi);
8551 vec_oprnd = var;
8552 }
8553
8554 signed char biasval =
8555 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8556
8557 tree bias = build_int_cst (intQI_type_node, biasval);
8558 gcall *call
8559 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8560 ptr, final_len, vec_oprnd,
8561 bias);
8562 gimple_call_set_nothrow (call, true);
8563 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8564 new_stmt = call;
8565 }
8566 else
8567 {
8568 data_ref = fold_build2 (MEM_REF, vectype,
8569 dataref_ptr,
8570 dataref_offset
8571 ? dataref_offset
8572 : build_int_cst (ref_type, 0));
8573 if (alignment_support_scheme == dr_aligned)
8574 ;
8575 else
8576 TREE_TYPE (data_ref)
8577 = build_aligned_type (TREE_TYPE (data_ref),
8578 align * BITS_PER_UNIT);
8579 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8580 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8582 }
8583
8584 if (slp)
8585 continue;
8586
8587 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8588 if (!next_stmt_info)
8589 break;
8590 }
8591 }
8592 if (!slp)
8593 {
8594 if (j == 0)
8595 *vec_stmt = new_stmt;
8596 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8597 }
8598 }
8599
8600 for (i = 0; i < group_size; ++i)
8601 {
8602 vec<tree> oprndsi = gvec_oprnds[i];
8603 oprndsi.release ();
8604 }
8605 oprnds.release ();
8606 result_chain.release ();
8607 vec_oprnds.release ();
8608
8609 return true;
8610 }
8611
8612 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8613 VECTOR_CST mask. No checks are made that the target platform supports the
8614 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8615 vect_gen_perm_mask_checked. */
8616
8617 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8618 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8619 {
8620 tree mask_type;
8621
8622 poly_uint64 nunits = sel.length ();
8623 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8624
8625 mask_type = build_vector_type (ssizetype, nunits);
8626 return vec_perm_indices_to_tree (mask_type, sel);
8627 }
8628
8629 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8630 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8631
8632 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8633 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8634 {
8635 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8636 return vect_gen_perm_mask_any (vectype, sel);
8637 }
8638
8639 /* Given a vector variable X and Y, that was generated for the scalar
8640 STMT_INFO, generate instructions to permute the vector elements of X and Y
8641 using permutation mask MASK_VEC, insert them at *GSI and return the
8642 permuted vector variable. */
8643
8644 static tree
permute_vec_elements(vec_info * vinfo,tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8645 permute_vec_elements (vec_info *vinfo,
8646 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8647 gimple_stmt_iterator *gsi)
8648 {
8649 tree vectype = TREE_TYPE (x);
8650 tree perm_dest, data_ref;
8651 gimple *perm_stmt;
8652
8653 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8654 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8655 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8656 else
8657 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8658 data_ref = make_ssa_name (perm_dest);
8659
8660 /* Generate the permute statement. */
8661 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8662 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8663
8664 return data_ref;
8665 }
8666
8667 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8668 inserting them on the loops preheader edge. Returns true if we
8669 were successful in doing so (and thus STMT_INFO can be moved then),
8670 otherwise returns false. */
8671
8672 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8673 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8674 {
8675 ssa_op_iter i;
8676 tree op;
8677 bool any = false;
8678
8679 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8680 {
8681 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8682 if (!gimple_nop_p (def_stmt)
8683 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8684 {
8685 /* Make sure we don't need to recurse. While we could do
8686 so in simple cases when there are more complex use webs
8687 we don't have an easy way to preserve stmt order to fulfil
8688 dependencies within them. */
8689 tree op2;
8690 ssa_op_iter i2;
8691 if (gimple_code (def_stmt) == GIMPLE_PHI)
8692 return false;
8693 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8694 {
8695 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8696 if (!gimple_nop_p (def_stmt2)
8697 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8698 return false;
8699 }
8700 any = true;
8701 }
8702 }
8703
8704 if (!any)
8705 return true;
8706
8707 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8708 {
8709 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8710 if (!gimple_nop_p (def_stmt)
8711 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8712 {
8713 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8714 gsi_remove (&gsi, false);
8715 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8716 }
8717 }
8718
8719 return true;
8720 }
8721
8722 /* vectorizable_load.
8723
8724 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8725 that can be vectorized.
8726 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8727 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8728 Return true if STMT_INFO is vectorizable in this way. */
8729
8730 static bool
vectorizable_load(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8731 vectorizable_load (vec_info *vinfo,
8732 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8733 gimple **vec_stmt, slp_tree slp_node,
8734 stmt_vector_for_cost *cost_vec)
8735 {
8736 tree scalar_dest;
8737 tree vec_dest = NULL;
8738 tree data_ref = NULL;
8739 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8740 class loop *loop = NULL;
8741 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8742 bool nested_in_vect_loop = false;
8743 tree elem_type;
8744 tree new_temp;
8745 machine_mode mode;
8746 tree dummy;
8747 tree dataref_ptr = NULL_TREE;
8748 tree dataref_offset = NULL_TREE;
8749 gimple *ptr_incr = NULL;
8750 int ncopies;
8751 int i, j;
8752 unsigned int group_size;
8753 poly_uint64 group_gap_adj;
8754 tree msq = NULL_TREE, lsq;
8755 tree realignment_token = NULL_TREE;
8756 gphi *phi = NULL;
8757 vec<tree> dr_chain = vNULL;
8758 bool grouped_load = false;
8759 stmt_vec_info first_stmt_info;
8760 stmt_vec_info first_stmt_info_for_drptr = NULL;
8761 bool compute_in_loop = false;
8762 class loop *at_loop;
8763 int vec_num;
8764 bool slp = (slp_node != NULL);
8765 bool slp_perm = false;
8766 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8767 poly_uint64 vf;
8768 tree aggr_type;
8769 gather_scatter_info gs_info;
8770 tree ref_type;
8771 enum vect_def_type mask_dt = vect_unknown_def_type;
8772
8773 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8774 return false;
8775
8776 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8777 && ! vec_stmt)
8778 return false;
8779
8780 if (!STMT_VINFO_DATA_REF (stmt_info))
8781 return false;
8782
8783 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8784 int mask_index = -1;
8785 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8786 {
8787 scalar_dest = gimple_assign_lhs (assign);
8788 if (TREE_CODE (scalar_dest) != SSA_NAME)
8789 return false;
8790
8791 tree_code code = gimple_assign_rhs_code (assign);
8792 if (code != ARRAY_REF
8793 && code != BIT_FIELD_REF
8794 && code != INDIRECT_REF
8795 && code != COMPONENT_REF
8796 && code != IMAGPART_EXPR
8797 && code != REALPART_EXPR
8798 && code != MEM_REF
8799 && TREE_CODE_CLASS (code) != tcc_declaration)
8800 return false;
8801 }
8802 else
8803 {
8804 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8805 if (!call || !gimple_call_internal_p (call))
8806 return false;
8807
8808 internal_fn ifn = gimple_call_internal_fn (call);
8809 if (!internal_load_fn_p (ifn))
8810 return false;
8811
8812 scalar_dest = gimple_call_lhs (call);
8813 if (!scalar_dest)
8814 return false;
8815
8816 mask_index = internal_fn_mask_index (ifn);
8817 /* ??? For SLP the mask operand is always last. */
8818 if (mask_index >= 0 && slp_node)
8819 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8820 if (mask_index >= 0
8821 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8822 &mask, NULL, &mask_dt, &mask_vectype))
8823 return false;
8824 }
8825
8826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8827 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8828
8829 if (loop_vinfo)
8830 {
8831 loop = LOOP_VINFO_LOOP (loop_vinfo);
8832 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8833 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8834 }
8835 else
8836 vf = 1;
8837
8838 /* Multiple types in SLP are handled by creating the appropriate number of
8839 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8840 case of SLP. */
8841 if (slp)
8842 ncopies = 1;
8843 else
8844 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8845
8846 gcc_assert (ncopies >= 1);
8847
8848 /* FORNOW. This restriction should be relaxed. */
8849 if (nested_in_vect_loop && ncopies > 1)
8850 {
8851 if (dump_enabled_p ())
8852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8853 "multiple types in nested loop.\n");
8854 return false;
8855 }
8856
8857 /* Invalidate assumptions made by dependence analysis when vectorization
8858 on the unrolled body effectively re-orders stmts. */
8859 if (ncopies > 1
8860 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8861 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8862 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8863 {
8864 if (dump_enabled_p ())
8865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8866 "cannot perform implicit CSE when unrolling "
8867 "with negative dependence distance\n");
8868 return false;
8869 }
8870
8871 elem_type = TREE_TYPE (vectype);
8872 mode = TYPE_MODE (vectype);
8873
8874 /* FORNOW. In some cases can vectorize even if data-type not supported
8875 (e.g. - data copies). */
8876 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8877 {
8878 if (dump_enabled_p ())
8879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8880 "Aligned load, but unsupported type.\n");
8881 return false;
8882 }
8883
8884 /* Check if the load is a part of an interleaving chain. */
8885 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8886 {
8887 grouped_load = true;
8888 /* FORNOW */
8889 gcc_assert (!nested_in_vect_loop);
8890 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8891
8892 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8893 group_size = DR_GROUP_SIZE (first_stmt_info);
8894
8895 /* Refuse non-SLP vectorization of SLP-only groups. */
8896 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8897 {
8898 if (dump_enabled_p ())
8899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8900 "cannot vectorize load in non-SLP mode.\n");
8901 return false;
8902 }
8903
8904 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8905 {
8906 slp_perm = true;
8907
8908 if (!loop_vinfo)
8909 {
8910 /* In BB vectorization we may not actually use a loaded vector
8911 accessing elements in excess of DR_GROUP_SIZE. */
8912 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8913 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8914 unsigned HOST_WIDE_INT nunits;
8915 unsigned j, k, maxk = 0;
8916 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8917 if (k > maxk)
8918 maxk = k;
8919 tree vectype = SLP_TREE_VECTYPE (slp_node);
8920 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8921 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8922 {
8923 if (dump_enabled_p ())
8924 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8925 "BB vectorization with gaps at the end of "
8926 "a load is not supported\n");
8927 return false;
8928 }
8929 }
8930
8931 auto_vec<tree> tem;
8932 unsigned n_perms;
8933 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8934 true, &n_perms))
8935 {
8936 if (dump_enabled_p ())
8937 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8938 vect_location,
8939 "unsupported load permutation\n");
8940 return false;
8941 }
8942 }
8943
8944 /* Invalidate assumptions made by dependence analysis when vectorization
8945 on the unrolled body effectively re-orders stmts. */
8946 if (!PURE_SLP_STMT (stmt_info)
8947 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8948 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8949 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8950 {
8951 if (dump_enabled_p ())
8952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8953 "cannot perform implicit CSE when performing "
8954 "group loads with negative dependence distance\n");
8955 return false;
8956 }
8957 }
8958 else
8959 group_size = 1;
8960
8961 vect_memory_access_type memory_access_type;
8962 enum dr_alignment_support alignment_support_scheme;
8963 int misalignment;
8964 poly_int64 poffset;
8965 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8966 ncopies, &memory_access_type, &poffset,
8967 &alignment_support_scheme, &misalignment, &gs_info))
8968 return false;
8969
8970 if (mask)
8971 {
8972 if (memory_access_type == VMAT_CONTIGUOUS)
8973 {
8974 machine_mode vec_mode = TYPE_MODE (vectype);
8975 if (!VECTOR_MODE_P (vec_mode)
8976 || !can_vec_mask_load_store_p (vec_mode,
8977 TYPE_MODE (mask_vectype), true))
8978 return false;
8979 }
8980 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8981 && memory_access_type != VMAT_GATHER_SCATTER)
8982 {
8983 if (dump_enabled_p ())
8984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8985 "unsupported access type for masked load.\n");
8986 return false;
8987 }
8988 else if (memory_access_type == VMAT_GATHER_SCATTER
8989 && gs_info.ifn == IFN_LAST
8990 && !gs_info.decl)
8991 {
8992 if (dump_enabled_p ())
8993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8994 "unsupported masked emulated gather.\n");
8995 return false;
8996 }
8997 }
8998
8999 if (!vec_stmt) /* transformation not required. */
9000 {
9001 if (slp_node
9002 && mask
9003 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9004 mask_vectype))
9005 {
9006 if (dump_enabled_p ())
9007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9008 "incompatible vector types for invariants\n");
9009 return false;
9010 }
9011
9012 if (!slp)
9013 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9014
9015 if (loop_vinfo
9016 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9017 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9018 VLS_LOAD, group_size,
9019 memory_access_type, &gs_info,
9020 mask);
9021
9022 if (dump_enabled_p ()
9023 && memory_access_type != VMAT_ELEMENTWISE
9024 && memory_access_type != VMAT_GATHER_SCATTER
9025 && alignment_support_scheme != dr_aligned)
9026 dump_printf_loc (MSG_NOTE, vect_location,
9027 "Vectorizing an unaligned access.\n");
9028
9029 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9030 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9031 alignment_support_scheme, misalignment,
9032 &gs_info, slp_node, cost_vec);
9033 return true;
9034 }
9035
9036 if (!slp)
9037 gcc_assert (memory_access_type
9038 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9039
9040 if (dump_enabled_p ())
9041 dump_printf_loc (MSG_NOTE, vect_location,
9042 "transform load. ncopies = %d\n", ncopies);
9043
9044 /* Transform. */
9045
9046 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9047 ensure_base_align (dr_info);
9048
9049 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9050 {
9051 vect_build_gather_load_calls (vinfo,
9052 stmt_info, gsi, vec_stmt, &gs_info, mask);
9053 return true;
9054 }
9055
9056 if (memory_access_type == VMAT_INVARIANT)
9057 {
9058 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9059 /* If we have versioned for aliasing or the loop doesn't
9060 have any data dependencies that would preclude this,
9061 then we are sure this is a loop invariant load and
9062 thus we can insert it on the preheader edge. */
9063 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9064 && !nested_in_vect_loop
9065 && hoist_defs_of_uses (stmt_info, loop));
9066 if (hoist_p)
9067 {
9068 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9069 if (dump_enabled_p ())
9070 dump_printf_loc (MSG_NOTE, vect_location,
9071 "hoisting out of the vectorized loop: %G", stmt);
9072 scalar_dest = copy_ssa_name (scalar_dest);
9073 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9074 gsi_insert_on_edge_immediate
9075 (loop_preheader_edge (loop),
9076 gimple_build_assign (scalar_dest, rhs));
9077 }
9078 /* These copies are all equivalent, but currently the representation
9079 requires a separate STMT_VINFO_VEC_STMT for each one. */
9080 gimple_stmt_iterator gsi2 = *gsi;
9081 gsi_next (&gsi2);
9082 for (j = 0; j < ncopies; j++)
9083 {
9084 if (hoist_p)
9085 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9086 vectype, NULL);
9087 else
9088 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9089 vectype, &gsi2);
9090 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9091 if (slp)
9092 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9093 else
9094 {
9095 if (j == 0)
9096 *vec_stmt = new_stmt;
9097 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9098 }
9099 }
9100 return true;
9101 }
9102
9103 if (memory_access_type == VMAT_ELEMENTWISE
9104 || memory_access_type == VMAT_STRIDED_SLP)
9105 {
9106 gimple_stmt_iterator incr_gsi;
9107 bool insert_after;
9108 tree offvar;
9109 tree ivstep;
9110 tree running_off;
9111 vec<constructor_elt, va_gc> *v = NULL;
9112 tree stride_base, stride_step, alias_off;
9113 /* Checked by get_load_store_type. */
9114 unsigned int const_nunits = nunits.to_constant ();
9115 unsigned HOST_WIDE_INT cst_offset = 0;
9116 tree dr_offset;
9117
9118 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9119 gcc_assert (!nested_in_vect_loop);
9120
9121 if (grouped_load)
9122 {
9123 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9124 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9125 }
9126 else
9127 {
9128 first_stmt_info = stmt_info;
9129 first_dr_info = dr_info;
9130 }
9131 if (slp && grouped_load)
9132 {
9133 group_size = DR_GROUP_SIZE (first_stmt_info);
9134 ref_type = get_group_alias_ptr_type (first_stmt_info);
9135 }
9136 else
9137 {
9138 if (grouped_load)
9139 cst_offset
9140 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9141 * vect_get_place_in_interleaving_chain (stmt_info,
9142 first_stmt_info));
9143 group_size = 1;
9144 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9145 }
9146
9147 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9148 stride_base
9149 = fold_build_pointer_plus
9150 (DR_BASE_ADDRESS (first_dr_info->dr),
9151 size_binop (PLUS_EXPR,
9152 convert_to_ptrofftype (dr_offset),
9153 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9154 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9155
9156 /* For a load with loop-invariant (but other than power-of-2)
9157 stride (i.e. not a grouped access) like so:
9158
9159 for (i = 0; i < n; i += stride)
9160 ... = array[i];
9161
9162 we generate a new induction variable and new accesses to
9163 form a new vector (or vectors, depending on ncopies):
9164
9165 for (j = 0; ; j += VF*stride)
9166 tmp1 = array[j];
9167 tmp2 = array[j + stride];
9168 ...
9169 vectemp = {tmp1, tmp2, ...}
9170 */
9171
9172 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9173 build_int_cst (TREE_TYPE (stride_step), vf));
9174
9175 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9176
9177 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9178 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9179 create_iv (stride_base, ivstep, NULL,
9180 loop, &incr_gsi, insert_after,
9181 &offvar, NULL);
9182
9183 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9184
9185 running_off = offvar;
9186 alias_off = build_int_cst (ref_type, 0);
9187 int nloads = const_nunits;
9188 int lnel = 1;
9189 tree ltype = TREE_TYPE (vectype);
9190 tree lvectype = vectype;
9191 auto_vec<tree> dr_chain;
9192 if (memory_access_type == VMAT_STRIDED_SLP)
9193 {
9194 if (group_size < const_nunits)
9195 {
9196 /* First check if vec_init optab supports construction from vector
9197 elts directly. Otherwise avoid emitting a constructor of
9198 vector elements by performing the loads using an integer type
9199 of the same size, constructing a vector of those and then
9200 re-interpreting it as the original vector type. This avoids a
9201 huge runtime penalty due to the general inability to perform
9202 store forwarding from smaller stores to a larger load. */
9203 tree ptype;
9204 tree vtype
9205 = vector_vector_composition_type (vectype,
9206 const_nunits / group_size,
9207 &ptype);
9208 if (vtype != NULL_TREE)
9209 {
9210 nloads = const_nunits / group_size;
9211 lnel = group_size;
9212 lvectype = vtype;
9213 ltype = ptype;
9214 }
9215 }
9216 else
9217 {
9218 nloads = 1;
9219 lnel = const_nunits;
9220 ltype = vectype;
9221 }
9222 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9223 }
9224 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9225 else if (nloads == 1)
9226 ltype = vectype;
9227
9228 if (slp)
9229 {
9230 /* For SLP permutation support we need to load the whole group,
9231 not only the number of vector stmts the permutation result
9232 fits in. */
9233 if (slp_perm)
9234 {
9235 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9236 variable VF. */
9237 unsigned int const_vf = vf.to_constant ();
9238 ncopies = CEIL (group_size * const_vf, const_nunits);
9239 dr_chain.create (ncopies);
9240 }
9241 else
9242 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9243 }
9244 unsigned int group_el = 0;
9245 unsigned HOST_WIDE_INT
9246 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9247 unsigned int n_groups = 0;
9248 for (j = 0; j < ncopies; j++)
9249 {
9250 if (nloads > 1)
9251 vec_alloc (v, nloads);
9252 gimple *new_stmt = NULL;
9253 for (i = 0; i < nloads; i++)
9254 {
9255 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9256 group_el * elsz + cst_offset);
9257 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9258 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9259 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9260 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9261 if (nloads > 1)
9262 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9263 gimple_assign_lhs (new_stmt));
9264
9265 group_el += lnel;
9266 if (! slp
9267 || group_el == group_size)
9268 {
9269 n_groups++;
9270 /* When doing SLP make sure to not load elements from
9271 the next vector iteration, those will not be accessed
9272 so just use the last element again. See PR107451. */
9273 if (!slp || known_lt (n_groups, vf))
9274 {
9275 tree newoff = copy_ssa_name (running_off);
9276 gimple *incr
9277 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9278 running_off, stride_step);
9279 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9280 running_off = newoff;
9281 }
9282 group_el = 0;
9283 }
9284 }
9285 if (nloads > 1)
9286 {
9287 tree vec_inv = build_constructor (lvectype, v);
9288 new_temp = vect_init_vector (vinfo, stmt_info,
9289 vec_inv, lvectype, gsi);
9290 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9291 if (lvectype != vectype)
9292 {
9293 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9294 VIEW_CONVERT_EXPR,
9295 build1 (VIEW_CONVERT_EXPR,
9296 vectype, new_temp));
9297 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9298 }
9299 }
9300
9301 if (slp)
9302 {
9303 if (slp_perm)
9304 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9305 else
9306 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9307 }
9308 else
9309 {
9310 if (j == 0)
9311 *vec_stmt = new_stmt;
9312 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9313 }
9314 }
9315 if (slp_perm)
9316 {
9317 unsigned n_perms;
9318 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9319 false, &n_perms);
9320 }
9321 return true;
9322 }
9323
9324 if (memory_access_type == VMAT_GATHER_SCATTER
9325 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9326 grouped_load = false;
9327
9328 if (grouped_load)
9329 {
9330 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9331 group_size = DR_GROUP_SIZE (first_stmt_info);
9332 /* For SLP vectorization we directly vectorize a subchain
9333 without permutation. */
9334 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9335 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9336 /* For BB vectorization always use the first stmt to base
9337 the data ref pointer on. */
9338 if (bb_vinfo)
9339 first_stmt_info_for_drptr
9340 = vect_find_first_scalar_stmt_in_slp (slp_node);
9341
9342 /* Check if the chain of loads is already vectorized. */
9343 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9344 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9345 ??? But we can only do so if there is exactly one
9346 as we have no way to get at the rest. Leave the CSE
9347 opportunity alone.
9348 ??? With the group load eventually participating
9349 in multiple different permutations (having multiple
9350 slp nodes which refer to the same group) the CSE
9351 is even wrong code. See PR56270. */
9352 && !slp)
9353 {
9354 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9355 return true;
9356 }
9357 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9358 group_gap_adj = 0;
9359
9360 /* VEC_NUM is the number of vect stmts to be created for this group. */
9361 if (slp)
9362 {
9363 grouped_load = false;
9364 /* If an SLP permutation is from N elements to N elements,
9365 and if one vector holds a whole number of N, we can load
9366 the inputs to the permutation in the same way as an
9367 unpermuted sequence. In other cases we need to load the
9368 whole group, not only the number of vector stmts the
9369 permutation result fits in. */
9370 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9371 if (slp_perm
9372 && (group_size != scalar_lanes
9373 || !multiple_p (nunits, group_size)))
9374 {
9375 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9376 variable VF; see vect_transform_slp_perm_load. */
9377 unsigned int const_vf = vf.to_constant ();
9378 unsigned int const_nunits = nunits.to_constant ();
9379 vec_num = CEIL (group_size * const_vf, const_nunits);
9380 group_gap_adj = vf * group_size - nunits * vec_num;
9381 }
9382 else
9383 {
9384 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9385 group_gap_adj
9386 = group_size - scalar_lanes;
9387 }
9388 }
9389 else
9390 vec_num = group_size;
9391
9392 ref_type = get_group_alias_ptr_type (first_stmt_info);
9393 }
9394 else
9395 {
9396 first_stmt_info = stmt_info;
9397 first_dr_info = dr_info;
9398 group_size = vec_num = 1;
9399 group_gap_adj = 0;
9400 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9401 if (slp)
9402 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9403 }
9404
9405 gcc_assert (alignment_support_scheme);
9406 vec_loop_masks *loop_masks
9407 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9408 ? &LOOP_VINFO_MASKS (loop_vinfo)
9409 : NULL);
9410 vec_loop_lens *loop_lens
9411 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9412 ? &LOOP_VINFO_LENS (loop_vinfo)
9413 : NULL);
9414
9415 /* Shouldn't go with length-based approach if fully masked. */
9416 gcc_assert (!loop_lens || !loop_masks);
9417
9418 /* Targets with store-lane instructions must not require explicit
9419 realignment. vect_supportable_dr_alignment always returns either
9420 dr_aligned or dr_unaligned_supported for masked operations. */
9421 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9422 && !mask
9423 && !loop_masks)
9424 || alignment_support_scheme == dr_aligned
9425 || alignment_support_scheme == dr_unaligned_supported);
9426
9427 /* In case the vectorization factor (VF) is bigger than the number
9428 of elements that we can fit in a vectype (nunits), we have to generate
9429 more than one vector stmt - i.e - we need to "unroll" the
9430 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9431 from one copy of the vector stmt to the next, in the field
9432 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9433 stages to find the correct vector defs to be used when vectorizing
9434 stmts that use the defs of the current stmt. The example below
9435 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9436 need to create 4 vectorized stmts):
9437
9438 before vectorization:
9439 RELATED_STMT VEC_STMT
9440 S1: x = memref - -
9441 S2: z = x + 1 - -
9442
9443 step 1: vectorize stmt S1:
9444 We first create the vector stmt VS1_0, and, as usual, record a
9445 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9446 Next, we create the vector stmt VS1_1, and record a pointer to
9447 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9448 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9449 stmts and pointers:
9450 RELATED_STMT VEC_STMT
9451 VS1_0: vx0 = memref0 VS1_1 -
9452 VS1_1: vx1 = memref1 VS1_2 -
9453 VS1_2: vx2 = memref2 VS1_3 -
9454 VS1_3: vx3 = memref3 - -
9455 S1: x = load - VS1_0
9456 S2: z = x + 1 - -
9457 */
9458
9459 /* In case of interleaving (non-unit grouped access):
9460
9461 S1: x2 = &base + 2
9462 S2: x0 = &base
9463 S3: x1 = &base + 1
9464 S4: x3 = &base + 3
9465
9466 Vectorized loads are created in the order of memory accesses
9467 starting from the access of the first stmt of the chain:
9468
9469 VS1: vx0 = &base
9470 VS2: vx1 = &base + vec_size*1
9471 VS3: vx3 = &base + vec_size*2
9472 VS4: vx4 = &base + vec_size*3
9473
9474 Then permutation statements are generated:
9475
9476 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9477 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9478 ...
9479
9480 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9481 (the order of the data-refs in the output of vect_permute_load_chain
9482 corresponds to the order of scalar stmts in the interleaving chain - see
9483 the documentation of vect_permute_load_chain()).
9484 The generation of permutation stmts and recording them in
9485 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9486
9487 In case of both multiple types and interleaving, the vector loads and
9488 permutation stmts above are created for every copy. The result vector
9489 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9490 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9491
9492 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9493 on a target that supports unaligned accesses (dr_unaligned_supported)
9494 we generate the following code:
9495 p = initial_addr;
9496 indx = 0;
9497 loop {
9498 p = p + indx * vectype_size;
9499 vec_dest = *(p);
9500 indx = indx + 1;
9501 }
9502
9503 Otherwise, the data reference is potentially unaligned on a target that
9504 does not support unaligned accesses (dr_explicit_realign_optimized) -
9505 then generate the following code, in which the data in each iteration is
9506 obtained by two vector loads, one from the previous iteration, and one
9507 from the current iteration:
9508 p1 = initial_addr;
9509 msq_init = *(floor(p1))
9510 p2 = initial_addr + VS - 1;
9511 realignment_token = call target_builtin;
9512 indx = 0;
9513 loop {
9514 p2 = p2 + indx * vectype_size
9515 lsq = *(floor(p2))
9516 vec_dest = realign_load (msq, lsq, realignment_token)
9517 indx = indx + 1;
9518 msq = lsq;
9519 } */
9520
9521 /* If the misalignment remains the same throughout the execution of the
9522 loop, we can create the init_addr and permutation mask at the loop
9523 preheader. Otherwise, it needs to be created inside the loop.
9524 This can only occur when vectorizing memory accesses in the inner-loop
9525 nested within an outer-loop that is being vectorized. */
9526
9527 if (nested_in_vect_loop
9528 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9529 GET_MODE_SIZE (TYPE_MODE (vectype))))
9530 {
9531 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9532 compute_in_loop = true;
9533 }
9534
9535 bool diff_first_stmt_info
9536 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9537
9538 tree offset = NULL_TREE;
9539 if ((alignment_support_scheme == dr_explicit_realign_optimized
9540 || alignment_support_scheme == dr_explicit_realign)
9541 && !compute_in_loop)
9542 {
9543 /* If we have different first_stmt_info, we can't set up realignment
9544 here, since we can't guarantee first_stmt_info DR has been
9545 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9546 distance from first_stmt_info DR instead as below. */
9547 if (!diff_first_stmt_info)
9548 msq = vect_setup_realignment (vinfo,
9549 first_stmt_info, gsi, &realignment_token,
9550 alignment_support_scheme, NULL_TREE,
9551 &at_loop);
9552 if (alignment_support_scheme == dr_explicit_realign_optimized)
9553 {
9554 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9555 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9556 size_one_node);
9557 gcc_assert (!first_stmt_info_for_drptr);
9558 }
9559 }
9560 else
9561 at_loop = loop;
9562
9563 if (!known_eq (poffset, 0))
9564 offset = (offset
9565 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9566 : size_int (poffset));
9567
9568 tree bump;
9569 tree vec_offset = NULL_TREE;
9570 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9571 {
9572 aggr_type = NULL_TREE;
9573 bump = NULL_TREE;
9574 }
9575 else if (memory_access_type == VMAT_GATHER_SCATTER)
9576 {
9577 aggr_type = elem_type;
9578 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9579 &bump, &vec_offset);
9580 }
9581 else
9582 {
9583 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9584 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9585 else
9586 aggr_type = vectype;
9587 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9588 memory_access_type);
9589 }
9590
9591 auto_vec<tree> vec_offsets;
9592 auto_vec<tree> vec_masks;
9593 if (mask)
9594 {
9595 if (slp_node)
9596 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9597 &vec_masks);
9598 else
9599 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9600 &vec_masks, mask_vectype);
9601 }
9602 tree vec_mask = NULL_TREE;
9603 poly_uint64 group_elt = 0;
9604 for (j = 0; j < ncopies; j++)
9605 {
9606 /* 1. Create the vector or array pointer update chain. */
9607 if (j == 0)
9608 {
9609 bool simd_lane_access_p
9610 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9611 if (simd_lane_access_p
9612 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9613 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9614 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9615 && integer_zerop (DR_INIT (first_dr_info->dr))
9616 && alias_sets_conflict_p (get_alias_set (aggr_type),
9617 get_alias_set (TREE_TYPE (ref_type)))
9618 && (alignment_support_scheme == dr_aligned
9619 || alignment_support_scheme == dr_unaligned_supported))
9620 {
9621 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9622 dataref_offset = build_int_cst (ref_type, 0);
9623 }
9624 else if (diff_first_stmt_info)
9625 {
9626 dataref_ptr
9627 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9628 aggr_type, at_loop, offset, &dummy,
9629 gsi, &ptr_incr, simd_lane_access_p,
9630 bump);
9631 /* Adjust the pointer by the difference to first_stmt. */
9632 data_reference_p ptrdr
9633 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9634 tree diff
9635 = fold_convert (sizetype,
9636 size_binop (MINUS_EXPR,
9637 DR_INIT (first_dr_info->dr),
9638 DR_INIT (ptrdr)));
9639 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9640 stmt_info, diff);
9641 if (alignment_support_scheme == dr_explicit_realign)
9642 {
9643 msq = vect_setup_realignment (vinfo,
9644 first_stmt_info_for_drptr, gsi,
9645 &realignment_token,
9646 alignment_support_scheme,
9647 dataref_ptr, &at_loop);
9648 gcc_assert (!compute_in_loop);
9649 }
9650 }
9651 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9652 {
9653 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9654 slp_node, &gs_info, &dataref_ptr,
9655 &vec_offsets);
9656 }
9657 else
9658 dataref_ptr
9659 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9660 at_loop,
9661 offset, &dummy, gsi, &ptr_incr,
9662 simd_lane_access_p, bump);
9663 if (mask)
9664 vec_mask = vec_masks[0];
9665 }
9666 else
9667 {
9668 if (dataref_offset)
9669 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9670 bump);
9671 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9672 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9673 stmt_info, bump);
9674 if (mask)
9675 vec_mask = vec_masks[j];
9676 }
9677
9678 if (grouped_load || slp_perm)
9679 dr_chain.create (vec_num);
9680
9681 gimple *new_stmt = NULL;
9682 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9683 {
9684 tree vec_array;
9685
9686 vec_array = create_vector_array (vectype, vec_num);
9687
9688 tree final_mask = NULL_TREE;
9689 if (loop_masks)
9690 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9691 vectype, j);
9692 if (vec_mask)
9693 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9694 final_mask, vec_mask, gsi);
9695
9696 gcall *call;
9697 if (final_mask)
9698 {
9699 /* Emit:
9700 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9701 VEC_MASK). */
9702 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9703 tree alias_ptr = build_int_cst (ref_type, align);
9704 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9705 dataref_ptr, alias_ptr,
9706 final_mask);
9707 }
9708 else
9709 {
9710 /* Emit:
9711 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9712 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9713 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9714 }
9715 gimple_call_set_lhs (call, vec_array);
9716 gimple_call_set_nothrow (call, true);
9717 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9718 new_stmt = call;
9719
9720 /* Extract each vector into an SSA_NAME. */
9721 for (i = 0; i < vec_num; i++)
9722 {
9723 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9724 vec_array, i);
9725 dr_chain.quick_push (new_temp);
9726 }
9727
9728 /* Record the mapping between SSA_NAMEs and statements. */
9729 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9730
9731 /* Record that VEC_ARRAY is now dead. */
9732 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9733 }
9734 else
9735 {
9736 for (i = 0; i < vec_num; i++)
9737 {
9738 tree final_mask = NULL_TREE;
9739 if (loop_masks
9740 && memory_access_type != VMAT_INVARIANT)
9741 final_mask = vect_get_loop_mask (gsi, loop_masks,
9742 vec_num * ncopies,
9743 vectype, vec_num * j + i);
9744 if (vec_mask)
9745 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9746 final_mask, vec_mask, gsi);
9747
9748 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9749 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9750 gsi, stmt_info, bump);
9751
9752 /* 2. Create the vector-load in the loop. */
9753 switch (alignment_support_scheme)
9754 {
9755 case dr_aligned:
9756 case dr_unaligned_supported:
9757 {
9758 unsigned int misalign;
9759 unsigned HOST_WIDE_INT align;
9760
9761 if (memory_access_type == VMAT_GATHER_SCATTER
9762 && gs_info.ifn != IFN_LAST)
9763 {
9764 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9765 vec_offset = vec_offsets[vec_num * j + i];
9766 tree zero = build_zero_cst (vectype);
9767 tree scale = size_int (gs_info.scale);
9768 gcall *call;
9769 if (final_mask)
9770 call = gimple_build_call_internal
9771 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9772 vec_offset, scale, zero, final_mask);
9773 else
9774 call = gimple_build_call_internal
9775 (IFN_GATHER_LOAD, 4, dataref_ptr,
9776 vec_offset, scale, zero);
9777 gimple_call_set_nothrow (call, true);
9778 new_stmt = call;
9779 data_ref = NULL_TREE;
9780 break;
9781 }
9782 else if (memory_access_type == VMAT_GATHER_SCATTER)
9783 {
9784 /* Emulated gather-scatter. */
9785 gcc_assert (!final_mask);
9786 unsigned HOST_WIDE_INT const_nunits
9787 = nunits.to_constant ();
9788 unsigned HOST_WIDE_INT const_offset_nunits
9789 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9790 .to_constant ();
9791 vec<constructor_elt, va_gc> *ctor_elts;
9792 vec_alloc (ctor_elts, const_nunits);
9793 gimple_seq stmts = NULL;
9794 /* We support offset vectors with more elements
9795 than the data vector for now. */
9796 unsigned HOST_WIDE_INT factor
9797 = const_offset_nunits / const_nunits;
9798 vec_offset = vec_offsets[j / factor];
9799 unsigned elt_offset = (j % factor) * const_nunits;
9800 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9801 tree scale = size_int (gs_info.scale);
9802 align
9803 = get_object_alignment (DR_REF (first_dr_info->dr));
9804 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9805 align);
9806 for (unsigned k = 0; k < const_nunits; ++k)
9807 {
9808 tree boff = size_binop (MULT_EXPR,
9809 TYPE_SIZE (idx_type),
9810 bitsize_int
9811 (k + elt_offset));
9812 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9813 idx_type, vec_offset,
9814 TYPE_SIZE (idx_type),
9815 boff);
9816 idx = gimple_convert (&stmts, sizetype, idx);
9817 idx = gimple_build (&stmts, MULT_EXPR,
9818 sizetype, idx, scale);
9819 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9820 TREE_TYPE (dataref_ptr),
9821 dataref_ptr, idx);
9822 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9823 tree elt = make_ssa_name (TREE_TYPE (vectype));
9824 tree ref = build2 (MEM_REF, ltype, ptr,
9825 build_int_cst (ref_type, 0));
9826 new_stmt = gimple_build_assign (elt, ref);
9827 gimple_seq_add_stmt (&stmts, new_stmt);
9828 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9829 }
9830 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9831 new_stmt = gimple_build_assign (NULL_TREE,
9832 build_constructor
9833 (vectype, ctor_elts));
9834 data_ref = NULL_TREE;
9835 break;
9836 }
9837
9838 align =
9839 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9840 if (alignment_support_scheme == dr_aligned)
9841 misalign = 0;
9842 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9843 {
9844 align = dr_alignment
9845 (vect_dr_behavior (vinfo, first_dr_info));
9846 misalign = 0;
9847 }
9848 else
9849 misalign = misalignment;
9850 if (dataref_offset == NULL_TREE
9851 && TREE_CODE (dataref_ptr) == SSA_NAME)
9852 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9853 align, misalign);
9854 align = least_bit_hwi (misalign | align);
9855
9856 if (final_mask)
9857 {
9858 tree ptr = build_int_cst (ref_type,
9859 align * BITS_PER_UNIT);
9860 gcall *call
9861 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9862 dataref_ptr, ptr,
9863 final_mask);
9864 gimple_call_set_nothrow (call, true);
9865 new_stmt = call;
9866 data_ref = NULL_TREE;
9867 }
9868 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9869 {
9870 tree final_len
9871 = vect_get_loop_len (loop_vinfo, loop_lens,
9872 vec_num * ncopies,
9873 vec_num * j + i);
9874 tree ptr = build_int_cst (ref_type,
9875 align * BITS_PER_UNIT);
9876
9877 machine_mode vmode = TYPE_MODE (vectype);
9878 opt_machine_mode new_ovmode
9879 = get_len_load_store_mode (vmode, true);
9880 machine_mode new_vmode = new_ovmode.require ();
9881 tree qi_type = unsigned_intQI_type_node;
9882
9883 signed char biasval =
9884 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9885
9886 tree bias = build_int_cst (intQI_type_node, biasval);
9887
9888 gcall *call
9889 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9890 dataref_ptr, ptr,
9891 final_len, bias);
9892 gimple_call_set_nothrow (call, true);
9893 new_stmt = call;
9894 data_ref = NULL_TREE;
9895
9896 /* Need conversion if it's wrapped with VnQI. */
9897 if (vmode != new_vmode)
9898 {
9899 tree new_vtype
9900 = build_vector_type_for_mode (qi_type, new_vmode);
9901 tree var = vect_get_new_ssa_name (new_vtype,
9902 vect_simple_var);
9903 gimple_set_lhs (call, var);
9904 vect_finish_stmt_generation (vinfo, stmt_info, call,
9905 gsi);
9906 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9907 new_stmt
9908 = gimple_build_assign (vec_dest,
9909 VIEW_CONVERT_EXPR, op);
9910 }
9911 }
9912 else
9913 {
9914 tree ltype = vectype;
9915 tree new_vtype = NULL_TREE;
9916 unsigned HOST_WIDE_INT gap
9917 = DR_GROUP_GAP (first_stmt_info);
9918 unsigned int vect_align
9919 = vect_known_alignment_in_bytes (first_dr_info,
9920 vectype);
9921 unsigned int scalar_dr_size
9922 = vect_get_scalar_dr_size (first_dr_info);
9923 /* If there's no peeling for gaps but we have a gap
9924 with slp loads then load the lower half of the
9925 vector only. See get_group_load_store_type for
9926 when we apply this optimization. */
9927 if (slp
9928 && loop_vinfo
9929 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9930 && gap != 0
9931 && known_eq (nunits, (group_size - gap) * 2)
9932 && known_eq (nunits, group_size)
9933 && gap >= (vect_align / scalar_dr_size))
9934 {
9935 tree half_vtype;
9936 new_vtype
9937 = vector_vector_composition_type (vectype, 2,
9938 &half_vtype);
9939 if (new_vtype != NULL_TREE)
9940 ltype = half_vtype;
9941 }
9942 tree offset
9943 = (dataref_offset ? dataref_offset
9944 : build_int_cst (ref_type, 0));
9945 if (ltype != vectype
9946 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9947 {
9948 unsigned HOST_WIDE_INT gap_offset
9949 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9950 tree gapcst = build_int_cst (ref_type, gap_offset);
9951 offset = size_binop (PLUS_EXPR, offset, gapcst);
9952 }
9953 data_ref
9954 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9955 if (alignment_support_scheme == dr_aligned)
9956 ;
9957 else
9958 TREE_TYPE (data_ref)
9959 = build_aligned_type (TREE_TYPE (data_ref),
9960 align * BITS_PER_UNIT);
9961 if (ltype != vectype)
9962 {
9963 vect_copy_ref_info (data_ref,
9964 DR_REF (first_dr_info->dr));
9965 tree tem = make_ssa_name (ltype);
9966 new_stmt = gimple_build_assign (tem, data_ref);
9967 vect_finish_stmt_generation (vinfo, stmt_info,
9968 new_stmt, gsi);
9969 data_ref = NULL;
9970 vec<constructor_elt, va_gc> *v;
9971 vec_alloc (v, 2);
9972 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9973 {
9974 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9975 build_zero_cst (ltype));
9976 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9977 }
9978 else
9979 {
9980 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9981 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9982 build_zero_cst (ltype));
9983 }
9984 gcc_assert (new_vtype != NULL_TREE);
9985 if (new_vtype == vectype)
9986 new_stmt = gimple_build_assign (
9987 vec_dest, build_constructor (vectype, v));
9988 else
9989 {
9990 tree new_vname = make_ssa_name (new_vtype);
9991 new_stmt = gimple_build_assign (
9992 new_vname, build_constructor (new_vtype, v));
9993 vect_finish_stmt_generation (vinfo, stmt_info,
9994 new_stmt, gsi);
9995 new_stmt = gimple_build_assign (
9996 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9997 new_vname));
9998 }
9999 }
10000 }
10001 break;
10002 }
10003 case dr_explicit_realign:
10004 {
10005 tree ptr, bump;
10006
10007 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10008
10009 if (compute_in_loop)
10010 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10011 &realignment_token,
10012 dr_explicit_realign,
10013 dataref_ptr, NULL);
10014
10015 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10016 ptr = copy_ssa_name (dataref_ptr);
10017 else
10018 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10019 // For explicit realign the target alignment should be
10020 // known at compile time.
10021 unsigned HOST_WIDE_INT align =
10022 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10023 new_stmt = gimple_build_assign
10024 (ptr, BIT_AND_EXPR, dataref_ptr,
10025 build_int_cst
10026 (TREE_TYPE (dataref_ptr),
10027 -(HOST_WIDE_INT) align));
10028 vect_finish_stmt_generation (vinfo, stmt_info,
10029 new_stmt, gsi);
10030 data_ref
10031 = build2 (MEM_REF, vectype, ptr,
10032 build_int_cst (ref_type, 0));
10033 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10034 vec_dest = vect_create_destination_var (scalar_dest,
10035 vectype);
10036 new_stmt = gimple_build_assign (vec_dest, data_ref);
10037 new_temp = make_ssa_name (vec_dest, new_stmt);
10038 gimple_assign_set_lhs (new_stmt, new_temp);
10039 gimple_move_vops (new_stmt, stmt_info->stmt);
10040 vect_finish_stmt_generation (vinfo, stmt_info,
10041 new_stmt, gsi);
10042 msq = new_temp;
10043
10044 bump = size_binop (MULT_EXPR, vs,
10045 TYPE_SIZE_UNIT (elem_type));
10046 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10047 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10048 stmt_info, bump);
10049 new_stmt = gimple_build_assign
10050 (NULL_TREE, BIT_AND_EXPR, ptr,
10051 build_int_cst
10052 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10053 ptr = copy_ssa_name (ptr, new_stmt);
10054 gimple_assign_set_lhs (new_stmt, ptr);
10055 vect_finish_stmt_generation (vinfo, stmt_info,
10056 new_stmt, gsi);
10057 data_ref
10058 = build2 (MEM_REF, vectype, ptr,
10059 build_int_cst (ref_type, 0));
10060 break;
10061 }
10062 case dr_explicit_realign_optimized:
10063 {
10064 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10065 new_temp = copy_ssa_name (dataref_ptr);
10066 else
10067 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10068 // We should only be doing this if we know the target
10069 // alignment at compile time.
10070 unsigned HOST_WIDE_INT align =
10071 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10072 new_stmt = gimple_build_assign
10073 (new_temp, BIT_AND_EXPR, dataref_ptr,
10074 build_int_cst (TREE_TYPE (dataref_ptr),
10075 -(HOST_WIDE_INT) align));
10076 vect_finish_stmt_generation (vinfo, stmt_info,
10077 new_stmt, gsi);
10078 data_ref
10079 = build2 (MEM_REF, vectype, new_temp,
10080 build_int_cst (ref_type, 0));
10081 break;
10082 }
10083 default:
10084 gcc_unreachable ();
10085 }
10086 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10087 /* DATA_REF is null if we've already built the statement. */
10088 if (data_ref)
10089 {
10090 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10091 new_stmt = gimple_build_assign (vec_dest, data_ref);
10092 }
10093 new_temp = make_ssa_name (vec_dest, new_stmt);
10094 gimple_set_lhs (new_stmt, new_temp);
10095 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10096
10097 /* 3. Handle explicit realignment if necessary/supported.
10098 Create in loop:
10099 vec_dest = realign_load (msq, lsq, realignment_token) */
10100 if (alignment_support_scheme == dr_explicit_realign_optimized
10101 || alignment_support_scheme == dr_explicit_realign)
10102 {
10103 lsq = gimple_assign_lhs (new_stmt);
10104 if (!realignment_token)
10105 realignment_token = dataref_ptr;
10106 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10107 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10108 msq, lsq, realignment_token);
10109 new_temp = make_ssa_name (vec_dest, new_stmt);
10110 gimple_assign_set_lhs (new_stmt, new_temp);
10111 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10112
10113 if (alignment_support_scheme == dr_explicit_realign_optimized)
10114 {
10115 gcc_assert (phi);
10116 if (i == vec_num - 1 && j == ncopies - 1)
10117 add_phi_arg (phi, lsq,
10118 loop_latch_edge (containing_loop),
10119 UNKNOWN_LOCATION);
10120 msq = lsq;
10121 }
10122 }
10123
10124 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10125 {
10126 tree perm_mask = perm_mask_for_reverse (vectype);
10127 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10128 perm_mask, stmt_info, gsi);
10129 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10130 }
10131
10132 /* Collect vector loads and later create their permutation in
10133 vect_transform_grouped_load (). */
10134 if (grouped_load || slp_perm)
10135 dr_chain.quick_push (new_temp);
10136
10137 /* Store vector loads in the corresponding SLP_NODE. */
10138 if (slp && !slp_perm)
10139 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10140
10141 /* With SLP permutation we load the gaps as well, without
10142 we need to skip the gaps after we manage to fully load
10143 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10144 group_elt += nunits;
10145 if (maybe_ne (group_gap_adj, 0U)
10146 && !slp_perm
10147 && known_eq (group_elt, group_size - group_gap_adj))
10148 {
10149 poly_wide_int bump_val
10150 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10151 * group_gap_adj);
10152 if (tree_int_cst_sgn
10153 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10154 bump_val = -bump_val;
10155 tree bump = wide_int_to_tree (sizetype, bump_val);
10156 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10157 gsi, stmt_info, bump);
10158 group_elt = 0;
10159 }
10160 }
10161 /* Bump the vector pointer to account for a gap or for excess
10162 elements loaded for a permuted SLP load. */
10163 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10164 {
10165 poly_wide_int bump_val
10166 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10167 * group_gap_adj);
10168 if (tree_int_cst_sgn
10169 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10170 bump_val = -bump_val;
10171 tree bump = wide_int_to_tree (sizetype, bump_val);
10172 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10173 stmt_info, bump);
10174 }
10175 }
10176
10177 if (slp && !slp_perm)
10178 continue;
10179
10180 if (slp_perm)
10181 {
10182 unsigned n_perms;
10183 /* For SLP we know we've seen all possible uses of dr_chain so
10184 direct vect_transform_slp_perm_load to DCE the unused parts.
10185 ??? This is a hack to prevent compile-time issues as seen
10186 in PR101120 and friends. */
10187 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10188 gsi, vf, false, &n_perms,
10189 nullptr, true);
10190 gcc_assert (ok);
10191 }
10192 else
10193 {
10194 if (grouped_load)
10195 {
10196 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10197 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10198 group_size, gsi);
10199 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10200 }
10201 else
10202 {
10203 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10204 }
10205 }
10206 dr_chain.release ();
10207 }
10208 if (!slp)
10209 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10210
10211 return true;
10212 }
10213
10214 /* Function vect_is_simple_cond.
10215
10216 Input:
10217 LOOP - the loop that is being vectorized.
10218 COND - Condition that is checked for simple use.
10219
10220 Output:
10221 *COMP_VECTYPE - the vector type for the comparison.
10222 *DTS - The def types for the arguments of the comparison
10223
10224 Returns whether a COND can be vectorized. Checks whether
10225 condition operands are supportable using vec_is_simple_use. */
10226
10227 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)10228 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10229 slp_tree slp_node, tree *comp_vectype,
10230 enum vect_def_type *dts, tree vectype)
10231 {
10232 tree lhs, rhs;
10233 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10234 slp_tree slp_op;
10235
10236 /* Mask case. */
10237 if (TREE_CODE (cond) == SSA_NAME
10238 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10239 {
10240 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10241 &slp_op, &dts[0], comp_vectype)
10242 || !*comp_vectype
10243 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10244 return false;
10245 return true;
10246 }
10247
10248 if (!COMPARISON_CLASS_P (cond))
10249 return false;
10250
10251 lhs = TREE_OPERAND (cond, 0);
10252 rhs = TREE_OPERAND (cond, 1);
10253
10254 if (TREE_CODE (lhs) == SSA_NAME)
10255 {
10256 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10257 &lhs, &slp_op, &dts[0], &vectype1))
10258 return false;
10259 }
10260 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10261 || TREE_CODE (lhs) == FIXED_CST)
10262 dts[0] = vect_constant_def;
10263 else
10264 return false;
10265
10266 if (TREE_CODE (rhs) == SSA_NAME)
10267 {
10268 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10269 &rhs, &slp_op, &dts[1], &vectype2))
10270 return false;
10271 }
10272 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10273 || TREE_CODE (rhs) == FIXED_CST)
10274 dts[1] = vect_constant_def;
10275 else
10276 return false;
10277
10278 if (vectype1 && vectype2
10279 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10280 TYPE_VECTOR_SUBPARTS (vectype2)))
10281 return false;
10282
10283 *comp_vectype = vectype1 ? vectype1 : vectype2;
10284 /* Invariant comparison. */
10285 if (! *comp_vectype)
10286 {
10287 tree scalar_type = TREE_TYPE (lhs);
10288 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10289 *comp_vectype = truth_type_for (vectype);
10290 else
10291 {
10292 /* If we can widen the comparison to match vectype do so. */
10293 if (INTEGRAL_TYPE_P (scalar_type)
10294 && !slp_node
10295 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10296 TYPE_SIZE (TREE_TYPE (vectype))))
10297 scalar_type = build_nonstandard_integer_type
10298 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10299 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10300 slp_node);
10301 }
10302 }
10303
10304 return true;
10305 }
10306
10307 /* vectorizable_condition.
10308
10309 Check if STMT_INFO is conditional modify expression that can be vectorized.
10310 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10311 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10312 at GSI.
10313
10314 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10315
10316 Return true if STMT_INFO is vectorizable in this way. */
10317
10318 static bool
vectorizable_condition(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10319 vectorizable_condition (vec_info *vinfo,
10320 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10321 gimple **vec_stmt,
10322 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10323 {
10324 tree scalar_dest = NULL_TREE;
10325 tree vec_dest = NULL_TREE;
10326 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10327 tree then_clause, else_clause;
10328 tree comp_vectype = NULL_TREE;
10329 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10330 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10331 tree vec_compare;
10332 tree new_temp;
10333 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10334 enum vect_def_type dts[4]
10335 = {vect_unknown_def_type, vect_unknown_def_type,
10336 vect_unknown_def_type, vect_unknown_def_type};
10337 int ndts = 4;
10338 int ncopies;
10339 int vec_num;
10340 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10341 int i;
10342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10343 vec<tree> vec_oprnds0 = vNULL;
10344 vec<tree> vec_oprnds1 = vNULL;
10345 vec<tree> vec_oprnds2 = vNULL;
10346 vec<tree> vec_oprnds3 = vNULL;
10347 tree vec_cmp_type;
10348 bool masked = false;
10349
10350 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10351 return false;
10352
10353 /* Is vectorizable conditional operation? */
10354 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10355 if (!stmt)
10356 return false;
10357
10358 code = gimple_assign_rhs_code (stmt);
10359 if (code != COND_EXPR)
10360 return false;
10361
10362 stmt_vec_info reduc_info = NULL;
10363 int reduc_index = -1;
10364 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10365 bool for_reduction
10366 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10367 if (for_reduction)
10368 {
10369 if (slp_node)
10370 return false;
10371 reduc_info = info_for_reduction (vinfo, stmt_info);
10372 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10373 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10374 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10375 || reduc_index != -1);
10376 }
10377 else
10378 {
10379 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10380 return false;
10381 }
10382
10383 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10384 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10385
10386 if (slp_node)
10387 {
10388 ncopies = 1;
10389 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10390 }
10391 else
10392 {
10393 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10394 vec_num = 1;
10395 }
10396
10397 gcc_assert (ncopies >= 1);
10398 if (for_reduction && ncopies > 1)
10399 return false; /* FORNOW */
10400
10401 cond_expr = gimple_assign_rhs1 (stmt);
10402
10403 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10404 &comp_vectype, &dts[0], vectype)
10405 || !comp_vectype)
10406 return false;
10407
10408 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10409 slp_tree then_slp_node, else_slp_node;
10410 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10411 &then_clause, &then_slp_node, &dts[2], &vectype1))
10412 return false;
10413 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10414 &else_clause, &else_slp_node, &dts[3], &vectype2))
10415 return false;
10416
10417 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10418 return false;
10419
10420 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10421 return false;
10422
10423 masked = !COMPARISON_CLASS_P (cond_expr);
10424 vec_cmp_type = truth_type_for (comp_vectype);
10425
10426 if (vec_cmp_type == NULL_TREE)
10427 return false;
10428
10429 cond_code = TREE_CODE (cond_expr);
10430 if (!masked)
10431 {
10432 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10433 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10434 }
10435
10436 /* For conditional reductions, the "then" value needs to be the candidate
10437 value calculated by this iteration while the "else" value needs to be
10438 the result carried over from previous iterations. If the COND_EXPR
10439 is the other way around, we need to swap it. */
10440 bool must_invert_cmp_result = false;
10441 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10442 {
10443 if (masked)
10444 must_invert_cmp_result = true;
10445 else
10446 {
10447 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10448 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10449 if (new_code == ERROR_MARK)
10450 must_invert_cmp_result = true;
10451 else
10452 {
10453 cond_code = new_code;
10454 /* Make sure we don't accidentally use the old condition. */
10455 cond_expr = NULL_TREE;
10456 }
10457 }
10458 std::swap (then_clause, else_clause);
10459 }
10460
10461 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10462 {
10463 /* Boolean values may have another representation in vectors
10464 and therefore we prefer bit operations over comparison for
10465 them (which also works for scalar masks). We store opcodes
10466 to use in bitop1 and bitop2. Statement is vectorized as
10467 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10468 depending on bitop1 and bitop2 arity. */
10469 switch (cond_code)
10470 {
10471 case GT_EXPR:
10472 bitop1 = BIT_NOT_EXPR;
10473 bitop2 = BIT_AND_EXPR;
10474 break;
10475 case GE_EXPR:
10476 bitop1 = BIT_NOT_EXPR;
10477 bitop2 = BIT_IOR_EXPR;
10478 break;
10479 case LT_EXPR:
10480 bitop1 = BIT_NOT_EXPR;
10481 bitop2 = BIT_AND_EXPR;
10482 std::swap (cond_expr0, cond_expr1);
10483 break;
10484 case LE_EXPR:
10485 bitop1 = BIT_NOT_EXPR;
10486 bitop2 = BIT_IOR_EXPR;
10487 std::swap (cond_expr0, cond_expr1);
10488 break;
10489 case NE_EXPR:
10490 bitop1 = BIT_XOR_EXPR;
10491 break;
10492 case EQ_EXPR:
10493 bitop1 = BIT_XOR_EXPR;
10494 bitop2 = BIT_NOT_EXPR;
10495 break;
10496 default:
10497 return false;
10498 }
10499 cond_code = SSA_NAME;
10500 }
10501
10502 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10503 && reduction_type == EXTRACT_LAST_REDUCTION
10504 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10505 {
10506 if (dump_enabled_p ())
10507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10508 "reduction comparison operation not supported.\n");
10509 return false;
10510 }
10511
10512 if (!vec_stmt)
10513 {
10514 if (bitop1 != NOP_EXPR)
10515 {
10516 machine_mode mode = TYPE_MODE (comp_vectype);
10517 optab optab;
10518
10519 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10520 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10521 return false;
10522
10523 if (bitop2 != NOP_EXPR)
10524 {
10525 optab = optab_for_tree_code (bitop2, comp_vectype,
10526 optab_default);
10527 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10528 return false;
10529 }
10530 }
10531
10532 vect_cost_for_stmt kind = vector_stmt;
10533 if (reduction_type == EXTRACT_LAST_REDUCTION)
10534 /* Count one reduction-like operation per vector. */
10535 kind = vec_to_scalar;
10536 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10537 return false;
10538
10539 if (slp_node
10540 && (!vect_maybe_update_slp_op_vectype
10541 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10542 || (op_adjust == 1
10543 && !vect_maybe_update_slp_op_vectype
10544 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10545 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10546 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10547 {
10548 if (dump_enabled_p ())
10549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10550 "incompatible vector types for invariants\n");
10551 return false;
10552 }
10553
10554 if (loop_vinfo && for_reduction
10555 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10556 {
10557 if (reduction_type == EXTRACT_LAST_REDUCTION)
10558 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10559 ncopies * vec_num, vectype, NULL);
10560 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10561 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10562 {
10563 if (dump_enabled_p ())
10564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10565 "conditional reduction prevents the use"
10566 " of partial vectors.\n");
10567 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10568 }
10569 }
10570
10571 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10572 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10573 cost_vec, kind);
10574 return true;
10575 }
10576
10577 /* Transform. */
10578
10579 /* Handle def. */
10580 scalar_dest = gimple_assign_lhs (stmt);
10581 if (reduction_type != EXTRACT_LAST_REDUCTION)
10582 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10583
10584 bool swap_cond_operands = false;
10585
10586 /* See whether another part of the vectorized code applies a loop
10587 mask to the condition, or to its inverse. */
10588
10589 vec_loop_masks *masks = NULL;
10590 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10591 {
10592 if (reduction_type == EXTRACT_LAST_REDUCTION)
10593 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10594 else
10595 {
10596 scalar_cond_masked_key cond (cond_expr, ncopies);
10597 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10598 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10599 else
10600 {
10601 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10602 tree_code orig_code = cond.code;
10603 cond.code = invert_tree_comparison (cond.code, honor_nans);
10604 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10605 {
10606 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10607 cond_code = cond.code;
10608 swap_cond_operands = true;
10609 }
10610 else
10611 {
10612 /* Try the inverse of the current mask. We check if the
10613 inverse mask is live and if so we generate a negate of
10614 the current mask such that we still honor NaNs. */
10615 cond.inverted_p = true;
10616 cond.code = orig_code;
10617 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10618 {
10619 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10620 cond_code = cond.code;
10621 swap_cond_operands = true;
10622 must_invert_cmp_result = true;
10623 }
10624 }
10625 }
10626 }
10627 }
10628
10629 /* Handle cond expr. */
10630 if (masked)
10631 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10632 cond_expr, &vec_oprnds0, comp_vectype,
10633 then_clause, &vec_oprnds2, vectype,
10634 reduction_type != EXTRACT_LAST_REDUCTION
10635 ? else_clause : NULL, &vec_oprnds3, vectype);
10636 else
10637 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10638 cond_expr0, &vec_oprnds0, comp_vectype,
10639 cond_expr1, &vec_oprnds1, comp_vectype,
10640 then_clause, &vec_oprnds2, vectype,
10641 reduction_type != EXTRACT_LAST_REDUCTION
10642 ? else_clause : NULL, &vec_oprnds3, vectype);
10643
10644 /* Arguments are ready. Create the new vector stmt. */
10645 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10646 {
10647 vec_then_clause = vec_oprnds2[i];
10648 if (reduction_type != EXTRACT_LAST_REDUCTION)
10649 vec_else_clause = vec_oprnds3[i];
10650
10651 if (swap_cond_operands)
10652 std::swap (vec_then_clause, vec_else_clause);
10653
10654 if (masked)
10655 vec_compare = vec_cond_lhs;
10656 else
10657 {
10658 vec_cond_rhs = vec_oprnds1[i];
10659 if (bitop1 == NOP_EXPR)
10660 {
10661 gimple_seq stmts = NULL;
10662 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10663 vec_cond_lhs, vec_cond_rhs);
10664 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10665 }
10666 else
10667 {
10668 new_temp = make_ssa_name (vec_cmp_type);
10669 gassign *new_stmt;
10670 if (bitop1 == BIT_NOT_EXPR)
10671 new_stmt = gimple_build_assign (new_temp, bitop1,
10672 vec_cond_rhs);
10673 else
10674 new_stmt
10675 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10676 vec_cond_rhs);
10677 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10678 if (bitop2 == NOP_EXPR)
10679 vec_compare = new_temp;
10680 else if (bitop2 == BIT_NOT_EXPR)
10681 {
10682 /* Instead of doing ~x ? y : z do x ? z : y. */
10683 vec_compare = new_temp;
10684 std::swap (vec_then_clause, vec_else_clause);
10685 }
10686 else
10687 {
10688 vec_compare = make_ssa_name (vec_cmp_type);
10689 new_stmt
10690 = gimple_build_assign (vec_compare, bitop2,
10691 vec_cond_lhs, new_temp);
10692 vect_finish_stmt_generation (vinfo, stmt_info,
10693 new_stmt, gsi);
10694 }
10695 }
10696 }
10697
10698 /* If we decided to apply a loop mask to the result of the vector
10699 comparison, AND the comparison with the mask now. Later passes
10700 should then be able to reuse the AND results between mulitple
10701 vector statements.
10702
10703 For example:
10704 for (int i = 0; i < 100; ++i)
10705 x[i] = y[i] ? z[i] : 10;
10706
10707 results in following optimized GIMPLE:
10708
10709 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10710 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10711 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10712 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10713 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10714 vect_iftmp.11_47, { 10, ... }>;
10715
10716 instead of using a masked and unmasked forms of
10717 vec != { 0, ... } (masked in the MASK_LOAD,
10718 unmasked in the VEC_COND_EXPR). */
10719
10720 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10721 in cases where that's necessary. */
10722
10723 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10724 {
10725 if (!is_gimple_val (vec_compare))
10726 {
10727 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10728 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10729 vec_compare);
10730 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10731 vec_compare = vec_compare_name;
10732 }
10733
10734 if (must_invert_cmp_result)
10735 {
10736 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10737 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10738 BIT_NOT_EXPR,
10739 vec_compare);
10740 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10741 vec_compare = vec_compare_name;
10742 }
10743
10744 if (masks)
10745 {
10746 tree loop_mask
10747 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10748 vectype, i);
10749 tree tmp2 = make_ssa_name (vec_cmp_type);
10750 gassign *g
10751 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10752 loop_mask);
10753 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10754 vec_compare = tmp2;
10755 }
10756 }
10757
10758 gimple *new_stmt;
10759 if (reduction_type == EXTRACT_LAST_REDUCTION)
10760 {
10761 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10762 tree lhs = gimple_get_lhs (old_stmt);
10763 new_stmt = gimple_build_call_internal
10764 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10765 vec_then_clause);
10766 gimple_call_set_lhs (new_stmt, lhs);
10767 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10768 if (old_stmt == gsi_stmt (*gsi))
10769 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10770 else
10771 {
10772 /* In this case we're moving the definition to later in the
10773 block. That doesn't matter because the only uses of the
10774 lhs are in phi statements. */
10775 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10776 gsi_remove (&old_gsi, true);
10777 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10778 }
10779 }
10780 else
10781 {
10782 new_temp = make_ssa_name (vec_dest);
10783 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10784 vec_then_clause, vec_else_clause);
10785 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10786 }
10787 if (slp_node)
10788 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10789 else
10790 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10791 }
10792
10793 if (!slp_node)
10794 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10795
10796 vec_oprnds0.release ();
10797 vec_oprnds1.release ();
10798 vec_oprnds2.release ();
10799 vec_oprnds3.release ();
10800
10801 return true;
10802 }
10803
10804 /* vectorizable_comparison.
10805
10806 Check if STMT_INFO is comparison expression that can be vectorized.
10807 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10808 comparison, put it in VEC_STMT, and insert it at GSI.
10809
10810 Return true if STMT_INFO is vectorizable in this way. */
10811
10812 static bool
vectorizable_comparison(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10813 vectorizable_comparison (vec_info *vinfo,
10814 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10815 gimple **vec_stmt,
10816 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10817 {
10818 tree lhs, rhs1, rhs2;
10819 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10820 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10821 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10822 tree new_temp;
10823 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10824 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10825 int ndts = 2;
10826 poly_uint64 nunits;
10827 int ncopies;
10828 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10829 int i;
10830 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10831 vec<tree> vec_oprnds0 = vNULL;
10832 vec<tree> vec_oprnds1 = vNULL;
10833 tree mask_type;
10834 tree mask;
10835
10836 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10837 return false;
10838
10839 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10840 return false;
10841
10842 mask_type = vectype;
10843 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10844
10845 if (slp_node)
10846 ncopies = 1;
10847 else
10848 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10849
10850 gcc_assert (ncopies >= 1);
10851 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10852 return false;
10853
10854 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10855 if (!stmt)
10856 return false;
10857
10858 code = gimple_assign_rhs_code (stmt);
10859
10860 if (TREE_CODE_CLASS (code) != tcc_comparison)
10861 return false;
10862
10863 slp_tree slp_rhs1, slp_rhs2;
10864 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10865 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10866 return false;
10867
10868 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10869 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10870 return false;
10871
10872 if (vectype1 && vectype2
10873 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10874 TYPE_VECTOR_SUBPARTS (vectype2)))
10875 return false;
10876
10877 vectype = vectype1 ? vectype1 : vectype2;
10878
10879 /* Invariant comparison. */
10880 if (!vectype)
10881 {
10882 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10883 vectype = mask_type;
10884 else
10885 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10886 slp_node);
10887 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10888 return false;
10889 }
10890 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10891 return false;
10892
10893 /* Can't compare mask and non-mask types. */
10894 if (vectype1 && vectype2
10895 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10896 return false;
10897
10898 /* Boolean values may have another representation in vectors
10899 and therefore we prefer bit operations over comparison for
10900 them (which also works for scalar masks). We store opcodes
10901 to use in bitop1 and bitop2. Statement is vectorized as
10902 BITOP2 (rhs1 BITOP1 rhs2) or
10903 rhs1 BITOP2 (BITOP1 rhs2)
10904 depending on bitop1 and bitop2 arity. */
10905 bool swap_p = false;
10906 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10907 {
10908 if (code == GT_EXPR)
10909 {
10910 bitop1 = BIT_NOT_EXPR;
10911 bitop2 = BIT_AND_EXPR;
10912 }
10913 else if (code == GE_EXPR)
10914 {
10915 bitop1 = BIT_NOT_EXPR;
10916 bitop2 = BIT_IOR_EXPR;
10917 }
10918 else if (code == LT_EXPR)
10919 {
10920 bitop1 = BIT_NOT_EXPR;
10921 bitop2 = BIT_AND_EXPR;
10922 swap_p = true;
10923 }
10924 else if (code == LE_EXPR)
10925 {
10926 bitop1 = BIT_NOT_EXPR;
10927 bitop2 = BIT_IOR_EXPR;
10928 swap_p = true;
10929 }
10930 else
10931 {
10932 bitop1 = BIT_XOR_EXPR;
10933 if (code == EQ_EXPR)
10934 bitop2 = BIT_NOT_EXPR;
10935 }
10936 }
10937
10938 if (!vec_stmt)
10939 {
10940 if (bitop1 == NOP_EXPR)
10941 {
10942 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10943 return false;
10944 }
10945 else
10946 {
10947 machine_mode mode = TYPE_MODE (vectype);
10948 optab optab;
10949
10950 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10951 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10952 return false;
10953
10954 if (bitop2 != NOP_EXPR)
10955 {
10956 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10957 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10958 return false;
10959 }
10960 }
10961
10962 /* Put types on constant and invariant SLP children. */
10963 if (slp_node
10964 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10965 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10966 {
10967 if (dump_enabled_p ())
10968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10969 "incompatible vector types for invariants\n");
10970 return false;
10971 }
10972
10973 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10974 vect_model_simple_cost (vinfo, stmt_info,
10975 ncopies * (1 + (bitop2 != NOP_EXPR)),
10976 dts, ndts, slp_node, cost_vec);
10977 return true;
10978 }
10979
10980 /* Transform. */
10981
10982 /* Handle def. */
10983 lhs = gimple_assign_lhs (stmt);
10984 mask = vect_create_destination_var (lhs, mask_type);
10985
10986 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10987 rhs1, &vec_oprnds0, vectype,
10988 rhs2, &vec_oprnds1, vectype);
10989 if (swap_p)
10990 std::swap (vec_oprnds0, vec_oprnds1);
10991
10992 /* Arguments are ready. Create the new vector stmt. */
10993 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10994 {
10995 gimple *new_stmt;
10996 vec_rhs2 = vec_oprnds1[i];
10997
10998 new_temp = make_ssa_name (mask);
10999 if (bitop1 == NOP_EXPR)
11000 {
11001 new_stmt = gimple_build_assign (new_temp, code,
11002 vec_rhs1, vec_rhs2);
11003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11004 }
11005 else
11006 {
11007 if (bitop1 == BIT_NOT_EXPR)
11008 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11009 else
11010 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11011 vec_rhs2);
11012 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11013 if (bitop2 != NOP_EXPR)
11014 {
11015 tree res = make_ssa_name (mask);
11016 if (bitop2 == BIT_NOT_EXPR)
11017 new_stmt = gimple_build_assign (res, bitop2, new_temp);
11018 else
11019 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11020 new_temp);
11021 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11022 }
11023 }
11024 if (slp_node)
11025 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11026 else
11027 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11028 }
11029
11030 if (!slp_node)
11031 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11032
11033 vec_oprnds0.release ();
11034 vec_oprnds1.release ();
11035
11036 return true;
11037 }
11038
11039 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11040 can handle all live statements in the node. Otherwise return true
11041 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11042 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11043
11044 static bool
can_vectorize_live_stmts(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)11045 can_vectorize_live_stmts (vec_info *vinfo,
11046 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11047 slp_tree slp_node, slp_instance slp_node_instance,
11048 bool vec_stmt_p,
11049 stmt_vector_for_cost *cost_vec)
11050 {
11051 if (slp_node)
11052 {
11053 stmt_vec_info slp_stmt_info;
11054 unsigned int i;
11055 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11056 {
11057 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11058 && !vectorizable_live_operation (vinfo,
11059 slp_stmt_info, gsi, slp_node,
11060 slp_node_instance, i,
11061 vec_stmt_p, cost_vec))
11062 return false;
11063 }
11064 }
11065 else if (STMT_VINFO_LIVE_P (stmt_info)
11066 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11067 slp_node, slp_node_instance, -1,
11068 vec_stmt_p, cost_vec))
11069 return false;
11070
11071 return true;
11072 }
11073
11074 /* Make sure the statement is vectorizable. */
11075
11076 opt_result
vect_analyze_stmt(vec_info * vinfo,stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)11077 vect_analyze_stmt (vec_info *vinfo,
11078 stmt_vec_info stmt_info, bool *need_to_vectorize,
11079 slp_tree node, slp_instance node_instance,
11080 stmt_vector_for_cost *cost_vec)
11081 {
11082 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11083 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11084 bool ok;
11085 gimple_seq pattern_def_seq;
11086
11087 if (dump_enabled_p ())
11088 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11089 stmt_info->stmt);
11090
11091 if (gimple_has_volatile_ops (stmt_info->stmt))
11092 return opt_result::failure_at (stmt_info->stmt,
11093 "not vectorized:"
11094 " stmt has volatile operands: %G\n",
11095 stmt_info->stmt);
11096
11097 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11098 && node == NULL
11099 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11100 {
11101 gimple_stmt_iterator si;
11102
11103 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11104 {
11105 stmt_vec_info pattern_def_stmt_info
11106 = vinfo->lookup_stmt (gsi_stmt (si));
11107 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11108 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11109 {
11110 /* Analyze def stmt of STMT if it's a pattern stmt. */
11111 if (dump_enabled_p ())
11112 dump_printf_loc (MSG_NOTE, vect_location,
11113 "==> examining pattern def statement: %G",
11114 pattern_def_stmt_info->stmt);
11115
11116 opt_result res
11117 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11118 need_to_vectorize, node, node_instance,
11119 cost_vec);
11120 if (!res)
11121 return res;
11122 }
11123 }
11124 }
11125
11126 /* Skip stmts that do not need to be vectorized. In loops this is expected
11127 to include:
11128 - the COND_EXPR which is the loop exit condition
11129 - any LABEL_EXPRs in the loop
11130 - computations that are used only for array indexing or loop control.
11131 In basic blocks we only analyze statements that are a part of some SLP
11132 instance, therefore, all the statements are relevant.
11133
11134 Pattern statement needs to be analyzed instead of the original statement
11135 if the original statement is not relevant. Otherwise, we analyze both
11136 statements. In basic blocks we are called from some SLP instance
11137 traversal, don't analyze pattern stmts instead, the pattern stmts
11138 already will be part of SLP instance. */
11139
11140 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11141 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11142 && !STMT_VINFO_LIVE_P (stmt_info))
11143 {
11144 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11145 && pattern_stmt_info
11146 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11147 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11148 {
11149 /* Analyze PATTERN_STMT instead of the original stmt. */
11150 stmt_info = pattern_stmt_info;
11151 if (dump_enabled_p ())
11152 dump_printf_loc (MSG_NOTE, vect_location,
11153 "==> examining pattern statement: %G",
11154 stmt_info->stmt);
11155 }
11156 else
11157 {
11158 if (dump_enabled_p ())
11159 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11160
11161 return opt_result::success ();
11162 }
11163 }
11164 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11165 && node == NULL
11166 && pattern_stmt_info
11167 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11168 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11169 {
11170 /* Analyze PATTERN_STMT too. */
11171 if (dump_enabled_p ())
11172 dump_printf_loc (MSG_NOTE, vect_location,
11173 "==> examining pattern statement: %G",
11174 pattern_stmt_info->stmt);
11175
11176 opt_result res
11177 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11178 node_instance, cost_vec);
11179 if (!res)
11180 return res;
11181 }
11182
11183 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11184 {
11185 case vect_internal_def:
11186 break;
11187
11188 case vect_reduction_def:
11189 case vect_nested_cycle:
11190 gcc_assert (!bb_vinfo
11191 && (relevance == vect_used_in_outer
11192 || relevance == vect_used_in_outer_by_reduction
11193 || relevance == vect_used_by_reduction
11194 || relevance == vect_unused_in_scope
11195 || relevance == vect_used_only_live));
11196 break;
11197
11198 case vect_induction_def:
11199 gcc_assert (!bb_vinfo);
11200 break;
11201
11202 case vect_constant_def:
11203 case vect_external_def:
11204 case vect_unknown_def_type:
11205 default:
11206 gcc_unreachable ();
11207 }
11208
11209 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11210 if (node)
11211 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11212
11213 if (STMT_VINFO_RELEVANT_P (stmt_info))
11214 {
11215 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11216 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11217 || (call && gimple_call_lhs (call) == NULL_TREE));
11218 *need_to_vectorize = true;
11219 }
11220
11221 if (PURE_SLP_STMT (stmt_info) && !node)
11222 {
11223 if (dump_enabled_p ())
11224 dump_printf_loc (MSG_NOTE, vect_location,
11225 "handled only by SLP analysis\n");
11226 return opt_result::success ();
11227 }
11228
11229 ok = true;
11230 if (!bb_vinfo
11231 && (STMT_VINFO_RELEVANT_P (stmt_info)
11232 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11233 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11234 -mveclibabi= takes preference over library functions with
11235 the simd attribute. */
11236 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11237 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11238 cost_vec)
11239 || vectorizable_conversion (vinfo, stmt_info,
11240 NULL, NULL, node, cost_vec)
11241 || vectorizable_operation (vinfo, stmt_info,
11242 NULL, NULL, node, cost_vec)
11243 || vectorizable_assignment (vinfo, stmt_info,
11244 NULL, NULL, node, cost_vec)
11245 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11246 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11247 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11248 node, node_instance, cost_vec)
11249 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11250 NULL, node, cost_vec)
11251 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11252 || vectorizable_condition (vinfo, stmt_info,
11253 NULL, NULL, node, cost_vec)
11254 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11255 cost_vec)
11256 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11257 stmt_info, NULL, node));
11258 else
11259 {
11260 if (bb_vinfo)
11261 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11262 || vectorizable_simd_clone_call (vinfo, stmt_info,
11263 NULL, NULL, node, cost_vec)
11264 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11265 cost_vec)
11266 || vectorizable_shift (vinfo, stmt_info,
11267 NULL, NULL, node, cost_vec)
11268 || vectorizable_operation (vinfo, stmt_info,
11269 NULL, NULL, node, cost_vec)
11270 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11271 cost_vec)
11272 || vectorizable_load (vinfo, stmt_info,
11273 NULL, NULL, node, cost_vec)
11274 || vectorizable_store (vinfo, stmt_info,
11275 NULL, NULL, node, cost_vec)
11276 || vectorizable_condition (vinfo, stmt_info,
11277 NULL, NULL, node, cost_vec)
11278 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11279 cost_vec)
11280 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11281 }
11282
11283 if (node)
11284 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11285
11286 if (!ok)
11287 return opt_result::failure_at (stmt_info->stmt,
11288 "not vectorized:"
11289 " relevant stmt not supported: %G",
11290 stmt_info->stmt);
11291
11292 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11293 need extra handling, except for vectorizable reductions. */
11294 if (!bb_vinfo
11295 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11296 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11297 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11298 stmt_info, NULL, node, node_instance,
11299 false, cost_vec))
11300 return opt_result::failure_at (stmt_info->stmt,
11301 "not vectorized:"
11302 " live stmt not supported: %G",
11303 stmt_info->stmt);
11304
11305 return opt_result::success ();
11306 }
11307
11308
11309 /* Function vect_transform_stmt.
11310
11311 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11312
11313 bool
vect_transform_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11314 vect_transform_stmt (vec_info *vinfo,
11315 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11316 slp_tree slp_node, slp_instance slp_node_instance)
11317 {
11318 bool is_store = false;
11319 gimple *vec_stmt = NULL;
11320 bool done;
11321
11322 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11323
11324 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11325 if (slp_node)
11326 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11327
11328 switch (STMT_VINFO_TYPE (stmt_info))
11329 {
11330 case type_demotion_vec_info_type:
11331 case type_promotion_vec_info_type:
11332 case type_conversion_vec_info_type:
11333 done = vectorizable_conversion (vinfo, stmt_info,
11334 gsi, &vec_stmt, slp_node, NULL);
11335 gcc_assert (done);
11336 break;
11337
11338 case induc_vec_info_type:
11339 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11340 stmt_info, &vec_stmt, slp_node,
11341 NULL);
11342 gcc_assert (done);
11343 break;
11344
11345 case shift_vec_info_type:
11346 done = vectorizable_shift (vinfo, stmt_info,
11347 gsi, &vec_stmt, slp_node, NULL);
11348 gcc_assert (done);
11349 break;
11350
11351 case op_vec_info_type:
11352 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11353 NULL);
11354 gcc_assert (done);
11355 break;
11356
11357 case assignment_vec_info_type:
11358 done = vectorizable_assignment (vinfo, stmt_info,
11359 gsi, &vec_stmt, slp_node, NULL);
11360 gcc_assert (done);
11361 break;
11362
11363 case load_vec_info_type:
11364 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11365 NULL);
11366 gcc_assert (done);
11367 break;
11368
11369 case store_vec_info_type:
11370 done = vectorizable_store (vinfo, stmt_info,
11371 gsi, &vec_stmt, slp_node, NULL);
11372 gcc_assert (done);
11373 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11374 {
11375 /* In case of interleaving, the whole chain is vectorized when the
11376 last store in the chain is reached. Store stmts before the last
11377 one are skipped, and there vec_stmt_info shouldn't be freed
11378 meanwhile. */
11379 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11380 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11381 is_store = true;
11382 }
11383 else
11384 is_store = true;
11385 break;
11386
11387 case condition_vec_info_type:
11388 done = vectorizable_condition (vinfo, stmt_info,
11389 gsi, &vec_stmt, slp_node, NULL);
11390 gcc_assert (done);
11391 break;
11392
11393 case comparison_vec_info_type:
11394 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11395 slp_node, NULL);
11396 gcc_assert (done);
11397 break;
11398
11399 case call_vec_info_type:
11400 done = vectorizable_call (vinfo, stmt_info,
11401 gsi, &vec_stmt, slp_node, NULL);
11402 break;
11403
11404 case call_simd_clone_vec_info_type:
11405 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11406 slp_node, NULL);
11407 break;
11408
11409 case reduc_vec_info_type:
11410 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11411 gsi, &vec_stmt, slp_node);
11412 gcc_assert (done);
11413 break;
11414
11415 case cycle_phi_info_type:
11416 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11417 &vec_stmt, slp_node, slp_node_instance);
11418 gcc_assert (done);
11419 break;
11420
11421 case lc_phi_info_type:
11422 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11423 stmt_info, &vec_stmt, slp_node);
11424 gcc_assert (done);
11425 break;
11426
11427 case phi_info_type:
11428 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11429 gcc_assert (done);
11430 break;
11431
11432 default:
11433 if (!STMT_VINFO_LIVE_P (stmt_info))
11434 {
11435 if (dump_enabled_p ())
11436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11437 "stmt not supported.\n");
11438 gcc_unreachable ();
11439 }
11440 done = true;
11441 }
11442
11443 if (!slp_node && vec_stmt)
11444 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11445
11446 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11447 {
11448 /* Handle stmts whose DEF is used outside the loop-nest that is
11449 being vectorized. */
11450 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11451 slp_node_instance, true, NULL);
11452 gcc_assert (done);
11453 }
11454
11455 if (slp_node)
11456 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11457
11458 return is_store;
11459 }
11460
11461
11462 /* Remove a group of stores (for SLP or interleaving), free their
11463 stmt_vec_info. */
11464
11465 void
vect_remove_stores(vec_info * vinfo,stmt_vec_info first_stmt_info)11466 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11467 {
11468 stmt_vec_info next_stmt_info = first_stmt_info;
11469
11470 while (next_stmt_info)
11471 {
11472 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11473 next_stmt_info = vect_orig_stmt (next_stmt_info);
11474 /* Free the attached stmt_vec_info and remove the stmt. */
11475 vinfo->remove_stmt (next_stmt_info);
11476 next_stmt_info = tmp;
11477 }
11478 }
11479
11480 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11481 elements of type SCALAR_TYPE, or null if the target doesn't support
11482 such a type.
11483
11484 If NUNITS is zero, return a vector type that contains elements of
11485 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11486
11487 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11488 for this vectorization region and want to "autodetect" the best choice.
11489 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11490 and we want the new type to be interoperable with it. PREVAILING_MODE
11491 in this case can be a scalar integer mode or a vector mode; when it
11492 is a vector mode, the function acts like a tree-level version of
11493 related_vector_mode. */
11494
11495 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11496 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11497 tree scalar_type, poly_uint64 nunits)
11498 {
11499 tree orig_scalar_type = scalar_type;
11500 scalar_mode inner_mode;
11501 machine_mode simd_mode;
11502 tree vectype;
11503
11504 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11505 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11506 return NULL_TREE;
11507
11508 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11509
11510 /* For vector types of elements whose mode precision doesn't
11511 match their types precision we use a element type of mode
11512 precision. The vectorization routines will have to make sure
11513 they support the proper result truncation/extension.
11514 We also make sure to build vector types with INTEGER_TYPE
11515 component type only. */
11516 if (INTEGRAL_TYPE_P (scalar_type)
11517 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11518 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11519 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11520 TYPE_UNSIGNED (scalar_type));
11521
11522 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11523 When the component mode passes the above test simply use a type
11524 corresponding to that mode. The theory is that any use that
11525 would cause problems with this will disable vectorization anyway. */
11526 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11527 && !INTEGRAL_TYPE_P (scalar_type))
11528 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11529
11530 /* We can't build a vector type of elements with alignment bigger than
11531 their size. */
11532 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11533 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11534 TYPE_UNSIGNED (scalar_type));
11535
11536 /* If we felt back to using the mode fail if there was
11537 no scalar type for it. */
11538 if (scalar_type == NULL_TREE)
11539 return NULL_TREE;
11540
11541 /* If no prevailing mode was supplied, use the mode the target prefers.
11542 Otherwise lookup a vector mode based on the prevailing mode. */
11543 if (prevailing_mode == VOIDmode)
11544 {
11545 gcc_assert (known_eq (nunits, 0U));
11546 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11547 if (SCALAR_INT_MODE_P (simd_mode))
11548 {
11549 /* Traditional behavior is not to take the integer mode
11550 literally, but simply to use it as a way of determining
11551 the vector size. It is up to mode_for_vector to decide
11552 what the TYPE_MODE should be.
11553
11554 Note that nunits == 1 is allowed in order to support single
11555 element vector types. */
11556 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11557 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11558 return NULL_TREE;
11559 }
11560 }
11561 else if (SCALAR_INT_MODE_P (prevailing_mode)
11562 || !related_vector_mode (prevailing_mode,
11563 inner_mode, nunits).exists (&simd_mode))
11564 {
11565 /* Fall back to using mode_for_vector, mostly in the hope of being
11566 able to use an integer mode. */
11567 if (known_eq (nunits, 0U)
11568 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11569 return NULL_TREE;
11570
11571 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11572 return NULL_TREE;
11573 }
11574
11575 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11576
11577 /* In cases where the mode was chosen by mode_for_vector, check that
11578 the target actually supports the chosen mode, or that it at least
11579 allows the vector mode to be replaced by a like-sized integer. */
11580 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11581 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11582 return NULL_TREE;
11583
11584 /* Re-attach the address-space qualifier if we canonicalized the scalar
11585 type. */
11586 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11587 return build_qualified_type
11588 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11589
11590 return vectype;
11591 }
11592
11593 /* Function get_vectype_for_scalar_type.
11594
11595 Returns the vector type corresponding to SCALAR_TYPE as supported
11596 by the target. If GROUP_SIZE is nonzero and we're performing BB
11597 vectorization, make sure that the number of elements in the vector
11598 is no bigger than GROUP_SIZE. */
11599
11600 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11601 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11602 unsigned int group_size)
11603 {
11604 /* For BB vectorization, we should always have a group size once we've
11605 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11606 are tentative requests during things like early data reference
11607 analysis and pattern recognition. */
11608 if (is_a <bb_vec_info> (vinfo))
11609 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11610 else
11611 group_size = 0;
11612
11613 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11614 scalar_type);
11615 if (vectype && vinfo->vector_mode == VOIDmode)
11616 vinfo->vector_mode = TYPE_MODE (vectype);
11617
11618 /* Register the natural choice of vector type, before the group size
11619 has been applied. */
11620 if (vectype)
11621 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11622
11623 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11624 try again with an explicit number of elements. */
11625 if (vectype
11626 && group_size
11627 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11628 {
11629 /* Start with the biggest number of units that fits within
11630 GROUP_SIZE and halve it until we find a valid vector type.
11631 Usually either the first attempt will succeed or all will
11632 fail (in the latter case because GROUP_SIZE is too small
11633 for the target), but it's possible that a target could have
11634 a hole between supported vector types.
11635
11636 If GROUP_SIZE is not a power of 2, this has the effect of
11637 trying the largest power of 2 that fits within the group,
11638 even though the group is not a multiple of that vector size.
11639 The BB vectorizer will then try to carve up the group into
11640 smaller pieces. */
11641 unsigned int nunits = 1 << floor_log2 (group_size);
11642 do
11643 {
11644 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11645 scalar_type, nunits);
11646 nunits /= 2;
11647 }
11648 while (nunits > 1 && !vectype);
11649 }
11650
11651 return vectype;
11652 }
11653
11654 /* Return the vector type corresponding to SCALAR_TYPE as supported
11655 by the target. NODE, if nonnull, is the SLP tree node that will
11656 use the returned vector type. */
11657
11658 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11659 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11660 {
11661 unsigned int group_size = 0;
11662 if (node)
11663 group_size = SLP_TREE_LANES (node);
11664 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11665 }
11666
11667 /* Function get_mask_type_for_scalar_type.
11668
11669 Returns the mask type corresponding to a result of comparison
11670 of vectors of specified SCALAR_TYPE as supported by target.
11671 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11672 make sure that the number of elements in the vector is no bigger
11673 than GROUP_SIZE. */
11674
11675 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11676 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11677 unsigned int group_size)
11678 {
11679 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11680
11681 if (!vectype)
11682 return NULL;
11683
11684 return truth_type_for (vectype);
11685 }
11686
11687 /* Function get_same_sized_vectype
11688
11689 Returns a vector type corresponding to SCALAR_TYPE of size
11690 VECTOR_TYPE if supported by the target. */
11691
11692 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11693 get_same_sized_vectype (tree scalar_type, tree vector_type)
11694 {
11695 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11696 return truth_type_for (vector_type);
11697
11698 poly_uint64 nunits;
11699 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11700 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11701 return NULL_TREE;
11702
11703 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11704 scalar_type, nunits);
11705 }
11706
11707 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11708 would not change the chosen vector modes. */
11709
11710 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11711 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11712 {
11713 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11714 i != vinfo->used_vector_modes.end (); ++i)
11715 if (!VECTOR_MODE_P (*i)
11716 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11717 return false;
11718 return true;
11719 }
11720
11721 /* Function vect_is_simple_use.
11722
11723 Input:
11724 VINFO - the vect info of the loop or basic block that is being vectorized.
11725 OPERAND - operand in the loop or bb.
11726 Output:
11727 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11728 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11729 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11730 the definition could be anywhere in the function
11731 DT - the type of definition
11732
11733 Returns whether a stmt with OPERAND can be vectorized.
11734 For loops, supportable operands are constants, loop invariants, and operands
11735 that are defined by the current iteration of the loop. Unsupportable
11736 operands are those that are defined by a previous iteration of the loop (as
11737 is the case in reduction/induction computations).
11738 For basic blocks, supportable operands are constants and bb invariants.
11739 For now, operands defined outside the basic block are not supported. */
11740
11741 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11742 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11743 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11744 {
11745 if (def_stmt_info_out)
11746 *def_stmt_info_out = NULL;
11747 if (def_stmt_out)
11748 *def_stmt_out = NULL;
11749 *dt = vect_unknown_def_type;
11750
11751 if (dump_enabled_p ())
11752 {
11753 dump_printf_loc (MSG_NOTE, vect_location,
11754 "vect_is_simple_use: operand ");
11755 if (TREE_CODE (operand) == SSA_NAME
11756 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11757 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11758 else
11759 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11760 }
11761
11762 if (CONSTANT_CLASS_P (operand))
11763 *dt = vect_constant_def;
11764 else if (is_gimple_min_invariant (operand))
11765 *dt = vect_external_def;
11766 else if (TREE_CODE (operand) != SSA_NAME)
11767 *dt = vect_unknown_def_type;
11768 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11769 *dt = vect_external_def;
11770 else
11771 {
11772 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11773 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11774 if (!stmt_vinfo)
11775 *dt = vect_external_def;
11776 else
11777 {
11778 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11779 def_stmt = stmt_vinfo->stmt;
11780 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11781 if (def_stmt_info_out)
11782 *def_stmt_info_out = stmt_vinfo;
11783 }
11784 if (def_stmt_out)
11785 *def_stmt_out = def_stmt;
11786 }
11787
11788 if (dump_enabled_p ())
11789 {
11790 dump_printf (MSG_NOTE, ", type of def: ");
11791 switch (*dt)
11792 {
11793 case vect_uninitialized_def:
11794 dump_printf (MSG_NOTE, "uninitialized\n");
11795 break;
11796 case vect_constant_def:
11797 dump_printf (MSG_NOTE, "constant\n");
11798 break;
11799 case vect_external_def:
11800 dump_printf (MSG_NOTE, "external\n");
11801 break;
11802 case vect_internal_def:
11803 dump_printf (MSG_NOTE, "internal\n");
11804 break;
11805 case vect_induction_def:
11806 dump_printf (MSG_NOTE, "induction\n");
11807 break;
11808 case vect_reduction_def:
11809 dump_printf (MSG_NOTE, "reduction\n");
11810 break;
11811 case vect_double_reduction_def:
11812 dump_printf (MSG_NOTE, "double reduction\n");
11813 break;
11814 case vect_nested_cycle:
11815 dump_printf (MSG_NOTE, "nested cycle\n");
11816 break;
11817 case vect_unknown_def_type:
11818 dump_printf (MSG_NOTE, "unknown\n");
11819 break;
11820 }
11821 }
11822
11823 if (*dt == vect_unknown_def_type)
11824 {
11825 if (dump_enabled_p ())
11826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11827 "Unsupported pattern.\n");
11828 return false;
11829 }
11830
11831 return true;
11832 }
11833
11834 /* Function vect_is_simple_use.
11835
11836 Same as vect_is_simple_use but also determines the vector operand
11837 type of OPERAND and stores it to *VECTYPE. If the definition of
11838 OPERAND is vect_uninitialized_def, vect_constant_def or
11839 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11840 is responsible to compute the best suited vector type for the
11841 scalar operand. */
11842
11843 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11844 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11845 tree *vectype, stmt_vec_info *def_stmt_info_out,
11846 gimple **def_stmt_out)
11847 {
11848 stmt_vec_info def_stmt_info;
11849 gimple *def_stmt;
11850 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11851 return false;
11852
11853 if (def_stmt_out)
11854 *def_stmt_out = def_stmt;
11855 if (def_stmt_info_out)
11856 *def_stmt_info_out = def_stmt_info;
11857
11858 /* Now get a vector type if the def is internal, otherwise supply
11859 NULL_TREE and leave it up to the caller to figure out a proper
11860 type for the use stmt. */
11861 if (*dt == vect_internal_def
11862 || *dt == vect_induction_def
11863 || *dt == vect_reduction_def
11864 || *dt == vect_double_reduction_def
11865 || *dt == vect_nested_cycle)
11866 {
11867 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11868 gcc_assert (*vectype != NULL_TREE);
11869 if (dump_enabled_p ())
11870 dump_printf_loc (MSG_NOTE, vect_location,
11871 "vect_is_simple_use: vectype %T\n", *vectype);
11872 }
11873 else if (*dt == vect_uninitialized_def
11874 || *dt == vect_constant_def
11875 || *dt == vect_external_def)
11876 *vectype = NULL_TREE;
11877 else
11878 gcc_unreachable ();
11879
11880 return true;
11881 }
11882
11883 /* Function vect_is_simple_use.
11884
11885 Same as vect_is_simple_use but determines the operand by operand
11886 position OPERAND from either STMT or SLP_NODE, filling in *OP
11887 and *SLP_DEF (when SLP_NODE is not NULL). */
11888
11889 bool
vect_is_simple_use(vec_info * vinfo,stmt_vec_info stmt,slp_tree slp_node,unsigned operand,tree * op,slp_tree * slp_def,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out)11890 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11891 unsigned operand, tree *op, slp_tree *slp_def,
11892 enum vect_def_type *dt,
11893 tree *vectype, stmt_vec_info *def_stmt_info_out)
11894 {
11895 if (slp_node)
11896 {
11897 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11898 *slp_def = child;
11899 *vectype = SLP_TREE_VECTYPE (child);
11900 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11901 {
11902 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11903 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11904 }
11905 else
11906 {
11907 if (def_stmt_info_out)
11908 *def_stmt_info_out = NULL;
11909 *op = SLP_TREE_SCALAR_OPS (child)[0];
11910 *dt = SLP_TREE_DEF_TYPE (child);
11911 return true;
11912 }
11913 }
11914 else
11915 {
11916 *slp_def = NULL;
11917 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11918 {
11919 if (gimple_assign_rhs_code (ass) == COND_EXPR
11920 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11921 {
11922 if (operand < 2)
11923 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11924 else
11925 *op = gimple_op (ass, operand);
11926 }
11927 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11928 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11929 else
11930 *op = gimple_op (ass, operand + 1);
11931 }
11932 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11933 *op = gimple_call_arg (call, operand);
11934 else
11935 gcc_unreachable ();
11936 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11937 }
11938 }
11939
11940 /* If OP is not NULL and is external or constant update its vector
11941 type with VECTYPE. Returns true if successful or false if not,
11942 for example when conflicting vector types are present. */
11943
11944 bool
vect_maybe_update_slp_op_vectype(slp_tree op,tree vectype)11945 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11946 {
11947 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11948 return true;
11949 if (SLP_TREE_VECTYPE (op))
11950 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11951 SLP_TREE_VECTYPE (op) = vectype;
11952 return true;
11953 }
11954
11955 /* Function supportable_widening_operation
11956
11957 Check whether an operation represented by the code CODE is a
11958 widening operation that is supported by the target platform in
11959 vector form (i.e., when operating on arguments of type VECTYPE_IN
11960 producing a result of type VECTYPE_OUT).
11961
11962 Widening operations we currently support are NOP (CONVERT), FLOAT,
11963 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11964 are supported by the target platform either directly (via vector
11965 tree-codes), or via target builtins.
11966
11967 Output:
11968 - CODE1 and CODE2 are codes of vector operations to be used when
11969 vectorizing the operation, if available.
11970 - MULTI_STEP_CVT determines the number of required intermediate steps in
11971 case of multi-step conversion (like char->short->int - in that case
11972 MULTI_STEP_CVT will be 1).
11973 - INTERM_TYPES contains the intermediate type required to perform the
11974 widening operation (short in the above example). */
11975
11976 bool
supportable_widening_operation(vec_info * vinfo,enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11977 supportable_widening_operation (vec_info *vinfo,
11978 enum tree_code code, stmt_vec_info stmt_info,
11979 tree vectype_out, tree vectype_in,
11980 enum tree_code *code1, enum tree_code *code2,
11981 int *multi_step_cvt,
11982 vec<tree> *interm_types)
11983 {
11984 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11985 class loop *vect_loop = NULL;
11986 machine_mode vec_mode;
11987 enum insn_code icode1, icode2;
11988 optab optab1, optab2;
11989 tree vectype = vectype_in;
11990 tree wide_vectype = vectype_out;
11991 enum tree_code c1, c2;
11992 int i;
11993 tree prev_type, intermediate_type;
11994 machine_mode intermediate_mode, prev_mode;
11995 optab optab3, optab4;
11996
11997 *multi_step_cvt = 0;
11998 if (loop_info)
11999 vect_loop = LOOP_VINFO_LOOP (loop_info);
12000
12001 switch (code)
12002 {
12003 case WIDEN_MULT_EXPR:
12004 /* The result of a vectorized widening operation usually requires
12005 two vectors (because the widened results do not fit into one vector).
12006 The generated vector results would normally be expected to be
12007 generated in the same order as in the original scalar computation,
12008 i.e. if 8 results are generated in each vector iteration, they are
12009 to be organized as follows:
12010 vect1: [res1,res2,res3,res4],
12011 vect2: [res5,res6,res7,res8].
12012
12013 However, in the special case that the result of the widening
12014 operation is used in a reduction computation only, the order doesn't
12015 matter (because when vectorizing a reduction we change the order of
12016 the computation). Some targets can take advantage of this and
12017 generate more efficient code. For example, targets like Altivec,
12018 that support widen_mult using a sequence of {mult_even,mult_odd}
12019 generate the following vectors:
12020 vect1: [res1,res3,res5,res7],
12021 vect2: [res2,res4,res6,res8].
12022
12023 When vectorizing outer-loops, we execute the inner-loop sequentially
12024 (each vectorized inner-loop iteration contributes to VF outer-loop
12025 iterations in parallel). We therefore don't allow to change the
12026 order of the computation in the inner-loop during outer-loop
12027 vectorization. */
12028 /* TODO: Another case in which order doesn't *really* matter is when we
12029 widen and then contract again, e.g. (short)((int)x * y >> 8).
12030 Normally, pack_trunc performs an even/odd permute, whereas the
12031 repack from an even/odd expansion would be an interleave, which
12032 would be significantly simpler for e.g. AVX2. */
12033 /* In any case, in order to avoid duplicating the code below, recurse
12034 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12035 are properly set up for the caller. If we fail, we'll continue with
12036 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12037 if (vect_loop
12038 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12039 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12040 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12041 stmt_info, vectype_out,
12042 vectype_in, code1, code2,
12043 multi_step_cvt, interm_types))
12044 {
12045 /* Elements in a vector with vect_used_by_reduction property cannot
12046 be reordered if the use chain with this property does not have the
12047 same operation. One such an example is s += a * b, where elements
12048 in a and b cannot be reordered. Here we check if the vector defined
12049 by STMT is only directly used in the reduction statement. */
12050 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12051 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12052 if (use_stmt_info
12053 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12054 return true;
12055 }
12056 c1 = VEC_WIDEN_MULT_LO_EXPR;
12057 c2 = VEC_WIDEN_MULT_HI_EXPR;
12058 break;
12059
12060 case DOT_PROD_EXPR:
12061 c1 = DOT_PROD_EXPR;
12062 c2 = DOT_PROD_EXPR;
12063 break;
12064
12065 case SAD_EXPR:
12066 c1 = SAD_EXPR;
12067 c2 = SAD_EXPR;
12068 break;
12069
12070 case VEC_WIDEN_MULT_EVEN_EXPR:
12071 /* Support the recursion induced just above. */
12072 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12073 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12074 break;
12075
12076 case WIDEN_LSHIFT_EXPR:
12077 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12078 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12079 break;
12080
12081 case WIDEN_PLUS_EXPR:
12082 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12083 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12084 break;
12085
12086 case WIDEN_MINUS_EXPR:
12087 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12088 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12089 break;
12090
12091 CASE_CONVERT:
12092 c1 = VEC_UNPACK_LO_EXPR;
12093 c2 = VEC_UNPACK_HI_EXPR;
12094 break;
12095
12096 case FLOAT_EXPR:
12097 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12098 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12099 break;
12100
12101 case FIX_TRUNC_EXPR:
12102 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12103 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12104 break;
12105
12106 default:
12107 gcc_unreachable ();
12108 }
12109
12110 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12111 std::swap (c1, c2);
12112
12113 if (code == FIX_TRUNC_EXPR)
12114 {
12115 /* The signedness is determined from output operand. */
12116 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12117 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12118 }
12119 else if (CONVERT_EXPR_CODE_P (code)
12120 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12121 && VECTOR_BOOLEAN_TYPE_P (vectype)
12122 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12123 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12124 {
12125 /* If the input and result modes are the same, a different optab
12126 is needed where we pass in the number of units in vectype. */
12127 optab1 = vec_unpacks_sbool_lo_optab;
12128 optab2 = vec_unpacks_sbool_hi_optab;
12129 }
12130 else
12131 {
12132 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12133 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12134 }
12135
12136 if (!optab1 || !optab2)
12137 return false;
12138
12139 vec_mode = TYPE_MODE (vectype);
12140 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12141 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12142 return false;
12143
12144 *code1 = c1;
12145 *code2 = c2;
12146
12147 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12148 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12149 {
12150 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12151 return true;
12152 /* For scalar masks we may have different boolean
12153 vector types having the same QImode. Thus we
12154 add additional check for elements number. */
12155 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12156 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12157 return true;
12158 }
12159
12160 /* Check if it's a multi-step conversion that can be done using intermediate
12161 types. */
12162
12163 prev_type = vectype;
12164 prev_mode = vec_mode;
12165
12166 if (!CONVERT_EXPR_CODE_P (code))
12167 return false;
12168
12169 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12170 intermediate steps in promotion sequence. We try
12171 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12172 not. */
12173 interm_types->create (MAX_INTERM_CVT_STEPS);
12174 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12175 {
12176 intermediate_mode = insn_data[icode1].operand[0].mode;
12177 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12178 intermediate_type
12179 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12180 else
12181 intermediate_type
12182 = lang_hooks.types.type_for_mode (intermediate_mode,
12183 TYPE_UNSIGNED (prev_type));
12184
12185 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12186 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12187 && intermediate_mode == prev_mode
12188 && SCALAR_INT_MODE_P (prev_mode))
12189 {
12190 /* If the input and result modes are the same, a different optab
12191 is needed where we pass in the number of units in vectype. */
12192 optab3 = vec_unpacks_sbool_lo_optab;
12193 optab4 = vec_unpacks_sbool_hi_optab;
12194 }
12195 else
12196 {
12197 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12198 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12199 }
12200
12201 if (!optab3 || !optab4
12202 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12203 || insn_data[icode1].operand[0].mode != intermediate_mode
12204 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12205 || insn_data[icode2].operand[0].mode != intermediate_mode
12206 || ((icode1 = optab_handler (optab3, intermediate_mode))
12207 == CODE_FOR_nothing)
12208 || ((icode2 = optab_handler (optab4, intermediate_mode))
12209 == CODE_FOR_nothing))
12210 break;
12211
12212 interm_types->quick_push (intermediate_type);
12213 (*multi_step_cvt)++;
12214
12215 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12216 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12217 {
12218 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12219 return true;
12220 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12221 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12222 return true;
12223 }
12224
12225 prev_type = intermediate_type;
12226 prev_mode = intermediate_mode;
12227 }
12228
12229 interm_types->release ();
12230 return false;
12231 }
12232
12233
12234 /* Function supportable_narrowing_operation
12235
12236 Check whether an operation represented by the code CODE is a
12237 narrowing operation that is supported by the target platform in
12238 vector form (i.e., when operating on arguments of type VECTYPE_IN
12239 and producing a result of type VECTYPE_OUT).
12240
12241 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12242 and FLOAT. This function checks if these operations are supported by
12243 the target platform directly via vector tree-codes.
12244
12245 Output:
12246 - CODE1 is the code of a vector operation to be used when
12247 vectorizing the operation, if available.
12248 - MULTI_STEP_CVT determines the number of required intermediate steps in
12249 case of multi-step conversion (like int->short->char - in that case
12250 MULTI_STEP_CVT will be 1).
12251 - INTERM_TYPES contains the intermediate type required to perform the
12252 narrowing operation (short in the above example). */
12253
12254 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)12255 supportable_narrowing_operation (enum tree_code code,
12256 tree vectype_out, tree vectype_in,
12257 enum tree_code *code1, int *multi_step_cvt,
12258 vec<tree> *interm_types)
12259 {
12260 machine_mode vec_mode;
12261 enum insn_code icode1;
12262 optab optab1, interm_optab;
12263 tree vectype = vectype_in;
12264 tree narrow_vectype = vectype_out;
12265 enum tree_code c1;
12266 tree intermediate_type, prev_type;
12267 machine_mode intermediate_mode, prev_mode;
12268 int i;
12269 unsigned HOST_WIDE_INT n_elts;
12270 bool uns;
12271
12272 *multi_step_cvt = 0;
12273 switch (code)
12274 {
12275 CASE_CONVERT:
12276 c1 = VEC_PACK_TRUNC_EXPR;
12277 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12278 && VECTOR_BOOLEAN_TYPE_P (vectype)
12279 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12280 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12281 && n_elts < BITS_PER_UNIT)
12282 optab1 = vec_pack_sbool_trunc_optab;
12283 else
12284 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12285 break;
12286
12287 case FIX_TRUNC_EXPR:
12288 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12289 /* The signedness is determined from output operand. */
12290 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12291 break;
12292
12293 case FLOAT_EXPR:
12294 c1 = VEC_PACK_FLOAT_EXPR;
12295 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12296 break;
12297
12298 default:
12299 gcc_unreachable ();
12300 }
12301
12302 if (!optab1)
12303 return false;
12304
12305 vec_mode = TYPE_MODE (vectype);
12306 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12307 return false;
12308
12309 *code1 = c1;
12310
12311 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12312 {
12313 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12314 return true;
12315 /* For scalar masks we may have different boolean
12316 vector types having the same QImode. Thus we
12317 add additional check for elements number. */
12318 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12319 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12320 return true;
12321 }
12322
12323 if (code == FLOAT_EXPR)
12324 return false;
12325
12326 /* Check if it's a multi-step conversion that can be done using intermediate
12327 types. */
12328 prev_mode = vec_mode;
12329 prev_type = vectype;
12330 if (code == FIX_TRUNC_EXPR)
12331 uns = TYPE_UNSIGNED (vectype_out);
12332 else
12333 uns = TYPE_UNSIGNED (vectype);
12334
12335 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12336 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12337 costly than signed. */
12338 if (code == FIX_TRUNC_EXPR && uns)
12339 {
12340 enum insn_code icode2;
12341
12342 intermediate_type
12343 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12344 interm_optab
12345 = optab_for_tree_code (c1, intermediate_type, optab_default);
12346 if (interm_optab != unknown_optab
12347 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12348 && insn_data[icode1].operand[0].mode
12349 == insn_data[icode2].operand[0].mode)
12350 {
12351 uns = false;
12352 optab1 = interm_optab;
12353 icode1 = icode2;
12354 }
12355 }
12356
12357 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12358 intermediate steps in promotion sequence. We try
12359 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12360 interm_types->create (MAX_INTERM_CVT_STEPS);
12361 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12362 {
12363 intermediate_mode = insn_data[icode1].operand[0].mode;
12364 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12365 intermediate_type
12366 = vect_double_mask_nunits (prev_type, intermediate_mode);
12367 else
12368 intermediate_type
12369 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12370 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12371 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12372 && SCALAR_INT_MODE_P (prev_mode)
12373 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12374 && n_elts < BITS_PER_UNIT)
12375 interm_optab = vec_pack_sbool_trunc_optab;
12376 else
12377 interm_optab
12378 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12379 optab_default);
12380 if (!interm_optab
12381 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12382 || insn_data[icode1].operand[0].mode != intermediate_mode
12383 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12384 == CODE_FOR_nothing))
12385 break;
12386
12387 interm_types->quick_push (intermediate_type);
12388 (*multi_step_cvt)++;
12389
12390 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12391 {
12392 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12393 return true;
12394 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12395 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12396 return true;
12397 }
12398
12399 prev_mode = intermediate_mode;
12400 prev_type = intermediate_type;
12401 optab1 = interm_optab;
12402 }
12403
12404 interm_types->release ();
12405 return false;
12406 }
12407
12408 /* Generate and return a vector mask of MASK_TYPE such that
12409 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12410 Add the statements to SEQ. */
12411
12412 tree
vect_gen_while(gimple_seq * seq,tree mask_type,tree start_index,tree end_index,const char * name)12413 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12414 tree end_index, const char *name)
12415 {
12416 tree cmp_type = TREE_TYPE (start_index);
12417 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12418 cmp_type, mask_type,
12419 OPTIMIZE_FOR_SPEED));
12420 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12421 start_index, end_index,
12422 build_zero_cst (mask_type));
12423 tree tmp;
12424 if (name)
12425 tmp = make_temp_ssa_name (mask_type, NULL, name);
12426 else
12427 tmp = make_ssa_name (mask_type);
12428 gimple_call_set_lhs (call, tmp);
12429 gimple_seq_add_stmt (seq, call);
12430 return tmp;
12431 }
12432
12433 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12434 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12435
12436 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12437 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12438 tree end_index)
12439 {
12440 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12441 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12442 }
12443
12444 /* Try to compute the vector types required to vectorize STMT_INFO,
12445 returning true on success and false if vectorization isn't possible.
12446 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12447 take sure that the number of elements in the vectors is no bigger
12448 than GROUP_SIZE.
12449
12450 On success:
12451
12452 - Set *STMT_VECTYPE_OUT to:
12453 - NULL_TREE if the statement doesn't need to be vectorized;
12454 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12455
12456 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12457 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12458 statement does not help to determine the overall number of units. */
12459
12460 opt_result
vect_get_vector_types_for_stmt(vec_info * vinfo,stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12461 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12462 tree *stmt_vectype_out,
12463 tree *nunits_vectype_out,
12464 unsigned int group_size)
12465 {
12466 gimple *stmt = stmt_info->stmt;
12467
12468 /* For BB vectorization, we should always have a group size once we've
12469 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12470 are tentative requests during things like early data reference
12471 analysis and pattern recognition. */
12472 if (is_a <bb_vec_info> (vinfo))
12473 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12474 else
12475 group_size = 0;
12476
12477 *stmt_vectype_out = NULL_TREE;
12478 *nunits_vectype_out = NULL_TREE;
12479
12480 if (gimple_get_lhs (stmt) == NULL_TREE
12481 /* MASK_STORE has no lhs, but is ok. */
12482 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12483 {
12484 if (is_a <gcall *> (stmt))
12485 {
12486 /* Ignore calls with no lhs. These must be calls to
12487 #pragma omp simd functions, and what vectorization factor
12488 it really needs can't be determined until
12489 vectorizable_simd_clone_call. */
12490 if (dump_enabled_p ())
12491 dump_printf_loc (MSG_NOTE, vect_location,
12492 "defer to SIMD clone analysis.\n");
12493 return opt_result::success ();
12494 }
12495
12496 return opt_result::failure_at (stmt,
12497 "not vectorized: irregular stmt.%G", stmt);
12498 }
12499
12500 tree vectype;
12501 tree scalar_type = NULL_TREE;
12502 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12503 {
12504 vectype = STMT_VINFO_VECTYPE (stmt_info);
12505 if (dump_enabled_p ())
12506 dump_printf_loc (MSG_NOTE, vect_location,
12507 "precomputed vectype: %T\n", vectype);
12508 }
12509 else if (vect_use_mask_type_p (stmt_info))
12510 {
12511 unsigned int precision = stmt_info->mask_precision;
12512 scalar_type = build_nonstandard_integer_type (precision, 1);
12513 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12514 if (!vectype)
12515 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12516 " data-type %T\n", scalar_type);
12517 if (dump_enabled_p ())
12518 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12519 }
12520 else
12521 {
12522 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12523 scalar_type = TREE_TYPE (DR_REF (dr));
12524 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12525 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12526 else
12527 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12528
12529 if (dump_enabled_p ())
12530 {
12531 if (group_size)
12532 dump_printf_loc (MSG_NOTE, vect_location,
12533 "get vectype for scalar type (group size %d):"
12534 " %T\n", group_size, scalar_type);
12535 else
12536 dump_printf_loc (MSG_NOTE, vect_location,
12537 "get vectype for scalar type: %T\n", scalar_type);
12538 }
12539 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12540 if (!vectype)
12541 return opt_result::failure_at (stmt,
12542 "not vectorized:"
12543 " unsupported data-type %T\n",
12544 scalar_type);
12545
12546 if (dump_enabled_p ())
12547 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12548 }
12549
12550 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12551 return opt_result::failure_at (stmt,
12552 "not vectorized: vector stmt in loop:%G",
12553 stmt);
12554
12555 *stmt_vectype_out = vectype;
12556
12557 /* Don't try to compute scalar types if the stmt produces a boolean
12558 vector; use the existing vector type instead. */
12559 tree nunits_vectype = vectype;
12560 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12561 {
12562 /* The number of units is set according to the smallest scalar
12563 type (or the largest vector size, but we only support one
12564 vector size per vectorization). */
12565 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12566 TREE_TYPE (vectype));
12567 if (scalar_type != TREE_TYPE (vectype))
12568 {
12569 if (dump_enabled_p ())
12570 dump_printf_loc (MSG_NOTE, vect_location,
12571 "get vectype for smallest scalar type: %T\n",
12572 scalar_type);
12573 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12574 group_size);
12575 if (!nunits_vectype)
12576 return opt_result::failure_at
12577 (stmt, "not vectorized: unsupported data-type %T\n",
12578 scalar_type);
12579 if (dump_enabled_p ())
12580 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12581 nunits_vectype);
12582 }
12583 }
12584
12585 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12586 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12587 return opt_result::failure_at (stmt,
12588 "Not vectorized: Incompatible number "
12589 "of vector subparts between %T and %T\n",
12590 nunits_vectype, *stmt_vectype_out);
12591
12592 if (dump_enabled_p ())
12593 {
12594 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12595 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12596 dump_printf (MSG_NOTE, "\n");
12597 }
12598
12599 *nunits_vectype_out = nunits_vectype;
12600 return opt_result::success ();
12601 }
12602
12603 /* Generate and return statement sequence that sets vector length LEN that is:
12604
12605 min_of_start_and_end = min (START_INDEX, END_INDEX);
12606 left_len = END_INDEX - min_of_start_and_end;
12607 rhs = min (left_len, LEN_LIMIT);
12608 LEN = rhs;
12609
12610 Note: the cost of the code generated by this function is modeled
12611 by vect_estimate_min_profitable_iters, so changes here may need
12612 corresponding changes there. */
12613
12614 gimple_seq
vect_gen_len(tree len,tree start_index,tree end_index,tree len_limit)12615 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12616 {
12617 gimple_seq stmts = NULL;
12618 tree len_type = TREE_TYPE (len);
12619 gcc_assert (TREE_TYPE (start_index) == len_type);
12620
12621 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12622 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12623 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12624 gimple* stmt = gimple_build_assign (len, rhs);
12625 gimple_seq_add_stmt (&stmts, stmt);
12626
12627 return stmts;
12628 }
12629
12630