1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
stmt_in_inner_loop_p(vec_info * vinfo,class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
read_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
write_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
vect_clobber_variable(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
841
842 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec,bool widen_arith)843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
848 {
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
851
852 for (i = 0; i < pwr + 1; i++)
853 {
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
859 }
860
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
866
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
871 }
872
873 /* Returns true if the current function returns DECL. */
874
875 static bool
cfun_returns(tree decl)876 cfun_returns (tree decl)
877 {
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
881 {
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
891 do
892 {
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
894 }
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
900 }
901 return false;
902 }
903
904 /* Function vect_model_store_cost
905
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
908
909 static void
vect_model_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 dr_alignment_support alignment_support_scheme,
913 int misalignment,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
916 {
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924
925 if (vls_type == VLS_STORE_INVARIANT)
926 {
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
930 }
931
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
941
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948 {
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
955
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
960 }
961
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
966 {
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
972 }
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
975 misalignment, &inside_cost, cost_vec);
976
977 if (memory_access_type == VMAT_ELEMENTWISE
978 || memory_access_type == VMAT_STRIDED_SLP)
979 {
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
982 inside_cost += record_stmt_cost (cost_vec,
983 ncopies * assumed_nunits,
984 vec_to_scalar, stmt_info, 0, vect_body);
985 }
986
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
992 if (base
993 && (TREE_CODE (base) == RESULT_DECL
994 || (DECL_P (base) && cfun_returns (base)))
995 && !aggregate_value_p (base, cfun->decl))
996 {
997 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
999 if (REG_P (reg))
1000 {
1001 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1004 if (nregs > 1)
1005 {
1006 /* Spill. */
1007 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1008 vector_store,
1009 stmt_info, 0, vect_epilogue);
1010 /* Loads. */
1011 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1012 scalar_load,
1013 stmt_info, 0, vect_epilogue);
1014 }
1015 }
1016 }
1017
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1022 }
1023
1024
1025 /* Calculate cost of DR's memory access. */
1026 void
vect_get_store_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1027 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1028 dr_alignment_support alignment_support_scheme,
1029 int misalignment,
1030 unsigned int *inside_cost,
1031 stmt_vector_for_cost *body_cost_vec)
1032 {
1033 switch (alignment_support_scheme)
1034 {
1035 case dr_aligned:
1036 {
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 vector_store, stmt_info, 0,
1039 vect_body);
1040
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: aligned.\n");
1044 break;
1045 }
1046
1047 case dr_unaligned_supported:
1048 {
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 unaligned_store, stmt_info,
1052 misalignment, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_store_cost: unaligned supported by "
1056 "hardware.\n");
1057 break;
1058 }
1059
1060 case dr_unaligned_unsupported:
1061 {
1062 *inside_cost = VECT_MAX_COST;
1063
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066 "vect_model_store_cost: unsupported access.\n");
1067 break;
1068 }
1069
1070 default:
1071 gcc_unreachable ();
1072 }
1073 }
1074
1075
1076 /* Function vect_model_load_cost
1077
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1082
1083 static void
vect_model_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,unsigned ncopies,poly_uint64 vf,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,gather_scatter_info * gs_info,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1084 vect_model_load_cost (vec_info *vinfo,
1085 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 vect_memory_access_type memory_access_type,
1087 dr_alignment_support alignment_support_scheme,
1088 int misalignment,
1089 gather_scatter_info *gs_info,
1090 slp_tree slp_node,
1091 stmt_vector_for_cost *cost_vec)
1092 {
1093 unsigned int inside_cost = 0, prologue_cost = 0;
1094 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1095
1096 gcc_assert (cost_vec);
1097
1098 /* ??? Somehow we need to fix this at the callers. */
1099 if (slp_node)
1100 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1101
1102 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1103 {
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms, n_loads;
1109 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1110 vf, true, &n_perms, &n_loads);
1111 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1112 first_stmt_info, 0, vect_body);
1113
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 ncopies = n_loads;
1117 }
1118
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info = stmt_info;
1122 if (!slp_node && grouped_access_p)
1123 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p = (first_stmt_info == stmt_info);
1129
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1133 {
1134 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1135 stmt_vec_info next_stmt_info = first_stmt_info;
1136 do
1137 {
1138 gaps -= 1;
1139 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1140 }
1141 while (next_stmt_info);
1142 if (gaps)
1143 {
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: %d unused vectors.\n",
1147 gaps);
1148 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1149 alignment_support_scheme, misalignment, false,
1150 &inside_cost, &prologue_cost,
1151 cost_vec, cost_vec, true);
1152 }
1153 }
1154
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1159 if (first_stmt_p
1160 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1161 {
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size = DR_GROUP_SIZE (first_stmt_info);
1165 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1166 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1167 stmt_info, 0, vect_body);
1168
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE, vect_location,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1172 group_size);
1173 }
1174
1175 /* The loads themselves. */
1176 if (memory_access_type == VMAT_ELEMENTWISE
1177 || memory_access_type == VMAT_GATHER_SCATTER)
1178 {
1179 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1180 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1181 if (memory_access_type == VMAT_GATHER_SCATTER
1182 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1185 the load). */
1186 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1187 vec_to_scalar, stmt_info, 0,
1188 vect_body);
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost += record_stmt_cost (cost_vec,
1191 ncopies * assumed_nunits,
1192 scalar_load, stmt_info, 0, vect_body);
1193 }
1194 else if (memory_access_type == VMAT_INVARIANT)
1195 {
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost += record_stmt_cost (cost_vec, 1,
1198 scalar_load, stmt_info, 0,
1199 vect_prologue);
1200 prologue_cost += record_stmt_cost (cost_vec, 1,
1201 scalar_to_vec, stmt_info, 0,
1202 vect_prologue);
1203 }
1204 else
1205 vect_get_load_cost (vinfo, stmt_info, ncopies,
1206 alignment_support_scheme, misalignment, first_stmt_p,
1207 &inside_cost, &prologue_cost,
1208 cost_vec, cost_vec, true);
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_STRIDED_SLP
1211 || (memory_access_type == VMAT_GATHER_SCATTER
1212 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1213 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 stmt_info, 0, vect_body);
1215
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 }
1221
1222
1223 /* Calculate cost of DR's memory access. */
1224 void
vect_get_load_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1225 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1226 dr_alignment_support alignment_support_scheme,
1227 int misalignment,
1228 bool add_realign_cost, unsigned int *inside_cost,
1229 unsigned int *prologue_cost,
1230 stmt_vector_for_cost *prologue_cost_vec,
1231 stmt_vector_for_cost *body_cost_vec,
1232 bool record_prologue_costs)
1233 {
1234 switch (alignment_support_scheme)
1235 {
1236 case dr_aligned:
1237 {
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1240
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_model_load_cost: aligned.\n");
1244
1245 break;
1246 }
1247 case dr_unaligned_supported:
1248 {
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1251 unaligned_load, stmt_info,
1252 misalignment, vect_body);
1253
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign:
1262 {
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1267
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1274
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1278
1279 break;
1280 }
1281 case dr_explicit_realign_optimized:
1282 {
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1287
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1294
1295 if (add_realign_cost && record_prologue_costs)
1296 {
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1304 }
1305
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1310
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1315
1316 break;
1317 }
1318
1319 case dr_unaligned_unsupported:
1320 {
1321 *inside_cost = VECT_MAX_COST;
1322
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1327 }
1328
1329 default:
1330 gcc_unreachable ();
1331 }
1332 }
1333
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1336
1337 static void
vect_init_vector_1(vec_info * vinfo,stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1338 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1340 {
1341 if (gsi)
1342 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1343 else
1344 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1345
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE, vect_location,
1348 "created new init_stmt: %G", new_stmt);
1349 }
1350
1351 /* Function vect_init_vector.
1352
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1360
1361 tree
vect_init_vector(vec_info * vinfo,stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1362 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1363 gimple_stmt_iterator *gsi)
1364 {
1365 gimple *init_stmt;
1366 tree new_temp;
1367
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1370 {
1371 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1372 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1373 {
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type))
1377 {
1378 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1379 tree false_val = build_zero_cst (TREE_TYPE (type));
1380
1381 if (CONSTANT_CLASS_P (val))
1382 val = integer_zerop (val) ? false_val : true_val;
1383 else
1384 {
1385 new_temp = make_ssa_name (TREE_TYPE (type));
1386 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1387 val, true_val, false_val);
1388 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1389 val = new_temp;
1390 }
1391 }
1392 else
1393 {
1394 gimple_seq stmts = NULL;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1396 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1397 TREE_TYPE (type), val);
1398 else
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1402 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1403 !gsi_end_p (gsi2); )
1404 {
1405 init_stmt = gsi_stmt (gsi2);
1406 gsi_remove (&gsi2, false);
1407 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1408 }
1409 }
1410 }
1411 val = build_vector_from_val (type, val);
1412 }
1413
1414 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1415 init_stmt = gimple_build_assign (new_temp, val);
1416 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1417 return new_temp;
1418 }
1419
1420
1421 /* Function vect_get_vec_defs_for_operand.
1422
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1425
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1428
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1432
1433 void
vect_get_vec_defs_for_operand(vec_info * vinfo,stmt_vec_info stmt_vinfo,unsigned ncopies,tree op,vec<tree> * vec_oprnds,tree vectype)1434 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1435 unsigned ncopies,
1436 tree op, vec<tree> *vec_oprnds, tree vectype)
1437 {
1438 gimple *def_stmt;
1439 enum vect_def_type dt;
1440 bool is_simple_use;
1441 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1442
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "vect_get_vec_defs_for_operand: %T\n", op);
1446
1447 stmt_vec_info def_stmt_info;
1448 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1449 &def_stmt_info, &def_stmt);
1450 gcc_assert (is_simple_use);
1451 if (def_stmt && dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1453
1454 vec_oprnds->create (ncopies);
1455 if (dt == vect_constant_def || dt == vect_external_def)
1456 {
1457 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1458 tree vector_type;
1459
1460 if (vectype)
1461 vector_type = vectype;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1464 vector_type = truth_type_for (stmt_vectype);
1465 else
1466 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1467
1468 gcc_assert (vector_type);
1469 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1470 while (ncopies--)
1471 vec_oprnds->quick_push (vop);
1472 }
1473 else
1474 {
1475 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1477 for (unsigned i = 0; i < ncopies; ++i)
1478 vec_oprnds->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1480 }
1481 }
1482
1483
1484 /* Get vectorized definitions for OP0 and OP1. */
1485
1486 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree vectype0,tree op1,vec<tree> * vec_oprnds1,tree vectype1,tree op2,vec<tree> * vec_oprnds2,tree vectype2,tree op3,vec<tree> * vec_oprnds3,tree vectype3)1487 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1488 unsigned ncopies,
1489 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1490 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1491 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1492 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1493 {
1494 if (slp_node)
1495 {
1496 if (op0)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1498 if (op1)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1500 if (op2)
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1502 if (op3)
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1504 }
1505 else
1506 {
1507 if (op0)
1508 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 op0, vec_oprnds0, vectype0);
1510 if (op1)
1511 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1512 op1, vec_oprnds1, vectype1);
1513 if (op2)
1514 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1515 op2, vec_oprnds2, vectype2);
1516 if (op3)
1517 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1518 op3, vec_oprnds3, vectype3);
1519 }
1520 }
1521
1522 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree op1,vec<tree> * vec_oprnds1,tree op2,vec<tree> * vec_oprnds2,tree op3,vec<tree> * vec_oprnds3)1523 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1524 unsigned ncopies,
1525 tree op0, vec<tree> *vec_oprnds0,
1526 tree op1, vec<tree> *vec_oprnds1,
1527 tree op2, vec<tree> *vec_oprnds2,
1528 tree op3, vec<tree> *vec_oprnds3)
1529 {
1530 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1531 op0, vec_oprnds0, NULL_TREE,
1532 op1, vec_oprnds1, NULL_TREE,
1533 op2, vec_oprnds2, NULL_TREE,
1534 op3, vec_oprnds3, NULL_TREE);
1535 }
1536
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1540
1541 static void
vect_finish_stmt_generation_1(vec_info *,stmt_vec_info stmt_info,gimple * vec_stmt)1542 vect_finish_stmt_generation_1 (vec_info *,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1544 {
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1547
1548 if (stmt_info)
1549 {
1550 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1551
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1556 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1557 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1558 }
1559 else
1560 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1561 }
1562
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1566
1567 void
vect_finish_replace_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt)1568 vect_finish_replace_stmt (vec_info *vinfo,
1569 stmt_vec_info stmt_info, gimple *vec_stmt)
1570 {
1571 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1572 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1573
1574 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1575 gsi_replace (&gsi, vec_stmt, true);
1576
1577 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1578 }
1579
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1582
1583 void
vect_finish_stmt_generation(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1584 vect_finish_stmt_generation (vec_info *vinfo,
1585 stmt_vec_info stmt_info, gimple *vec_stmt,
1586 gimple_stmt_iterator *gsi)
1587 {
1588 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1589
1590 if (!gsi_end_p (*gsi)
1591 && gimple_has_mem_ops (vec_stmt))
1592 {
1593 gimple *at_stmt = gsi_stmt (*gsi);
1594 tree vuse = gimple_vuse (at_stmt);
1595 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1596 {
1597 tree vdef = gimple_vdef (at_stmt);
1598 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1599 gimple_set_modified (vec_stmt, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1605 && ((is_gimple_assign (vec_stmt)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1607 || (is_gimple_call (vec_stmt)
1608 && !(gimple_call_flags (vec_stmt)
1609 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1610 {
1611 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1612 gimple_set_vdef (vec_stmt, new_vdef);
1613 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1614 }
1615 }
1616 }
1617 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1618 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1619 }
1620
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1625
1626 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1627 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1628 tree vectype_out, tree vectype_in)
1629 {
1630 internal_fn ifn;
1631 if (internal_fn_p (cfn))
1632 ifn = as_internal_fn (cfn);
1633 else
1634 ifn = associated_internal_fn (fndecl);
1635 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1636 {
1637 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1638 if (info.vectorizable)
1639 {
1640 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1641 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1642 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1643 OPTIMIZE_FOR_SPEED))
1644 return ifn;
1645 }
1646 }
1647 return IFN_LAST;
1648 }
1649
1650
1651 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1652 gimple_stmt_iterator *);
1653
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1658
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1666
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1669 types. */
1670
1671 static void
check_load_store_for_partial_vectors(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,unsigned int ncopies,gather_scatter_info * gs_info,tree scalar_mask)1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1673 vec_load_store_type vls_type,
1674 int group_size,
1675 vect_memory_access_type
1676 memory_access_type,
1677 unsigned int ncopies,
1678 gather_scatter_info *gs_info,
1679 tree scalar_mask)
1680 {
1681 /* Invariant loads need no special support. */
1682 if (memory_access_type == VMAT_INVARIANT)
1683 return;
1684
1685 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1686 machine_mode vecmode = TYPE_MODE (vectype);
1687 bool is_load = (vls_type == VLS_LOAD);
1688 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1689 {
1690 if (is_load
1691 ? !vect_load_lanes_supported (vectype, group_size, true)
1692 : !vect_store_lanes_supported (vectype, group_size, true))
1693 {
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "can't operate on partial vectors because"
1697 " the target doesn't have an appropriate"
1698 " load/store-lanes instruction.\n");
1699 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1700 return;
1701 }
1702 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1703 return;
1704 }
1705
1706 if (memory_access_type == VMAT_GATHER_SCATTER)
1707 {
1708 internal_fn ifn = (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE);
1711 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1712 gs_info->memory_type,
1713 gs_info->offset_vectype,
1714 gs_info->scale))
1715 {
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1722 return;
1723 }
1724 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1725 return;
1726 }
1727
1728 if (memory_access_type != VMAT_CONTIGUOUS
1729 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1730 {
1731 /* Element X of the data must come from iteration i * VF + X of the
1732 scalar loop. We need more work to support other mappings. */
1733 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735 "can't operate on partial vectors because an"
1736 " access isn't contiguous.\n");
1737 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1738 return;
1739 }
1740
1741 if (!VECTOR_MODE_P (vecmode))
1742 {
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1745 "can't operate on partial vectors when emulating"
1746 " vector operations.\n");
1747 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1748 return;
1749 }
1750
1751 /* We might load more scalars than we need for permuting SLP loads.
1752 We checked in get_group_load_store_type that the extra elements
1753 don't leak into a new vector. */
1754 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1755 {
1756 unsigned int nvectors;
1757 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1758 return nvectors;
1759 gcc_unreachable ();
1760 };
1761
1762 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1763 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1764 machine_mode mask_mode;
1765 bool using_partial_vectors_p = false;
1766 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1767 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1768 {
1769 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1770 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1771 using_partial_vectors_p = true;
1772 }
1773
1774 machine_mode vmode;
1775 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1776 {
1777 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1778 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1779 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1780 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1781 using_partial_vectors_p = true;
1782 }
1783
1784 if (!using_partial_vectors_p)
1785 {
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "can't operate on partial vectors because the"
1789 " target doesn't have the appropriate partial"
1790 " vectorization load or store.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1792 }
1793 }
1794
1795 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1796 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1797 that needs to be applied to all loads and stores in a vectorized loop.
1798 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1799 otherwise return VEC_MASK & LOOP_MASK.
1800
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1803
1804 static tree
prepare_vec_mask(loop_vec_info loop_vinfo,tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1805 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1806 tree vec_mask, gimple_stmt_iterator *gsi)
1807 {
1808 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1809 if (!loop_mask)
1810 return vec_mask;
1811
1812 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1813
1814 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1815 return vec_mask;
1816
1817 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1818 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1819 vec_mask, loop_mask);
1820
1821 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1822 return and_res;
1823 }
1824
1825 /* Determine whether we can use a gather load or scatter store to vectorize
1826 strided load or store STMT_INFO by truncating the current offset to a
1827 smaller width. We need to be able to construct an offset vector:
1828
1829 { 0, X, X*2, X*3, ... }
1830
1831 without loss of precision, where X is STMT_INFO's DR_STEP.
1832
1833 Return true if this is possible, describing the gather load or scatter
1834 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1835
1836 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1837 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1838 loop_vec_info loop_vinfo, bool masked_p,
1839 gather_scatter_info *gs_info)
1840 {
1841 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1842 data_reference *dr = dr_info->dr;
1843 tree step = DR_STEP (dr);
1844 if (TREE_CODE (step) != INTEGER_CST)
1845 {
1846 /* ??? Perhaps we could use range information here? */
1847 if (dump_enabled_p ())
1848 dump_printf_loc (MSG_NOTE, vect_location,
1849 "cannot truncate variable step.\n");
1850 return false;
1851 }
1852
1853 /* Get the number of bits in an element. */
1854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1855 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1856 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1857
1858 /* Set COUNT to the upper limit on the number of elements - 1.
1859 Start with the maximum vectorization factor. */
1860 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1861
1862 /* Try lowering COUNT to the number of scalar latch iterations. */
1863 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1864 widest_int max_iters;
1865 if (max_loop_iterations (loop, &max_iters)
1866 && max_iters < count)
1867 count = max_iters.to_shwi ();
1868
1869 /* Try scales of 1 and the element size. */
1870 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1871 wi::overflow_type overflow = wi::OVF_NONE;
1872 for (int i = 0; i < 2; ++i)
1873 {
1874 int scale = scales[i];
1875 widest_int factor;
1876 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1877 continue;
1878
1879 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1880 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1881 if (overflow)
1882 continue;
1883 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1884 unsigned int min_offset_bits = wi::min_precision (range, sign);
1885
1886 /* Find the narrowest viable offset type. */
1887 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1888 tree offset_type = build_nonstandard_integer_type (offset_bits,
1889 sign == UNSIGNED);
1890
1891 /* See whether the target supports the operation with an offset
1892 no narrower than OFFSET_TYPE. */
1893 tree memory_type = TREE_TYPE (DR_REF (dr));
1894 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1895 vectype, memory_type, offset_type, scale,
1896 &gs_info->ifn, &gs_info->offset_vectype)
1897 || gs_info->ifn == IFN_LAST)
1898 continue;
1899
1900 gs_info->decl = NULL_TREE;
1901 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1902 but we don't need to store that here. */
1903 gs_info->base = NULL_TREE;
1904 gs_info->element_type = TREE_TYPE (vectype);
1905 gs_info->offset = fold_convert (offset_type, step);
1906 gs_info->offset_dt = vect_constant_def;
1907 gs_info->scale = scale;
1908 gs_info->memory_type = memory_type;
1909 return true;
1910 }
1911
1912 if (overflow && dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE, vect_location,
1914 "truncating gather/scatter offset to %d bits"
1915 " might change its value.\n", element_bits);
1916
1917 return false;
1918 }
1919
1920 /* Return true if we can use gather/scatter internal functions to
1921 vectorize STMT_INFO, which is a grouped or strided load or store.
1922 MASKED_P is true if load or store is conditional. When returning
1923 true, fill in GS_INFO with the information required to perform the
1924 operation. */
1925
1926 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1927 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1928 loop_vec_info loop_vinfo, bool masked_p,
1929 gather_scatter_info *gs_info)
1930 {
1931 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1932 || gs_info->ifn == IFN_LAST)
1933 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1934 masked_p, gs_info);
1935
1936 tree old_offset_type = TREE_TYPE (gs_info->offset);
1937 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1938
1939 gcc_assert (TYPE_PRECISION (new_offset_type)
1940 >= TYPE_PRECISION (old_offset_type));
1941 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1942
1943 if (dump_enabled_p ())
1944 dump_printf_loc (MSG_NOTE, vect_location,
1945 "using gather/scatter for strided/grouped access,"
1946 " scale = %d\n", gs_info->scale);
1947
1948 return true;
1949 }
1950
1951 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1952 elements with a known constant step. Return -1 if that step
1953 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1954
1955 static int
compare_step_with_zero(vec_info * vinfo,stmt_vec_info stmt_info)1956 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1957 {
1958 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1959 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1960 size_zero_node);
1961 }
1962
1963 /* If the target supports a permute mask that reverses the elements in
1964 a vector of type VECTYPE, return that mask, otherwise return null. */
1965
1966 static tree
perm_mask_for_reverse(tree vectype)1967 perm_mask_for_reverse (tree vectype)
1968 {
1969 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1970
1971 /* The encoding has a single stepped pattern. */
1972 vec_perm_builder sel (nunits, 1, 3);
1973 for (int i = 0; i < 3; ++i)
1974 sel.quick_push (nunits - 1 - i);
1975
1976 vec_perm_indices indices (sel, 1, nunits);
1977 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1978 return NULL_TREE;
1979 return vect_gen_perm_mask_checked (vectype, indices);
1980 }
1981
1982 /* A subroutine of get_load_store_type, with a subset of the same
1983 arguments. Handle the case where STMT_INFO is a load or store that
1984 accesses consecutive elements with a negative step. Sets *POFFSET
1985 to the offset to be applied to the DR for the first access. */
1986
1987 static vect_memory_access_type
get_negative_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies,poly_int64 * poffset)1988 get_negative_load_store_type (vec_info *vinfo,
1989 stmt_vec_info stmt_info, tree vectype,
1990 vec_load_store_type vls_type,
1991 unsigned int ncopies, poly_int64 *poffset)
1992 {
1993 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1994 dr_alignment_support alignment_support_scheme;
1995
1996 if (ncopies > 1)
1997 {
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2000 "multiple types with negative step.\n");
2001 return VMAT_ELEMENTWISE;
2002 }
2003
2004 /* For backward running DRs the first access in vectype actually is
2005 N-1 elements before the address of the DR. */
2006 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2007 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2008
2009 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2010 alignment_support_scheme
2011 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2012 if (alignment_support_scheme != dr_aligned
2013 && alignment_support_scheme != dr_unaligned_supported)
2014 {
2015 if (dump_enabled_p ())
2016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2017 "negative step but alignment required.\n");
2018 *poffset = 0;
2019 return VMAT_ELEMENTWISE;
2020 }
2021
2022 if (vls_type == VLS_STORE_INVARIANT)
2023 {
2024 if (dump_enabled_p ())
2025 dump_printf_loc (MSG_NOTE, vect_location,
2026 "negative step with invariant source;"
2027 " no permute needed.\n");
2028 return VMAT_CONTIGUOUS_DOWN;
2029 }
2030
2031 if (!perm_mask_for_reverse (vectype))
2032 {
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2035 "negative step and reversing not supported.\n");
2036 *poffset = 0;
2037 return VMAT_ELEMENTWISE;
2038 }
2039
2040 return VMAT_CONTIGUOUS_REVERSE;
2041 }
2042
2043 /* STMT_INFO is either a masked or unconditional store. Return the value
2044 being stored. */
2045
2046 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2047 vect_get_store_rhs (stmt_vec_info stmt_info)
2048 {
2049 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2050 {
2051 gcc_assert (gimple_assign_single_p (assign));
2052 return gimple_assign_rhs1 (assign);
2053 }
2054 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2055 {
2056 internal_fn ifn = gimple_call_internal_fn (call);
2057 int index = internal_fn_stored_value_index (ifn);
2058 gcc_assert (index >= 0);
2059 return gimple_call_arg (call, index);
2060 }
2061 gcc_unreachable ();
2062 }
2063
2064 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2065
2066 This function returns a vector type which can be composed with NETLS pieces,
2067 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2068 same vector size as the return vector. It checks target whether supports
2069 pieces-size vector mode for construction firstly, if target fails to, check
2070 pieces-size scalar mode for construction further. It returns NULL_TREE if
2071 fails to find the available composition.
2072
2073 For example, for (vtype=V16QI, nelts=4), we can probably get:
2074 - V16QI with PTYPE V4QI.
2075 - V4SI with PTYPE SI.
2076 - NULL_TREE. */
2077
2078 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2079 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2080 {
2081 gcc_assert (VECTOR_TYPE_P (vtype));
2082 gcc_assert (known_gt (nelts, 0U));
2083
2084 machine_mode vmode = TYPE_MODE (vtype);
2085 if (!VECTOR_MODE_P (vmode))
2086 return NULL_TREE;
2087
2088 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2089 unsigned int pbsize;
2090 if (constant_multiple_p (vbsize, nelts, &pbsize))
2091 {
2092 /* First check if vec_init optab supports construction from
2093 vector pieces directly. */
2094 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2095 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2096 machine_mode rmode;
2097 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2098 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2099 != CODE_FOR_nothing))
2100 {
2101 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2102 return vtype;
2103 }
2104
2105 /* Otherwise check if exists an integer type of the same piece size and
2106 if vec_init optab supports construction from it directly. */
2107 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2108 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2109 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2110 != CODE_FOR_nothing))
2111 {
2112 *ptype = build_nonstandard_integer_type (pbsize, 1);
2113 return build_vector_type (*ptype, nelts);
2114 }
2115 }
2116
2117 return NULL_TREE;
2118 }
2119
2120 /* A subroutine of get_load_store_type, with a subset of the same
2121 arguments. Handle the case where STMT_INFO is part of a grouped load
2122 or store.
2123
2124 For stores, the statements in the group are all consecutive
2125 and there is no gap at the end. For loads, the statements in the
2126 group might not be consecutive; there can be gaps between statements
2127 as well as at the end. */
2128
2129 static bool
get_group_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2130 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2131 tree vectype, slp_tree slp_node,
2132 bool masked_p, vec_load_store_type vls_type,
2133 vect_memory_access_type *memory_access_type,
2134 poly_int64 *poffset,
2135 dr_alignment_support *alignment_support_scheme,
2136 int *misalignment,
2137 gather_scatter_info *gs_info)
2138 {
2139 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2140 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2148
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2152
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2159
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2163
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2166
2167 if (slp_node)
2168 {
2169 /* For SLP vectorization we directly vectorize a subchain
2170 without permutation. */
2171 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2172 first_dr_info
2173 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2174 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2175 {
2176 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2177 separated by the stride, until we have a complete vector.
2178 Fall back to scalar accesses if that isn't possible. */
2179 if (multiple_p (nunits, group_size))
2180 *memory_access_type = VMAT_STRIDED_SLP;
2181 else
2182 *memory_access_type = VMAT_ELEMENTWISE;
2183 }
2184 else
2185 {
2186 overrun_p = loop_vinfo && gap != 0;
2187 if (overrun_p && vls_type != VLS_LOAD)
2188 {
2189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2190 "Grouped store with gaps requires"
2191 " non-consecutive accesses\n");
2192 return false;
2193 }
2194 /* An overrun is fine if the trailing elements are smaller
2195 than the alignment boundary B. Every vector access will
2196 be a multiple of B and so we are guaranteed to access a
2197 non-gap element in the same B-sized block. */
2198 if (overrun_p
2199 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2200 vectype)
2201 / vect_get_scalar_dr_size (first_dr_info)))
2202 overrun_p = false;
2203
2204 /* If the gap splits the vector in half and the target
2205 can do half-vector operations avoid the epilogue peeling
2206 by simply loading half of the vector only. Usually
2207 the construction with an upper zero half will be elided. */
2208 dr_alignment_support alss;
2209 int misalign = dr_misalignment (first_dr_info, vectype);
2210 tree half_vtype;
2211 if (overrun_p
2212 && !masked_p
2213 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2214 vectype, misalign)))
2215 == dr_aligned
2216 || alss == dr_unaligned_supported)
2217 && known_eq (nunits, (group_size - gap) * 2)
2218 && known_eq (nunits, group_size)
2219 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2220 != NULL_TREE))
2221 overrun_p = false;
2222
2223 if (overrun_p && !can_overrun_p)
2224 {
2225 if (dump_enabled_p ())
2226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2227 "Peeling for outer loop is not supported\n");
2228 return false;
2229 }
2230 int cmp = compare_step_with_zero (vinfo, stmt_info);
2231 if (cmp < 0)
2232 {
2233 if (single_element_p)
2234 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2235 only correct for single element "interleaving" SLP. */
2236 *memory_access_type = get_negative_load_store_type
2237 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2238 else
2239 {
2240 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2241 separated by the stride, until we have a complete vector.
2242 Fall back to scalar accesses if that isn't possible. */
2243 if (multiple_p (nunits, group_size))
2244 *memory_access_type = VMAT_STRIDED_SLP;
2245 else
2246 *memory_access_type = VMAT_ELEMENTWISE;
2247 }
2248 }
2249 else
2250 {
2251 gcc_assert (!loop_vinfo || cmp > 0);
2252 *memory_access_type = VMAT_CONTIGUOUS;
2253 }
2254 }
2255 }
2256 else
2257 {
2258 /* We can always handle this case using elementwise accesses,
2259 but see if something more efficient is available. */
2260 *memory_access_type = VMAT_ELEMENTWISE;
2261
2262 /* If there is a gap at the end of the group then these optimizations
2263 would access excess elements in the last iteration. */
2264 bool would_overrun_p = (gap != 0);
2265 /* An overrun is fine if the trailing elements are smaller than the
2266 alignment boundary B. Every vector access will be a multiple of B
2267 and so we are guaranteed to access a non-gap element in the
2268 same B-sized block. */
2269 if (would_overrun_p
2270 && !masked_p
2271 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2272 / vect_get_scalar_dr_size (first_dr_info)))
2273 would_overrun_p = false;
2274
2275 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2276 && (can_overrun_p || !would_overrun_p)
2277 && compare_step_with_zero (vinfo, stmt_info) > 0)
2278 {
2279 /* First cope with the degenerate case of a single-element
2280 vector. */
2281 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2282 ;
2283
2284 /* Otherwise try using LOAD/STORE_LANES. */
2285 else if (vls_type == VLS_LOAD
2286 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2287 : vect_store_lanes_supported (vectype, group_size,
2288 masked_p))
2289 {
2290 *memory_access_type = VMAT_LOAD_STORE_LANES;
2291 overrun_p = would_overrun_p;
2292 }
2293
2294 /* If that fails, try using permuting loads. */
2295 else if (vls_type == VLS_LOAD
2296 ? vect_grouped_load_supported (vectype, single_element_p,
2297 group_size)
2298 : vect_grouped_store_supported (vectype, group_size))
2299 {
2300 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2301 overrun_p = would_overrun_p;
2302 }
2303 }
2304
2305 /* As a last resort, trying using a gather load or scatter store.
2306
2307 ??? Although the code can handle all group sizes correctly,
2308 it probably isn't a win to use separate strided accesses based
2309 on nearby locations. Or, even if it's a win over scalar code,
2310 it might not be a win over vectorizing at a lower VF, if that
2311 allows us to use contiguous accesses. */
2312 if (*memory_access_type == VMAT_ELEMENTWISE
2313 && single_element_p
2314 && loop_vinfo
2315 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2316 masked_p, gs_info))
2317 *memory_access_type = VMAT_GATHER_SCATTER;
2318 }
2319
2320 if (*memory_access_type == VMAT_GATHER_SCATTER
2321 || *memory_access_type == VMAT_ELEMENTWISE)
2322 {
2323 *alignment_support_scheme = dr_unaligned_supported;
2324 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2325 }
2326 else
2327 {
2328 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2329 *alignment_support_scheme
2330 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2331 *misalignment);
2332 }
2333
2334 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2335 {
2336 /* STMT is the leader of the group. Check the operands of all the
2337 stmts of the group. */
2338 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2339 while (next_stmt_info)
2340 {
2341 tree op = vect_get_store_rhs (next_stmt_info);
2342 enum vect_def_type dt;
2343 if (!vect_is_simple_use (op, vinfo, &dt))
2344 {
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "use not simple.\n");
2348 return false;
2349 }
2350 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2351 }
2352 }
2353
2354 if (overrun_p)
2355 {
2356 gcc_assert (can_overrun_p);
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "Data access with gaps requires scalar "
2360 "epilogue loop\n");
2361 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2362 }
2363
2364 return true;
2365 }
2366
2367 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2368 if there is a memory access type that the vectorized form can use,
2369 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2370 or scatters, fill in GS_INFO accordingly. In addition
2371 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2372 the target does not support the alignment scheme. *MISALIGNMENT
2373 is set according to the alignment of the access (including
2374 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2375
2376 SLP says whether we're performing SLP rather than loop vectorization.
2377 MASKED_P is true if the statement is conditional on a vectorized mask.
2378 VECTYPE is the vector type that the vectorized statements will use.
2379 NCOPIES is the number of vector statements that will be needed. */
2380
2381 static bool
get_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2382 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2383 tree vectype, slp_tree slp_node,
2384 bool masked_p, vec_load_store_type vls_type,
2385 unsigned int ncopies,
2386 vect_memory_access_type *memory_access_type,
2387 poly_int64 *poffset,
2388 dr_alignment_support *alignment_support_scheme,
2389 int *misalignment,
2390 gather_scatter_info *gs_info)
2391 {
2392 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2393 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2394 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2395 *poffset = 0;
2396 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2397 {
2398 *memory_access_type = VMAT_GATHER_SCATTER;
2399 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2400 gcc_unreachable ();
2401 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2402 &gs_info->offset_dt,
2403 &gs_info->offset_vectype))
2404 {
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "%s index use not simple.\n",
2408 vls_type == VLS_LOAD ? "gather" : "scatter");
2409 return false;
2410 }
2411 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2412 {
2413 if (vls_type != VLS_LOAD)
2414 {
2415 if (dump_enabled_p ())
2416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2417 "unsupported emulated scatter.\n");
2418 return false;
2419 }
2420 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2421 || !TYPE_VECTOR_SUBPARTS
2422 (gs_info->offset_vectype).is_constant ()
2423 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2424 (gs_info->offset_vectype),
2425 TYPE_VECTOR_SUBPARTS (vectype)))
2426 {
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "unsupported vector types for emulated "
2430 "gather.\n");
2431 return false;
2432 }
2433 }
2434 /* Gather-scatter accesses perform only component accesses, alignment
2435 is irrelevant for them. */
2436 *alignment_support_scheme = dr_unaligned_supported;
2437 }
2438 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2439 {
2440 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2441 masked_p,
2442 vls_type, memory_access_type, poffset,
2443 alignment_support_scheme,
2444 misalignment, gs_info))
2445 return false;
2446 }
2447 else if (STMT_VINFO_STRIDED_P (stmt_info))
2448 {
2449 gcc_assert (!slp_node);
2450 if (loop_vinfo
2451 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2452 masked_p, gs_info))
2453 *memory_access_type = VMAT_GATHER_SCATTER;
2454 else
2455 *memory_access_type = VMAT_ELEMENTWISE;
2456 /* Alignment is irrelevant here. */
2457 *alignment_support_scheme = dr_unaligned_supported;
2458 }
2459 else
2460 {
2461 int cmp = compare_step_with_zero (vinfo, stmt_info);
2462 if (cmp == 0)
2463 {
2464 gcc_assert (vls_type == VLS_LOAD);
2465 *memory_access_type = VMAT_INVARIANT;
2466 /* Invariant accesses perform only component accesses, alignment
2467 is irrelevant for them. */
2468 *alignment_support_scheme = dr_unaligned_supported;
2469 }
2470 else
2471 {
2472 if (cmp < 0)
2473 *memory_access_type = get_negative_load_store_type
2474 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2475 else
2476 *memory_access_type = VMAT_CONTIGUOUS;
2477 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2478 vectype, *poffset);
2479 *alignment_support_scheme
2480 = vect_supportable_dr_alignment (vinfo,
2481 STMT_VINFO_DR_INFO (stmt_info),
2482 vectype, *misalignment);
2483 }
2484 }
2485
2486 if ((*memory_access_type == VMAT_ELEMENTWISE
2487 || *memory_access_type == VMAT_STRIDED_SLP)
2488 && !nunits.is_constant ())
2489 {
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 "Not using elementwise accesses due to variable "
2493 "vectorization factor.\n");
2494 return false;
2495 }
2496
2497 if (*alignment_support_scheme == dr_unaligned_unsupported)
2498 {
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "unsupported unaligned access\n");
2502 return false;
2503 }
2504
2505 /* FIXME: At the moment the cost model seems to underestimate the
2506 cost of using elementwise accesses. This check preserves the
2507 traditional behavior until that can be fixed. */
2508 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2509 if (!first_stmt_info)
2510 first_stmt_info = stmt_info;
2511 if (*memory_access_type == VMAT_ELEMENTWISE
2512 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2513 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2514 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2515 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "not falling back to elementwise accesses\n");
2520 return false;
2521 }
2522 return true;
2523 }
2524
2525 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2526 conditional operation STMT_INFO. When returning true, store the mask
2527 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2528 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2529 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2530
2531 static bool
vect_check_scalar_mask(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned mask_index,tree * mask,slp_tree * mask_node,vect_def_type * mask_dt_out,tree * mask_vectype_out)2532 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2533 slp_tree slp_node, unsigned mask_index,
2534 tree *mask, slp_tree *mask_node,
2535 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2536 {
2537 enum vect_def_type mask_dt;
2538 tree mask_vectype;
2539 slp_tree mask_node_1;
2540 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2541 mask, &mask_node_1, &mask_dt, &mask_vectype))
2542 {
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "mask use not simple.\n");
2546 return false;
2547 }
2548
2549 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2550 {
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2553 "mask argument is not a boolean.\n");
2554 return false;
2555 }
2556
2557 /* If the caller is not prepared for adjusting an external/constant
2558 SLP mask vector type fail. */
2559 if (slp_node
2560 && !mask_node
2561 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2562 {
2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565 "SLP mask argument is not vectorized.\n");
2566 return false;
2567 }
2568
2569 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2570 if (!mask_vectype)
2571 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2572
2573 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2574 {
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "could not find an appropriate vector mask type.\n");
2578 return false;
2579 }
2580
2581 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2582 TYPE_VECTOR_SUBPARTS (vectype)))
2583 {
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 "vector mask type %T"
2587 " does not match vector data type %T.\n",
2588 mask_vectype, vectype);
2589
2590 return false;
2591 }
2592
2593 *mask_dt_out = mask_dt;
2594 *mask_vectype_out = mask_vectype;
2595 if (mask_node)
2596 *mask_node = mask_node_1;
2597 return true;
2598 }
2599
2600 /* Return true if stored value RHS is suitable for vectorizing store
2601 statement STMT_INFO. When returning true, store the type of the
2602 definition in *RHS_DT_OUT, the type of the vectorized store value in
2603 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2604
2605 static bool
vect_check_store_rhs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2606 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2607 slp_tree slp_node, tree rhs,
2608 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2609 vec_load_store_type *vls_type_out)
2610 {
2611 /* In the case this is a store from a constant make sure
2612 native_encode_expr can handle it. */
2613 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2614 {
2615 if (dump_enabled_p ())
2616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2617 "cannot encode constant as a byte sequence.\n");
2618 return false;
2619 }
2620
2621 unsigned op_no = 0;
2622 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2623 {
2624 if (gimple_call_internal_p (call)
2625 && internal_store_fn_p (gimple_call_internal_fn (call)))
2626 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2627 }
2628
2629 enum vect_def_type rhs_dt;
2630 tree rhs_vectype;
2631 slp_tree slp_op;
2632 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2633 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2634 {
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2637 "use not simple.\n");
2638 return false;
2639 }
2640
2641 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2642 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2643 {
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2646 "incompatible vector types.\n");
2647 return false;
2648 }
2649
2650 *rhs_dt_out = rhs_dt;
2651 *rhs_vectype_out = rhs_vectype;
2652 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2653 *vls_type_out = VLS_STORE_INVARIANT;
2654 else
2655 *vls_type_out = VLS_STORE;
2656 return true;
2657 }
2658
2659 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2660 Note that we support masks with floating-point type, in which case the
2661 floats are interpreted as a bitmask. */
2662
2663 static tree
vect_build_all_ones_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree masktype)2664 vect_build_all_ones_mask (vec_info *vinfo,
2665 stmt_vec_info stmt_info, tree masktype)
2666 {
2667 if (TREE_CODE (masktype) == INTEGER_TYPE)
2668 return build_int_cst (masktype, -1);
2669 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2670 {
2671 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2672 mask = build_vector_from_val (masktype, mask);
2673 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2674 }
2675 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2676 {
2677 REAL_VALUE_TYPE r;
2678 long tmp[6];
2679 for (int j = 0; j < 6; ++j)
2680 tmp[j] = -1;
2681 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2682 tree mask = build_real (TREE_TYPE (masktype), r);
2683 mask = build_vector_from_val (masktype, mask);
2684 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2685 }
2686 gcc_unreachable ();
2687 }
2688
2689 /* Build an all-zero merge value of type VECTYPE while vectorizing
2690 STMT_INFO as a gather load. */
2691
2692 static tree
vect_build_zero_merge_argument(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype)2693 vect_build_zero_merge_argument (vec_info *vinfo,
2694 stmt_vec_info stmt_info, tree vectype)
2695 {
2696 tree merge;
2697 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2698 merge = build_int_cst (TREE_TYPE (vectype), 0);
2699 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2700 {
2701 REAL_VALUE_TYPE r;
2702 long tmp[6];
2703 for (int j = 0; j < 6; ++j)
2704 tmp[j] = 0;
2705 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2706 merge = build_real (TREE_TYPE (vectype), r);
2707 }
2708 else
2709 gcc_unreachable ();
2710 merge = build_vector_from_val (vectype, merge);
2711 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2712 }
2713
2714 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2715 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2716 the gather load operation. If the load is conditional, MASK is the
2717 unvectorized condition and MASK_DT is its definition type, otherwise
2718 MASK is null. */
2719
2720 static void
vect_build_gather_load_calls(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask)2721 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2722 gimple_stmt_iterator *gsi,
2723 gimple **vec_stmt,
2724 gather_scatter_info *gs_info,
2725 tree mask)
2726 {
2727 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2728 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2729 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2730 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2731 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2732 edge pe = loop_preheader_edge (loop);
2733 enum { NARROW, NONE, WIDEN } modifier;
2734 poly_uint64 gather_off_nunits
2735 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2736
2737 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2738 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2739 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2740 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2741 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2742 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2743 tree scaletype = TREE_VALUE (arglist);
2744 tree real_masktype = masktype;
2745 gcc_checking_assert (types_compatible_p (srctype, rettype)
2746 && (!mask
2747 || TREE_CODE (masktype) == INTEGER_TYPE
2748 || types_compatible_p (srctype, masktype)));
2749 if (mask)
2750 masktype = truth_type_for (srctype);
2751
2752 tree mask_halftype = masktype;
2753 tree perm_mask = NULL_TREE;
2754 tree mask_perm_mask = NULL_TREE;
2755 if (known_eq (nunits, gather_off_nunits))
2756 modifier = NONE;
2757 else if (known_eq (nunits * 2, gather_off_nunits))
2758 {
2759 modifier = WIDEN;
2760
2761 /* Currently widening gathers and scatters are only supported for
2762 fixed-length vectors. */
2763 int count = gather_off_nunits.to_constant ();
2764 vec_perm_builder sel (count, count, 1);
2765 for (int i = 0; i < count; ++i)
2766 sel.quick_push (i | (count / 2));
2767
2768 vec_perm_indices indices (sel, 1, count);
2769 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2770 indices);
2771 }
2772 else if (known_eq (nunits, gather_off_nunits * 2))
2773 {
2774 modifier = NARROW;
2775
2776 /* Currently narrowing gathers and scatters are only supported for
2777 fixed-length vectors. */
2778 int count = nunits.to_constant ();
2779 vec_perm_builder sel (count, count, 1);
2780 sel.quick_grow (count);
2781 for (int i = 0; i < count; ++i)
2782 sel[i] = i < count / 2 ? i : i + count / 2;
2783 vec_perm_indices indices (sel, 2, count);
2784 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2785
2786 ncopies *= 2;
2787
2788 if (mask && masktype == real_masktype)
2789 {
2790 for (int i = 0; i < count; ++i)
2791 sel[i] = i | (count / 2);
2792 indices.new_vector (sel, 2, count);
2793 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2794 }
2795 else if (mask)
2796 mask_halftype = truth_type_for (gs_info->offset_vectype);
2797 }
2798 else
2799 gcc_unreachable ();
2800
2801 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2802 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2803
2804 tree ptr = fold_convert (ptrtype, gs_info->base);
2805 if (!is_gimple_min_invariant (ptr))
2806 {
2807 gimple_seq seq;
2808 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2809 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2810 gcc_assert (!new_bb);
2811 }
2812
2813 tree scale = build_int_cst (scaletype, gs_info->scale);
2814
2815 tree vec_oprnd0 = NULL_TREE;
2816 tree vec_mask = NULL_TREE;
2817 tree src_op = NULL_TREE;
2818 tree mask_op = NULL_TREE;
2819 tree prev_res = NULL_TREE;
2820
2821 if (!mask)
2822 {
2823 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2824 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2825 }
2826
2827 auto_vec<tree> vec_oprnds0;
2828 auto_vec<tree> vec_masks;
2829 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2830 modifier == WIDEN ? ncopies / 2 : ncopies,
2831 gs_info->offset, &vec_oprnds0);
2832 if (mask)
2833 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2834 modifier == NARROW ? ncopies / 2 : ncopies,
2835 mask, &vec_masks, masktype);
2836 for (int j = 0; j < ncopies; ++j)
2837 {
2838 tree op, var;
2839 if (modifier == WIDEN && (j & 1))
2840 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2841 perm_mask, stmt_info, gsi);
2842 else
2843 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2844
2845 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2846 {
2847 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2848 TYPE_VECTOR_SUBPARTS (idxtype)));
2849 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2850 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2851 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2852 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2853 op = var;
2854 }
2855
2856 if (mask)
2857 {
2858 if (mask_perm_mask && (j & 1))
2859 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2860 mask_perm_mask, stmt_info, gsi);
2861 else
2862 {
2863 if (modifier == NARROW)
2864 {
2865 if ((j & 1) == 0)
2866 vec_mask = vec_masks[j / 2];
2867 }
2868 else
2869 vec_mask = vec_masks[j];
2870
2871 mask_op = vec_mask;
2872 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2873 {
2874 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2875 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2876 gcc_assert (known_eq (sub1, sub2));
2877 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2878 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2879 gassign *new_stmt
2880 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2881 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2882 mask_op = var;
2883 }
2884 }
2885 if (modifier == NARROW && masktype != real_masktype)
2886 {
2887 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2888 gassign *new_stmt
2889 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2890 : VEC_UNPACK_LO_EXPR,
2891 mask_op);
2892 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2893 mask_op = var;
2894 }
2895 src_op = mask_op;
2896 }
2897
2898 tree mask_arg = mask_op;
2899 if (masktype != real_masktype)
2900 {
2901 tree utype, optype = TREE_TYPE (mask_op);
2902 if (VECTOR_TYPE_P (real_masktype)
2903 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2904 utype = real_masktype;
2905 else
2906 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2907 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2908 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2909 gassign *new_stmt
2910 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2911 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2912 mask_arg = var;
2913 if (!useless_type_conversion_p (real_masktype, utype))
2914 {
2915 gcc_assert (TYPE_PRECISION (utype)
2916 <= TYPE_PRECISION (real_masktype));
2917 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2918 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2919 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2920 mask_arg = var;
2921 }
2922 src_op = build_zero_cst (srctype);
2923 }
2924 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2925 mask_arg, scale);
2926
2927 if (!useless_type_conversion_p (vectype, rettype))
2928 {
2929 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2930 TYPE_VECTOR_SUBPARTS (rettype)));
2931 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2932 gimple_call_set_lhs (new_stmt, op);
2933 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2934 var = make_ssa_name (vec_dest);
2935 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2936 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2937 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2938 }
2939 else
2940 {
2941 var = make_ssa_name (vec_dest, new_stmt);
2942 gimple_call_set_lhs (new_stmt, var);
2943 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2944 }
2945
2946 if (modifier == NARROW)
2947 {
2948 if ((j & 1) == 0)
2949 {
2950 prev_res = var;
2951 continue;
2952 }
2953 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2954 stmt_info, gsi);
2955 new_stmt = SSA_NAME_DEF_STMT (var);
2956 }
2957
2958 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2959 }
2960 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2961 }
2962
2963 /* Prepare the base and offset in GS_INFO for vectorization.
2964 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2965 to the vectorized offset argument for the first copy of STMT_INFO.
2966 STMT_INFO is the statement described by GS_INFO and LOOP is the
2967 containing loop. */
2968
2969 static void
vect_get_gather_scatter_ops(loop_vec_info loop_vinfo,class loop * loop,stmt_vec_info stmt_info,slp_tree slp_node,gather_scatter_info * gs_info,tree * dataref_ptr,vec<tree> * vec_offset)2970 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2971 class loop *loop, stmt_vec_info stmt_info,
2972 slp_tree slp_node, gather_scatter_info *gs_info,
2973 tree *dataref_ptr, vec<tree> *vec_offset)
2974 {
2975 gimple_seq stmts = NULL;
2976 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2977 if (stmts != NULL)
2978 {
2979 basic_block new_bb;
2980 edge pe = loop_preheader_edge (loop);
2981 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2982 gcc_assert (!new_bb);
2983 }
2984 if (slp_node)
2985 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2986 else
2987 {
2988 unsigned ncopies
2989 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2990 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2991 gs_info->offset, vec_offset,
2992 gs_info->offset_vectype);
2993 }
2994 }
2995
2996 /* Prepare to implement a grouped or strided load or store using
2997 the gather load or scatter store operation described by GS_INFO.
2998 STMT_INFO is the load or store statement.
2999
3000 Set *DATAREF_BUMP to the amount that should be added to the base
3001 address after each copy of the vectorized statement. Set *VEC_OFFSET
3002 to an invariant offset vector in which element I has the value
3003 I * DR_STEP / SCALE. */
3004
3005 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3006 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3007 loop_vec_info loop_vinfo,
3008 gather_scatter_info *gs_info,
3009 tree *dataref_bump, tree *vec_offset)
3010 {
3011 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3012 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3013
3014 tree bump = size_binop (MULT_EXPR,
3015 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3016 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3017 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3018
3019 /* The offset given in GS_INFO can have pointer type, so use the element
3020 type of the vector instead. */
3021 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3022
3023 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3024 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3025 ssize_int (gs_info->scale));
3026 step = fold_convert (offset_type, step);
3027
3028 /* Create {0, X, X*2, X*3, ...}. */
3029 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3030 build_zero_cst (offset_type), step);
3031 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3032 }
3033
3034 /* Return the amount that should be added to a vector pointer to move
3035 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3036 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3037 vectorization. */
3038
3039 static tree
vect_get_data_ptr_increment(vec_info * vinfo,dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3040 vect_get_data_ptr_increment (vec_info *vinfo,
3041 dr_vec_info *dr_info, tree aggr_type,
3042 vect_memory_access_type memory_access_type)
3043 {
3044 if (memory_access_type == VMAT_INVARIANT)
3045 return size_zero_node;
3046
3047 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3048 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3049 if (tree_int_cst_sgn (step) == -1)
3050 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3051 return iv_step;
3052 }
3053
3054 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3055
3056 static bool
vectorizable_bswap(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_tree * slp_op,tree vectype_in,stmt_vector_for_cost * cost_vec)3057 vectorizable_bswap (vec_info *vinfo,
3058 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3059 gimple **vec_stmt, slp_tree slp_node,
3060 slp_tree *slp_op,
3061 tree vectype_in, stmt_vector_for_cost *cost_vec)
3062 {
3063 tree op, vectype;
3064 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3065 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3066 unsigned ncopies;
3067
3068 op = gimple_call_arg (stmt, 0);
3069 vectype = STMT_VINFO_VECTYPE (stmt_info);
3070 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3071
3072 /* Multiple types in SLP are handled by creating the appropriate number of
3073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3074 case of SLP. */
3075 if (slp_node)
3076 ncopies = 1;
3077 else
3078 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3079
3080 gcc_assert (ncopies >= 1);
3081
3082 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3083 if (! char_vectype)
3084 return false;
3085
3086 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3087 unsigned word_bytes;
3088 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3089 return false;
3090
3091 /* The encoding uses one stepped pattern for each byte in the word. */
3092 vec_perm_builder elts (num_bytes, word_bytes, 3);
3093 for (unsigned i = 0; i < 3; ++i)
3094 for (unsigned j = 0; j < word_bytes; ++j)
3095 elts.quick_push ((i + 1) * word_bytes - j - 1);
3096
3097 vec_perm_indices indices (elts, 1, num_bytes);
3098 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3099 return false;
3100
3101 if (! vec_stmt)
3102 {
3103 if (slp_node
3104 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3105 {
3106 if (dump_enabled_p ())
3107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3108 "incompatible vector types for invariants\n");
3109 return false;
3110 }
3111
3112 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3113 DUMP_VECT_SCOPE ("vectorizable_bswap");
3114 record_stmt_cost (cost_vec,
3115 1, vector_stmt, stmt_info, 0, vect_prologue);
3116 record_stmt_cost (cost_vec,
3117 slp_node
3118 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3119 vec_perm, stmt_info, 0, vect_body);
3120 return true;
3121 }
3122
3123 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3124
3125 /* Transform. */
3126 vec<tree> vec_oprnds = vNULL;
3127 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3128 op, &vec_oprnds);
3129 /* Arguments are ready. create the new vector stmt. */
3130 unsigned i;
3131 tree vop;
3132 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3133 {
3134 gimple *new_stmt;
3135 tree tem = make_ssa_name (char_vectype);
3136 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3137 char_vectype, vop));
3138 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3139 tree tem2 = make_ssa_name (char_vectype);
3140 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3141 tem, tem, bswap_vconst);
3142 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3143 tem = make_ssa_name (vectype);
3144 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3145 vectype, tem2));
3146 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3147 if (slp_node)
3148 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3149 else
3150 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3151 }
3152
3153 if (!slp_node)
3154 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3155
3156 vec_oprnds.release ();
3157 return true;
3158 }
3159
3160 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3161 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3162 in a single step. On success, store the binary pack code in
3163 *CONVERT_CODE. */
3164
3165 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3166 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3167 tree_code *convert_code)
3168 {
3169 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3170 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3171 return false;
3172
3173 tree_code code;
3174 int multi_step_cvt = 0;
3175 auto_vec <tree, 8> interm_types;
3176 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3177 &code, &multi_step_cvt, &interm_types)
3178 || multi_step_cvt)
3179 return false;
3180
3181 *convert_code = code;
3182 return true;
3183 }
3184
3185 /* Function vectorizable_call.
3186
3187 Check if STMT_INFO performs a function call that can be vectorized.
3188 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3189 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3190 Return true if STMT_INFO is vectorizable in this way. */
3191
3192 static bool
vectorizable_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3193 vectorizable_call (vec_info *vinfo,
3194 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3195 gimple **vec_stmt, slp_tree slp_node,
3196 stmt_vector_for_cost *cost_vec)
3197 {
3198 gcall *stmt;
3199 tree vec_dest;
3200 tree scalar_dest;
3201 tree op;
3202 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3203 tree vectype_out, vectype_in;
3204 poly_uint64 nunits_in;
3205 poly_uint64 nunits_out;
3206 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3207 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3208 tree fndecl, new_temp, rhs_type;
3209 enum vect_def_type dt[4]
3210 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3211 vect_unknown_def_type };
3212 tree vectypes[ARRAY_SIZE (dt)] = {};
3213 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3214 int ndts = ARRAY_SIZE (dt);
3215 int ncopies, j;
3216 auto_vec<tree, 8> vargs;
3217 enum { NARROW, NONE, WIDEN } modifier;
3218 size_t i, nargs;
3219 tree lhs;
3220
3221 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3222 return false;
3223
3224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3225 && ! vec_stmt)
3226 return false;
3227
3228 /* Is STMT_INFO a vectorizable call? */
3229 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3230 if (!stmt)
3231 return false;
3232
3233 if (gimple_call_internal_p (stmt)
3234 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3235 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3236 /* Handled by vectorizable_load and vectorizable_store. */
3237 return false;
3238
3239 if (gimple_call_lhs (stmt) == NULL_TREE
3240 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3241 return false;
3242
3243 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3244
3245 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3246
3247 /* Process function arguments. */
3248 rhs_type = NULL_TREE;
3249 vectype_in = NULL_TREE;
3250 nargs = gimple_call_num_args (stmt);
3251
3252 /* Bail out if the function has more than four arguments, we do not have
3253 interesting builtin functions to vectorize with more than two arguments
3254 except for fma. No arguments is also not good. */
3255 if (nargs == 0 || nargs > 4)
3256 return false;
3257
3258 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3259 combined_fn cfn = gimple_call_combined_fn (stmt);
3260 if (cfn == CFN_GOMP_SIMD_LANE)
3261 {
3262 nargs = 0;
3263 rhs_type = unsigned_type_node;
3264 }
3265
3266 int mask_opno = -1;
3267 if (internal_fn_p (cfn))
3268 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3269
3270 for (i = 0; i < nargs; i++)
3271 {
3272 if ((int) i == mask_opno)
3273 {
3274 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3275 &op, &slp_op[i], &dt[i], &vectypes[i]))
3276 return false;
3277 continue;
3278 }
3279
3280 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3281 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3282 {
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 "use not simple.\n");
3286 return false;
3287 }
3288
3289 /* We can only handle calls with arguments of the same type. */
3290 if (rhs_type
3291 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3292 {
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3295 "argument types differ.\n");
3296 return false;
3297 }
3298 if (!rhs_type)
3299 rhs_type = TREE_TYPE (op);
3300
3301 if (!vectype_in)
3302 vectype_in = vectypes[i];
3303 else if (vectypes[i]
3304 && !types_compatible_p (vectypes[i], vectype_in))
3305 {
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 "argument vector types differ.\n");
3309 return false;
3310 }
3311 }
3312 /* If all arguments are external or constant defs, infer the vector type
3313 from the scalar type. */
3314 if (!vectype_in)
3315 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3316 if (vec_stmt)
3317 gcc_assert (vectype_in);
3318 if (!vectype_in)
3319 {
3320 if (dump_enabled_p ())
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3322 "no vectype for scalar type %T\n", rhs_type);
3323
3324 return false;
3325 }
3326 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3327 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3328 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3329 by a pack of the two vectors into an SI vector. We would need
3330 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3331 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3332 {
3333 if (dump_enabled_p ())
3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3335 "mismatched vector sizes %T and %T\n",
3336 vectype_in, vectype_out);
3337 return false;
3338 }
3339
3340 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3341 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3342 {
3343 if (dump_enabled_p ())
3344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3345 "mixed mask and nonmask vector types\n");
3346 return false;
3347 }
3348
3349 /* FORNOW */
3350 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3351 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3352 if (known_eq (nunits_in * 2, nunits_out))
3353 modifier = NARROW;
3354 else if (known_eq (nunits_out, nunits_in))
3355 modifier = NONE;
3356 else if (known_eq (nunits_out * 2, nunits_in))
3357 modifier = WIDEN;
3358 else
3359 return false;
3360
3361 /* We only handle functions that do not read or clobber memory. */
3362 if (gimple_vuse (stmt))
3363 {
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3366 "function reads from or writes to memory.\n");
3367 return false;
3368 }
3369
3370 /* For now, we only vectorize functions if a target specific builtin
3371 is available. TODO -- in some cases, it might be profitable to
3372 insert the calls for pieces of the vector, in order to be able
3373 to vectorize other operations in the loop. */
3374 fndecl = NULL_TREE;
3375 internal_fn ifn = IFN_LAST;
3376 tree callee = gimple_call_fndecl (stmt);
3377
3378 /* First try using an internal function. */
3379 tree_code convert_code = ERROR_MARK;
3380 if (cfn != CFN_LAST
3381 && (modifier == NONE
3382 || (modifier == NARROW
3383 && simple_integer_narrowing (vectype_out, vectype_in,
3384 &convert_code))))
3385 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3386 vectype_in);
3387
3388 /* If that fails, try asking for a target-specific built-in function. */
3389 if (ifn == IFN_LAST)
3390 {
3391 if (cfn != CFN_LAST)
3392 fndecl = targetm.vectorize.builtin_vectorized_function
3393 (cfn, vectype_out, vectype_in);
3394 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3395 fndecl = targetm.vectorize.builtin_md_vectorized_function
3396 (callee, vectype_out, vectype_in);
3397 }
3398
3399 if (ifn == IFN_LAST && !fndecl)
3400 {
3401 if (cfn == CFN_GOMP_SIMD_LANE
3402 && !slp_node
3403 && loop_vinfo
3404 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3405 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3406 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3407 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3408 {
3409 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3410 { 0, 1, 2, ... vf - 1 } vector. */
3411 gcc_assert (nargs == 0);
3412 }
3413 else if (modifier == NONE
3414 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3415 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3416 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3417 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3418 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3419 slp_op, vectype_in, cost_vec);
3420 else
3421 {
3422 if (dump_enabled_p ())
3423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3424 "function is not vectorizable.\n");
3425 return false;
3426 }
3427 }
3428
3429 if (slp_node)
3430 ncopies = 1;
3431 else if (modifier == NARROW && ifn == IFN_LAST)
3432 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3433 else
3434 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3435
3436 /* Sanity check: make sure that at least one copy of the vectorized stmt
3437 needs to be generated. */
3438 gcc_assert (ncopies >= 1);
3439
3440 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3441 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3442 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3443 if (!vec_stmt) /* transformation not required. */
3444 {
3445 if (slp_node)
3446 for (i = 0; i < nargs; ++i)
3447 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3448 {
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 "incompatible vector types for invariants\n");
3452 return false;
3453 }
3454 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3455 DUMP_VECT_SCOPE ("vectorizable_call");
3456 vect_model_simple_cost (vinfo, stmt_info,
3457 ncopies, dt, ndts, slp_node, cost_vec);
3458 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3459 record_stmt_cost (cost_vec, ncopies / 2,
3460 vec_promote_demote, stmt_info, 0, vect_body);
3461
3462 if (loop_vinfo
3463 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3464 && (reduc_idx >= 0 || mask_opno >= 0))
3465 {
3466 if (reduc_idx >= 0
3467 && (cond_fn == IFN_LAST
3468 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3469 OPTIMIZE_FOR_SPEED)))
3470 {
3471 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3473 "can't use a fully-masked loop because no"
3474 " conditional operation is available.\n");
3475 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3476 }
3477 else
3478 {
3479 unsigned int nvectors
3480 = (slp_node
3481 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3482 : ncopies);
3483 tree scalar_mask = NULL_TREE;
3484 if (mask_opno >= 0)
3485 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3486 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3487 vectype_out, scalar_mask);
3488 }
3489 }
3490 return true;
3491 }
3492
3493 /* Transform. */
3494
3495 if (dump_enabled_p ())
3496 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3497
3498 /* Handle def. */
3499 scalar_dest = gimple_call_lhs (stmt);
3500 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3501
3502 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3503 unsigned int vect_nargs = nargs;
3504 if (masked_loop_p && reduc_idx >= 0)
3505 {
3506 ifn = cond_fn;
3507 vect_nargs += 2;
3508 }
3509
3510 if (modifier == NONE || ifn != IFN_LAST)
3511 {
3512 tree prev_res = NULL_TREE;
3513 vargs.safe_grow (vect_nargs, true);
3514 auto_vec<vec<tree> > vec_defs (nargs);
3515 for (j = 0; j < ncopies; ++j)
3516 {
3517 /* Build argument list for the vectorized call. */
3518 if (slp_node)
3519 {
3520 vec<tree> vec_oprnds0;
3521
3522 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3523 vec_oprnds0 = vec_defs[0];
3524
3525 /* Arguments are ready. Create the new vector stmt. */
3526 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3527 {
3528 int varg = 0;
3529 if (masked_loop_p && reduc_idx >= 0)
3530 {
3531 unsigned int vec_num = vec_oprnds0.length ();
3532 /* Always true for SLP. */
3533 gcc_assert (ncopies == 1);
3534 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3535 vectype_out, i);
3536 }
3537 size_t k;
3538 for (k = 0; k < nargs; k++)
3539 {
3540 vec<tree> vec_oprndsk = vec_defs[k];
3541 vargs[varg++] = vec_oprndsk[i];
3542 }
3543 if (masked_loop_p && reduc_idx >= 0)
3544 vargs[varg++] = vargs[reduc_idx + 1];
3545 gimple *new_stmt;
3546 if (modifier == NARROW)
3547 {
3548 /* We don't define any narrowing conditional functions
3549 at present. */
3550 gcc_assert (mask_opno < 0);
3551 tree half_res = make_ssa_name (vectype_in);
3552 gcall *call
3553 = gimple_build_call_internal_vec (ifn, vargs);
3554 gimple_call_set_lhs (call, half_res);
3555 gimple_call_set_nothrow (call, true);
3556 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3557 if ((i & 1) == 0)
3558 {
3559 prev_res = half_res;
3560 continue;
3561 }
3562 new_temp = make_ssa_name (vec_dest);
3563 new_stmt = gimple_build_assign (new_temp, convert_code,
3564 prev_res, half_res);
3565 vect_finish_stmt_generation (vinfo, stmt_info,
3566 new_stmt, gsi);
3567 }
3568 else
3569 {
3570 if (mask_opno >= 0 && masked_loop_p)
3571 {
3572 unsigned int vec_num = vec_oprnds0.length ();
3573 /* Always true for SLP. */
3574 gcc_assert (ncopies == 1);
3575 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3576 vectype_out, i);
3577 vargs[mask_opno] = prepare_vec_mask
3578 (loop_vinfo, TREE_TYPE (mask), mask,
3579 vargs[mask_opno], gsi);
3580 }
3581
3582 gcall *call;
3583 if (ifn != IFN_LAST)
3584 call = gimple_build_call_internal_vec (ifn, vargs);
3585 else
3586 call = gimple_build_call_vec (fndecl, vargs);
3587 new_temp = make_ssa_name (vec_dest, call);
3588 gimple_call_set_lhs (call, new_temp);
3589 gimple_call_set_nothrow (call, true);
3590 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3591 new_stmt = call;
3592 }
3593 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3594 }
3595 continue;
3596 }
3597
3598 int varg = 0;
3599 if (masked_loop_p && reduc_idx >= 0)
3600 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3601 vectype_out, j);
3602 for (i = 0; i < nargs; i++)
3603 {
3604 op = gimple_call_arg (stmt, i);
3605 if (j == 0)
3606 {
3607 vec_defs.quick_push (vNULL);
3608 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3609 op, &vec_defs[i],
3610 vectypes[i]);
3611 }
3612 vargs[varg++] = vec_defs[i][j];
3613 }
3614 if (masked_loop_p && reduc_idx >= 0)
3615 vargs[varg++] = vargs[reduc_idx + 1];
3616
3617 if (mask_opno >= 0 && masked_loop_p)
3618 {
3619 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3620 vectype_out, j);
3621 vargs[mask_opno]
3622 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3623 vargs[mask_opno], gsi);
3624 }
3625
3626 gimple *new_stmt;
3627 if (cfn == CFN_GOMP_SIMD_LANE)
3628 {
3629 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3630 tree new_var
3631 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3632 gimple *init_stmt = gimple_build_assign (new_var, cst);
3633 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3634 new_temp = make_ssa_name (vec_dest);
3635 new_stmt = gimple_build_assign (new_temp, new_var);
3636 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3637 }
3638 else if (modifier == NARROW)
3639 {
3640 /* We don't define any narrowing conditional functions at
3641 present. */
3642 gcc_assert (mask_opno < 0);
3643 tree half_res = make_ssa_name (vectype_in);
3644 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3645 gimple_call_set_lhs (call, half_res);
3646 gimple_call_set_nothrow (call, true);
3647 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3648 if ((j & 1) == 0)
3649 {
3650 prev_res = half_res;
3651 continue;
3652 }
3653 new_temp = make_ssa_name (vec_dest);
3654 new_stmt = gimple_build_assign (new_temp, convert_code,
3655 prev_res, half_res);
3656 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3657 }
3658 else
3659 {
3660 gcall *call;
3661 if (ifn != IFN_LAST)
3662 call = gimple_build_call_internal_vec (ifn, vargs);
3663 else
3664 call = gimple_build_call_vec (fndecl, vargs);
3665 new_temp = make_ssa_name (vec_dest, call);
3666 gimple_call_set_lhs (call, new_temp);
3667 gimple_call_set_nothrow (call, true);
3668 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3669 new_stmt = call;
3670 }
3671
3672 if (j == (modifier == NARROW ? 1 : 0))
3673 *vec_stmt = new_stmt;
3674 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3675 }
3676 for (i = 0; i < nargs; i++)
3677 {
3678 vec<tree> vec_oprndsi = vec_defs[i];
3679 vec_oprndsi.release ();
3680 }
3681 }
3682 else if (modifier == NARROW)
3683 {
3684 auto_vec<vec<tree> > vec_defs (nargs);
3685 /* We don't define any narrowing conditional functions at present. */
3686 gcc_assert (mask_opno < 0);
3687 for (j = 0; j < ncopies; ++j)
3688 {
3689 /* Build argument list for the vectorized call. */
3690 if (j == 0)
3691 vargs.create (nargs * 2);
3692 else
3693 vargs.truncate (0);
3694
3695 if (slp_node)
3696 {
3697 vec<tree> vec_oprnds0;
3698
3699 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3700 vec_oprnds0 = vec_defs[0];
3701
3702 /* Arguments are ready. Create the new vector stmt. */
3703 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3704 {
3705 size_t k;
3706 vargs.truncate (0);
3707 for (k = 0; k < nargs; k++)
3708 {
3709 vec<tree> vec_oprndsk = vec_defs[k];
3710 vargs.quick_push (vec_oprndsk[i]);
3711 vargs.quick_push (vec_oprndsk[i + 1]);
3712 }
3713 gcall *call;
3714 if (ifn != IFN_LAST)
3715 call = gimple_build_call_internal_vec (ifn, vargs);
3716 else
3717 call = gimple_build_call_vec (fndecl, vargs);
3718 new_temp = make_ssa_name (vec_dest, call);
3719 gimple_call_set_lhs (call, new_temp);
3720 gimple_call_set_nothrow (call, true);
3721 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3722 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3723 }
3724 continue;
3725 }
3726
3727 for (i = 0; i < nargs; i++)
3728 {
3729 op = gimple_call_arg (stmt, i);
3730 if (j == 0)
3731 {
3732 vec_defs.quick_push (vNULL);
3733 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3734 op, &vec_defs[i], vectypes[i]);
3735 }
3736 vec_oprnd0 = vec_defs[i][2*j];
3737 vec_oprnd1 = vec_defs[i][2*j+1];
3738
3739 vargs.quick_push (vec_oprnd0);
3740 vargs.quick_push (vec_oprnd1);
3741 }
3742
3743 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3744 new_temp = make_ssa_name (vec_dest, new_stmt);
3745 gimple_call_set_lhs (new_stmt, new_temp);
3746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3747
3748 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3749 }
3750
3751 if (!slp_node)
3752 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3753
3754 for (i = 0; i < nargs; i++)
3755 {
3756 vec<tree> vec_oprndsi = vec_defs[i];
3757 vec_oprndsi.release ();
3758 }
3759 }
3760 else
3761 /* No current target implements this case. */
3762 return false;
3763
3764 vargs.release ();
3765
3766 /* The call in STMT might prevent it from being removed in dce.
3767 We however cannot remove it here, due to the way the ssa name
3768 it defines is mapped to the new definition. So just replace
3769 rhs of the statement with something harmless. */
3770
3771 if (slp_node)
3772 return true;
3773
3774 stmt_info = vect_orig_stmt (stmt_info);
3775 lhs = gimple_get_lhs (stmt_info->stmt);
3776
3777 gassign *new_stmt
3778 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3779 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3780
3781 return true;
3782 }
3783
3784
3785 struct simd_call_arg_info
3786 {
3787 tree vectype;
3788 tree op;
3789 HOST_WIDE_INT linear_step;
3790 enum vect_def_type dt;
3791 unsigned int align;
3792 bool simd_lane_linear;
3793 };
3794
3795 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3796 is linear within simd lane (but not within whole loop), note it in
3797 *ARGINFO. */
3798
3799 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3800 vect_simd_lane_linear (tree op, class loop *loop,
3801 struct simd_call_arg_info *arginfo)
3802 {
3803 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3804
3805 if (!is_gimple_assign (def_stmt)
3806 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3807 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3808 return;
3809
3810 tree base = gimple_assign_rhs1 (def_stmt);
3811 HOST_WIDE_INT linear_step = 0;
3812 tree v = gimple_assign_rhs2 (def_stmt);
3813 while (TREE_CODE (v) == SSA_NAME)
3814 {
3815 tree t;
3816 def_stmt = SSA_NAME_DEF_STMT (v);
3817 if (is_gimple_assign (def_stmt))
3818 switch (gimple_assign_rhs_code (def_stmt))
3819 {
3820 case PLUS_EXPR:
3821 t = gimple_assign_rhs2 (def_stmt);
3822 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3823 return;
3824 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3825 v = gimple_assign_rhs1 (def_stmt);
3826 continue;
3827 case MULT_EXPR:
3828 t = gimple_assign_rhs2 (def_stmt);
3829 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3830 return;
3831 linear_step = tree_to_shwi (t);
3832 v = gimple_assign_rhs1 (def_stmt);
3833 continue;
3834 CASE_CONVERT:
3835 t = gimple_assign_rhs1 (def_stmt);
3836 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3837 || (TYPE_PRECISION (TREE_TYPE (v))
3838 < TYPE_PRECISION (TREE_TYPE (t))))
3839 return;
3840 if (!linear_step)
3841 linear_step = 1;
3842 v = t;
3843 continue;
3844 default:
3845 return;
3846 }
3847 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3848 && loop->simduid
3849 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3850 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3851 == loop->simduid))
3852 {
3853 if (!linear_step)
3854 linear_step = 1;
3855 arginfo->linear_step = linear_step;
3856 arginfo->op = base;
3857 arginfo->simd_lane_linear = true;
3858 return;
3859 }
3860 }
3861 }
3862
3863 /* Return the number of elements in vector type VECTYPE, which is associated
3864 with a SIMD clone. At present these vectors always have a constant
3865 length. */
3866
3867 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3868 simd_clone_subparts (tree vectype)
3869 {
3870 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3871 }
3872
3873 /* Function vectorizable_simd_clone_call.
3874
3875 Check if STMT_INFO performs a function call that can be vectorized
3876 by calling a simd clone of the function.
3877 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3878 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3879 Return true if STMT_INFO is vectorizable in this way. */
3880
3881 static bool
vectorizable_simd_clone_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3882 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3883 gimple_stmt_iterator *gsi,
3884 gimple **vec_stmt, slp_tree slp_node,
3885 stmt_vector_for_cost *)
3886 {
3887 tree vec_dest;
3888 tree scalar_dest;
3889 tree op, type;
3890 tree vec_oprnd0 = NULL_TREE;
3891 tree vectype;
3892 poly_uint64 nunits;
3893 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3894 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3895 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3896 tree fndecl, new_temp;
3897 int ncopies, j;
3898 auto_vec<simd_call_arg_info> arginfo;
3899 vec<tree> vargs = vNULL;
3900 size_t i, nargs;
3901 tree lhs, rtype, ratype;
3902 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3903
3904 /* Is STMT a vectorizable call? */
3905 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3906 if (!stmt)
3907 return false;
3908
3909 fndecl = gimple_call_fndecl (stmt);
3910 if (fndecl == NULL_TREE)
3911 return false;
3912
3913 struct cgraph_node *node = cgraph_node::get (fndecl);
3914 if (node == NULL || node->simd_clones == NULL)
3915 return false;
3916
3917 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3918 return false;
3919
3920 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3921 && ! vec_stmt)
3922 return false;
3923
3924 if (gimple_call_lhs (stmt)
3925 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3926 return false;
3927
3928 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3929
3930 vectype = STMT_VINFO_VECTYPE (stmt_info);
3931
3932 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3933 return false;
3934
3935 /* FORNOW */
3936 if (slp_node)
3937 return false;
3938
3939 /* Process function arguments. */
3940 nargs = gimple_call_num_args (stmt);
3941
3942 /* Bail out if the function has zero arguments. */
3943 if (nargs == 0)
3944 return false;
3945
3946 arginfo.reserve (nargs, true);
3947
3948 for (i = 0; i < nargs; i++)
3949 {
3950 simd_call_arg_info thisarginfo;
3951 affine_iv iv;
3952
3953 thisarginfo.linear_step = 0;
3954 thisarginfo.align = 0;
3955 thisarginfo.op = NULL_TREE;
3956 thisarginfo.simd_lane_linear = false;
3957
3958 op = gimple_call_arg (stmt, i);
3959 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3960 &thisarginfo.vectype)
3961 || thisarginfo.dt == vect_uninitialized_def)
3962 {
3963 if (dump_enabled_p ())
3964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3965 "use not simple.\n");
3966 return false;
3967 }
3968
3969 if (thisarginfo.dt == vect_constant_def
3970 || thisarginfo.dt == vect_external_def)
3971 gcc_assert (thisarginfo.vectype == NULL_TREE);
3972 else
3973 {
3974 gcc_assert (thisarginfo.vectype != NULL_TREE);
3975 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3976 {
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3979 "vector mask arguments are not supported\n");
3980 return false;
3981 }
3982 }
3983
3984 /* For linear arguments, the analyze phase should have saved
3985 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3986 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3987 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3988 {
3989 gcc_assert (vec_stmt);
3990 thisarginfo.linear_step
3991 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3992 thisarginfo.op
3993 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3994 thisarginfo.simd_lane_linear
3995 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3996 == boolean_true_node);
3997 /* If loop has been peeled for alignment, we need to adjust it. */
3998 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3999 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4000 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4001 {
4002 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4003 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4004 tree opt = TREE_TYPE (thisarginfo.op);
4005 bias = fold_convert (TREE_TYPE (step), bias);
4006 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4007 thisarginfo.op
4008 = fold_build2 (POINTER_TYPE_P (opt)
4009 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4010 thisarginfo.op, bias);
4011 }
4012 }
4013 else if (!vec_stmt
4014 && thisarginfo.dt != vect_constant_def
4015 && thisarginfo.dt != vect_external_def
4016 && loop_vinfo
4017 && TREE_CODE (op) == SSA_NAME
4018 && simple_iv (loop, loop_containing_stmt (stmt), op,
4019 &iv, false)
4020 && tree_fits_shwi_p (iv.step))
4021 {
4022 thisarginfo.linear_step = tree_to_shwi (iv.step);
4023 thisarginfo.op = iv.base;
4024 }
4025 else if ((thisarginfo.dt == vect_constant_def
4026 || thisarginfo.dt == vect_external_def)
4027 && POINTER_TYPE_P (TREE_TYPE (op)))
4028 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4029 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4030 linear too. */
4031 if (POINTER_TYPE_P (TREE_TYPE (op))
4032 && !thisarginfo.linear_step
4033 && !vec_stmt
4034 && thisarginfo.dt != vect_constant_def
4035 && thisarginfo.dt != vect_external_def
4036 && loop_vinfo
4037 && !slp_node
4038 && TREE_CODE (op) == SSA_NAME)
4039 vect_simd_lane_linear (op, loop, &thisarginfo);
4040
4041 arginfo.quick_push (thisarginfo);
4042 }
4043
4044 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4045 if (!vf.is_constant ())
4046 {
4047 if (dump_enabled_p ())
4048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4049 "not considering SIMD clones; not yet supported"
4050 " for variable-width vectors.\n");
4051 return false;
4052 }
4053
4054 unsigned int badness = 0;
4055 struct cgraph_node *bestn = NULL;
4056 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4057 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4058 else
4059 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4060 n = n->simdclone->next_clone)
4061 {
4062 unsigned int this_badness = 0;
4063 unsigned int num_calls;
4064 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4065 || n->simdclone->nargs != nargs)
4066 continue;
4067 if (num_calls != 1)
4068 this_badness += exact_log2 (num_calls) * 4096;
4069 if (n->simdclone->inbranch)
4070 this_badness += 8192;
4071 int target_badness = targetm.simd_clone.usable (n);
4072 if (target_badness < 0)
4073 continue;
4074 this_badness += target_badness * 512;
4075 /* FORNOW: Have to add code to add the mask argument. */
4076 if (n->simdclone->inbranch)
4077 continue;
4078 for (i = 0; i < nargs; i++)
4079 {
4080 switch (n->simdclone->args[i].arg_type)
4081 {
4082 case SIMD_CLONE_ARG_TYPE_VECTOR:
4083 if (!useless_type_conversion_p
4084 (n->simdclone->args[i].orig_type,
4085 TREE_TYPE (gimple_call_arg (stmt, i))))
4086 i = -1;
4087 else if (arginfo[i].dt == vect_constant_def
4088 || arginfo[i].dt == vect_external_def
4089 || arginfo[i].linear_step)
4090 this_badness += 64;
4091 break;
4092 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4093 if (arginfo[i].dt != vect_constant_def
4094 && arginfo[i].dt != vect_external_def)
4095 i = -1;
4096 break;
4097 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4098 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4099 if (arginfo[i].dt == vect_constant_def
4100 || arginfo[i].dt == vect_external_def
4101 || (arginfo[i].linear_step
4102 != n->simdclone->args[i].linear_step))
4103 i = -1;
4104 break;
4105 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4106 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4107 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4108 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4111 /* FORNOW */
4112 i = -1;
4113 break;
4114 case SIMD_CLONE_ARG_TYPE_MASK:
4115 gcc_unreachable ();
4116 }
4117 if (i == (size_t) -1)
4118 break;
4119 if (n->simdclone->args[i].alignment > arginfo[i].align)
4120 {
4121 i = -1;
4122 break;
4123 }
4124 if (arginfo[i].align)
4125 this_badness += (exact_log2 (arginfo[i].align)
4126 - exact_log2 (n->simdclone->args[i].alignment));
4127 }
4128 if (i == (size_t) -1)
4129 continue;
4130 if (bestn == NULL || this_badness < badness)
4131 {
4132 bestn = n;
4133 badness = this_badness;
4134 }
4135 }
4136
4137 if (bestn == NULL)
4138 return false;
4139
4140 for (i = 0; i < nargs; i++)
4141 if ((arginfo[i].dt == vect_constant_def
4142 || arginfo[i].dt == vect_external_def)
4143 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4144 {
4145 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4146 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4147 slp_node);
4148 if (arginfo[i].vectype == NULL
4149 || !constant_multiple_p (bestn->simdclone->simdlen,
4150 simd_clone_subparts (arginfo[i].vectype)))
4151 return false;
4152 }
4153
4154 fndecl = bestn->decl;
4155 nunits = bestn->simdclone->simdlen;
4156 ncopies = vector_unroll_factor (vf, nunits);
4157
4158 /* If the function isn't const, only allow it in simd loops where user
4159 has asserted that at least nunits consecutive iterations can be
4160 performed using SIMD instructions. */
4161 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4162 && gimple_vuse (stmt))
4163 return false;
4164
4165 /* Sanity check: make sure that at least one copy of the vectorized stmt
4166 needs to be generated. */
4167 gcc_assert (ncopies >= 1);
4168
4169 if (!vec_stmt) /* transformation not required. */
4170 {
4171 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4172 for (i = 0; i < nargs; i++)
4173 if ((bestn->simdclone->args[i].arg_type
4174 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4175 || (bestn->simdclone->args[i].arg_type
4176 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4177 {
4178 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4179 + 1,
4180 true);
4181 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4182 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4183 ? size_type_node : TREE_TYPE (arginfo[i].op);
4184 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4185 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4186 tree sll = arginfo[i].simd_lane_linear
4187 ? boolean_true_node : boolean_false_node;
4188 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4189 }
4190 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4191 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4192 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4193 dt, slp_node, cost_vec); */
4194 return true;
4195 }
4196
4197 /* Transform. */
4198
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4201
4202 /* Handle def. */
4203 scalar_dest = gimple_call_lhs (stmt);
4204 vec_dest = NULL_TREE;
4205 rtype = NULL_TREE;
4206 ratype = NULL_TREE;
4207 if (scalar_dest)
4208 {
4209 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4210 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4211 if (TREE_CODE (rtype) == ARRAY_TYPE)
4212 {
4213 ratype = rtype;
4214 rtype = TREE_TYPE (ratype);
4215 }
4216 }
4217
4218 auto_vec<vec<tree> > vec_oprnds;
4219 auto_vec<unsigned> vec_oprnds_i;
4220 vec_oprnds.safe_grow_cleared (nargs, true);
4221 vec_oprnds_i.safe_grow_cleared (nargs, true);
4222 for (j = 0; j < ncopies; ++j)
4223 {
4224 /* Build argument list for the vectorized call. */
4225 if (j == 0)
4226 vargs.create (nargs);
4227 else
4228 vargs.truncate (0);
4229
4230 for (i = 0; i < nargs; i++)
4231 {
4232 unsigned int k, l, m, o;
4233 tree atype;
4234 op = gimple_call_arg (stmt, i);
4235 switch (bestn->simdclone->args[i].arg_type)
4236 {
4237 case SIMD_CLONE_ARG_TYPE_VECTOR:
4238 atype = bestn->simdclone->args[i].vector_type;
4239 o = vector_unroll_factor (nunits,
4240 simd_clone_subparts (atype));
4241 for (m = j * o; m < (j + 1) * o; m++)
4242 {
4243 if (simd_clone_subparts (atype)
4244 < simd_clone_subparts (arginfo[i].vectype))
4245 {
4246 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4247 k = (simd_clone_subparts (arginfo[i].vectype)
4248 / simd_clone_subparts (atype));
4249 gcc_assert ((k & (k - 1)) == 0);
4250 if (m == 0)
4251 {
4252 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4253 ncopies * o / k, op,
4254 &vec_oprnds[i]);
4255 vec_oprnds_i[i] = 0;
4256 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4257 }
4258 else
4259 {
4260 vec_oprnd0 = arginfo[i].op;
4261 if ((m & (k - 1)) == 0)
4262 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4263 }
4264 arginfo[i].op = vec_oprnd0;
4265 vec_oprnd0
4266 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4267 bitsize_int (prec),
4268 bitsize_int ((m & (k - 1)) * prec));
4269 gassign *new_stmt
4270 = gimple_build_assign (make_ssa_name (atype),
4271 vec_oprnd0);
4272 vect_finish_stmt_generation (vinfo, stmt_info,
4273 new_stmt, gsi);
4274 vargs.safe_push (gimple_assign_lhs (new_stmt));
4275 }
4276 else
4277 {
4278 k = (simd_clone_subparts (atype)
4279 / simd_clone_subparts (arginfo[i].vectype));
4280 gcc_assert ((k & (k - 1)) == 0);
4281 vec<constructor_elt, va_gc> *ctor_elts;
4282 if (k != 1)
4283 vec_alloc (ctor_elts, k);
4284 else
4285 ctor_elts = NULL;
4286 for (l = 0; l < k; l++)
4287 {
4288 if (m == 0 && l == 0)
4289 {
4290 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4291 k * o * ncopies,
4292 op,
4293 &vec_oprnds[i]);
4294 vec_oprnds_i[i] = 0;
4295 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4296 }
4297 else
4298 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4299 arginfo[i].op = vec_oprnd0;
4300 if (k == 1)
4301 break;
4302 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4303 vec_oprnd0);
4304 }
4305 if (k == 1)
4306 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4307 atype))
4308 {
4309 vec_oprnd0
4310 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4311 gassign *new_stmt
4312 = gimple_build_assign (make_ssa_name (atype),
4313 vec_oprnd0);
4314 vect_finish_stmt_generation (vinfo, stmt_info,
4315 new_stmt, gsi);
4316 vargs.safe_push (gimple_assign_lhs (new_stmt));
4317 }
4318 else
4319 vargs.safe_push (vec_oprnd0);
4320 else
4321 {
4322 vec_oprnd0 = build_constructor (atype, ctor_elts);
4323 gassign *new_stmt
4324 = gimple_build_assign (make_ssa_name (atype),
4325 vec_oprnd0);
4326 vect_finish_stmt_generation (vinfo, stmt_info,
4327 new_stmt, gsi);
4328 vargs.safe_push (gimple_assign_lhs (new_stmt));
4329 }
4330 }
4331 }
4332 break;
4333 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4334 vargs.safe_push (op);
4335 break;
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4338 if (j == 0)
4339 {
4340 gimple_seq stmts;
4341 arginfo[i].op
4342 = force_gimple_operand (unshare_expr (arginfo[i].op),
4343 &stmts, true, NULL_TREE);
4344 if (stmts != NULL)
4345 {
4346 basic_block new_bb;
4347 edge pe = loop_preheader_edge (loop);
4348 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4349 gcc_assert (!new_bb);
4350 }
4351 if (arginfo[i].simd_lane_linear)
4352 {
4353 vargs.safe_push (arginfo[i].op);
4354 break;
4355 }
4356 tree phi_res = copy_ssa_name (op);
4357 gphi *new_phi = create_phi_node (phi_res, loop->header);
4358 add_phi_arg (new_phi, arginfo[i].op,
4359 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4360 enum tree_code code
4361 = POINTER_TYPE_P (TREE_TYPE (op))
4362 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4363 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4364 ? sizetype : TREE_TYPE (op);
4365 poly_widest_int cst
4366 = wi::mul (bestn->simdclone->args[i].linear_step,
4367 ncopies * nunits);
4368 tree tcst = wide_int_to_tree (type, cst);
4369 tree phi_arg = copy_ssa_name (op);
4370 gassign *new_stmt
4371 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4372 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4373 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4374 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4375 UNKNOWN_LOCATION);
4376 arginfo[i].op = phi_res;
4377 vargs.safe_push (phi_res);
4378 }
4379 else
4380 {
4381 enum tree_code code
4382 = POINTER_TYPE_P (TREE_TYPE (op))
4383 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4384 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4385 ? sizetype : TREE_TYPE (op);
4386 poly_widest_int cst
4387 = wi::mul (bestn->simdclone->args[i].linear_step,
4388 j * nunits);
4389 tree tcst = wide_int_to_tree (type, cst);
4390 new_temp = make_ssa_name (TREE_TYPE (op));
4391 gassign *new_stmt
4392 = gimple_build_assign (new_temp, code,
4393 arginfo[i].op, tcst);
4394 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4395 vargs.safe_push (new_temp);
4396 }
4397 break;
4398 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4399 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4400 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4401 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4402 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4403 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4404 default:
4405 gcc_unreachable ();
4406 }
4407 }
4408
4409 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4410 if (vec_dest)
4411 {
4412 gcc_assert (ratype
4413 || known_eq (simd_clone_subparts (rtype), nunits));
4414 if (ratype)
4415 new_temp = create_tmp_var (ratype);
4416 else if (useless_type_conversion_p (vectype, rtype))
4417 new_temp = make_ssa_name (vec_dest, new_call);
4418 else
4419 new_temp = make_ssa_name (rtype, new_call);
4420 gimple_call_set_lhs (new_call, new_temp);
4421 }
4422 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4423 gimple *new_stmt = new_call;
4424
4425 if (vec_dest)
4426 {
4427 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4428 {
4429 unsigned int k, l;
4430 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4431 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4432 k = vector_unroll_factor (nunits,
4433 simd_clone_subparts (vectype));
4434 gcc_assert ((k & (k - 1)) == 0);
4435 for (l = 0; l < k; l++)
4436 {
4437 tree t;
4438 if (ratype)
4439 {
4440 t = build_fold_addr_expr (new_temp);
4441 t = build2 (MEM_REF, vectype, t,
4442 build_int_cst (TREE_TYPE (t), l * bytes));
4443 }
4444 else
4445 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4446 bitsize_int (prec), bitsize_int (l * prec));
4447 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4448 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4449
4450 if (j == 0 && l == 0)
4451 *vec_stmt = new_stmt;
4452 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4453 }
4454
4455 if (ratype)
4456 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4457 continue;
4458 }
4459 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4460 {
4461 unsigned int k = (simd_clone_subparts (vectype)
4462 / simd_clone_subparts (rtype));
4463 gcc_assert ((k & (k - 1)) == 0);
4464 if ((j & (k - 1)) == 0)
4465 vec_alloc (ret_ctor_elts, k);
4466 if (ratype)
4467 {
4468 unsigned int m, o;
4469 o = vector_unroll_factor (nunits,
4470 simd_clone_subparts (rtype));
4471 for (m = 0; m < o; m++)
4472 {
4473 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4474 size_int (m), NULL_TREE, NULL_TREE);
4475 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4476 tem);
4477 vect_finish_stmt_generation (vinfo, stmt_info,
4478 new_stmt, gsi);
4479 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4480 gimple_assign_lhs (new_stmt));
4481 }
4482 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4483 }
4484 else
4485 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4486 if ((j & (k - 1)) != k - 1)
4487 continue;
4488 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4489 new_stmt
4490 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4491 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4492
4493 if ((unsigned) j == k - 1)
4494 *vec_stmt = new_stmt;
4495 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4496 continue;
4497 }
4498 else if (ratype)
4499 {
4500 tree t = build_fold_addr_expr (new_temp);
4501 t = build2 (MEM_REF, vectype, t,
4502 build_int_cst (TREE_TYPE (t), 0));
4503 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4504 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4505 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4506 }
4507 else if (!useless_type_conversion_p (vectype, rtype))
4508 {
4509 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4510 new_stmt
4511 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4512 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4513 }
4514 }
4515
4516 if (j == 0)
4517 *vec_stmt = new_stmt;
4518 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4519 }
4520
4521 for (i = 0; i < nargs; ++i)
4522 {
4523 vec<tree> oprndsi = vec_oprnds[i];
4524 oprndsi.release ();
4525 }
4526 vargs.release ();
4527
4528 /* The call in STMT might prevent it from being removed in dce.
4529 We however cannot remove it here, due to the way the ssa name
4530 it defines is mapped to the new definition. So just replace
4531 rhs of the statement with something harmless. */
4532
4533 if (slp_node)
4534 return true;
4535
4536 gimple *new_stmt;
4537 if (scalar_dest)
4538 {
4539 type = TREE_TYPE (scalar_dest);
4540 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4541 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4542 }
4543 else
4544 new_stmt = gimple_build_nop ();
4545 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4546 unlink_stmt_vdef (stmt);
4547
4548 return true;
4549 }
4550
4551
4552 /* Function vect_gen_widened_results_half
4553
4554 Create a vector stmt whose code, type, number of arguments, and result
4555 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4556 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4557 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4558 needs to be created (DECL is a function-decl of a target-builtin).
4559 STMT_INFO is the original scalar stmt that we are vectorizing. */
4560
4561 static gimple *
vect_gen_widened_results_half(vec_info * vinfo,enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4562 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4563 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4564 tree vec_dest, gimple_stmt_iterator *gsi,
4565 stmt_vec_info stmt_info)
4566 {
4567 gimple *new_stmt;
4568 tree new_temp;
4569
4570 /* Generate half of the widened result: */
4571 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4572 if (op_type != binary_op)
4573 vec_oprnd1 = NULL;
4574 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4575 new_temp = make_ssa_name (vec_dest, new_stmt);
4576 gimple_assign_set_lhs (new_stmt, new_temp);
4577 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4578
4579 return new_stmt;
4580 }
4581
4582
4583 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4584 For multi-step conversions store the resulting vectors and call the function
4585 recursively. */
4586
4587 static void
vect_create_vectorized_demotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> & vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code)4588 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4589 int multi_step_cvt,
4590 stmt_vec_info stmt_info,
4591 vec<tree> &vec_dsts,
4592 gimple_stmt_iterator *gsi,
4593 slp_tree slp_node, enum tree_code code)
4594 {
4595 unsigned int i;
4596 tree vop0, vop1, new_tmp, vec_dest;
4597
4598 vec_dest = vec_dsts.pop ();
4599
4600 for (i = 0; i < vec_oprnds->length (); i += 2)
4601 {
4602 /* Create demotion operation. */
4603 vop0 = (*vec_oprnds)[i];
4604 vop1 = (*vec_oprnds)[i + 1];
4605 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4606 new_tmp = make_ssa_name (vec_dest, new_stmt);
4607 gimple_assign_set_lhs (new_stmt, new_tmp);
4608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4609
4610 if (multi_step_cvt)
4611 /* Store the resulting vector for next recursive call. */
4612 (*vec_oprnds)[i/2] = new_tmp;
4613 else
4614 {
4615 /* This is the last step of the conversion sequence. Store the
4616 vectors in SLP_NODE or in vector info of the scalar statement
4617 (or in STMT_VINFO_RELATED_STMT chain). */
4618 if (slp_node)
4619 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4620 else
4621 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4622 }
4623 }
4624
4625 /* For multi-step demotion operations we first generate demotion operations
4626 from the source type to the intermediate types, and then combine the
4627 results (stored in VEC_OPRNDS) in demotion operation to the destination
4628 type. */
4629 if (multi_step_cvt)
4630 {
4631 /* At each level of recursion we have half of the operands we had at the
4632 previous level. */
4633 vec_oprnds->truncate ((i+1)/2);
4634 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4635 multi_step_cvt - 1,
4636 stmt_info, vec_dsts, gsi,
4637 slp_node, VEC_PACK_TRUNC_EXPR);
4638 }
4639
4640 vec_dsts.quick_push (vec_dest);
4641 }
4642
4643
4644 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4645 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4646 STMT_INFO. For multi-step conversions store the resulting vectors and
4647 call the function recursively. */
4648
4649 static void
vect_create_vectorized_promotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4650 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4651 vec<tree> *vec_oprnds0,
4652 vec<tree> *vec_oprnds1,
4653 stmt_vec_info stmt_info, tree vec_dest,
4654 gimple_stmt_iterator *gsi,
4655 enum tree_code code1,
4656 enum tree_code code2, int op_type)
4657 {
4658 int i;
4659 tree vop0, vop1, new_tmp1, new_tmp2;
4660 gimple *new_stmt1, *new_stmt2;
4661 vec<tree> vec_tmp = vNULL;
4662
4663 vec_tmp.create (vec_oprnds0->length () * 2);
4664 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4665 {
4666 if (op_type == binary_op)
4667 vop1 = (*vec_oprnds1)[i];
4668 else
4669 vop1 = NULL_TREE;
4670
4671 /* Generate the two halves of promotion operation. */
4672 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4673 op_type, vec_dest, gsi,
4674 stmt_info);
4675 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4676 op_type, vec_dest, gsi,
4677 stmt_info);
4678 if (is_gimple_call (new_stmt1))
4679 {
4680 new_tmp1 = gimple_call_lhs (new_stmt1);
4681 new_tmp2 = gimple_call_lhs (new_stmt2);
4682 }
4683 else
4684 {
4685 new_tmp1 = gimple_assign_lhs (new_stmt1);
4686 new_tmp2 = gimple_assign_lhs (new_stmt2);
4687 }
4688
4689 /* Store the results for the next step. */
4690 vec_tmp.quick_push (new_tmp1);
4691 vec_tmp.quick_push (new_tmp2);
4692 }
4693
4694 vec_oprnds0->release ();
4695 *vec_oprnds0 = vec_tmp;
4696 }
4697
4698 /* Create vectorized promotion stmts for widening stmts using only half the
4699 potential vector size for input. */
4700 static void
vect_create_half_widening_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,int op_type)4701 vect_create_half_widening_stmts (vec_info *vinfo,
4702 vec<tree> *vec_oprnds0,
4703 vec<tree> *vec_oprnds1,
4704 stmt_vec_info stmt_info, tree vec_dest,
4705 gimple_stmt_iterator *gsi,
4706 enum tree_code code1,
4707 int op_type)
4708 {
4709 int i;
4710 tree vop0, vop1;
4711 gimple *new_stmt1;
4712 gimple *new_stmt2;
4713 gimple *new_stmt3;
4714 vec<tree> vec_tmp = vNULL;
4715
4716 vec_tmp.create (vec_oprnds0->length ());
4717 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4718 {
4719 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4720
4721 gcc_assert (op_type == binary_op);
4722 vop1 = (*vec_oprnds1)[i];
4723
4724 /* Widen the first vector input. */
4725 out_type = TREE_TYPE (vec_dest);
4726 new_tmp1 = make_ssa_name (out_type);
4727 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4728 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4729 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4730 {
4731 /* Widen the second vector input. */
4732 new_tmp2 = make_ssa_name (out_type);
4733 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4734 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4735 /* Perform the operation. With both vector inputs widened. */
4736 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4737 }
4738 else
4739 {
4740 /* Perform the operation. With the single vector input widened. */
4741 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4742 }
4743
4744 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4745 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4747
4748 /* Store the results for the next step. */
4749 vec_tmp.quick_push (new_tmp3);
4750 }
4751
4752 vec_oprnds0->release ();
4753 *vec_oprnds0 = vec_tmp;
4754 }
4755
4756
4757 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4758 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4760 Return true if STMT_INFO is vectorizable in this way. */
4761
4762 static bool
vectorizable_conversion(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4763 vectorizable_conversion (vec_info *vinfo,
4764 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4765 gimple **vec_stmt, slp_tree slp_node,
4766 stmt_vector_for_cost *cost_vec)
4767 {
4768 tree vec_dest;
4769 tree scalar_dest;
4770 tree op0, op1 = NULL_TREE;
4771 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4772 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4773 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4774 tree new_temp;
4775 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4776 int ndts = 2;
4777 poly_uint64 nunits_in;
4778 poly_uint64 nunits_out;
4779 tree vectype_out, vectype_in;
4780 int ncopies, i;
4781 tree lhs_type, rhs_type;
4782 enum { NARROW, NONE, WIDEN } modifier;
4783 vec<tree> vec_oprnds0 = vNULL;
4784 vec<tree> vec_oprnds1 = vNULL;
4785 tree vop0;
4786 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4787 int multi_step_cvt = 0;
4788 vec<tree> interm_types = vNULL;
4789 tree intermediate_type, cvt_type = NULL_TREE;
4790 int op_type;
4791 unsigned short fltsz;
4792
4793 /* Is STMT a vectorizable conversion? */
4794
4795 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4796 return false;
4797
4798 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4799 && ! vec_stmt)
4800 return false;
4801
4802 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4803 if (!stmt)
4804 return false;
4805
4806 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4807 return false;
4808
4809 code = gimple_assign_rhs_code (stmt);
4810 if (!CONVERT_EXPR_CODE_P (code)
4811 && code != FIX_TRUNC_EXPR
4812 && code != FLOAT_EXPR
4813 && code != WIDEN_PLUS_EXPR
4814 && code != WIDEN_MINUS_EXPR
4815 && code != WIDEN_MULT_EXPR
4816 && code != WIDEN_LSHIFT_EXPR)
4817 return false;
4818
4819 bool widen_arith = (code == WIDEN_PLUS_EXPR
4820 || code == WIDEN_MINUS_EXPR
4821 || code == WIDEN_MULT_EXPR
4822 || code == WIDEN_LSHIFT_EXPR);
4823 op_type = TREE_CODE_LENGTH (code);
4824
4825 /* Check types of lhs and rhs. */
4826 scalar_dest = gimple_assign_lhs (stmt);
4827 lhs_type = TREE_TYPE (scalar_dest);
4828 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4829
4830 /* Check the operands of the operation. */
4831 slp_tree slp_op0, slp_op1 = NULL;
4832 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4833 0, &op0, &slp_op0, &dt[0], &vectype_in))
4834 {
4835 if (dump_enabled_p ())
4836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4837 "use not simple.\n");
4838 return false;
4839 }
4840
4841 rhs_type = TREE_TYPE (op0);
4842 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4843 && !((INTEGRAL_TYPE_P (lhs_type)
4844 && INTEGRAL_TYPE_P (rhs_type))
4845 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4846 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4847 return false;
4848
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850 && ((INTEGRAL_TYPE_P (lhs_type)
4851 && !type_has_mode_precision_p (lhs_type))
4852 || (INTEGRAL_TYPE_P (rhs_type)
4853 && !type_has_mode_precision_p (rhs_type))))
4854 {
4855 if (dump_enabled_p ())
4856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4857 "type conversion to/from bit-precision unsupported."
4858 "\n");
4859 return false;
4860 }
4861
4862 if (op_type == binary_op)
4863 {
4864 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4865 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4866
4867 op1 = gimple_assign_rhs2 (stmt);
4868 tree vectype1_in;
4869 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4870 &op1, &slp_op1, &dt[1], &vectype1_in))
4871 {
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874 "use not simple.\n");
4875 return false;
4876 }
4877 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4878 OP1. */
4879 if (!vectype_in)
4880 vectype_in = vectype1_in;
4881 }
4882
4883 /* If op0 is an external or constant def, infer the vector type
4884 from the scalar type. */
4885 if (!vectype_in)
4886 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4887 if (vec_stmt)
4888 gcc_assert (vectype_in);
4889 if (!vectype_in)
4890 {
4891 if (dump_enabled_p ())
4892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4893 "no vectype for scalar type %T\n", rhs_type);
4894
4895 return false;
4896 }
4897
4898 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4899 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4900 {
4901 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4903 "can't convert between boolean and non "
4904 "boolean vectors %T\n", rhs_type);
4905
4906 return false;
4907 }
4908
4909 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4910 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911 if (known_eq (nunits_out, nunits_in))
4912 if (widen_arith)
4913 modifier = WIDEN;
4914 else
4915 modifier = NONE;
4916 else if (multiple_p (nunits_out, nunits_in))
4917 modifier = NARROW;
4918 else
4919 {
4920 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4921 modifier = WIDEN;
4922 }
4923
4924 /* Multiple types in SLP are handled by creating the appropriate number of
4925 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4926 case of SLP. */
4927 if (slp_node)
4928 ncopies = 1;
4929 else if (modifier == NARROW)
4930 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4931 else
4932 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4933
4934 /* Sanity check: make sure that at least one copy of the vectorized stmt
4935 needs to be generated. */
4936 gcc_assert (ncopies >= 1);
4937
4938 bool found_mode = false;
4939 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4940 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4941 opt_scalar_mode rhs_mode_iter;
4942
4943 /* Supportable by target? */
4944 switch (modifier)
4945 {
4946 case NONE:
4947 if (code != FIX_TRUNC_EXPR
4948 && code != FLOAT_EXPR
4949 && !CONVERT_EXPR_CODE_P (code))
4950 return false;
4951 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4952 break;
4953 /* FALLTHRU */
4954 unsupported:
4955 if (dump_enabled_p ())
4956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4957 "conversion not supported by target.\n");
4958 return false;
4959
4960 case WIDEN:
4961 if (known_eq (nunits_in, nunits_out))
4962 {
4963 if (!supportable_half_widening_operation (code, vectype_out,
4964 vectype_in, &code1))
4965 goto unsupported;
4966 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4967 break;
4968 }
4969 if (supportable_widening_operation (vinfo, code, stmt_info,
4970 vectype_out, vectype_in, &code1,
4971 &code2, &multi_step_cvt,
4972 &interm_types))
4973 {
4974 /* Binary widening operation can only be supported directly by the
4975 architecture. */
4976 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4977 break;
4978 }
4979
4980 if (code != FLOAT_EXPR
4981 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4982 goto unsupported;
4983
4984 fltsz = GET_MODE_SIZE (lhs_mode);
4985 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4986 {
4987 rhs_mode = rhs_mode_iter.require ();
4988 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4989 break;
4990
4991 cvt_type
4992 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4993 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4994 if (cvt_type == NULL_TREE)
4995 goto unsupported;
4996
4997 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4998 {
4999 if (!supportable_convert_operation (code, vectype_out,
5000 cvt_type, &codecvt1))
5001 goto unsupported;
5002 }
5003 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5004 vectype_out, cvt_type,
5005 &codecvt1, &codecvt2,
5006 &multi_step_cvt,
5007 &interm_types))
5008 continue;
5009 else
5010 gcc_assert (multi_step_cvt == 0);
5011
5012 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5013 cvt_type,
5014 vectype_in, &code1, &code2,
5015 &multi_step_cvt, &interm_types))
5016 {
5017 found_mode = true;
5018 break;
5019 }
5020 }
5021
5022 if (!found_mode)
5023 goto unsupported;
5024
5025 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5026 codecvt2 = ERROR_MARK;
5027 else
5028 {
5029 multi_step_cvt++;
5030 interm_types.safe_push (cvt_type);
5031 cvt_type = NULL_TREE;
5032 }
5033 break;
5034
5035 case NARROW:
5036 gcc_assert (op_type == unary_op);
5037 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5038 &code1, &multi_step_cvt,
5039 &interm_types))
5040 break;
5041
5042 if (code != FIX_TRUNC_EXPR
5043 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5044 goto unsupported;
5045
5046 cvt_type
5047 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5048 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5049 if (cvt_type == NULL_TREE)
5050 goto unsupported;
5051 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5052 &codecvt1))
5053 goto unsupported;
5054 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5055 &code1, &multi_step_cvt,
5056 &interm_types))
5057 break;
5058 goto unsupported;
5059
5060 default:
5061 gcc_unreachable ();
5062 }
5063
5064 if (!vec_stmt) /* transformation not required. */
5065 {
5066 if (slp_node
5067 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5068 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5069 {
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "incompatible vector types for invariants\n");
5073 return false;
5074 }
5075 DUMP_VECT_SCOPE ("vectorizable_conversion");
5076 if (modifier == NONE)
5077 {
5078 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5079 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5080 cost_vec);
5081 }
5082 else if (modifier == NARROW)
5083 {
5084 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5085 /* The final packing step produces one vector result per copy. */
5086 unsigned int nvectors
5087 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5088 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5089 multi_step_cvt, cost_vec,
5090 widen_arith);
5091 }
5092 else
5093 {
5094 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5095 /* The initial unpacking step produces two vector results
5096 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5097 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5098 unsigned int nvectors
5099 = (slp_node
5100 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5101 : ncopies * 2);
5102 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5103 multi_step_cvt, cost_vec,
5104 widen_arith);
5105 }
5106 interm_types.release ();
5107 return true;
5108 }
5109
5110 /* Transform. */
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE, vect_location,
5113 "transform conversion. ncopies = %d.\n", ncopies);
5114
5115 if (op_type == binary_op)
5116 {
5117 if (CONSTANT_CLASS_P (op0))
5118 op0 = fold_convert (TREE_TYPE (op1), op0);
5119 else if (CONSTANT_CLASS_P (op1))
5120 op1 = fold_convert (TREE_TYPE (op0), op1);
5121 }
5122
5123 /* In case of multi-step conversion, we first generate conversion operations
5124 to the intermediate types, and then from that types to the final one.
5125 We create vector destinations for the intermediate type (TYPES) received
5126 from supportable_*_operation, and store them in the correct order
5127 for future use in vect_create_vectorized_*_stmts (). */
5128 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5129 vec_dest = vect_create_destination_var (scalar_dest,
5130 (cvt_type && modifier == WIDEN)
5131 ? cvt_type : vectype_out);
5132 vec_dsts.quick_push (vec_dest);
5133
5134 if (multi_step_cvt)
5135 {
5136 for (i = interm_types.length () - 1;
5137 interm_types.iterate (i, &intermediate_type); i--)
5138 {
5139 vec_dest = vect_create_destination_var (scalar_dest,
5140 intermediate_type);
5141 vec_dsts.quick_push (vec_dest);
5142 }
5143 }
5144
5145 if (cvt_type)
5146 vec_dest = vect_create_destination_var (scalar_dest,
5147 modifier == WIDEN
5148 ? vectype_out : cvt_type);
5149
5150 int ninputs = 1;
5151 if (!slp_node)
5152 {
5153 if (modifier == WIDEN)
5154 ;
5155 else if (modifier == NARROW)
5156 {
5157 if (multi_step_cvt)
5158 ninputs = vect_pow2 (multi_step_cvt);
5159 ninputs *= 2;
5160 }
5161 }
5162
5163 switch (modifier)
5164 {
5165 case NONE:
5166 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5167 op0, &vec_oprnds0);
5168 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5169 {
5170 /* Arguments are ready, create the new vector stmt. */
5171 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5172 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5173 new_temp = make_ssa_name (vec_dest, new_stmt);
5174 gimple_assign_set_lhs (new_stmt, new_temp);
5175 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5176
5177 if (slp_node)
5178 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5179 else
5180 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5181 }
5182 break;
5183
5184 case WIDEN:
5185 /* In case the vectorization factor (VF) is bigger than the number
5186 of elements that we can fit in a vectype (nunits), we have to
5187 generate more than one vector stmt - i.e - we need to "unroll"
5188 the vector stmt by a factor VF/nunits. */
5189 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5190 op0, &vec_oprnds0,
5191 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5192 &vec_oprnds1);
5193 if (code == WIDEN_LSHIFT_EXPR)
5194 {
5195 int oprnds_size = vec_oprnds0.length ();
5196 vec_oprnds1.create (oprnds_size);
5197 for (i = 0; i < oprnds_size; ++i)
5198 vec_oprnds1.quick_push (op1);
5199 }
5200 /* Arguments are ready. Create the new vector stmts. */
5201 for (i = multi_step_cvt; i >= 0; i--)
5202 {
5203 tree this_dest = vec_dsts[i];
5204 enum tree_code c1 = code1, c2 = code2;
5205 if (i == 0 && codecvt2 != ERROR_MARK)
5206 {
5207 c1 = codecvt1;
5208 c2 = codecvt2;
5209 }
5210 if (known_eq (nunits_out, nunits_in))
5211 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5212 &vec_oprnds1, stmt_info,
5213 this_dest, gsi,
5214 c1, op_type);
5215 else
5216 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5217 &vec_oprnds1, stmt_info,
5218 this_dest, gsi,
5219 c1, c2, op_type);
5220 }
5221
5222 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5223 {
5224 gimple *new_stmt;
5225 if (cvt_type)
5226 {
5227 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5228 new_temp = make_ssa_name (vec_dest);
5229 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5230 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5231 }
5232 else
5233 new_stmt = SSA_NAME_DEF_STMT (vop0);
5234
5235 if (slp_node)
5236 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5237 else
5238 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5239 }
5240 break;
5241
5242 case NARROW:
5243 /* In case the vectorization factor (VF) is bigger than the number
5244 of elements that we can fit in a vectype (nunits), we have to
5245 generate more than one vector stmt - i.e - we need to "unroll"
5246 the vector stmt by a factor VF/nunits. */
5247 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5248 op0, &vec_oprnds0);
5249 /* Arguments are ready. Create the new vector stmts. */
5250 if (cvt_type)
5251 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5252 {
5253 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5254 new_temp = make_ssa_name (vec_dest);
5255 gassign *new_stmt
5256 = gimple_build_assign (new_temp, codecvt1, vop0);
5257 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5258 vec_oprnds0[i] = new_temp;
5259 }
5260
5261 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5262 multi_step_cvt,
5263 stmt_info, vec_dsts, gsi,
5264 slp_node, code1);
5265 break;
5266 }
5267 if (!slp_node)
5268 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5269
5270 vec_oprnds0.release ();
5271 vec_oprnds1.release ();
5272 interm_types.release ();
5273
5274 return true;
5275 }
5276
5277 /* Return true if we can assume from the scalar form of STMT_INFO that
5278 neither the scalar nor the vector forms will generate code. STMT_INFO
5279 is known not to involve a data reference. */
5280
5281 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5282 vect_nop_conversion_p (stmt_vec_info stmt_info)
5283 {
5284 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5285 if (!stmt)
5286 return false;
5287
5288 tree lhs = gimple_assign_lhs (stmt);
5289 tree_code code = gimple_assign_rhs_code (stmt);
5290 tree rhs = gimple_assign_rhs1 (stmt);
5291
5292 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5293 return true;
5294
5295 if (CONVERT_EXPR_CODE_P (code))
5296 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5297
5298 return false;
5299 }
5300
5301 /* Function vectorizable_assignment.
5302
5303 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5304 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5305 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5306 Return true if STMT_INFO is vectorizable in this way. */
5307
5308 static bool
vectorizable_assignment(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5309 vectorizable_assignment (vec_info *vinfo,
5310 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5311 gimple **vec_stmt, slp_tree slp_node,
5312 stmt_vector_for_cost *cost_vec)
5313 {
5314 tree vec_dest;
5315 tree scalar_dest;
5316 tree op;
5317 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5318 tree new_temp;
5319 enum vect_def_type dt[1] = {vect_unknown_def_type};
5320 int ndts = 1;
5321 int ncopies;
5322 int i;
5323 vec<tree> vec_oprnds = vNULL;
5324 tree vop;
5325 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5326 enum tree_code code;
5327 tree vectype_in;
5328
5329 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5330 return false;
5331
5332 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5333 && ! vec_stmt)
5334 return false;
5335
5336 /* Is vectorizable assignment? */
5337 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5338 if (!stmt)
5339 return false;
5340
5341 scalar_dest = gimple_assign_lhs (stmt);
5342 if (TREE_CODE (scalar_dest) != SSA_NAME)
5343 return false;
5344
5345 if (STMT_VINFO_DATA_REF (stmt_info))
5346 return false;
5347
5348 code = gimple_assign_rhs_code (stmt);
5349 if (!(gimple_assign_single_p (stmt)
5350 || code == PAREN_EXPR
5351 || CONVERT_EXPR_CODE_P (code)))
5352 return false;
5353
5354 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5355 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5356
5357 /* Multiple types in SLP are handled by creating the appropriate number of
5358 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5359 case of SLP. */
5360 if (slp_node)
5361 ncopies = 1;
5362 else
5363 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5364
5365 gcc_assert (ncopies >= 1);
5366
5367 slp_tree slp_op;
5368 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5369 &dt[0], &vectype_in))
5370 {
5371 if (dump_enabled_p ())
5372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373 "use not simple.\n");
5374 return false;
5375 }
5376 if (!vectype_in)
5377 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5378
5379 /* We can handle NOP_EXPR conversions that do not change the number
5380 of elements or the vector size. */
5381 if ((CONVERT_EXPR_CODE_P (code)
5382 || code == VIEW_CONVERT_EXPR)
5383 && (!vectype_in
5384 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5385 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5386 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5387 return false;
5388
5389 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5390 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5391 {
5392 if (dump_enabled_p ())
5393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5394 "can't convert between boolean and non "
5395 "boolean vectors %T\n", TREE_TYPE (op));
5396
5397 return false;
5398 }
5399
5400 /* We do not handle bit-precision changes. */
5401 if ((CONVERT_EXPR_CODE_P (code)
5402 || code == VIEW_CONVERT_EXPR)
5403 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5404 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5405 || !type_has_mode_precision_p (TREE_TYPE (op)))
5406 /* But a conversion that does not change the bit-pattern is ok. */
5407 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5408 > TYPE_PRECISION (TREE_TYPE (op)))
5409 && TYPE_UNSIGNED (TREE_TYPE (op))))
5410 {
5411 if (dump_enabled_p ())
5412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5413 "type conversion to/from bit-precision "
5414 "unsupported.\n");
5415 return false;
5416 }
5417
5418 if (!vec_stmt) /* transformation not required. */
5419 {
5420 if (slp_node
5421 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5422 {
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 "incompatible vector types for invariants\n");
5426 return false;
5427 }
5428 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5429 DUMP_VECT_SCOPE ("vectorizable_assignment");
5430 if (!vect_nop_conversion_p (stmt_info))
5431 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5432 cost_vec);
5433 return true;
5434 }
5435
5436 /* Transform. */
5437 if (dump_enabled_p ())
5438 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5439
5440 /* Handle def. */
5441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5442
5443 /* Handle use. */
5444 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5445
5446 /* Arguments are ready. create the new vector stmt. */
5447 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5448 {
5449 if (CONVERT_EXPR_CODE_P (code)
5450 || code == VIEW_CONVERT_EXPR)
5451 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5452 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5453 new_temp = make_ssa_name (vec_dest, new_stmt);
5454 gimple_assign_set_lhs (new_stmt, new_temp);
5455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5456 if (slp_node)
5457 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5458 else
5459 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5460 }
5461 if (!slp_node)
5462 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5463
5464 vec_oprnds.release ();
5465 return true;
5466 }
5467
5468
5469 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5470 either as shift by a scalar or by a vector. */
5471
5472 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5473 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5474 {
5475
5476 machine_mode vec_mode;
5477 optab optab;
5478 int icode;
5479 tree vectype;
5480
5481 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5482 if (!vectype)
5483 return false;
5484
5485 optab = optab_for_tree_code (code, vectype, optab_scalar);
5486 if (!optab
5487 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5488 {
5489 optab = optab_for_tree_code (code, vectype, optab_vector);
5490 if (!optab
5491 || (optab_handler (optab, TYPE_MODE (vectype))
5492 == CODE_FOR_nothing))
5493 return false;
5494 }
5495
5496 vec_mode = TYPE_MODE (vectype);
5497 icode = (int) optab_handler (optab, vec_mode);
5498 if (icode == CODE_FOR_nothing)
5499 return false;
5500
5501 return true;
5502 }
5503
5504
5505 /* Function vectorizable_shift.
5506
5507 Check if STMT_INFO performs a shift operation that can be vectorized.
5508 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5509 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5510 Return true if STMT_INFO is vectorizable in this way. */
5511
5512 static bool
vectorizable_shift(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5513 vectorizable_shift (vec_info *vinfo,
5514 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5515 gimple **vec_stmt, slp_tree slp_node,
5516 stmt_vector_for_cost *cost_vec)
5517 {
5518 tree vec_dest;
5519 tree scalar_dest;
5520 tree op0, op1 = NULL;
5521 tree vec_oprnd1 = NULL_TREE;
5522 tree vectype;
5523 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5524 enum tree_code code;
5525 machine_mode vec_mode;
5526 tree new_temp;
5527 optab optab;
5528 int icode;
5529 machine_mode optab_op2_mode;
5530 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5531 int ndts = 2;
5532 poly_uint64 nunits_in;
5533 poly_uint64 nunits_out;
5534 tree vectype_out;
5535 tree op1_vectype;
5536 int ncopies;
5537 int i;
5538 vec<tree> vec_oprnds0 = vNULL;
5539 vec<tree> vec_oprnds1 = vNULL;
5540 tree vop0, vop1;
5541 unsigned int k;
5542 bool scalar_shift_arg = true;
5543 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5544 bool incompatible_op1_vectype_p = false;
5545
5546 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5547 return false;
5548
5549 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5550 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5551 && ! vec_stmt)
5552 return false;
5553
5554 /* Is STMT a vectorizable binary/unary operation? */
5555 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5556 if (!stmt)
5557 return false;
5558
5559 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5560 return false;
5561
5562 code = gimple_assign_rhs_code (stmt);
5563
5564 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5565 || code == RROTATE_EXPR))
5566 return false;
5567
5568 scalar_dest = gimple_assign_lhs (stmt);
5569 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5570 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5571 {
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "bit-precision shifts not supported.\n");
5575 return false;
5576 }
5577
5578 slp_tree slp_op0;
5579 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5580 0, &op0, &slp_op0, &dt[0], &vectype))
5581 {
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5584 "use not simple.\n");
5585 return false;
5586 }
5587 /* If op0 is an external or constant def, infer the vector type
5588 from the scalar type. */
5589 if (!vectype)
5590 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5591 if (vec_stmt)
5592 gcc_assert (vectype);
5593 if (!vectype)
5594 {
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5597 "no vectype for scalar type\n");
5598 return false;
5599 }
5600
5601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5602 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5603 if (maybe_ne (nunits_out, nunits_in))
5604 return false;
5605
5606 stmt_vec_info op1_def_stmt_info;
5607 slp_tree slp_op1;
5608 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5609 &dt[1], &op1_vectype, &op1_def_stmt_info))
5610 {
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5613 "use not simple.\n");
5614 return false;
5615 }
5616
5617 /* Multiple types in SLP are handled by creating the appropriate number of
5618 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5619 case of SLP. */
5620 if (slp_node)
5621 ncopies = 1;
5622 else
5623 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5624
5625 gcc_assert (ncopies >= 1);
5626
5627 /* Determine whether the shift amount is a vector, or scalar. If the
5628 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5629
5630 if ((dt[1] == vect_internal_def
5631 || dt[1] == vect_induction_def
5632 || dt[1] == vect_nested_cycle)
5633 && !slp_node)
5634 scalar_shift_arg = false;
5635 else if (dt[1] == vect_constant_def
5636 || dt[1] == vect_external_def
5637 || dt[1] == vect_internal_def)
5638 {
5639 /* In SLP, need to check whether the shift count is the same,
5640 in loops if it is a constant or invariant, it is always
5641 a scalar shift. */
5642 if (slp_node)
5643 {
5644 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5645 stmt_vec_info slpstmt_info;
5646
5647 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5648 {
5649 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5650 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5651 scalar_shift_arg = false;
5652 }
5653
5654 /* For internal SLP defs we have to make sure we see scalar stmts
5655 for all vector elements.
5656 ??? For different vectors we could resort to a different
5657 scalar shift operand but code-generation below simply always
5658 takes the first. */
5659 if (dt[1] == vect_internal_def
5660 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5661 stmts.length ()))
5662 scalar_shift_arg = false;
5663 }
5664
5665 /* If the shift amount is computed by a pattern stmt we cannot
5666 use the scalar amount directly thus give up and use a vector
5667 shift. */
5668 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5669 scalar_shift_arg = false;
5670 }
5671 else
5672 {
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5675 "operand mode requires invariant argument.\n");
5676 return false;
5677 }
5678
5679 /* Vector shifted by vector. */
5680 bool was_scalar_shift_arg = scalar_shift_arg;
5681 if (!scalar_shift_arg)
5682 {
5683 optab = optab_for_tree_code (code, vectype, optab_vector);
5684 if (dump_enabled_p ())
5685 dump_printf_loc (MSG_NOTE, vect_location,
5686 "vector/vector shift/rotate found.\n");
5687
5688 if (!op1_vectype)
5689 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5690 slp_op1);
5691 incompatible_op1_vectype_p
5692 = (op1_vectype == NULL_TREE
5693 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5694 TYPE_VECTOR_SUBPARTS (vectype))
5695 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5696 if (incompatible_op1_vectype_p
5697 && (!slp_node
5698 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5699 || slp_op1->refcnt != 1))
5700 {
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5703 "unusable type for last operand in"
5704 " vector/vector shift/rotate.\n");
5705 return false;
5706 }
5707 }
5708 /* See if the machine has a vector shifted by scalar insn and if not
5709 then see if it has a vector shifted by vector insn. */
5710 else
5711 {
5712 optab = optab_for_tree_code (code, vectype, optab_scalar);
5713 if (optab
5714 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5715 {
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_NOTE, vect_location,
5718 "vector/scalar shift/rotate found.\n");
5719 }
5720 else
5721 {
5722 optab = optab_for_tree_code (code, vectype, optab_vector);
5723 if (optab
5724 && (optab_handler (optab, TYPE_MODE (vectype))
5725 != CODE_FOR_nothing))
5726 {
5727 scalar_shift_arg = false;
5728
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE, vect_location,
5731 "vector/vector shift/rotate found.\n");
5732
5733 if (!op1_vectype)
5734 op1_vectype = get_vectype_for_scalar_type (vinfo,
5735 TREE_TYPE (op1),
5736 slp_op1);
5737
5738 /* Unlike the other binary operators, shifts/rotates have
5739 the rhs being int, instead of the same type as the lhs,
5740 so make sure the scalar is the right type if we are
5741 dealing with vectors of long long/long/short/char. */
5742 incompatible_op1_vectype_p
5743 = (!op1_vectype
5744 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5745 TREE_TYPE (op1)));
5746 if (incompatible_op1_vectype_p
5747 && dt[1] == vect_internal_def)
5748 {
5749 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5751 "unusable type for last operand in"
5752 " vector/vector shift/rotate.\n");
5753 return false;
5754 }
5755 }
5756 }
5757 }
5758
5759 /* Supportable by target? */
5760 if (!optab)
5761 {
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5764 "no optab.\n");
5765 return false;
5766 }
5767 vec_mode = TYPE_MODE (vectype);
5768 icode = (int) optab_handler (optab, vec_mode);
5769 if (icode == CODE_FOR_nothing)
5770 {
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "op not supported by target.\n");
5774 return false;
5775 }
5776 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5777 if (vect_emulated_vector_p (vectype))
5778 return false;
5779
5780 if (!vec_stmt) /* transformation not required. */
5781 {
5782 if (slp_node
5783 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5784 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5785 && (!incompatible_op1_vectype_p
5786 || dt[1] == vect_constant_def)
5787 && !vect_maybe_update_slp_op_vectype
5788 (slp_op1,
5789 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5790 {
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5793 "incompatible vector types for invariants\n");
5794 return false;
5795 }
5796 /* Now adjust the constant shift amount in place. */
5797 if (slp_node
5798 && incompatible_op1_vectype_p
5799 && dt[1] == vect_constant_def)
5800 {
5801 for (unsigned i = 0;
5802 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5803 {
5804 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5805 = fold_convert (TREE_TYPE (vectype),
5806 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5807 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5808 == INTEGER_CST));
5809 }
5810 }
5811 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5812 DUMP_VECT_SCOPE ("vectorizable_shift");
5813 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5814 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5815 return true;
5816 }
5817
5818 /* Transform. */
5819
5820 if (dump_enabled_p ())
5821 dump_printf_loc (MSG_NOTE, vect_location,
5822 "transform binary/unary operation.\n");
5823
5824 if (incompatible_op1_vectype_p && !slp_node)
5825 {
5826 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5827 op1 = fold_convert (TREE_TYPE (vectype), op1);
5828 if (dt[1] != vect_constant_def)
5829 op1 = vect_init_vector (vinfo, stmt_info, op1,
5830 TREE_TYPE (vectype), NULL);
5831 }
5832
5833 /* Handle def. */
5834 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5835
5836 if (scalar_shift_arg && dt[1] != vect_internal_def)
5837 {
5838 /* Vector shl and shr insn patterns can be defined with scalar
5839 operand 2 (shift operand). In this case, use constant or loop
5840 invariant op1 directly, without extending it to vector mode
5841 first. */
5842 optab_op2_mode = insn_data[icode].operand[2].mode;
5843 if (!VECTOR_MODE_P (optab_op2_mode))
5844 {
5845 if (dump_enabled_p ())
5846 dump_printf_loc (MSG_NOTE, vect_location,
5847 "operand 1 using scalar mode.\n");
5848 vec_oprnd1 = op1;
5849 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5850 vec_oprnds1.quick_push (vec_oprnd1);
5851 /* Store vec_oprnd1 for every vector stmt to be created.
5852 We check during the analysis that all the shift arguments
5853 are the same.
5854 TODO: Allow different constants for different vector
5855 stmts generated for an SLP instance. */
5856 for (k = 0;
5857 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5858 vec_oprnds1.quick_push (vec_oprnd1);
5859 }
5860 }
5861 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5862 {
5863 if (was_scalar_shift_arg)
5864 {
5865 /* If the argument was the same in all lanes create
5866 the correctly typed vector shift amount directly. */
5867 op1 = fold_convert (TREE_TYPE (vectype), op1);
5868 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5869 !loop_vinfo ? gsi : NULL);
5870 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5871 !loop_vinfo ? gsi : NULL);
5872 vec_oprnds1.create (slp_node->vec_stmts_size);
5873 for (k = 0; k < slp_node->vec_stmts_size; k++)
5874 vec_oprnds1.quick_push (vec_oprnd1);
5875 }
5876 else if (dt[1] == vect_constant_def)
5877 /* The constant shift amount has been adjusted in place. */
5878 ;
5879 else
5880 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5881 }
5882
5883 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5884 (a special case for certain kind of vector shifts); otherwise,
5885 operand 1 should be of a vector type (the usual case). */
5886 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5887 op0, &vec_oprnds0,
5888 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5889
5890 /* Arguments are ready. Create the new vector stmt. */
5891 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5892 {
5893 /* For internal defs where we need to use a scalar shift arg
5894 extract the first lane. */
5895 if (scalar_shift_arg && dt[1] == vect_internal_def)
5896 {
5897 vop1 = vec_oprnds1[0];
5898 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5899 gassign *new_stmt
5900 = gimple_build_assign (new_temp,
5901 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5902 vop1,
5903 TYPE_SIZE (TREE_TYPE (new_temp)),
5904 bitsize_zero_node));
5905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5906 vop1 = new_temp;
5907 }
5908 else
5909 vop1 = vec_oprnds1[i];
5910 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5911 new_temp = make_ssa_name (vec_dest, new_stmt);
5912 gimple_assign_set_lhs (new_stmt, new_temp);
5913 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5914 if (slp_node)
5915 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5916 else
5917 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5918 }
5919
5920 if (!slp_node)
5921 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5922
5923 vec_oprnds0.release ();
5924 vec_oprnds1.release ();
5925
5926 return true;
5927 }
5928
5929
5930 /* Function vectorizable_operation.
5931
5932 Check if STMT_INFO performs a binary, unary or ternary operation that can
5933 be vectorized.
5934 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5935 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5936 Return true if STMT_INFO is vectorizable in this way. */
5937
5938 static bool
vectorizable_operation(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5939 vectorizable_operation (vec_info *vinfo,
5940 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5941 gimple **vec_stmt, slp_tree slp_node,
5942 stmt_vector_for_cost *cost_vec)
5943 {
5944 tree vec_dest;
5945 tree scalar_dest;
5946 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5947 tree vectype;
5948 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5949 enum tree_code code, orig_code;
5950 machine_mode vec_mode;
5951 tree new_temp;
5952 int op_type;
5953 optab optab;
5954 bool target_support_p;
5955 enum vect_def_type dt[3]
5956 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5957 int ndts = 3;
5958 poly_uint64 nunits_in;
5959 poly_uint64 nunits_out;
5960 tree vectype_out;
5961 int ncopies, vec_num;
5962 int i;
5963 vec<tree> vec_oprnds0 = vNULL;
5964 vec<tree> vec_oprnds1 = vNULL;
5965 vec<tree> vec_oprnds2 = vNULL;
5966 tree vop0, vop1, vop2;
5967 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5968
5969 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5970 return false;
5971
5972 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5973 && ! vec_stmt)
5974 return false;
5975
5976 /* Is STMT a vectorizable binary/unary operation? */
5977 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5978 if (!stmt)
5979 return false;
5980
5981 /* Loads and stores are handled in vectorizable_{load,store}. */
5982 if (STMT_VINFO_DATA_REF (stmt_info))
5983 return false;
5984
5985 orig_code = code = gimple_assign_rhs_code (stmt);
5986
5987 /* Shifts are handled in vectorizable_shift. */
5988 if (code == LSHIFT_EXPR
5989 || code == RSHIFT_EXPR
5990 || code == LROTATE_EXPR
5991 || code == RROTATE_EXPR)
5992 return false;
5993
5994 /* Comparisons are handled in vectorizable_comparison. */
5995 if (TREE_CODE_CLASS (code) == tcc_comparison)
5996 return false;
5997
5998 /* Conditions are handled in vectorizable_condition. */
5999 if (code == COND_EXPR)
6000 return false;
6001
6002 /* For pointer addition and subtraction, we should use the normal
6003 plus and minus for the vector operation. */
6004 if (code == POINTER_PLUS_EXPR)
6005 code = PLUS_EXPR;
6006 if (code == POINTER_DIFF_EXPR)
6007 code = MINUS_EXPR;
6008
6009 /* Support only unary or binary operations. */
6010 op_type = TREE_CODE_LENGTH (code);
6011 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6012 {
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015 "num. args = %d (not unary/binary/ternary op).\n",
6016 op_type);
6017 return false;
6018 }
6019
6020 scalar_dest = gimple_assign_lhs (stmt);
6021 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6022
6023 /* Most operations cannot handle bit-precision types without extra
6024 truncations. */
6025 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6026 if (!mask_op_p
6027 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6028 /* Exception are bitwise binary operations. */
6029 && code != BIT_IOR_EXPR
6030 && code != BIT_XOR_EXPR
6031 && code != BIT_AND_EXPR)
6032 {
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035 "bit-precision arithmetic not supported.\n");
6036 return false;
6037 }
6038
6039 slp_tree slp_op0;
6040 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6041 0, &op0, &slp_op0, &dt[0], &vectype))
6042 {
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045 "use not simple.\n");
6046 return false;
6047 }
6048 /* If op0 is an external or constant def, infer the vector type
6049 from the scalar type. */
6050 if (!vectype)
6051 {
6052 /* For boolean type we cannot determine vectype by
6053 invariant value (don't know whether it is a vector
6054 of booleans or vector of integers). We use output
6055 vectype because operations on boolean don't change
6056 type. */
6057 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6058 {
6059 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6060 {
6061 if (dump_enabled_p ())
6062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 "not supported operation on bool value.\n");
6064 return false;
6065 }
6066 vectype = vectype_out;
6067 }
6068 else
6069 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6070 slp_node);
6071 }
6072 if (vec_stmt)
6073 gcc_assert (vectype);
6074 if (!vectype)
6075 {
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6078 "no vectype for scalar type %T\n",
6079 TREE_TYPE (op0));
6080
6081 return false;
6082 }
6083
6084 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6085 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6086 if (maybe_ne (nunits_out, nunits_in))
6087 return false;
6088
6089 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6090 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6091 if (op_type == binary_op || op_type == ternary_op)
6092 {
6093 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6094 1, &op1, &slp_op1, &dt[1], &vectype2))
6095 {
6096 if (dump_enabled_p ())
6097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6098 "use not simple.\n");
6099 return false;
6100 }
6101 }
6102 if (op_type == ternary_op)
6103 {
6104 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6105 2, &op2, &slp_op2, &dt[2], &vectype3))
6106 {
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6109 "use not simple.\n");
6110 return false;
6111 }
6112 }
6113
6114 /* Multiple types in SLP are handled by creating the appropriate number of
6115 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6116 case of SLP. */
6117 if (slp_node)
6118 {
6119 ncopies = 1;
6120 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6121 }
6122 else
6123 {
6124 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6125 vec_num = 1;
6126 }
6127
6128 gcc_assert (ncopies >= 1);
6129
6130 /* Reject attempts to combine mask types with nonmask types, e.g. if
6131 we have an AND between a (nonmask) boolean loaded from memory and
6132 a (mask) boolean result of a comparison.
6133
6134 TODO: We could easily fix these cases up using pattern statements. */
6135 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6136 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6137 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6138 {
6139 if (dump_enabled_p ())
6140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6141 "mixed mask and nonmask vector types\n");
6142 return false;
6143 }
6144
6145 /* Supportable by target? */
6146
6147 vec_mode = TYPE_MODE (vectype);
6148 if (code == MULT_HIGHPART_EXPR)
6149 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6150 else
6151 {
6152 optab = optab_for_tree_code (code, vectype, optab_default);
6153 if (!optab)
6154 {
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157 "no optab.\n");
6158 return false;
6159 }
6160 target_support_p = (optab_handler (optab, vec_mode)
6161 != CODE_FOR_nothing);
6162 }
6163
6164 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6165 if (!target_support_p)
6166 {
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "op not supported by target.\n");
6170 /* Check only during analysis. */
6171 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6172 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6173 return false;
6174 if (dump_enabled_p ())
6175 dump_printf_loc (MSG_NOTE, vect_location,
6176 "proceeding using word mode.\n");
6177 using_emulated_vectors_p = true;
6178 }
6179
6180 if (using_emulated_vectors_p
6181 && !vect_can_vectorize_without_simd_p (code))
6182 {
6183 if (dump_enabled_p ())
6184 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6185 return false;
6186 }
6187
6188 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6189 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6190 internal_fn cond_fn = get_conditional_internal_fn (code);
6191
6192 if (!vec_stmt) /* transformation not required. */
6193 {
6194 /* If this operation is part of a reduction, a fully-masked loop
6195 should only change the active lanes of the reduction chain,
6196 keeping the inactive lanes as-is. */
6197 if (loop_vinfo
6198 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6199 && reduc_idx >= 0)
6200 {
6201 if (cond_fn == IFN_LAST
6202 || !direct_internal_fn_supported_p (cond_fn, vectype,
6203 OPTIMIZE_FOR_SPEED))
6204 {
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 "can't use a fully-masked loop because no"
6208 " conditional operation is available.\n");
6209 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6210 }
6211 else
6212 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6213 vectype, NULL);
6214 }
6215
6216 /* Put types on constant and invariant SLP children. */
6217 if (slp_node
6218 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6219 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6220 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6221 {
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6224 "incompatible vector types for invariants\n");
6225 return false;
6226 }
6227
6228 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6229 DUMP_VECT_SCOPE ("vectorizable_operation");
6230 vect_model_simple_cost (vinfo, stmt_info,
6231 ncopies, dt, ndts, slp_node, cost_vec);
6232 if (using_emulated_vectors_p)
6233 {
6234 /* The above vect_model_simple_cost call handles constants
6235 in the prologue and (mis-)costs one of the stmts as
6236 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6237 for the actual lowering that will be applied. */
6238 unsigned n
6239 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6240 switch (code)
6241 {
6242 case PLUS_EXPR:
6243 n *= 5;
6244 break;
6245 case MINUS_EXPR:
6246 n *= 6;
6247 break;
6248 case NEGATE_EXPR:
6249 n *= 4;
6250 break;
6251 default:;
6252 }
6253 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6254 }
6255 return true;
6256 }
6257
6258 /* Transform. */
6259
6260 if (dump_enabled_p ())
6261 dump_printf_loc (MSG_NOTE, vect_location,
6262 "transform binary/unary operation.\n");
6263
6264 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6265
6266 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6267 vectors with unsigned elements, but the result is signed. So, we
6268 need to compute the MINUS_EXPR into vectype temporary and
6269 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6270 tree vec_cvt_dest = NULL_TREE;
6271 if (orig_code == POINTER_DIFF_EXPR)
6272 {
6273 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6274 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6275 }
6276 /* Handle def. */
6277 else
6278 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6279
6280 /* In case the vectorization factor (VF) is bigger than the number
6281 of elements that we can fit in a vectype (nunits), we have to generate
6282 more than one vector stmt - i.e - we need to "unroll" the
6283 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6284 from one copy of the vector stmt to the next, in the field
6285 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6286 stages to find the correct vector defs to be used when vectorizing
6287 stmts that use the defs of the current stmt. The example below
6288 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6289 we need to create 4 vectorized stmts):
6290
6291 before vectorization:
6292 RELATED_STMT VEC_STMT
6293 S1: x = memref - -
6294 S2: z = x + 1 - -
6295
6296 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6297 there):
6298 RELATED_STMT VEC_STMT
6299 VS1_0: vx0 = memref0 VS1_1 -
6300 VS1_1: vx1 = memref1 VS1_2 -
6301 VS1_2: vx2 = memref2 VS1_3 -
6302 VS1_3: vx3 = memref3 - -
6303 S1: x = load - VS1_0
6304 S2: z = x + 1 - -
6305
6306 step2: vectorize stmt S2 (done here):
6307 To vectorize stmt S2 we first need to find the relevant vector
6308 def for the first operand 'x'. This is, as usual, obtained from
6309 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6310 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6311 relevant vector def 'vx0'. Having found 'vx0' we can generate
6312 the vector stmt VS2_0, and as usual, record it in the
6313 STMT_VINFO_VEC_STMT of stmt S2.
6314 When creating the second copy (VS2_1), we obtain the relevant vector
6315 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6316 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6317 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6318 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6319 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6320 chain of stmts and pointers:
6321 RELATED_STMT VEC_STMT
6322 VS1_0: vx0 = memref0 VS1_1 -
6323 VS1_1: vx1 = memref1 VS1_2 -
6324 VS1_2: vx2 = memref2 VS1_3 -
6325 VS1_3: vx3 = memref3 - -
6326 S1: x = load - VS1_0
6327 VS2_0: vz0 = vx0 + v1 VS2_1 -
6328 VS2_1: vz1 = vx1 + v1 VS2_2 -
6329 VS2_2: vz2 = vx2 + v1 VS2_3 -
6330 VS2_3: vz3 = vx3 + v1 - -
6331 S2: z = x + 1 - VS2_0 */
6332
6333 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6334 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6335 /* Arguments are ready. Create the new vector stmt. */
6336 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6337 {
6338 gimple *new_stmt = NULL;
6339 vop1 = ((op_type == binary_op || op_type == ternary_op)
6340 ? vec_oprnds1[i] : NULL_TREE);
6341 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6342 if (masked_loop_p && reduc_idx >= 0)
6343 {
6344 /* Perform the operation on active elements only and take
6345 inactive elements from the reduction chain input. */
6346 gcc_assert (!vop2);
6347 vop2 = reduc_idx == 1 ? vop1 : vop0;
6348 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6349 vectype, i);
6350 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6351 vop0, vop1, vop2);
6352 new_temp = make_ssa_name (vec_dest, call);
6353 gimple_call_set_lhs (call, new_temp);
6354 gimple_call_set_nothrow (call, true);
6355 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6356 new_stmt = call;
6357 }
6358 else
6359 {
6360 tree mask = NULL_TREE;
6361 /* When combining two masks check if either of them is elsewhere
6362 combined with a loop mask, if that's the case we can mark that the
6363 new combined mask doesn't need to be combined with a loop mask. */
6364 if (masked_loop_p && code == BIT_AND_EXPR)
6365 {
6366 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6367 ncopies}))
6368 {
6369 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6370 vectype, i);
6371
6372 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6373 vop0, gsi);
6374 }
6375
6376 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6377 ncopies }))
6378 {
6379 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6380 vectype, i);
6381
6382 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6383 vop1, gsi);
6384 }
6385 }
6386
6387 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6388 new_temp = make_ssa_name (vec_dest, new_stmt);
6389 gimple_assign_set_lhs (new_stmt, new_temp);
6390 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6391
6392 /* Enter the combined value into the vector cond hash so we don't
6393 AND it with a loop mask again. */
6394 if (mask)
6395 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6396
6397 if (vec_cvt_dest)
6398 {
6399 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6400 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6401 new_temp);
6402 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6403 gimple_assign_set_lhs (new_stmt, new_temp);
6404 vect_finish_stmt_generation (vinfo, stmt_info,
6405 new_stmt, gsi);
6406 }
6407 }
6408 if (slp_node)
6409 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6410 else
6411 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6412 }
6413
6414 if (!slp_node)
6415 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6416
6417 vec_oprnds0.release ();
6418 vec_oprnds1.release ();
6419 vec_oprnds2.release ();
6420
6421 return true;
6422 }
6423
6424 /* A helper function to ensure data reference DR_INFO's base alignment. */
6425
6426 static void
ensure_base_align(dr_vec_info * dr_info)6427 ensure_base_align (dr_vec_info *dr_info)
6428 {
6429 /* Alignment is only analyzed for the first element of a DR group,
6430 use that to look at base alignment we need to enforce. */
6431 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6432 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6433
6434 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6435
6436 if (dr_info->base_misaligned)
6437 {
6438 tree base_decl = dr_info->base_decl;
6439
6440 // We should only be able to increase the alignment of a base object if
6441 // we know what its new alignment should be at compile time.
6442 unsigned HOST_WIDE_INT align_base_to =
6443 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6444
6445 if (decl_in_symtab_p (base_decl))
6446 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6447 else if (DECL_ALIGN (base_decl) < align_base_to)
6448 {
6449 SET_DECL_ALIGN (base_decl, align_base_to);
6450 DECL_USER_ALIGN (base_decl) = 1;
6451 }
6452 dr_info->base_misaligned = false;
6453 }
6454 }
6455
6456
6457 /* Function get_group_alias_ptr_type.
6458
6459 Return the alias type for the group starting at FIRST_STMT_INFO. */
6460
6461 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6462 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6463 {
6464 struct data_reference *first_dr, *next_dr;
6465
6466 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6467 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6468 while (next_stmt_info)
6469 {
6470 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6471 if (get_alias_set (DR_REF (first_dr))
6472 != get_alias_set (DR_REF (next_dr)))
6473 {
6474 if (dump_enabled_p ())
6475 dump_printf_loc (MSG_NOTE, vect_location,
6476 "conflicting alias set types.\n");
6477 return ptr_type_node;
6478 }
6479 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6480 }
6481 return reference_alias_ptr_type (DR_REF (first_dr));
6482 }
6483
6484
6485 /* Function scan_operand_equal_p.
6486
6487 Helper function for check_scan_store. Compare two references
6488 with .GOMP_SIMD_LANE bases. */
6489
6490 static bool
scan_operand_equal_p(tree ref1,tree ref2)6491 scan_operand_equal_p (tree ref1, tree ref2)
6492 {
6493 tree ref[2] = { ref1, ref2 };
6494 poly_int64 bitsize[2], bitpos[2];
6495 tree offset[2], base[2];
6496 for (int i = 0; i < 2; ++i)
6497 {
6498 machine_mode mode;
6499 int unsignedp, reversep, volatilep = 0;
6500 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6501 &offset[i], &mode, &unsignedp,
6502 &reversep, &volatilep);
6503 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6504 return false;
6505 if (TREE_CODE (base[i]) == MEM_REF
6506 && offset[i] == NULL_TREE
6507 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6508 {
6509 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6510 if (is_gimple_assign (def_stmt)
6511 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6512 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6513 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6514 {
6515 if (maybe_ne (mem_ref_offset (base[i]), 0))
6516 return false;
6517 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6518 offset[i] = gimple_assign_rhs2 (def_stmt);
6519 }
6520 }
6521 }
6522
6523 if (!operand_equal_p (base[0], base[1], 0))
6524 return false;
6525 if (maybe_ne (bitsize[0], bitsize[1]))
6526 return false;
6527 if (offset[0] != offset[1])
6528 {
6529 if (!offset[0] || !offset[1])
6530 return false;
6531 if (!operand_equal_p (offset[0], offset[1], 0))
6532 {
6533 tree step[2];
6534 for (int i = 0; i < 2; ++i)
6535 {
6536 step[i] = integer_one_node;
6537 if (TREE_CODE (offset[i]) == SSA_NAME)
6538 {
6539 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6540 if (is_gimple_assign (def_stmt)
6541 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6542 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6543 == INTEGER_CST))
6544 {
6545 step[i] = gimple_assign_rhs2 (def_stmt);
6546 offset[i] = gimple_assign_rhs1 (def_stmt);
6547 }
6548 }
6549 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6550 {
6551 step[i] = TREE_OPERAND (offset[i], 1);
6552 offset[i] = TREE_OPERAND (offset[i], 0);
6553 }
6554 tree rhs1 = NULL_TREE;
6555 if (TREE_CODE (offset[i]) == SSA_NAME)
6556 {
6557 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6558 if (gimple_assign_cast_p (def_stmt))
6559 rhs1 = gimple_assign_rhs1 (def_stmt);
6560 }
6561 else if (CONVERT_EXPR_P (offset[i]))
6562 rhs1 = TREE_OPERAND (offset[i], 0);
6563 if (rhs1
6564 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6565 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6566 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6567 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6568 offset[i] = rhs1;
6569 }
6570 if (!operand_equal_p (offset[0], offset[1], 0)
6571 || !operand_equal_p (step[0], step[1], 0))
6572 return false;
6573 }
6574 }
6575 return true;
6576 }
6577
6578
6579 enum scan_store_kind {
6580 /* Normal permutation. */
6581 scan_store_kind_perm,
6582
6583 /* Whole vector left shift permutation with zero init. */
6584 scan_store_kind_lshift_zero,
6585
6586 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6587 scan_store_kind_lshift_cond
6588 };
6589
6590 /* Function check_scan_store.
6591
6592 Verify if we can perform the needed permutations or whole vector shifts.
6593 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6594 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6595 to do at each step. */
6596
6597 static int
6598 scan_store_can_perm_p (tree vectype, tree init,
6599 vec<enum scan_store_kind> *use_whole_vector = NULL)
6600 {
6601 enum machine_mode vec_mode = TYPE_MODE (vectype);
6602 unsigned HOST_WIDE_INT nunits;
6603 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6604 return -1;
6605 int units_log2 = exact_log2 (nunits);
6606 if (units_log2 <= 0)
6607 return -1;
6608
6609 int i;
6610 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6611 for (i = 0; i <= units_log2; ++i)
6612 {
6613 unsigned HOST_WIDE_INT j, k;
6614 enum scan_store_kind kind = scan_store_kind_perm;
6615 vec_perm_builder sel (nunits, nunits, 1);
6616 sel.quick_grow (nunits);
6617 if (i == units_log2)
6618 {
6619 for (j = 0; j < nunits; ++j)
6620 sel[j] = nunits - 1;
6621 }
6622 else
6623 {
6624 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6625 sel[j] = j;
6626 for (k = 0; j < nunits; ++j, ++k)
6627 sel[j] = nunits + k;
6628 }
6629 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6630 if (!can_vec_perm_const_p (vec_mode, indices))
6631 {
6632 if (i == units_log2)
6633 return -1;
6634
6635 if (whole_vector_shift_kind == scan_store_kind_perm)
6636 {
6637 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6638 return -1;
6639 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6640 /* Whole vector shifts shift in zeros, so if init is all zero
6641 constant, there is no need to do anything further. */
6642 if ((TREE_CODE (init) != INTEGER_CST
6643 && TREE_CODE (init) != REAL_CST)
6644 || !initializer_zerop (init))
6645 {
6646 tree masktype = truth_type_for (vectype);
6647 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6648 return -1;
6649 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6650 }
6651 }
6652 kind = whole_vector_shift_kind;
6653 }
6654 if (use_whole_vector)
6655 {
6656 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6657 use_whole_vector->safe_grow_cleared (i, true);
6658 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6659 use_whole_vector->safe_push (kind);
6660 }
6661 }
6662
6663 return units_log2;
6664 }
6665
6666
6667 /* Function check_scan_store.
6668
6669 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6670
6671 static bool
check_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6672 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6673 enum vect_def_type rhs_dt, bool slp, tree mask,
6674 vect_memory_access_type memory_access_type)
6675 {
6676 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6677 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6678 tree ref_type;
6679
6680 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6681 if (slp
6682 || mask
6683 || memory_access_type != VMAT_CONTIGUOUS
6684 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6685 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6686 || loop_vinfo == NULL
6687 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6688 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6689 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6690 || !integer_zerop (DR_INIT (dr_info->dr))
6691 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6692 || !alias_sets_conflict_p (get_alias_set (vectype),
6693 get_alias_set (TREE_TYPE (ref_type))))
6694 {
6695 if (dump_enabled_p ())
6696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6697 "unsupported OpenMP scan store.\n");
6698 return false;
6699 }
6700
6701 /* We need to pattern match code built by OpenMP lowering and simplified
6702 by following optimizations into something we can handle.
6703 #pragma omp simd reduction(inscan,+:r)
6704 for (...)
6705 {
6706 r += something ();
6707 #pragma omp scan inclusive (r)
6708 use (r);
6709 }
6710 shall have body with:
6711 // Initialization for input phase, store the reduction initializer:
6712 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6713 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6714 D.2042[_21] = 0;
6715 // Actual input phase:
6716 ...
6717 r.0_5 = D.2042[_20];
6718 _6 = _4 + r.0_5;
6719 D.2042[_20] = _6;
6720 // Initialization for scan phase:
6721 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6722 _26 = D.2043[_25];
6723 _27 = D.2042[_25];
6724 _28 = _26 + _27;
6725 D.2043[_25] = _28;
6726 D.2042[_25] = _28;
6727 // Actual scan phase:
6728 ...
6729 r.1_8 = D.2042[_20];
6730 ...
6731 The "omp simd array" variable D.2042 holds the privatized copy used
6732 inside of the loop and D.2043 is another one that holds copies of
6733 the current original list item. The separate GOMP_SIMD_LANE ifn
6734 kinds are there in order to allow optimizing the initializer store
6735 and combiner sequence, e.g. if it is originally some C++ish user
6736 defined reduction, but allow the vectorizer to pattern recognize it
6737 and turn into the appropriate vectorized scan.
6738
6739 For exclusive scan, this is slightly different:
6740 #pragma omp simd reduction(inscan,+:r)
6741 for (...)
6742 {
6743 use (r);
6744 #pragma omp scan exclusive (r)
6745 r += something ();
6746 }
6747 shall have body with:
6748 // Initialization for input phase, store the reduction initializer:
6749 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6750 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6751 D.2042[_21] = 0;
6752 // Actual input phase:
6753 ...
6754 r.0_5 = D.2042[_20];
6755 _6 = _4 + r.0_5;
6756 D.2042[_20] = _6;
6757 // Initialization for scan phase:
6758 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6759 _26 = D.2043[_25];
6760 D.2044[_25] = _26;
6761 _27 = D.2042[_25];
6762 _28 = _26 + _27;
6763 D.2043[_25] = _28;
6764 // Actual scan phase:
6765 ...
6766 r.1_8 = D.2044[_20];
6767 ... */
6768
6769 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6770 {
6771 /* Match the D.2042[_21] = 0; store above. Just require that
6772 it is a constant or external definition store. */
6773 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6774 {
6775 fail_init:
6776 if (dump_enabled_p ())
6777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6778 "unsupported OpenMP scan initializer store.\n");
6779 return false;
6780 }
6781
6782 if (! loop_vinfo->scan_map)
6783 loop_vinfo->scan_map = new hash_map<tree, tree>;
6784 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6785 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6786 if (cached)
6787 goto fail_init;
6788 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6789
6790 /* These stores can be vectorized normally. */
6791 return true;
6792 }
6793
6794 if (rhs_dt != vect_internal_def)
6795 {
6796 fail:
6797 if (dump_enabled_p ())
6798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6799 "unsupported OpenMP scan combiner pattern.\n");
6800 return false;
6801 }
6802
6803 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6804 tree rhs = gimple_assign_rhs1 (stmt);
6805 if (TREE_CODE (rhs) != SSA_NAME)
6806 goto fail;
6807
6808 gimple *other_store_stmt = NULL;
6809 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6810 bool inscan_var_store
6811 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6812
6813 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6814 {
6815 if (!inscan_var_store)
6816 {
6817 use_operand_p use_p;
6818 imm_use_iterator iter;
6819 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6820 {
6821 gimple *use_stmt = USE_STMT (use_p);
6822 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6823 continue;
6824 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6825 || !is_gimple_assign (use_stmt)
6826 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6827 || other_store_stmt
6828 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6829 goto fail;
6830 other_store_stmt = use_stmt;
6831 }
6832 if (other_store_stmt == NULL)
6833 goto fail;
6834 rhs = gimple_assign_lhs (other_store_stmt);
6835 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6836 goto fail;
6837 }
6838 }
6839 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6840 {
6841 use_operand_p use_p;
6842 imm_use_iterator iter;
6843 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6844 {
6845 gimple *use_stmt = USE_STMT (use_p);
6846 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6847 continue;
6848 if (other_store_stmt)
6849 goto fail;
6850 other_store_stmt = use_stmt;
6851 }
6852 }
6853 else
6854 goto fail;
6855
6856 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6857 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6858 || !is_gimple_assign (def_stmt)
6859 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6860 goto fail;
6861
6862 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6863 /* For pointer addition, we should use the normal plus for the vector
6864 operation. */
6865 switch (code)
6866 {
6867 case POINTER_PLUS_EXPR:
6868 code = PLUS_EXPR;
6869 break;
6870 case MULT_HIGHPART_EXPR:
6871 goto fail;
6872 default:
6873 break;
6874 }
6875 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6876 goto fail;
6877
6878 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6879 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6880 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6881 goto fail;
6882
6883 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6884 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6885 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6886 || !gimple_assign_load_p (load1_stmt)
6887 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6888 || !gimple_assign_load_p (load2_stmt))
6889 goto fail;
6890
6891 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6892 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6893 if (load1_stmt_info == NULL
6894 || load2_stmt_info == NULL
6895 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6896 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6897 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6898 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6899 goto fail;
6900
6901 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6902 {
6903 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6904 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6905 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6906 goto fail;
6907 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6908 tree lrhs;
6909 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6910 lrhs = rhs1;
6911 else
6912 lrhs = rhs2;
6913 use_operand_p use_p;
6914 imm_use_iterator iter;
6915 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6916 {
6917 gimple *use_stmt = USE_STMT (use_p);
6918 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6919 continue;
6920 if (other_store_stmt)
6921 goto fail;
6922 other_store_stmt = use_stmt;
6923 }
6924 }
6925
6926 if (other_store_stmt == NULL)
6927 goto fail;
6928 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6929 || !gimple_store_p (other_store_stmt))
6930 goto fail;
6931
6932 stmt_vec_info other_store_stmt_info
6933 = loop_vinfo->lookup_stmt (other_store_stmt);
6934 if (other_store_stmt_info == NULL
6935 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6936 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6937 goto fail;
6938
6939 gimple *stmt1 = stmt;
6940 gimple *stmt2 = other_store_stmt;
6941 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6942 std::swap (stmt1, stmt2);
6943 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6944 gimple_assign_rhs1 (load2_stmt)))
6945 {
6946 std::swap (rhs1, rhs2);
6947 std::swap (load1_stmt, load2_stmt);
6948 std::swap (load1_stmt_info, load2_stmt_info);
6949 }
6950 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6951 gimple_assign_rhs1 (load1_stmt)))
6952 goto fail;
6953
6954 tree var3 = NULL_TREE;
6955 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6956 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6957 gimple_assign_rhs1 (load2_stmt)))
6958 goto fail;
6959 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6960 {
6961 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6962 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6963 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6964 goto fail;
6965 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6966 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6967 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6968 || lookup_attribute ("omp simd inscan exclusive",
6969 DECL_ATTRIBUTES (var3)))
6970 goto fail;
6971 }
6972
6973 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6974 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6975 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6976 goto fail;
6977
6978 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6979 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6980 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6981 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6982 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6983 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6984 goto fail;
6985
6986 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6987 std::swap (var1, var2);
6988
6989 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6990 {
6991 if (!lookup_attribute ("omp simd inscan exclusive",
6992 DECL_ATTRIBUTES (var1)))
6993 goto fail;
6994 var1 = var3;
6995 }
6996
6997 if (loop_vinfo->scan_map == NULL)
6998 goto fail;
6999 tree *init = loop_vinfo->scan_map->get (var1);
7000 if (init == NULL)
7001 goto fail;
7002
7003 /* The IL is as expected, now check if we can actually vectorize it.
7004 Inclusive scan:
7005 _26 = D.2043[_25];
7006 _27 = D.2042[_25];
7007 _28 = _26 + _27;
7008 D.2043[_25] = _28;
7009 D.2042[_25] = _28;
7010 should be vectorized as (where _40 is the vectorized rhs
7011 from the D.2042[_21] = 0; store):
7012 _30 = MEM <vector(8) int> [(int *)&D.2043];
7013 _31 = MEM <vector(8) int> [(int *)&D.2042];
7014 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7015 _33 = _31 + _32;
7016 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7017 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7018 _35 = _33 + _34;
7019 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7020 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7021 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7022 _37 = _35 + _36;
7023 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7024 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7025 _38 = _30 + _37;
7026 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7027 MEM <vector(8) int> [(int *)&D.2043] = _39;
7028 MEM <vector(8) int> [(int *)&D.2042] = _38;
7029 Exclusive scan:
7030 _26 = D.2043[_25];
7031 D.2044[_25] = _26;
7032 _27 = D.2042[_25];
7033 _28 = _26 + _27;
7034 D.2043[_25] = _28;
7035 should be vectorized as (where _40 is the vectorized rhs
7036 from the D.2042[_21] = 0; store):
7037 _30 = MEM <vector(8) int> [(int *)&D.2043];
7038 _31 = MEM <vector(8) int> [(int *)&D.2042];
7039 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7040 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7041 _34 = _32 + _33;
7042 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7043 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7044 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7045 _36 = _34 + _35;
7046 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7047 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7048 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7049 _38 = _36 + _37;
7050 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7051 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7052 _39 = _30 + _38;
7053 _50 = _31 + _39;
7054 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7055 MEM <vector(8) int> [(int *)&D.2044] = _39;
7056 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7057 enum machine_mode vec_mode = TYPE_MODE (vectype);
7058 optab optab = optab_for_tree_code (code, vectype, optab_default);
7059 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7060 goto fail;
7061
7062 int units_log2 = scan_store_can_perm_p (vectype, *init);
7063 if (units_log2 == -1)
7064 goto fail;
7065
7066 return true;
7067 }
7068
7069
7070 /* Function vectorizable_scan_store.
7071
7072 Helper of vectorizable_score, arguments like on vectorizable_store.
7073 Handle only the transformation, checking is done in check_scan_store. */
7074
7075 static bool
vectorizable_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,int ncopies)7076 vectorizable_scan_store (vec_info *vinfo,
7077 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7078 gimple **vec_stmt, int ncopies)
7079 {
7080 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7081 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7082 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7084
7085 if (dump_enabled_p ())
7086 dump_printf_loc (MSG_NOTE, vect_location,
7087 "transform scan store. ncopies = %d\n", ncopies);
7088
7089 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7090 tree rhs = gimple_assign_rhs1 (stmt);
7091 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7092
7093 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7094 bool inscan_var_store
7095 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7096
7097 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7098 {
7099 use_operand_p use_p;
7100 imm_use_iterator iter;
7101 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7102 {
7103 gimple *use_stmt = USE_STMT (use_p);
7104 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7105 continue;
7106 rhs = gimple_assign_lhs (use_stmt);
7107 break;
7108 }
7109 }
7110
7111 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7112 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7113 if (code == POINTER_PLUS_EXPR)
7114 code = PLUS_EXPR;
7115 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7116 && commutative_tree_code (code));
7117 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7118 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7119 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7120 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7121 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7122 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7123 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7124 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7125 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7126 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7127 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7128
7129 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7130 {
7131 std::swap (rhs1, rhs2);
7132 std::swap (var1, var2);
7133 std::swap (load1_dr_info, load2_dr_info);
7134 }
7135
7136 tree *init = loop_vinfo->scan_map->get (var1);
7137 gcc_assert (init);
7138
7139 unsigned HOST_WIDE_INT nunits;
7140 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7141 gcc_unreachable ();
7142 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7143 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7144 gcc_assert (units_log2 > 0);
7145 auto_vec<tree, 16> perms;
7146 perms.quick_grow (units_log2 + 1);
7147 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7148 for (int i = 0; i <= units_log2; ++i)
7149 {
7150 unsigned HOST_WIDE_INT j, k;
7151 vec_perm_builder sel (nunits, nunits, 1);
7152 sel.quick_grow (nunits);
7153 if (i == units_log2)
7154 for (j = 0; j < nunits; ++j)
7155 sel[j] = nunits - 1;
7156 else
7157 {
7158 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7159 sel[j] = j;
7160 for (k = 0; j < nunits; ++j, ++k)
7161 sel[j] = nunits + k;
7162 }
7163 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7164 if (!use_whole_vector.is_empty ()
7165 && use_whole_vector[i] != scan_store_kind_perm)
7166 {
7167 if (zero_vec == NULL_TREE)
7168 zero_vec = build_zero_cst (vectype);
7169 if (masktype == NULL_TREE
7170 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7171 masktype = truth_type_for (vectype);
7172 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7173 }
7174 else
7175 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7176 }
7177
7178 tree vec_oprnd1 = NULL_TREE;
7179 tree vec_oprnd2 = NULL_TREE;
7180 tree vec_oprnd3 = NULL_TREE;
7181 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7182 tree dataref_offset = build_int_cst (ref_type, 0);
7183 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7184 vectype, VMAT_CONTIGUOUS);
7185 tree ldataref_ptr = NULL_TREE;
7186 tree orig = NULL_TREE;
7187 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7188 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7189 auto_vec<tree> vec_oprnds1;
7190 auto_vec<tree> vec_oprnds2;
7191 auto_vec<tree> vec_oprnds3;
7192 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7193 *init, &vec_oprnds1,
7194 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7195 rhs2, &vec_oprnds3);
7196 for (int j = 0; j < ncopies; j++)
7197 {
7198 vec_oprnd1 = vec_oprnds1[j];
7199 if (ldataref_ptr == NULL)
7200 vec_oprnd2 = vec_oprnds2[j];
7201 vec_oprnd3 = vec_oprnds3[j];
7202 if (j == 0)
7203 orig = vec_oprnd3;
7204 else if (!inscan_var_store)
7205 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7206
7207 if (ldataref_ptr)
7208 {
7209 vec_oprnd2 = make_ssa_name (vectype);
7210 tree data_ref = fold_build2 (MEM_REF, vectype,
7211 unshare_expr (ldataref_ptr),
7212 dataref_offset);
7213 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7214 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7215 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7216 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7217 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7218 }
7219
7220 tree v = vec_oprnd2;
7221 for (int i = 0; i < units_log2; ++i)
7222 {
7223 tree new_temp = make_ssa_name (vectype);
7224 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7225 (zero_vec
7226 && (use_whole_vector[i]
7227 != scan_store_kind_perm))
7228 ? zero_vec : vec_oprnd1, v,
7229 perms[i]);
7230 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7231 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7232 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7233
7234 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7235 {
7236 /* Whole vector shift shifted in zero bits, but if *init
7237 is not initializer_zerop, we need to replace those elements
7238 with elements from vec_oprnd1. */
7239 tree_vector_builder vb (masktype, nunits, 1);
7240 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7241 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7242 ? boolean_false_node : boolean_true_node);
7243
7244 tree new_temp2 = make_ssa_name (vectype);
7245 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7246 new_temp, vec_oprnd1);
7247 vect_finish_stmt_generation (vinfo, stmt_info,
7248 g, gsi);
7249 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7250 new_temp = new_temp2;
7251 }
7252
7253 /* For exclusive scan, perform the perms[i] permutation once
7254 more. */
7255 if (i == 0
7256 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7257 && v == vec_oprnd2)
7258 {
7259 v = new_temp;
7260 --i;
7261 continue;
7262 }
7263
7264 tree new_temp2 = make_ssa_name (vectype);
7265 g = gimple_build_assign (new_temp2, code, v, new_temp);
7266 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7267 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7268
7269 v = new_temp2;
7270 }
7271
7272 tree new_temp = make_ssa_name (vectype);
7273 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7274 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7275 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7276
7277 tree last_perm_arg = new_temp;
7278 /* For exclusive scan, new_temp computed above is the exclusive scan
7279 prefix sum. Turn it into inclusive prefix sum for the broadcast
7280 of the last element into orig. */
7281 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7282 {
7283 last_perm_arg = make_ssa_name (vectype);
7284 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7285 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7286 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7287 }
7288
7289 orig = make_ssa_name (vectype);
7290 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7291 last_perm_arg, perms[units_log2]);
7292 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7293 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7294
7295 if (!inscan_var_store)
7296 {
7297 tree data_ref = fold_build2 (MEM_REF, vectype,
7298 unshare_expr (dataref_ptr),
7299 dataref_offset);
7300 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7301 g = gimple_build_assign (data_ref, new_temp);
7302 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7303 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7304 }
7305 }
7306
7307 if (inscan_var_store)
7308 for (int j = 0; j < ncopies; j++)
7309 {
7310 if (j != 0)
7311 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7312
7313 tree data_ref = fold_build2 (MEM_REF, vectype,
7314 unshare_expr (dataref_ptr),
7315 dataref_offset);
7316 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7317 gimple *g = gimple_build_assign (data_ref, orig);
7318 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7319 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7320 }
7321 return true;
7322 }
7323
7324
7325 /* Function vectorizable_store.
7326
7327 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7328 that can be vectorized.
7329 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7330 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7331 Return true if STMT_INFO is vectorizable in this way. */
7332
7333 static bool
vectorizable_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7334 vectorizable_store (vec_info *vinfo,
7335 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7336 gimple **vec_stmt, slp_tree slp_node,
7337 stmt_vector_for_cost *cost_vec)
7338 {
7339 tree data_ref;
7340 tree op;
7341 tree vec_oprnd = NULL_TREE;
7342 tree elem_type;
7343 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7344 class loop *loop = NULL;
7345 machine_mode vec_mode;
7346 tree dummy;
7347 enum vect_def_type rhs_dt = vect_unknown_def_type;
7348 enum vect_def_type mask_dt = vect_unknown_def_type;
7349 tree dataref_ptr = NULL_TREE;
7350 tree dataref_offset = NULL_TREE;
7351 gimple *ptr_incr = NULL;
7352 int ncopies;
7353 int j;
7354 stmt_vec_info first_stmt_info;
7355 bool grouped_store;
7356 unsigned int group_size, i;
7357 vec<tree> oprnds = vNULL;
7358 vec<tree> result_chain = vNULL;
7359 vec<tree> vec_oprnds = vNULL;
7360 bool slp = (slp_node != NULL);
7361 unsigned int vec_num;
7362 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7363 tree aggr_type;
7364 gather_scatter_info gs_info;
7365 poly_uint64 vf;
7366 vec_load_store_type vls_type;
7367 tree ref_type;
7368
7369 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7370 return false;
7371
7372 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7373 && ! vec_stmt)
7374 return false;
7375
7376 /* Is vectorizable store? */
7377
7378 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7379 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7380 {
7381 tree scalar_dest = gimple_assign_lhs (assign);
7382 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7383 && is_pattern_stmt_p (stmt_info))
7384 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7385 if (TREE_CODE (scalar_dest) != ARRAY_REF
7386 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7387 && TREE_CODE (scalar_dest) != INDIRECT_REF
7388 && TREE_CODE (scalar_dest) != COMPONENT_REF
7389 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7390 && TREE_CODE (scalar_dest) != REALPART_EXPR
7391 && TREE_CODE (scalar_dest) != MEM_REF)
7392 return false;
7393 }
7394 else
7395 {
7396 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7397 if (!call || !gimple_call_internal_p (call))
7398 return false;
7399
7400 internal_fn ifn = gimple_call_internal_fn (call);
7401 if (!internal_store_fn_p (ifn))
7402 return false;
7403
7404 if (slp_node != NULL)
7405 {
7406 if (dump_enabled_p ())
7407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7408 "SLP of masked stores not supported.\n");
7409 return false;
7410 }
7411
7412 int mask_index = internal_fn_mask_index (ifn);
7413 if (mask_index >= 0
7414 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7415 &mask, NULL, &mask_dt, &mask_vectype))
7416 return false;
7417 }
7418
7419 op = vect_get_store_rhs (stmt_info);
7420
7421 /* Cannot have hybrid store SLP -- that would mean storing to the
7422 same location twice. */
7423 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7424
7425 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7426 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7427
7428 if (loop_vinfo)
7429 {
7430 loop = LOOP_VINFO_LOOP (loop_vinfo);
7431 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7432 }
7433 else
7434 vf = 1;
7435
7436 /* Multiple types in SLP are handled by creating the appropriate number of
7437 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7438 case of SLP. */
7439 if (slp)
7440 ncopies = 1;
7441 else
7442 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7443
7444 gcc_assert (ncopies >= 1);
7445
7446 /* FORNOW. This restriction should be relaxed. */
7447 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7448 {
7449 if (dump_enabled_p ())
7450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7451 "multiple types in nested loop.\n");
7452 return false;
7453 }
7454
7455 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7456 op, &rhs_dt, &rhs_vectype, &vls_type))
7457 return false;
7458
7459 elem_type = TREE_TYPE (vectype);
7460 vec_mode = TYPE_MODE (vectype);
7461
7462 if (!STMT_VINFO_DATA_REF (stmt_info))
7463 return false;
7464
7465 vect_memory_access_type memory_access_type;
7466 enum dr_alignment_support alignment_support_scheme;
7467 int misalignment;
7468 poly_int64 poffset;
7469 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7470 ncopies, &memory_access_type, &poffset,
7471 &alignment_support_scheme, &misalignment, &gs_info))
7472 return false;
7473
7474 if (mask)
7475 {
7476 if (memory_access_type == VMAT_CONTIGUOUS)
7477 {
7478 if (!VECTOR_MODE_P (vec_mode)
7479 || !can_vec_mask_load_store_p (vec_mode,
7480 TYPE_MODE (mask_vectype), false))
7481 return false;
7482 }
7483 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7484 && (memory_access_type != VMAT_GATHER_SCATTER
7485 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7486 {
7487 if (dump_enabled_p ())
7488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7489 "unsupported access type for masked store.\n");
7490 return false;
7491 }
7492 }
7493 else
7494 {
7495 /* FORNOW. In some cases can vectorize even if data-type not supported
7496 (e.g. - array initialization with 0). */
7497 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7498 return false;
7499 }
7500
7501 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7502 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7503 && memory_access_type != VMAT_GATHER_SCATTER
7504 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7505 if (grouped_store)
7506 {
7507 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7508 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7509 group_size = DR_GROUP_SIZE (first_stmt_info);
7510 }
7511 else
7512 {
7513 first_stmt_info = stmt_info;
7514 first_dr_info = dr_info;
7515 group_size = vec_num = 1;
7516 }
7517
7518 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7519 {
7520 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7521 memory_access_type))
7522 return false;
7523 }
7524
7525 if (!vec_stmt) /* transformation not required. */
7526 {
7527 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7528
7529 if (loop_vinfo
7530 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7531 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7532 group_size, memory_access_type,
7533 ncopies, &gs_info, mask);
7534
7535 if (slp_node
7536 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7537 vectype))
7538 {
7539 if (dump_enabled_p ())
7540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7541 "incompatible vector types for invariants\n");
7542 return false;
7543 }
7544
7545 if (dump_enabled_p ()
7546 && memory_access_type != VMAT_ELEMENTWISE
7547 && memory_access_type != VMAT_GATHER_SCATTER
7548 && alignment_support_scheme != dr_aligned)
7549 dump_printf_loc (MSG_NOTE, vect_location,
7550 "Vectorizing an unaligned access.\n");
7551
7552 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7553 vect_model_store_cost (vinfo, stmt_info, ncopies,
7554 memory_access_type, alignment_support_scheme,
7555 misalignment, vls_type, slp_node, cost_vec);
7556 return true;
7557 }
7558 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7559
7560 /* Transform. */
7561
7562 ensure_base_align (dr_info);
7563
7564 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7565 {
7566 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7567 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7568 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7569 tree ptr, var, scale, vec_mask;
7570 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7571 tree mask_halfvectype = mask_vectype;
7572 edge pe = loop_preheader_edge (loop);
7573 gimple_seq seq;
7574 basic_block new_bb;
7575 enum { NARROW, NONE, WIDEN } modifier;
7576 poly_uint64 scatter_off_nunits
7577 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7578
7579 if (known_eq (nunits, scatter_off_nunits))
7580 modifier = NONE;
7581 else if (known_eq (nunits * 2, scatter_off_nunits))
7582 {
7583 modifier = WIDEN;
7584
7585 /* Currently gathers and scatters are only supported for
7586 fixed-length vectors. */
7587 unsigned int count = scatter_off_nunits.to_constant ();
7588 vec_perm_builder sel (count, count, 1);
7589 for (i = 0; i < (unsigned int) count; ++i)
7590 sel.quick_push (i | (count / 2));
7591
7592 vec_perm_indices indices (sel, 1, count);
7593 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7594 indices);
7595 gcc_assert (perm_mask != NULL_TREE);
7596 }
7597 else if (known_eq (nunits, scatter_off_nunits * 2))
7598 {
7599 modifier = NARROW;
7600
7601 /* Currently gathers and scatters are only supported for
7602 fixed-length vectors. */
7603 unsigned int count = nunits.to_constant ();
7604 vec_perm_builder sel (count, count, 1);
7605 for (i = 0; i < (unsigned int) count; ++i)
7606 sel.quick_push (i | (count / 2));
7607
7608 vec_perm_indices indices (sel, 2, count);
7609 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7610 gcc_assert (perm_mask != NULL_TREE);
7611 ncopies *= 2;
7612
7613 if (mask)
7614 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7615 }
7616 else
7617 gcc_unreachable ();
7618
7619 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7620 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7621 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7622 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7623 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7624 scaletype = TREE_VALUE (arglist);
7625
7626 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7627 && TREE_CODE (rettype) == VOID_TYPE);
7628
7629 ptr = fold_convert (ptrtype, gs_info.base);
7630 if (!is_gimple_min_invariant (ptr))
7631 {
7632 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7633 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7634 gcc_assert (!new_bb);
7635 }
7636
7637 if (mask == NULL_TREE)
7638 {
7639 mask_arg = build_int_cst (masktype, -1);
7640 mask_arg = vect_init_vector (vinfo, stmt_info,
7641 mask_arg, masktype, NULL);
7642 }
7643
7644 scale = build_int_cst (scaletype, gs_info.scale);
7645
7646 auto_vec<tree> vec_oprnds0;
7647 auto_vec<tree> vec_oprnds1;
7648 auto_vec<tree> vec_masks;
7649 if (mask)
7650 {
7651 tree mask_vectype = truth_type_for (vectype);
7652 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7653 modifier == NARROW
7654 ? ncopies / 2 : ncopies,
7655 mask, &vec_masks, mask_vectype);
7656 }
7657 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7658 modifier == WIDEN
7659 ? ncopies / 2 : ncopies,
7660 gs_info.offset, &vec_oprnds0);
7661 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7662 modifier == NARROW
7663 ? ncopies / 2 : ncopies,
7664 op, &vec_oprnds1);
7665 for (j = 0; j < ncopies; ++j)
7666 {
7667 if (modifier == WIDEN)
7668 {
7669 if (j & 1)
7670 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7671 perm_mask, stmt_info, gsi);
7672 else
7673 op = vec_oprnd0 = vec_oprnds0[j / 2];
7674 src = vec_oprnd1 = vec_oprnds1[j];
7675 if (mask)
7676 mask_op = vec_mask = vec_masks[j];
7677 }
7678 else if (modifier == NARROW)
7679 {
7680 if (j & 1)
7681 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7682 perm_mask, stmt_info, gsi);
7683 else
7684 src = vec_oprnd1 = vec_oprnds1[j / 2];
7685 op = vec_oprnd0 = vec_oprnds0[j];
7686 if (mask)
7687 mask_op = vec_mask = vec_masks[j / 2];
7688 }
7689 else
7690 {
7691 op = vec_oprnd0 = vec_oprnds0[j];
7692 src = vec_oprnd1 = vec_oprnds1[j];
7693 if (mask)
7694 mask_op = vec_mask = vec_masks[j];
7695 }
7696
7697 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7698 {
7699 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7700 TYPE_VECTOR_SUBPARTS (srctype)));
7701 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7702 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7703 gassign *new_stmt
7704 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7705 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7706 src = var;
7707 }
7708
7709 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7710 {
7711 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7712 TYPE_VECTOR_SUBPARTS (idxtype)));
7713 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7714 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7715 gassign *new_stmt
7716 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7717 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7718 op = var;
7719 }
7720
7721 if (mask)
7722 {
7723 tree utype;
7724 mask_arg = mask_op;
7725 if (modifier == NARROW)
7726 {
7727 var = vect_get_new_ssa_name (mask_halfvectype,
7728 vect_simple_var);
7729 gassign *new_stmt
7730 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7731 : VEC_UNPACK_LO_EXPR,
7732 mask_op);
7733 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7734 mask_arg = var;
7735 }
7736 tree optype = TREE_TYPE (mask_arg);
7737 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7738 utype = masktype;
7739 else
7740 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7741 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7742 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7743 gassign *new_stmt
7744 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7745 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7746 mask_arg = var;
7747 if (!useless_type_conversion_p (masktype, utype))
7748 {
7749 gcc_assert (TYPE_PRECISION (utype)
7750 <= TYPE_PRECISION (masktype));
7751 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7752 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7753 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7754 mask_arg = var;
7755 }
7756 }
7757
7758 gcall *new_stmt
7759 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7760 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7761
7762 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7763 }
7764 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7765 return true;
7766 }
7767 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7768 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7769
7770 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7771 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7772
7773 if (grouped_store)
7774 {
7775 /* FORNOW */
7776 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7777
7778 /* We vectorize all the stmts of the interleaving group when we
7779 reach the last stmt in the group. */
7780 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7781 < DR_GROUP_SIZE (first_stmt_info)
7782 && !slp)
7783 {
7784 *vec_stmt = NULL;
7785 return true;
7786 }
7787
7788 if (slp)
7789 {
7790 grouped_store = false;
7791 /* VEC_NUM is the number of vect stmts to be created for this
7792 group. */
7793 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7794 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7795 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7796 == first_stmt_info);
7797 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7798 op = vect_get_store_rhs (first_stmt_info);
7799 }
7800 else
7801 /* VEC_NUM is the number of vect stmts to be created for this
7802 group. */
7803 vec_num = group_size;
7804
7805 ref_type = get_group_alias_ptr_type (first_stmt_info);
7806 }
7807 else
7808 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7809
7810 if (dump_enabled_p ())
7811 dump_printf_loc (MSG_NOTE, vect_location,
7812 "transform store. ncopies = %d\n", ncopies);
7813
7814 if (memory_access_type == VMAT_ELEMENTWISE
7815 || memory_access_type == VMAT_STRIDED_SLP)
7816 {
7817 gimple_stmt_iterator incr_gsi;
7818 bool insert_after;
7819 gimple *incr;
7820 tree offvar;
7821 tree ivstep;
7822 tree running_off;
7823 tree stride_base, stride_step, alias_off;
7824 tree vec_oprnd;
7825 tree dr_offset;
7826 unsigned int g;
7827 /* Checked by get_load_store_type. */
7828 unsigned int const_nunits = nunits.to_constant ();
7829
7830 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7831 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7832
7833 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7834 stride_base
7835 = fold_build_pointer_plus
7836 (DR_BASE_ADDRESS (first_dr_info->dr),
7837 size_binop (PLUS_EXPR,
7838 convert_to_ptrofftype (dr_offset),
7839 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7840 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7841
7842 /* For a store with loop-invariant (but other than power-of-2)
7843 stride (i.e. not a grouped access) like so:
7844
7845 for (i = 0; i < n; i += stride)
7846 array[i] = ...;
7847
7848 we generate a new induction variable and new stores from
7849 the components of the (vectorized) rhs:
7850
7851 for (j = 0; ; j += VF*stride)
7852 vectemp = ...;
7853 tmp1 = vectemp[0];
7854 array[j] = tmp1;
7855 tmp2 = vectemp[1];
7856 array[j + stride] = tmp2;
7857 ...
7858 */
7859
7860 unsigned nstores = const_nunits;
7861 unsigned lnel = 1;
7862 tree ltype = elem_type;
7863 tree lvectype = vectype;
7864 if (slp)
7865 {
7866 if (group_size < const_nunits
7867 && const_nunits % group_size == 0)
7868 {
7869 nstores = const_nunits / group_size;
7870 lnel = group_size;
7871 ltype = build_vector_type (elem_type, group_size);
7872 lvectype = vectype;
7873
7874 /* First check if vec_extract optab doesn't support extraction
7875 of vector elts directly. */
7876 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7877 machine_mode vmode;
7878 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7879 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7880 group_size).exists (&vmode)
7881 || (convert_optab_handler (vec_extract_optab,
7882 TYPE_MODE (vectype), vmode)
7883 == CODE_FOR_nothing))
7884 {
7885 /* Try to avoid emitting an extract of vector elements
7886 by performing the extracts using an integer type of the
7887 same size, extracting from a vector of those and then
7888 re-interpreting it as the original vector type if
7889 supported. */
7890 unsigned lsize
7891 = group_size * GET_MODE_BITSIZE (elmode);
7892 unsigned int lnunits = const_nunits / group_size;
7893 /* If we can't construct such a vector fall back to
7894 element extracts from the original vector type and
7895 element size stores. */
7896 if (int_mode_for_size (lsize, 0).exists (&elmode)
7897 && VECTOR_MODE_P (TYPE_MODE (vectype))
7898 && related_vector_mode (TYPE_MODE (vectype), elmode,
7899 lnunits).exists (&vmode)
7900 && (convert_optab_handler (vec_extract_optab,
7901 vmode, elmode)
7902 != CODE_FOR_nothing))
7903 {
7904 nstores = lnunits;
7905 lnel = group_size;
7906 ltype = build_nonstandard_integer_type (lsize, 1);
7907 lvectype = build_vector_type (ltype, nstores);
7908 }
7909 /* Else fall back to vector extraction anyway.
7910 Fewer stores are more important than avoiding spilling
7911 of the vector we extract from. Compared to the
7912 construction case in vectorizable_load no store-forwarding
7913 issue exists here for reasonable archs. */
7914 }
7915 }
7916 else if (group_size >= const_nunits
7917 && group_size % const_nunits == 0)
7918 {
7919 nstores = 1;
7920 lnel = const_nunits;
7921 ltype = vectype;
7922 lvectype = vectype;
7923 }
7924 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7925 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7926 }
7927
7928 ivstep = stride_step;
7929 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7930 build_int_cst (TREE_TYPE (ivstep), vf));
7931
7932 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7933
7934 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7935 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7936 create_iv (stride_base, ivstep, NULL,
7937 loop, &incr_gsi, insert_after,
7938 &offvar, NULL);
7939 incr = gsi_stmt (incr_gsi);
7940
7941 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7942
7943 alias_off = build_int_cst (ref_type, 0);
7944 stmt_vec_info next_stmt_info = first_stmt_info;
7945 for (g = 0; g < group_size; g++)
7946 {
7947 running_off = offvar;
7948 if (g)
7949 {
7950 tree size = TYPE_SIZE_UNIT (ltype);
7951 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7952 size);
7953 tree newoff = copy_ssa_name (running_off, NULL);
7954 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7955 running_off, pos);
7956 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7957 running_off = newoff;
7958 }
7959 if (!slp)
7960 op = vect_get_store_rhs (next_stmt_info);
7961 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7962 op, &vec_oprnds);
7963 unsigned int group_el = 0;
7964 unsigned HOST_WIDE_INT
7965 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7966 for (j = 0; j < ncopies; j++)
7967 {
7968 vec_oprnd = vec_oprnds[j];
7969 /* Pun the vector to extract from if necessary. */
7970 if (lvectype != vectype)
7971 {
7972 tree tem = make_ssa_name (lvectype);
7973 gimple *pun
7974 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7975 lvectype, vec_oprnd));
7976 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7977 vec_oprnd = tem;
7978 }
7979 for (i = 0; i < nstores; i++)
7980 {
7981 tree newref, newoff;
7982 gimple *incr, *assign;
7983 tree size = TYPE_SIZE (ltype);
7984 /* Extract the i'th component. */
7985 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7986 bitsize_int (i), size);
7987 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7988 size, pos);
7989
7990 elem = force_gimple_operand_gsi (gsi, elem, true,
7991 NULL_TREE, true,
7992 GSI_SAME_STMT);
7993
7994 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7995 group_el * elsz);
7996 newref = build2 (MEM_REF, ltype,
7997 running_off, this_off);
7998 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7999
8000 /* And store it to *running_off. */
8001 assign = gimple_build_assign (newref, elem);
8002 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8003
8004 group_el += lnel;
8005 if (! slp
8006 || group_el == group_size)
8007 {
8008 newoff = copy_ssa_name (running_off, NULL);
8009 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8010 running_off, stride_step);
8011 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8012
8013 running_off = newoff;
8014 group_el = 0;
8015 }
8016 if (g == group_size - 1
8017 && !slp)
8018 {
8019 if (j == 0 && i == 0)
8020 *vec_stmt = assign;
8021 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8022 }
8023 }
8024 }
8025 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8026 vec_oprnds.release ();
8027 if (slp)
8028 break;
8029 }
8030
8031 return true;
8032 }
8033
8034 auto_vec<tree> dr_chain (group_size);
8035 oprnds.create (group_size);
8036
8037 gcc_assert (alignment_support_scheme);
8038 vec_loop_masks *loop_masks
8039 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8040 ? &LOOP_VINFO_MASKS (loop_vinfo)
8041 : NULL);
8042 vec_loop_lens *loop_lens
8043 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8044 ? &LOOP_VINFO_LENS (loop_vinfo)
8045 : NULL);
8046
8047 /* Shouldn't go with length-based approach if fully masked. */
8048 gcc_assert (!loop_lens || !loop_masks);
8049
8050 /* Targets with store-lane instructions must not require explicit
8051 realignment. vect_supportable_dr_alignment always returns either
8052 dr_aligned or dr_unaligned_supported for masked operations. */
8053 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8054 && !mask
8055 && !loop_masks)
8056 || alignment_support_scheme == dr_aligned
8057 || alignment_support_scheme == dr_unaligned_supported);
8058
8059 tree offset = NULL_TREE;
8060 if (!known_eq (poffset, 0))
8061 offset = size_int (poffset);
8062
8063 tree bump;
8064 tree vec_offset = NULL_TREE;
8065 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8066 {
8067 aggr_type = NULL_TREE;
8068 bump = NULL_TREE;
8069 }
8070 else if (memory_access_type == VMAT_GATHER_SCATTER)
8071 {
8072 aggr_type = elem_type;
8073 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8074 &bump, &vec_offset);
8075 }
8076 else
8077 {
8078 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8079 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8080 else
8081 aggr_type = vectype;
8082 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8083 memory_access_type);
8084 }
8085
8086 if (mask)
8087 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8088
8089 /* In case the vectorization factor (VF) is bigger than the number
8090 of elements that we can fit in a vectype (nunits), we have to generate
8091 more than one vector stmt - i.e - we need to "unroll" the
8092 vector stmt by a factor VF/nunits. */
8093
8094 /* In case of interleaving (non-unit grouped access):
8095
8096 S1: &base + 2 = x2
8097 S2: &base = x0
8098 S3: &base + 1 = x1
8099 S4: &base + 3 = x3
8100
8101 We create vectorized stores starting from base address (the access of the
8102 first stmt in the chain (S2 in the above example), when the last store stmt
8103 of the chain (S4) is reached:
8104
8105 VS1: &base = vx2
8106 VS2: &base + vec_size*1 = vx0
8107 VS3: &base + vec_size*2 = vx1
8108 VS4: &base + vec_size*3 = vx3
8109
8110 Then permutation statements are generated:
8111
8112 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8113 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8114 ...
8115
8116 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8117 (the order of the data-refs in the output of vect_permute_store_chain
8118 corresponds to the order of scalar stmts in the interleaving chain - see
8119 the documentation of vect_permute_store_chain()).
8120
8121 In case of both multiple types and interleaving, above vector stores and
8122 permutation stmts are created for every copy. The result vector stmts are
8123 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8124 STMT_VINFO_RELATED_STMT for the next copies.
8125 */
8126
8127 auto_vec<tree> vec_masks;
8128 tree vec_mask = NULL;
8129 auto_vec<tree> vec_offsets;
8130 auto_vec<vec<tree> > gvec_oprnds;
8131 gvec_oprnds.safe_grow_cleared (group_size, true);
8132 for (j = 0; j < ncopies; j++)
8133 {
8134 gimple *new_stmt;
8135 if (j == 0)
8136 {
8137 if (slp)
8138 {
8139 /* Get vectorized arguments for SLP_NODE. */
8140 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8141 op, &vec_oprnds);
8142 vec_oprnd = vec_oprnds[0];
8143 }
8144 else
8145 {
8146 /* For interleaved stores we collect vectorized defs for all the
8147 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8148 used as an input to vect_permute_store_chain().
8149
8150 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8151 and OPRNDS are of size 1. */
8152 stmt_vec_info next_stmt_info = first_stmt_info;
8153 for (i = 0; i < group_size; i++)
8154 {
8155 /* Since gaps are not supported for interleaved stores,
8156 DR_GROUP_SIZE is the exact number of stmts in the chain.
8157 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8158 that there is no interleaving, DR_GROUP_SIZE is 1,
8159 and only one iteration of the loop will be executed. */
8160 op = vect_get_store_rhs (next_stmt_info);
8161 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8162 ncopies, op, &gvec_oprnds[i]);
8163 vec_oprnd = gvec_oprnds[i][0];
8164 dr_chain.quick_push (gvec_oprnds[i][0]);
8165 oprnds.quick_push (gvec_oprnds[i][0]);
8166 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8167 }
8168 if (mask)
8169 {
8170 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8171 mask, &vec_masks, mask_vectype);
8172 vec_mask = vec_masks[0];
8173 }
8174 }
8175
8176 /* We should have catched mismatched types earlier. */
8177 gcc_assert (useless_type_conversion_p (vectype,
8178 TREE_TYPE (vec_oprnd)));
8179 bool simd_lane_access_p
8180 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8181 if (simd_lane_access_p
8182 && !loop_masks
8183 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8184 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8185 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8186 && integer_zerop (DR_INIT (first_dr_info->dr))
8187 && alias_sets_conflict_p (get_alias_set (aggr_type),
8188 get_alias_set (TREE_TYPE (ref_type))))
8189 {
8190 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8191 dataref_offset = build_int_cst (ref_type, 0);
8192 }
8193 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8194 {
8195 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8196 slp_node, &gs_info, &dataref_ptr,
8197 &vec_offsets);
8198 vec_offset = vec_offsets[0];
8199 }
8200 else
8201 dataref_ptr
8202 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8203 simd_lane_access_p ? loop : NULL,
8204 offset, &dummy, gsi, &ptr_incr,
8205 simd_lane_access_p, bump);
8206 }
8207 else
8208 {
8209 /* For interleaved stores we created vectorized defs for all the
8210 defs stored in OPRNDS in the previous iteration (previous copy).
8211 DR_CHAIN is then used as an input to vect_permute_store_chain().
8212 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8213 OPRNDS are of size 1. */
8214 for (i = 0; i < group_size; i++)
8215 {
8216 vec_oprnd = gvec_oprnds[i][j];
8217 dr_chain[i] = gvec_oprnds[i][j];
8218 oprnds[i] = gvec_oprnds[i][j];
8219 }
8220 if (mask)
8221 vec_mask = vec_masks[j];
8222 if (dataref_offset)
8223 dataref_offset
8224 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8225 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8226 vec_offset = vec_offsets[j];
8227 else
8228 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8229 stmt_info, bump);
8230 }
8231
8232 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8233 {
8234 tree vec_array;
8235
8236 /* Get an array into which we can store the individual vectors. */
8237 vec_array = create_vector_array (vectype, vec_num);
8238
8239 /* Invalidate the current contents of VEC_ARRAY. This should
8240 become an RTL clobber too, which prevents the vector registers
8241 from being upward-exposed. */
8242 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8243
8244 /* Store the individual vectors into the array. */
8245 for (i = 0; i < vec_num; i++)
8246 {
8247 vec_oprnd = dr_chain[i];
8248 write_vector_array (vinfo, stmt_info,
8249 gsi, vec_oprnd, vec_array, i);
8250 }
8251
8252 tree final_mask = NULL;
8253 if (loop_masks)
8254 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8255 vectype, j);
8256 if (vec_mask)
8257 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8258 final_mask, vec_mask, gsi);
8259
8260 gcall *call;
8261 if (final_mask)
8262 {
8263 /* Emit:
8264 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8265 VEC_ARRAY). */
8266 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8267 tree alias_ptr = build_int_cst (ref_type, align);
8268 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8269 dataref_ptr, alias_ptr,
8270 final_mask, vec_array);
8271 }
8272 else
8273 {
8274 /* Emit:
8275 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8276 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8277 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8278 vec_array);
8279 gimple_call_set_lhs (call, data_ref);
8280 }
8281 gimple_call_set_nothrow (call, true);
8282 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8283 new_stmt = call;
8284
8285 /* Record that VEC_ARRAY is now dead. */
8286 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8287 }
8288 else
8289 {
8290 new_stmt = NULL;
8291 if (grouped_store)
8292 {
8293 if (j == 0)
8294 result_chain.create (group_size);
8295 /* Permute. */
8296 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8297 gsi, &result_chain);
8298 }
8299
8300 stmt_vec_info next_stmt_info = first_stmt_info;
8301 for (i = 0; i < vec_num; i++)
8302 {
8303 unsigned misalign;
8304 unsigned HOST_WIDE_INT align;
8305
8306 tree final_mask = NULL_TREE;
8307 if (loop_masks)
8308 final_mask = vect_get_loop_mask (gsi, loop_masks,
8309 vec_num * ncopies,
8310 vectype, vec_num * j + i);
8311 if (vec_mask)
8312 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8313 final_mask, vec_mask, gsi);
8314
8315 if (memory_access_type == VMAT_GATHER_SCATTER)
8316 {
8317 tree scale = size_int (gs_info.scale);
8318 gcall *call;
8319 if (final_mask)
8320 call = gimple_build_call_internal
8321 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8322 scale, vec_oprnd, final_mask);
8323 else
8324 call = gimple_build_call_internal
8325 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8326 scale, vec_oprnd);
8327 gimple_call_set_nothrow (call, true);
8328 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8329 new_stmt = call;
8330 break;
8331 }
8332
8333 if (i > 0)
8334 /* Bump the vector pointer. */
8335 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8336 gsi, stmt_info, bump);
8337
8338 if (slp)
8339 vec_oprnd = vec_oprnds[i];
8340 else if (grouped_store)
8341 /* For grouped stores vectorized defs are interleaved in
8342 vect_permute_store_chain(). */
8343 vec_oprnd = result_chain[i];
8344
8345 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8346 if (alignment_support_scheme == dr_aligned)
8347 misalign = 0;
8348 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8349 {
8350 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8351 misalign = 0;
8352 }
8353 else
8354 misalign = misalignment;
8355 if (dataref_offset == NULL_TREE
8356 && TREE_CODE (dataref_ptr) == SSA_NAME)
8357 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8358 misalign);
8359 align = least_bit_hwi (misalign | align);
8360
8361 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8362 {
8363 tree perm_mask = perm_mask_for_reverse (vectype);
8364 tree perm_dest = vect_create_destination_var
8365 (vect_get_store_rhs (stmt_info), vectype);
8366 tree new_temp = make_ssa_name (perm_dest);
8367
8368 /* Generate the permute statement. */
8369 gimple *perm_stmt
8370 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8371 vec_oprnd, perm_mask);
8372 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8373
8374 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8375 vec_oprnd = new_temp;
8376 }
8377
8378 /* Arguments are ready. Create the new vector stmt. */
8379 if (final_mask)
8380 {
8381 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8382 gcall *call
8383 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8384 dataref_ptr, ptr,
8385 final_mask, vec_oprnd);
8386 gimple_call_set_nothrow (call, true);
8387 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8388 new_stmt = call;
8389 }
8390 else if (loop_lens)
8391 {
8392 tree final_len
8393 = vect_get_loop_len (loop_vinfo, loop_lens,
8394 vec_num * ncopies, vec_num * j + i);
8395 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8396 machine_mode vmode = TYPE_MODE (vectype);
8397 opt_machine_mode new_ovmode
8398 = get_len_load_store_mode (vmode, false);
8399 machine_mode new_vmode = new_ovmode.require ();
8400 /* Need conversion if it's wrapped with VnQI. */
8401 if (vmode != new_vmode)
8402 {
8403 tree new_vtype
8404 = build_vector_type_for_mode (unsigned_intQI_type_node,
8405 new_vmode);
8406 tree var
8407 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8408 vec_oprnd
8409 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8410 gassign *new_stmt
8411 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8412 vec_oprnd);
8413 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8414 gsi);
8415 vec_oprnd = var;
8416 }
8417 gcall *call
8418 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8419 ptr, final_len, vec_oprnd);
8420 gimple_call_set_nothrow (call, true);
8421 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8422 new_stmt = call;
8423 }
8424 else
8425 {
8426 data_ref = fold_build2 (MEM_REF, vectype,
8427 dataref_ptr,
8428 dataref_offset
8429 ? dataref_offset
8430 : build_int_cst (ref_type, 0));
8431 if (alignment_support_scheme == dr_aligned)
8432 ;
8433 else
8434 TREE_TYPE (data_ref)
8435 = build_aligned_type (TREE_TYPE (data_ref),
8436 align * BITS_PER_UNIT);
8437 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8438 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8439 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8440 }
8441
8442 if (slp)
8443 continue;
8444
8445 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8446 if (!next_stmt_info)
8447 break;
8448 }
8449 }
8450 if (!slp)
8451 {
8452 if (j == 0)
8453 *vec_stmt = new_stmt;
8454 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8455 }
8456 }
8457
8458 for (i = 0; i < group_size; ++i)
8459 {
8460 vec<tree> oprndsi = gvec_oprnds[i];
8461 oprndsi.release ();
8462 }
8463 oprnds.release ();
8464 result_chain.release ();
8465 vec_oprnds.release ();
8466
8467 return true;
8468 }
8469
8470 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8471 VECTOR_CST mask. No checks are made that the target platform supports the
8472 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8473 vect_gen_perm_mask_checked. */
8474
8475 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8476 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8477 {
8478 tree mask_type;
8479
8480 poly_uint64 nunits = sel.length ();
8481 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8482
8483 mask_type = build_vector_type (ssizetype, nunits);
8484 return vec_perm_indices_to_tree (mask_type, sel);
8485 }
8486
8487 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8488 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8489
8490 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8491 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8492 {
8493 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8494 return vect_gen_perm_mask_any (vectype, sel);
8495 }
8496
8497 /* Given a vector variable X and Y, that was generated for the scalar
8498 STMT_INFO, generate instructions to permute the vector elements of X and Y
8499 using permutation mask MASK_VEC, insert them at *GSI and return the
8500 permuted vector variable. */
8501
8502 static tree
permute_vec_elements(vec_info * vinfo,tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8503 permute_vec_elements (vec_info *vinfo,
8504 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8505 gimple_stmt_iterator *gsi)
8506 {
8507 tree vectype = TREE_TYPE (x);
8508 tree perm_dest, data_ref;
8509 gimple *perm_stmt;
8510
8511 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8512 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8513 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8514 else
8515 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8516 data_ref = make_ssa_name (perm_dest);
8517
8518 /* Generate the permute statement. */
8519 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8520 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8521
8522 return data_ref;
8523 }
8524
8525 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8526 inserting them on the loops preheader edge. Returns true if we
8527 were successful in doing so (and thus STMT_INFO can be moved then),
8528 otherwise returns false. */
8529
8530 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8531 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8532 {
8533 ssa_op_iter i;
8534 tree op;
8535 bool any = false;
8536
8537 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8538 {
8539 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8540 if (!gimple_nop_p (def_stmt)
8541 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8542 {
8543 /* Make sure we don't need to recurse. While we could do
8544 so in simple cases when there are more complex use webs
8545 we don't have an easy way to preserve stmt order to fulfil
8546 dependencies within them. */
8547 tree op2;
8548 ssa_op_iter i2;
8549 if (gimple_code (def_stmt) == GIMPLE_PHI)
8550 return false;
8551 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8552 {
8553 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8554 if (!gimple_nop_p (def_stmt2)
8555 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8556 return false;
8557 }
8558 any = true;
8559 }
8560 }
8561
8562 if (!any)
8563 return true;
8564
8565 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8566 {
8567 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8568 if (!gimple_nop_p (def_stmt)
8569 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8570 {
8571 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8572 gsi_remove (&gsi, false);
8573 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8574 }
8575 }
8576
8577 return true;
8578 }
8579
8580 /* vectorizable_load.
8581
8582 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8583 that can be vectorized.
8584 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8585 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8586 Return true if STMT_INFO is vectorizable in this way. */
8587
8588 static bool
vectorizable_load(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8589 vectorizable_load (vec_info *vinfo,
8590 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8591 gimple **vec_stmt, slp_tree slp_node,
8592 stmt_vector_for_cost *cost_vec)
8593 {
8594 tree scalar_dest;
8595 tree vec_dest = NULL;
8596 tree data_ref = NULL;
8597 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8598 class loop *loop = NULL;
8599 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8600 bool nested_in_vect_loop = false;
8601 tree elem_type;
8602 tree new_temp;
8603 machine_mode mode;
8604 tree dummy;
8605 tree dataref_ptr = NULL_TREE;
8606 tree dataref_offset = NULL_TREE;
8607 gimple *ptr_incr = NULL;
8608 int ncopies;
8609 int i, j;
8610 unsigned int group_size;
8611 poly_uint64 group_gap_adj;
8612 tree msq = NULL_TREE, lsq;
8613 tree realignment_token = NULL_TREE;
8614 gphi *phi = NULL;
8615 vec<tree> dr_chain = vNULL;
8616 bool grouped_load = false;
8617 stmt_vec_info first_stmt_info;
8618 stmt_vec_info first_stmt_info_for_drptr = NULL;
8619 bool compute_in_loop = false;
8620 class loop *at_loop;
8621 int vec_num;
8622 bool slp = (slp_node != NULL);
8623 bool slp_perm = false;
8624 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8625 poly_uint64 vf;
8626 tree aggr_type;
8627 gather_scatter_info gs_info;
8628 tree ref_type;
8629 enum vect_def_type mask_dt = vect_unknown_def_type;
8630
8631 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8632 return false;
8633
8634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8635 && ! vec_stmt)
8636 return false;
8637
8638 if (!STMT_VINFO_DATA_REF (stmt_info))
8639 return false;
8640
8641 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8642 int mask_index = -1;
8643 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8644 {
8645 scalar_dest = gimple_assign_lhs (assign);
8646 if (TREE_CODE (scalar_dest) != SSA_NAME)
8647 return false;
8648
8649 tree_code code = gimple_assign_rhs_code (assign);
8650 if (code != ARRAY_REF
8651 && code != BIT_FIELD_REF
8652 && code != INDIRECT_REF
8653 && code != COMPONENT_REF
8654 && code != IMAGPART_EXPR
8655 && code != REALPART_EXPR
8656 && code != MEM_REF
8657 && TREE_CODE_CLASS (code) != tcc_declaration)
8658 return false;
8659 }
8660 else
8661 {
8662 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8663 if (!call || !gimple_call_internal_p (call))
8664 return false;
8665
8666 internal_fn ifn = gimple_call_internal_fn (call);
8667 if (!internal_load_fn_p (ifn))
8668 return false;
8669
8670 scalar_dest = gimple_call_lhs (call);
8671 if (!scalar_dest)
8672 return false;
8673
8674 mask_index = internal_fn_mask_index (ifn);
8675 /* ??? For SLP the mask operand is always last. */
8676 if (mask_index >= 0 && slp_node)
8677 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8678 if (mask_index >= 0
8679 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8680 &mask, NULL, &mask_dt, &mask_vectype))
8681 return false;
8682 }
8683
8684 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8685 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8686
8687 if (loop_vinfo)
8688 {
8689 loop = LOOP_VINFO_LOOP (loop_vinfo);
8690 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8691 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8692 }
8693 else
8694 vf = 1;
8695
8696 /* Multiple types in SLP are handled by creating the appropriate number of
8697 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8698 case of SLP. */
8699 if (slp)
8700 ncopies = 1;
8701 else
8702 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8703
8704 gcc_assert (ncopies >= 1);
8705
8706 /* FORNOW. This restriction should be relaxed. */
8707 if (nested_in_vect_loop && ncopies > 1)
8708 {
8709 if (dump_enabled_p ())
8710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8711 "multiple types in nested loop.\n");
8712 return false;
8713 }
8714
8715 /* Invalidate assumptions made by dependence analysis when vectorization
8716 on the unrolled body effectively re-orders stmts. */
8717 if (ncopies > 1
8718 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8719 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8720 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8721 {
8722 if (dump_enabled_p ())
8723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8724 "cannot perform implicit CSE when unrolling "
8725 "with negative dependence distance\n");
8726 return false;
8727 }
8728
8729 elem_type = TREE_TYPE (vectype);
8730 mode = TYPE_MODE (vectype);
8731
8732 /* FORNOW. In some cases can vectorize even if data-type not supported
8733 (e.g. - data copies). */
8734 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8735 {
8736 if (dump_enabled_p ())
8737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8738 "Aligned load, but unsupported type.\n");
8739 return false;
8740 }
8741
8742 /* Check if the load is a part of an interleaving chain. */
8743 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8744 {
8745 grouped_load = true;
8746 /* FORNOW */
8747 gcc_assert (!nested_in_vect_loop);
8748 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8749
8750 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8751 group_size = DR_GROUP_SIZE (first_stmt_info);
8752
8753 /* Refuse non-SLP vectorization of SLP-only groups. */
8754 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8755 {
8756 if (dump_enabled_p ())
8757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8758 "cannot vectorize load in non-SLP mode.\n");
8759 return false;
8760 }
8761
8762 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8763 {
8764 slp_perm = true;
8765
8766 if (!loop_vinfo)
8767 {
8768 /* In BB vectorization we may not actually use a loaded vector
8769 accessing elements in excess of DR_GROUP_SIZE. */
8770 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8771 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8772 unsigned HOST_WIDE_INT nunits;
8773 unsigned j, k, maxk = 0;
8774 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8775 if (k > maxk)
8776 maxk = k;
8777 tree vectype = SLP_TREE_VECTYPE (slp_node);
8778 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8779 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8780 {
8781 if (dump_enabled_p ())
8782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8783 "BB vectorization with gaps at the end of "
8784 "a load is not supported\n");
8785 return false;
8786 }
8787 }
8788
8789 auto_vec<tree> tem;
8790 unsigned n_perms;
8791 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8792 true, &n_perms))
8793 {
8794 if (dump_enabled_p ())
8795 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8796 vect_location,
8797 "unsupported load permutation\n");
8798 return false;
8799 }
8800 }
8801
8802 /* Invalidate assumptions made by dependence analysis when vectorization
8803 on the unrolled body effectively re-orders stmts. */
8804 if (!PURE_SLP_STMT (stmt_info)
8805 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8806 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8807 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8808 {
8809 if (dump_enabled_p ())
8810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8811 "cannot perform implicit CSE when performing "
8812 "group loads with negative dependence distance\n");
8813 return false;
8814 }
8815 }
8816 else
8817 group_size = 1;
8818
8819 vect_memory_access_type memory_access_type;
8820 enum dr_alignment_support alignment_support_scheme;
8821 int misalignment;
8822 poly_int64 poffset;
8823 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8824 ncopies, &memory_access_type, &poffset,
8825 &alignment_support_scheme, &misalignment, &gs_info))
8826 return false;
8827
8828 if (mask)
8829 {
8830 if (memory_access_type == VMAT_CONTIGUOUS)
8831 {
8832 machine_mode vec_mode = TYPE_MODE (vectype);
8833 if (!VECTOR_MODE_P (vec_mode)
8834 || !can_vec_mask_load_store_p (vec_mode,
8835 TYPE_MODE (mask_vectype), true))
8836 return false;
8837 }
8838 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8839 && memory_access_type != VMAT_GATHER_SCATTER)
8840 {
8841 if (dump_enabled_p ())
8842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8843 "unsupported access type for masked load.\n");
8844 return false;
8845 }
8846 else if (memory_access_type == VMAT_GATHER_SCATTER
8847 && gs_info.ifn == IFN_LAST
8848 && !gs_info.decl)
8849 {
8850 if (dump_enabled_p ())
8851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8852 "unsupported masked emulated gather.\n");
8853 return false;
8854 }
8855 }
8856
8857 if (!vec_stmt) /* transformation not required. */
8858 {
8859 if (slp_node
8860 && mask
8861 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8862 mask_vectype))
8863 {
8864 if (dump_enabled_p ())
8865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8866 "incompatible vector types for invariants\n");
8867 return false;
8868 }
8869
8870 if (!slp)
8871 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8872
8873 if (loop_vinfo
8874 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8875 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8876 group_size, memory_access_type,
8877 ncopies, &gs_info, mask);
8878
8879 if (dump_enabled_p ()
8880 && memory_access_type != VMAT_ELEMENTWISE
8881 && memory_access_type != VMAT_GATHER_SCATTER
8882 && alignment_support_scheme != dr_aligned)
8883 dump_printf_loc (MSG_NOTE, vect_location,
8884 "Vectorizing an unaligned access.\n");
8885
8886 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8887 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8888 alignment_support_scheme, misalignment,
8889 &gs_info, slp_node, cost_vec);
8890 return true;
8891 }
8892
8893 if (!slp)
8894 gcc_assert (memory_access_type
8895 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8896
8897 if (dump_enabled_p ())
8898 dump_printf_loc (MSG_NOTE, vect_location,
8899 "transform load. ncopies = %d\n", ncopies);
8900
8901 /* Transform. */
8902
8903 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8904 ensure_base_align (dr_info);
8905
8906 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8907 {
8908 vect_build_gather_load_calls (vinfo,
8909 stmt_info, gsi, vec_stmt, &gs_info, mask);
8910 return true;
8911 }
8912
8913 if (memory_access_type == VMAT_INVARIANT)
8914 {
8915 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8916 /* If we have versioned for aliasing or the loop doesn't
8917 have any data dependencies that would preclude this,
8918 then we are sure this is a loop invariant load and
8919 thus we can insert it on the preheader edge. */
8920 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8921 && !nested_in_vect_loop
8922 && hoist_defs_of_uses (stmt_info, loop));
8923 if (hoist_p)
8924 {
8925 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8926 if (dump_enabled_p ())
8927 dump_printf_loc (MSG_NOTE, vect_location,
8928 "hoisting out of the vectorized loop: %G", stmt);
8929 scalar_dest = copy_ssa_name (scalar_dest);
8930 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8931 gsi_insert_on_edge_immediate
8932 (loop_preheader_edge (loop),
8933 gimple_build_assign (scalar_dest, rhs));
8934 }
8935 /* These copies are all equivalent, but currently the representation
8936 requires a separate STMT_VINFO_VEC_STMT for each one. */
8937 gimple_stmt_iterator gsi2 = *gsi;
8938 gsi_next (&gsi2);
8939 for (j = 0; j < ncopies; j++)
8940 {
8941 if (hoist_p)
8942 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8943 vectype, NULL);
8944 else
8945 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8946 vectype, &gsi2);
8947 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8948 if (slp)
8949 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8950 else
8951 {
8952 if (j == 0)
8953 *vec_stmt = new_stmt;
8954 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8955 }
8956 }
8957 return true;
8958 }
8959
8960 if (memory_access_type == VMAT_ELEMENTWISE
8961 || memory_access_type == VMAT_STRIDED_SLP)
8962 {
8963 gimple_stmt_iterator incr_gsi;
8964 bool insert_after;
8965 tree offvar;
8966 tree ivstep;
8967 tree running_off;
8968 vec<constructor_elt, va_gc> *v = NULL;
8969 tree stride_base, stride_step, alias_off;
8970 /* Checked by get_load_store_type. */
8971 unsigned int const_nunits = nunits.to_constant ();
8972 unsigned HOST_WIDE_INT cst_offset = 0;
8973 tree dr_offset;
8974
8975 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8976 gcc_assert (!nested_in_vect_loop);
8977
8978 if (grouped_load)
8979 {
8980 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8981 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8982 }
8983 else
8984 {
8985 first_stmt_info = stmt_info;
8986 first_dr_info = dr_info;
8987 }
8988 if (slp && grouped_load)
8989 {
8990 group_size = DR_GROUP_SIZE (first_stmt_info);
8991 ref_type = get_group_alias_ptr_type (first_stmt_info);
8992 }
8993 else
8994 {
8995 if (grouped_load)
8996 cst_offset
8997 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8998 * vect_get_place_in_interleaving_chain (stmt_info,
8999 first_stmt_info));
9000 group_size = 1;
9001 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9002 }
9003
9004 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9005 stride_base
9006 = fold_build_pointer_plus
9007 (DR_BASE_ADDRESS (first_dr_info->dr),
9008 size_binop (PLUS_EXPR,
9009 convert_to_ptrofftype (dr_offset),
9010 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9011 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9012
9013 /* For a load with loop-invariant (but other than power-of-2)
9014 stride (i.e. not a grouped access) like so:
9015
9016 for (i = 0; i < n; i += stride)
9017 ... = array[i];
9018
9019 we generate a new induction variable and new accesses to
9020 form a new vector (or vectors, depending on ncopies):
9021
9022 for (j = 0; ; j += VF*stride)
9023 tmp1 = array[j];
9024 tmp2 = array[j + stride];
9025 ...
9026 vectemp = {tmp1, tmp2, ...}
9027 */
9028
9029 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9030 build_int_cst (TREE_TYPE (stride_step), vf));
9031
9032 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9033
9034 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9035 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9036 create_iv (stride_base, ivstep, NULL,
9037 loop, &incr_gsi, insert_after,
9038 &offvar, NULL);
9039
9040 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9041
9042 running_off = offvar;
9043 alias_off = build_int_cst (ref_type, 0);
9044 int nloads = const_nunits;
9045 int lnel = 1;
9046 tree ltype = TREE_TYPE (vectype);
9047 tree lvectype = vectype;
9048 auto_vec<tree> dr_chain;
9049 if (memory_access_type == VMAT_STRIDED_SLP)
9050 {
9051 if (group_size < const_nunits)
9052 {
9053 /* First check if vec_init optab supports construction from vector
9054 elts directly. Otherwise avoid emitting a constructor of
9055 vector elements by performing the loads using an integer type
9056 of the same size, constructing a vector of those and then
9057 re-interpreting it as the original vector type. This avoids a
9058 huge runtime penalty due to the general inability to perform
9059 store forwarding from smaller stores to a larger load. */
9060 tree ptype;
9061 tree vtype
9062 = vector_vector_composition_type (vectype,
9063 const_nunits / group_size,
9064 &ptype);
9065 if (vtype != NULL_TREE)
9066 {
9067 nloads = const_nunits / group_size;
9068 lnel = group_size;
9069 lvectype = vtype;
9070 ltype = ptype;
9071 }
9072 }
9073 else
9074 {
9075 nloads = 1;
9076 lnel = const_nunits;
9077 ltype = vectype;
9078 }
9079 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9080 }
9081 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9082 else if (nloads == 1)
9083 ltype = vectype;
9084
9085 if (slp)
9086 {
9087 /* For SLP permutation support we need to load the whole group,
9088 not only the number of vector stmts the permutation result
9089 fits in. */
9090 if (slp_perm)
9091 {
9092 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9093 variable VF. */
9094 unsigned int const_vf = vf.to_constant ();
9095 ncopies = CEIL (group_size * const_vf, const_nunits);
9096 dr_chain.create (ncopies);
9097 }
9098 else
9099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9100 }
9101 unsigned int group_el = 0;
9102 unsigned HOST_WIDE_INT
9103 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9104 for (j = 0; j < ncopies; j++)
9105 {
9106 if (nloads > 1)
9107 vec_alloc (v, nloads);
9108 gimple *new_stmt = NULL;
9109 for (i = 0; i < nloads; i++)
9110 {
9111 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9112 group_el * elsz + cst_offset);
9113 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9114 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9115 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9116 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9117 if (nloads > 1)
9118 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9119 gimple_assign_lhs (new_stmt));
9120
9121 group_el += lnel;
9122 if (! slp
9123 || group_el == group_size)
9124 {
9125 tree newoff = copy_ssa_name (running_off);
9126 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9127 running_off, stride_step);
9128 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9129
9130 running_off = newoff;
9131 group_el = 0;
9132 }
9133 }
9134 if (nloads > 1)
9135 {
9136 tree vec_inv = build_constructor (lvectype, v);
9137 new_temp = vect_init_vector (vinfo, stmt_info,
9138 vec_inv, lvectype, gsi);
9139 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9140 if (lvectype != vectype)
9141 {
9142 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9143 VIEW_CONVERT_EXPR,
9144 build1 (VIEW_CONVERT_EXPR,
9145 vectype, new_temp));
9146 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9147 }
9148 }
9149
9150 if (slp)
9151 {
9152 if (slp_perm)
9153 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9154 else
9155 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9156 }
9157 else
9158 {
9159 if (j == 0)
9160 *vec_stmt = new_stmt;
9161 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9162 }
9163 }
9164 if (slp_perm)
9165 {
9166 unsigned n_perms;
9167 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9168 false, &n_perms);
9169 }
9170 return true;
9171 }
9172
9173 if (memory_access_type == VMAT_GATHER_SCATTER
9174 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9175 grouped_load = false;
9176
9177 if (grouped_load)
9178 {
9179 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9180 group_size = DR_GROUP_SIZE (first_stmt_info);
9181 /* For SLP vectorization we directly vectorize a subchain
9182 without permutation. */
9183 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9184 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9185 /* For BB vectorization always use the first stmt to base
9186 the data ref pointer on. */
9187 if (bb_vinfo)
9188 first_stmt_info_for_drptr
9189 = vect_find_first_scalar_stmt_in_slp (slp_node);
9190
9191 /* Check if the chain of loads is already vectorized. */
9192 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9193 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9194 ??? But we can only do so if there is exactly one
9195 as we have no way to get at the rest. Leave the CSE
9196 opportunity alone.
9197 ??? With the group load eventually participating
9198 in multiple different permutations (having multiple
9199 slp nodes which refer to the same group) the CSE
9200 is even wrong code. See PR56270. */
9201 && !slp)
9202 {
9203 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9204 return true;
9205 }
9206 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9207 group_gap_adj = 0;
9208
9209 /* VEC_NUM is the number of vect stmts to be created for this group. */
9210 if (slp)
9211 {
9212 grouped_load = false;
9213 /* If an SLP permutation is from N elements to N elements,
9214 and if one vector holds a whole number of N, we can load
9215 the inputs to the permutation in the same way as an
9216 unpermuted sequence. In other cases we need to load the
9217 whole group, not only the number of vector stmts the
9218 permutation result fits in. */
9219 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9220 if (slp_perm
9221 && (group_size != scalar_lanes
9222 || !multiple_p (nunits, group_size)))
9223 {
9224 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9225 variable VF; see vect_transform_slp_perm_load. */
9226 unsigned int const_vf = vf.to_constant ();
9227 unsigned int const_nunits = nunits.to_constant ();
9228 vec_num = CEIL (group_size * const_vf, const_nunits);
9229 group_gap_adj = vf * group_size - nunits * vec_num;
9230 }
9231 else
9232 {
9233 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9234 group_gap_adj
9235 = group_size - scalar_lanes;
9236 }
9237 }
9238 else
9239 vec_num = group_size;
9240
9241 ref_type = get_group_alias_ptr_type (first_stmt_info);
9242 }
9243 else
9244 {
9245 first_stmt_info = stmt_info;
9246 first_dr_info = dr_info;
9247 group_size = vec_num = 1;
9248 group_gap_adj = 0;
9249 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9250 }
9251
9252 gcc_assert (alignment_support_scheme);
9253 vec_loop_masks *loop_masks
9254 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9255 ? &LOOP_VINFO_MASKS (loop_vinfo)
9256 : NULL);
9257 vec_loop_lens *loop_lens
9258 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9259 ? &LOOP_VINFO_LENS (loop_vinfo)
9260 : NULL);
9261
9262 /* Shouldn't go with length-based approach if fully masked. */
9263 gcc_assert (!loop_lens || !loop_masks);
9264
9265 /* Targets with store-lane instructions must not require explicit
9266 realignment. vect_supportable_dr_alignment always returns either
9267 dr_aligned or dr_unaligned_supported for masked operations. */
9268 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9269 && !mask
9270 && !loop_masks)
9271 || alignment_support_scheme == dr_aligned
9272 || alignment_support_scheme == dr_unaligned_supported);
9273
9274 /* In case the vectorization factor (VF) is bigger than the number
9275 of elements that we can fit in a vectype (nunits), we have to generate
9276 more than one vector stmt - i.e - we need to "unroll" the
9277 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9278 from one copy of the vector stmt to the next, in the field
9279 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9280 stages to find the correct vector defs to be used when vectorizing
9281 stmts that use the defs of the current stmt. The example below
9282 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9283 need to create 4 vectorized stmts):
9284
9285 before vectorization:
9286 RELATED_STMT VEC_STMT
9287 S1: x = memref - -
9288 S2: z = x + 1 - -
9289
9290 step 1: vectorize stmt S1:
9291 We first create the vector stmt VS1_0, and, as usual, record a
9292 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9293 Next, we create the vector stmt VS1_1, and record a pointer to
9294 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9295 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9296 stmts and pointers:
9297 RELATED_STMT VEC_STMT
9298 VS1_0: vx0 = memref0 VS1_1 -
9299 VS1_1: vx1 = memref1 VS1_2 -
9300 VS1_2: vx2 = memref2 VS1_3 -
9301 VS1_3: vx3 = memref3 - -
9302 S1: x = load - VS1_0
9303 S2: z = x + 1 - -
9304 */
9305
9306 /* In case of interleaving (non-unit grouped access):
9307
9308 S1: x2 = &base + 2
9309 S2: x0 = &base
9310 S3: x1 = &base + 1
9311 S4: x3 = &base + 3
9312
9313 Vectorized loads are created in the order of memory accesses
9314 starting from the access of the first stmt of the chain:
9315
9316 VS1: vx0 = &base
9317 VS2: vx1 = &base + vec_size*1
9318 VS3: vx3 = &base + vec_size*2
9319 VS4: vx4 = &base + vec_size*3
9320
9321 Then permutation statements are generated:
9322
9323 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9324 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9325 ...
9326
9327 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9328 (the order of the data-refs in the output of vect_permute_load_chain
9329 corresponds to the order of scalar stmts in the interleaving chain - see
9330 the documentation of vect_permute_load_chain()).
9331 The generation of permutation stmts and recording them in
9332 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9333
9334 In case of both multiple types and interleaving, the vector loads and
9335 permutation stmts above are created for every copy. The result vector
9336 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9337 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9338
9339 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9340 on a target that supports unaligned accesses (dr_unaligned_supported)
9341 we generate the following code:
9342 p = initial_addr;
9343 indx = 0;
9344 loop {
9345 p = p + indx * vectype_size;
9346 vec_dest = *(p);
9347 indx = indx + 1;
9348 }
9349
9350 Otherwise, the data reference is potentially unaligned on a target that
9351 does not support unaligned accesses (dr_explicit_realign_optimized) -
9352 then generate the following code, in which the data in each iteration is
9353 obtained by two vector loads, one from the previous iteration, and one
9354 from the current iteration:
9355 p1 = initial_addr;
9356 msq_init = *(floor(p1))
9357 p2 = initial_addr + VS - 1;
9358 realignment_token = call target_builtin;
9359 indx = 0;
9360 loop {
9361 p2 = p2 + indx * vectype_size
9362 lsq = *(floor(p2))
9363 vec_dest = realign_load (msq, lsq, realignment_token)
9364 indx = indx + 1;
9365 msq = lsq;
9366 } */
9367
9368 /* If the misalignment remains the same throughout the execution of the
9369 loop, we can create the init_addr and permutation mask at the loop
9370 preheader. Otherwise, it needs to be created inside the loop.
9371 This can only occur when vectorizing memory accesses in the inner-loop
9372 nested within an outer-loop that is being vectorized. */
9373
9374 if (nested_in_vect_loop
9375 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9376 GET_MODE_SIZE (TYPE_MODE (vectype))))
9377 {
9378 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9379 compute_in_loop = true;
9380 }
9381
9382 bool diff_first_stmt_info
9383 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9384
9385 tree offset = NULL_TREE;
9386 if ((alignment_support_scheme == dr_explicit_realign_optimized
9387 || alignment_support_scheme == dr_explicit_realign)
9388 && !compute_in_loop)
9389 {
9390 /* If we have different first_stmt_info, we can't set up realignment
9391 here, since we can't guarantee first_stmt_info DR has been
9392 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9393 distance from first_stmt_info DR instead as below. */
9394 if (!diff_first_stmt_info)
9395 msq = vect_setup_realignment (vinfo,
9396 first_stmt_info, gsi, &realignment_token,
9397 alignment_support_scheme, NULL_TREE,
9398 &at_loop);
9399 if (alignment_support_scheme == dr_explicit_realign_optimized)
9400 {
9401 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9402 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9403 size_one_node);
9404 gcc_assert (!first_stmt_info_for_drptr);
9405 }
9406 }
9407 else
9408 at_loop = loop;
9409
9410 if (!known_eq (poffset, 0))
9411 offset = (offset
9412 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9413 : size_int (poffset));
9414
9415 tree bump;
9416 tree vec_offset = NULL_TREE;
9417 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9418 {
9419 aggr_type = NULL_TREE;
9420 bump = NULL_TREE;
9421 }
9422 else if (memory_access_type == VMAT_GATHER_SCATTER)
9423 {
9424 aggr_type = elem_type;
9425 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9426 &bump, &vec_offset);
9427 }
9428 else
9429 {
9430 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9431 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9432 else
9433 aggr_type = vectype;
9434 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9435 memory_access_type);
9436 }
9437
9438 vec<tree> vec_offsets = vNULL;
9439 auto_vec<tree> vec_masks;
9440 if (mask)
9441 {
9442 if (slp_node)
9443 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9444 &vec_masks);
9445 else
9446 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9447 &vec_masks, mask_vectype);
9448 }
9449 tree vec_mask = NULL_TREE;
9450 poly_uint64 group_elt = 0;
9451 for (j = 0; j < ncopies; j++)
9452 {
9453 /* 1. Create the vector or array pointer update chain. */
9454 if (j == 0)
9455 {
9456 bool simd_lane_access_p
9457 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9458 if (simd_lane_access_p
9459 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9460 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9461 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9462 && integer_zerop (DR_INIT (first_dr_info->dr))
9463 && alias_sets_conflict_p (get_alias_set (aggr_type),
9464 get_alias_set (TREE_TYPE (ref_type)))
9465 && (alignment_support_scheme == dr_aligned
9466 || alignment_support_scheme == dr_unaligned_supported))
9467 {
9468 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9469 dataref_offset = build_int_cst (ref_type, 0);
9470 }
9471 else if (diff_first_stmt_info)
9472 {
9473 dataref_ptr
9474 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9475 aggr_type, at_loop, offset, &dummy,
9476 gsi, &ptr_incr, simd_lane_access_p,
9477 bump);
9478 /* Adjust the pointer by the difference to first_stmt. */
9479 data_reference_p ptrdr
9480 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9481 tree diff
9482 = fold_convert (sizetype,
9483 size_binop (MINUS_EXPR,
9484 DR_INIT (first_dr_info->dr),
9485 DR_INIT (ptrdr)));
9486 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9487 stmt_info, diff);
9488 if (alignment_support_scheme == dr_explicit_realign)
9489 {
9490 msq = vect_setup_realignment (vinfo,
9491 first_stmt_info_for_drptr, gsi,
9492 &realignment_token,
9493 alignment_support_scheme,
9494 dataref_ptr, &at_loop);
9495 gcc_assert (!compute_in_loop);
9496 }
9497 }
9498 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9499 {
9500 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9501 slp_node, &gs_info, &dataref_ptr,
9502 &vec_offsets);
9503 }
9504 else
9505 dataref_ptr
9506 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9507 at_loop,
9508 offset, &dummy, gsi, &ptr_incr,
9509 simd_lane_access_p, bump);
9510 if (mask)
9511 vec_mask = vec_masks[0];
9512 }
9513 else
9514 {
9515 if (dataref_offset)
9516 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9517 bump);
9518 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9519 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9520 stmt_info, bump);
9521 if (mask)
9522 vec_mask = vec_masks[j];
9523 }
9524
9525 if (grouped_load || slp_perm)
9526 dr_chain.create (vec_num);
9527
9528 gimple *new_stmt = NULL;
9529 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9530 {
9531 tree vec_array;
9532
9533 vec_array = create_vector_array (vectype, vec_num);
9534
9535 tree final_mask = NULL_TREE;
9536 if (loop_masks)
9537 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9538 vectype, j);
9539 if (vec_mask)
9540 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9541 final_mask, vec_mask, gsi);
9542
9543 gcall *call;
9544 if (final_mask)
9545 {
9546 /* Emit:
9547 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9548 VEC_MASK). */
9549 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9550 tree alias_ptr = build_int_cst (ref_type, align);
9551 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9552 dataref_ptr, alias_ptr,
9553 final_mask);
9554 }
9555 else
9556 {
9557 /* Emit:
9558 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9559 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9560 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9561 }
9562 gimple_call_set_lhs (call, vec_array);
9563 gimple_call_set_nothrow (call, true);
9564 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9565 new_stmt = call;
9566
9567 /* Extract each vector into an SSA_NAME. */
9568 for (i = 0; i < vec_num; i++)
9569 {
9570 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9571 vec_array, i);
9572 dr_chain.quick_push (new_temp);
9573 }
9574
9575 /* Record the mapping between SSA_NAMEs and statements. */
9576 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9577
9578 /* Record that VEC_ARRAY is now dead. */
9579 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9580 }
9581 else
9582 {
9583 for (i = 0; i < vec_num; i++)
9584 {
9585 tree final_mask = NULL_TREE;
9586 if (loop_masks
9587 && memory_access_type != VMAT_INVARIANT)
9588 final_mask = vect_get_loop_mask (gsi, loop_masks,
9589 vec_num * ncopies,
9590 vectype, vec_num * j + i);
9591 if (vec_mask)
9592 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9593 final_mask, vec_mask, gsi);
9594
9595 if (i > 0)
9596 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9597 gsi, stmt_info, bump);
9598
9599 /* 2. Create the vector-load in the loop. */
9600 switch (alignment_support_scheme)
9601 {
9602 case dr_aligned:
9603 case dr_unaligned_supported:
9604 {
9605 unsigned int misalign;
9606 unsigned HOST_WIDE_INT align;
9607
9608 if (memory_access_type == VMAT_GATHER_SCATTER
9609 && gs_info.ifn != IFN_LAST)
9610 {
9611 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9612 vec_offset = vec_offsets[j];
9613 tree zero = build_zero_cst (vectype);
9614 tree scale = size_int (gs_info.scale);
9615 gcall *call;
9616 if (final_mask)
9617 call = gimple_build_call_internal
9618 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9619 vec_offset, scale, zero, final_mask);
9620 else
9621 call = gimple_build_call_internal
9622 (IFN_GATHER_LOAD, 4, dataref_ptr,
9623 vec_offset, scale, zero);
9624 gimple_call_set_nothrow (call, true);
9625 new_stmt = call;
9626 data_ref = NULL_TREE;
9627 break;
9628 }
9629 else if (memory_access_type == VMAT_GATHER_SCATTER)
9630 {
9631 /* Emulated gather-scatter. */
9632 gcc_assert (!final_mask);
9633 unsigned HOST_WIDE_INT const_nunits
9634 = nunits.to_constant ();
9635 unsigned HOST_WIDE_INT const_offset_nunits
9636 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9637 .to_constant ();
9638 vec<constructor_elt, va_gc> *ctor_elts;
9639 vec_alloc (ctor_elts, const_nunits);
9640 gimple_seq stmts = NULL;
9641 /* We support offset vectors with more elements
9642 than the data vector for now. */
9643 unsigned HOST_WIDE_INT factor
9644 = const_offset_nunits / const_nunits;
9645 vec_offset = vec_offsets[j / factor];
9646 unsigned elt_offset = (j % factor) * const_nunits;
9647 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9648 tree scale = size_int (gs_info.scale);
9649 align
9650 = get_object_alignment (DR_REF (first_dr_info->dr));
9651 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9652 align);
9653 for (unsigned k = 0; k < const_nunits; ++k)
9654 {
9655 tree boff = size_binop (MULT_EXPR,
9656 TYPE_SIZE (idx_type),
9657 bitsize_int
9658 (k + elt_offset));
9659 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9660 idx_type, vec_offset,
9661 TYPE_SIZE (idx_type),
9662 boff);
9663 idx = gimple_convert (&stmts, sizetype, idx);
9664 idx = gimple_build (&stmts, MULT_EXPR,
9665 sizetype, idx, scale);
9666 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9667 TREE_TYPE (dataref_ptr),
9668 dataref_ptr, idx);
9669 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9670 tree elt = make_ssa_name (TREE_TYPE (vectype));
9671 tree ref = build2 (MEM_REF, ltype, ptr,
9672 build_int_cst (ref_type, 0));
9673 new_stmt = gimple_build_assign (elt, ref);
9674 gimple_seq_add_stmt (&stmts, new_stmt);
9675 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9676 }
9677 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9678 new_stmt = gimple_build_assign (NULL_TREE,
9679 build_constructor
9680 (vectype, ctor_elts));
9681 data_ref = NULL_TREE;
9682 break;
9683 }
9684
9685 align =
9686 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9687 if (alignment_support_scheme == dr_aligned)
9688 misalign = 0;
9689 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9690 {
9691 align = dr_alignment
9692 (vect_dr_behavior (vinfo, first_dr_info));
9693 misalign = 0;
9694 }
9695 else
9696 misalign = misalignment;
9697 if (dataref_offset == NULL_TREE
9698 && TREE_CODE (dataref_ptr) == SSA_NAME)
9699 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9700 align, misalign);
9701 align = least_bit_hwi (misalign | align);
9702
9703 if (final_mask)
9704 {
9705 tree ptr = build_int_cst (ref_type,
9706 align * BITS_PER_UNIT);
9707 gcall *call
9708 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9709 dataref_ptr, ptr,
9710 final_mask);
9711 gimple_call_set_nothrow (call, true);
9712 new_stmt = call;
9713 data_ref = NULL_TREE;
9714 }
9715 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9716 {
9717 tree final_len
9718 = vect_get_loop_len (loop_vinfo, loop_lens,
9719 vec_num * ncopies,
9720 vec_num * j + i);
9721 tree ptr = build_int_cst (ref_type,
9722 align * BITS_PER_UNIT);
9723 gcall *call
9724 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9725 dataref_ptr, ptr,
9726 final_len);
9727 gimple_call_set_nothrow (call, true);
9728 new_stmt = call;
9729 data_ref = NULL_TREE;
9730
9731 /* Need conversion if it's wrapped with VnQI. */
9732 machine_mode vmode = TYPE_MODE (vectype);
9733 opt_machine_mode new_ovmode
9734 = get_len_load_store_mode (vmode, true);
9735 machine_mode new_vmode = new_ovmode.require ();
9736 if (vmode != new_vmode)
9737 {
9738 tree qi_type = unsigned_intQI_type_node;
9739 tree new_vtype
9740 = build_vector_type_for_mode (qi_type, new_vmode);
9741 tree var = vect_get_new_ssa_name (new_vtype,
9742 vect_simple_var);
9743 gimple_set_lhs (call, var);
9744 vect_finish_stmt_generation (vinfo, stmt_info, call,
9745 gsi);
9746 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9747 new_stmt
9748 = gimple_build_assign (vec_dest,
9749 VIEW_CONVERT_EXPR, op);
9750 }
9751 }
9752 else
9753 {
9754 tree ltype = vectype;
9755 tree new_vtype = NULL_TREE;
9756 unsigned HOST_WIDE_INT gap
9757 = DR_GROUP_GAP (first_stmt_info);
9758 unsigned int vect_align
9759 = vect_known_alignment_in_bytes (first_dr_info,
9760 vectype);
9761 unsigned int scalar_dr_size
9762 = vect_get_scalar_dr_size (first_dr_info);
9763 /* If there's no peeling for gaps but we have a gap
9764 with slp loads then load the lower half of the
9765 vector only. See get_group_load_store_type for
9766 when we apply this optimization. */
9767 if (slp
9768 && loop_vinfo
9769 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9770 && gap != 0
9771 && known_eq (nunits, (group_size - gap) * 2)
9772 && known_eq (nunits, group_size)
9773 && gap >= (vect_align / scalar_dr_size))
9774 {
9775 tree half_vtype;
9776 new_vtype
9777 = vector_vector_composition_type (vectype, 2,
9778 &half_vtype);
9779 if (new_vtype != NULL_TREE)
9780 ltype = half_vtype;
9781 }
9782 tree offset
9783 = (dataref_offset ? dataref_offset
9784 : build_int_cst (ref_type, 0));
9785 if (ltype != vectype
9786 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9787 {
9788 unsigned HOST_WIDE_INT gap_offset
9789 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9790 tree gapcst = build_int_cst (ref_type, gap_offset);
9791 offset = size_binop (PLUS_EXPR, offset, gapcst);
9792 }
9793 data_ref
9794 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9795 if (alignment_support_scheme == dr_aligned)
9796 ;
9797 else
9798 TREE_TYPE (data_ref)
9799 = build_aligned_type (TREE_TYPE (data_ref),
9800 align * BITS_PER_UNIT);
9801 if (ltype != vectype)
9802 {
9803 vect_copy_ref_info (data_ref,
9804 DR_REF (first_dr_info->dr));
9805 tree tem = make_ssa_name (ltype);
9806 new_stmt = gimple_build_assign (tem, data_ref);
9807 vect_finish_stmt_generation (vinfo, stmt_info,
9808 new_stmt, gsi);
9809 data_ref = NULL;
9810 vec<constructor_elt, va_gc> *v;
9811 vec_alloc (v, 2);
9812 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9813 {
9814 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9815 build_zero_cst (ltype));
9816 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9817 }
9818 else
9819 {
9820 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9821 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9822 build_zero_cst (ltype));
9823 }
9824 gcc_assert (new_vtype != NULL_TREE);
9825 if (new_vtype == vectype)
9826 new_stmt = gimple_build_assign (
9827 vec_dest, build_constructor (vectype, v));
9828 else
9829 {
9830 tree new_vname = make_ssa_name (new_vtype);
9831 new_stmt = gimple_build_assign (
9832 new_vname, build_constructor (new_vtype, v));
9833 vect_finish_stmt_generation (vinfo, stmt_info,
9834 new_stmt, gsi);
9835 new_stmt = gimple_build_assign (
9836 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9837 new_vname));
9838 }
9839 }
9840 }
9841 break;
9842 }
9843 case dr_explicit_realign:
9844 {
9845 tree ptr, bump;
9846
9847 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9848
9849 if (compute_in_loop)
9850 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9851 &realignment_token,
9852 dr_explicit_realign,
9853 dataref_ptr, NULL);
9854
9855 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9856 ptr = copy_ssa_name (dataref_ptr);
9857 else
9858 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9859 // For explicit realign the target alignment should be
9860 // known at compile time.
9861 unsigned HOST_WIDE_INT align =
9862 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9863 new_stmt = gimple_build_assign
9864 (ptr, BIT_AND_EXPR, dataref_ptr,
9865 build_int_cst
9866 (TREE_TYPE (dataref_ptr),
9867 -(HOST_WIDE_INT) align));
9868 vect_finish_stmt_generation (vinfo, stmt_info,
9869 new_stmt, gsi);
9870 data_ref
9871 = build2 (MEM_REF, vectype, ptr,
9872 build_int_cst (ref_type, 0));
9873 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9874 vec_dest = vect_create_destination_var (scalar_dest,
9875 vectype);
9876 new_stmt = gimple_build_assign (vec_dest, data_ref);
9877 new_temp = make_ssa_name (vec_dest, new_stmt);
9878 gimple_assign_set_lhs (new_stmt, new_temp);
9879 gimple_move_vops (new_stmt, stmt_info->stmt);
9880 vect_finish_stmt_generation (vinfo, stmt_info,
9881 new_stmt, gsi);
9882 msq = new_temp;
9883
9884 bump = size_binop (MULT_EXPR, vs,
9885 TYPE_SIZE_UNIT (elem_type));
9886 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9887 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9888 stmt_info, bump);
9889 new_stmt = gimple_build_assign
9890 (NULL_TREE, BIT_AND_EXPR, ptr,
9891 build_int_cst
9892 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9893 ptr = copy_ssa_name (ptr, new_stmt);
9894 gimple_assign_set_lhs (new_stmt, ptr);
9895 vect_finish_stmt_generation (vinfo, stmt_info,
9896 new_stmt, gsi);
9897 data_ref
9898 = build2 (MEM_REF, vectype, ptr,
9899 build_int_cst (ref_type, 0));
9900 break;
9901 }
9902 case dr_explicit_realign_optimized:
9903 {
9904 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9905 new_temp = copy_ssa_name (dataref_ptr);
9906 else
9907 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9908 // We should only be doing this if we know the target
9909 // alignment at compile time.
9910 unsigned HOST_WIDE_INT align =
9911 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9912 new_stmt = gimple_build_assign
9913 (new_temp, BIT_AND_EXPR, dataref_ptr,
9914 build_int_cst (TREE_TYPE (dataref_ptr),
9915 -(HOST_WIDE_INT) align));
9916 vect_finish_stmt_generation (vinfo, stmt_info,
9917 new_stmt, gsi);
9918 data_ref
9919 = build2 (MEM_REF, vectype, new_temp,
9920 build_int_cst (ref_type, 0));
9921 break;
9922 }
9923 default:
9924 gcc_unreachable ();
9925 }
9926 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9927 /* DATA_REF is null if we've already built the statement. */
9928 if (data_ref)
9929 {
9930 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9931 new_stmt = gimple_build_assign (vec_dest, data_ref);
9932 }
9933 new_temp = make_ssa_name (vec_dest, new_stmt);
9934 gimple_set_lhs (new_stmt, new_temp);
9935 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9936
9937 /* 3. Handle explicit realignment if necessary/supported.
9938 Create in loop:
9939 vec_dest = realign_load (msq, lsq, realignment_token) */
9940 if (alignment_support_scheme == dr_explicit_realign_optimized
9941 || alignment_support_scheme == dr_explicit_realign)
9942 {
9943 lsq = gimple_assign_lhs (new_stmt);
9944 if (!realignment_token)
9945 realignment_token = dataref_ptr;
9946 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9947 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9948 msq, lsq, realignment_token);
9949 new_temp = make_ssa_name (vec_dest, new_stmt);
9950 gimple_assign_set_lhs (new_stmt, new_temp);
9951 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9952
9953 if (alignment_support_scheme == dr_explicit_realign_optimized)
9954 {
9955 gcc_assert (phi);
9956 if (i == vec_num - 1 && j == ncopies - 1)
9957 add_phi_arg (phi, lsq,
9958 loop_latch_edge (containing_loop),
9959 UNKNOWN_LOCATION);
9960 msq = lsq;
9961 }
9962 }
9963
9964 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9965 {
9966 tree perm_mask = perm_mask_for_reverse (vectype);
9967 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9968 perm_mask, stmt_info, gsi);
9969 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9970 }
9971
9972 /* Collect vector loads and later create their permutation in
9973 vect_transform_grouped_load (). */
9974 if (grouped_load || slp_perm)
9975 dr_chain.quick_push (new_temp);
9976
9977 /* Store vector loads in the corresponding SLP_NODE. */
9978 if (slp && !slp_perm)
9979 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9980
9981 /* With SLP permutation we load the gaps as well, without
9982 we need to skip the gaps after we manage to fully load
9983 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9984 group_elt += nunits;
9985 if (maybe_ne (group_gap_adj, 0U)
9986 && !slp_perm
9987 && known_eq (group_elt, group_size - group_gap_adj))
9988 {
9989 poly_wide_int bump_val
9990 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9991 * group_gap_adj);
9992 if (tree_int_cst_sgn
9993 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9994 bump_val = -bump_val;
9995 tree bump = wide_int_to_tree (sizetype, bump_val);
9996 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9997 gsi, stmt_info, bump);
9998 group_elt = 0;
9999 }
10000 }
10001 /* Bump the vector pointer to account for a gap or for excess
10002 elements loaded for a permuted SLP load. */
10003 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10004 {
10005 poly_wide_int bump_val
10006 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10007 * group_gap_adj);
10008 if (tree_int_cst_sgn
10009 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10010 bump_val = -bump_val;
10011 tree bump = wide_int_to_tree (sizetype, bump_val);
10012 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10013 stmt_info, bump);
10014 }
10015 }
10016
10017 if (slp && !slp_perm)
10018 continue;
10019
10020 if (slp_perm)
10021 {
10022 unsigned n_perms;
10023 /* For SLP we know we've seen all possible uses of dr_chain so
10024 direct vect_transform_slp_perm_load to DCE the unused parts.
10025 ??? This is a hack to prevent compile-time issues as seen
10026 in PR101120 and friends. */
10027 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10028 gsi, vf, false, &n_perms,
10029 nullptr, true);
10030 gcc_assert (ok);
10031 }
10032 else
10033 {
10034 if (grouped_load)
10035 {
10036 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10037 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10038 group_size, gsi);
10039 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10040 }
10041 else
10042 {
10043 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10044 }
10045 }
10046 dr_chain.release ();
10047 }
10048 if (!slp)
10049 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10050
10051 return true;
10052 }
10053
10054 /* Function vect_is_simple_cond.
10055
10056 Input:
10057 LOOP - the loop that is being vectorized.
10058 COND - Condition that is checked for simple use.
10059
10060 Output:
10061 *COMP_VECTYPE - the vector type for the comparison.
10062 *DTS - The def types for the arguments of the comparison
10063
10064 Returns whether a COND can be vectorized. Checks whether
10065 condition operands are supportable using vec_is_simple_use. */
10066
10067 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)10068 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10069 slp_tree slp_node, tree *comp_vectype,
10070 enum vect_def_type *dts, tree vectype)
10071 {
10072 tree lhs, rhs;
10073 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10074 slp_tree slp_op;
10075
10076 /* Mask case. */
10077 if (TREE_CODE (cond) == SSA_NAME
10078 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10079 {
10080 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10081 &slp_op, &dts[0], comp_vectype)
10082 || !*comp_vectype
10083 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10084 return false;
10085 return true;
10086 }
10087
10088 if (!COMPARISON_CLASS_P (cond))
10089 return false;
10090
10091 lhs = TREE_OPERAND (cond, 0);
10092 rhs = TREE_OPERAND (cond, 1);
10093
10094 if (TREE_CODE (lhs) == SSA_NAME)
10095 {
10096 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10097 &lhs, &slp_op, &dts[0], &vectype1))
10098 return false;
10099 }
10100 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10101 || TREE_CODE (lhs) == FIXED_CST)
10102 dts[0] = vect_constant_def;
10103 else
10104 return false;
10105
10106 if (TREE_CODE (rhs) == SSA_NAME)
10107 {
10108 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10109 &rhs, &slp_op, &dts[1], &vectype2))
10110 return false;
10111 }
10112 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10113 || TREE_CODE (rhs) == FIXED_CST)
10114 dts[1] = vect_constant_def;
10115 else
10116 return false;
10117
10118 if (vectype1 && vectype2
10119 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10120 TYPE_VECTOR_SUBPARTS (vectype2)))
10121 return false;
10122
10123 *comp_vectype = vectype1 ? vectype1 : vectype2;
10124 /* Invariant comparison. */
10125 if (! *comp_vectype)
10126 {
10127 tree scalar_type = TREE_TYPE (lhs);
10128 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10129 *comp_vectype = truth_type_for (vectype);
10130 else
10131 {
10132 /* If we can widen the comparison to match vectype do so. */
10133 if (INTEGRAL_TYPE_P (scalar_type)
10134 && !slp_node
10135 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10136 TYPE_SIZE (TREE_TYPE (vectype))))
10137 scalar_type = build_nonstandard_integer_type
10138 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10139 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10140 slp_node);
10141 }
10142 }
10143
10144 return true;
10145 }
10146
10147 /* vectorizable_condition.
10148
10149 Check if STMT_INFO is conditional modify expression that can be vectorized.
10150 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10151 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10152 at GSI.
10153
10154 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10155
10156 Return true if STMT_INFO is vectorizable in this way. */
10157
10158 static bool
vectorizable_condition(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10159 vectorizable_condition (vec_info *vinfo,
10160 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10161 gimple **vec_stmt,
10162 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10163 {
10164 tree scalar_dest = NULL_TREE;
10165 tree vec_dest = NULL_TREE;
10166 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10167 tree then_clause, else_clause;
10168 tree comp_vectype = NULL_TREE;
10169 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10170 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10171 tree vec_compare;
10172 tree new_temp;
10173 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10174 enum vect_def_type dts[4]
10175 = {vect_unknown_def_type, vect_unknown_def_type,
10176 vect_unknown_def_type, vect_unknown_def_type};
10177 int ndts = 4;
10178 int ncopies;
10179 int vec_num;
10180 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10181 int i;
10182 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10183 vec<tree> vec_oprnds0 = vNULL;
10184 vec<tree> vec_oprnds1 = vNULL;
10185 vec<tree> vec_oprnds2 = vNULL;
10186 vec<tree> vec_oprnds3 = vNULL;
10187 tree vec_cmp_type;
10188 bool masked = false;
10189
10190 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10191 return false;
10192
10193 /* Is vectorizable conditional operation? */
10194 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10195 if (!stmt)
10196 return false;
10197
10198 code = gimple_assign_rhs_code (stmt);
10199 if (code != COND_EXPR)
10200 return false;
10201
10202 stmt_vec_info reduc_info = NULL;
10203 int reduc_index = -1;
10204 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10205 bool for_reduction
10206 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10207 if (for_reduction)
10208 {
10209 if (STMT_SLP_TYPE (stmt_info))
10210 return false;
10211 reduc_info = info_for_reduction (vinfo, stmt_info);
10212 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10213 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10214 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10215 || reduc_index != -1);
10216 }
10217 else
10218 {
10219 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10220 return false;
10221 }
10222
10223 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10224 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10225
10226 if (slp_node)
10227 {
10228 ncopies = 1;
10229 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10230 }
10231 else
10232 {
10233 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10234 vec_num = 1;
10235 }
10236
10237 gcc_assert (ncopies >= 1);
10238 if (for_reduction && ncopies > 1)
10239 return false; /* FORNOW */
10240
10241 cond_expr = gimple_assign_rhs1 (stmt);
10242
10243 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10244 &comp_vectype, &dts[0], vectype)
10245 || !comp_vectype)
10246 return false;
10247
10248 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10249 slp_tree then_slp_node, else_slp_node;
10250 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10251 &then_clause, &then_slp_node, &dts[2], &vectype1))
10252 return false;
10253 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10254 &else_clause, &else_slp_node, &dts[3], &vectype2))
10255 return false;
10256
10257 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10258 return false;
10259
10260 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10261 return false;
10262
10263 masked = !COMPARISON_CLASS_P (cond_expr);
10264 vec_cmp_type = truth_type_for (comp_vectype);
10265
10266 if (vec_cmp_type == NULL_TREE)
10267 return false;
10268
10269 cond_code = TREE_CODE (cond_expr);
10270 if (!masked)
10271 {
10272 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10273 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10274 }
10275
10276 /* For conditional reductions, the "then" value needs to be the candidate
10277 value calculated by this iteration while the "else" value needs to be
10278 the result carried over from previous iterations. If the COND_EXPR
10279 is the other way around, we need to swap it. */
10280 bool must_invert_cmp_result = false;
10281 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10282 {
10283 if (masked)
10284 must_invert_cmp_result = true;
10285 else
10286 {
10287 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10288 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10289 if (new_code == ERROR_MARK)
10290 must_invert_cmp_result = true;
10291 else
10292 {
10293 cond_code = new_code;
10294 /* Make sure we don't accidentally use the old condition. */
10295 cond_expr = NULL_TREE;
10296 }
10297 }
10298 std::swap (then_clause, else_clause);
10299 }
10300
10301 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10302 {
10303 /* Boolean values may have another representation in vectors
10304 and therefore we prefer bit operations over comparison for
10305 them (which also works for scalar masks). We store opcodes
10306 to use in bitop1 and bitop2. Statement is vectorized as
10307 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10308 depending on bitop1 and bitop2 arity. */
10309 switch (cond_code)
10310 {
10311 case GT_EXPR:
10312 bitop1 = BIT_NOT_EXPR;
10313 bitop2 = BIT_AND_EXPR;
10314 break;
10315 case GE_EXPR:
10316 bitop1 = BIT_NOT_EXPR;
10317 bitop2 = BIT_IOR_EXPR;
10318 break;
10319 case LT_EXPR:
10320 bitop1 = BIT_NOT_EXPR;
10321 bitop2 = BIT_AND_EXPR;
10322 std::swap (cond_expr0, cond_expr1);
10323 break;
10324 case LE_EXPR:
10325 bitop1 = BIT_NOT_EXPR;
10326 bitop2 = BIT_IOR_EXPR;
10327 std::swap (cond_expr0, cond_expr1);
10328 break;
10329 case NE_EXPR:
10330 bitop1 = BIT_XOR_EXPR;
10331 break;
10332 case EQ_EXPR:
10333 bitop1 = BIT_XOR_EXPR;
10334 bitop2 = BIT_NOT_EXPR;
10335 break;
10336 default:
10337 return false;
10338 }
10339 cond_code = SSA_NAME;
10340 }
10341
10342 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10343 && reduction_type == EXTRACT_LAST_REDUCTION
10344 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10345 {
10346 if (dump_enabled_p ())
10347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10348 "reduction comparison operation not supported.\n");
10349 return false;
10350 }
10351
10352 if (!vec_stmt)
10353 {
10354 if (bitop1 != NOP_EXPR)
10355 {
10356 machine_mode mode = TYPE_MODE (comp_vectype);
10357 optab optab;
10358
10359 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10360 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10361 return false;
10362
10363 if (bitop2 != NOP_EXPR)
10364 {
10365 optab = optab_for_tree_code (bitop2, comp_vectype,
10366 optab_default);
10367 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10368 return false;
10369 }
10370 }
10371
10372 vect_cost_for_stmt kind = vector_stmt;
10373 if (reduction_type == EXTRACT_LAST_REDUCTION)
10374 /* Count one reduction-like operation per vector. */
10375 kind = vec_to_scalar;
10376 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10377 return false;
10378
10379 if (slp_node
10380 && (!vect_maybe_update_slp_op_vectype
10381 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10382 || (op_adjust == 1
10383 && !vect_maybe_update_slp_op_vectype
10384 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10385 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10386 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10387 {
10388 if (dump_enabled_p ())
10389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10390 "incompatible vector types for invariants\n");
10391 return false;
10392 }
10393
10394 if (loop_vinfo && for_reduction
10395 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10396 {
10397 if (reduction_type == EXTRACT_LAST_REDUCTION)
10398 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10399 ncopies * vec_num, vectype, NULL);
10400 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10401 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10402 {
10403 if (dump_enabled_p ())
10404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10405 "conditional reduction prevents the use"
10406 " of partial vectors.\n");
10407 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10408 }
10409 }
10410
10411 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10412 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10413 cost_vec, kind);
10414 return true;
10415 }
10416
10417 /* Transform. */
10418
10419 /* Handle def. */
10420 scalar_dest = gimple_assign_lhs (stmt);
10421 if (reduction_type != EXTRACT_LAST_REDUCTION)
10422 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10423
10424 bool swap_cond_operands = false;
10425
10426 /* See whether another part of the vectorized code applies a loop
10427 mask to the condition, or to its inverse. */
10428
10429 vec_loop_masks *masks = NULL;
10430 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10431 {
10432 if (reduction_type == EXTRACT_LAST_REDUCTION)
10433 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10434 else
10435 {
10436 scalar_cond_masked_key cond (cond_expr, ncopies);
10437 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10438 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10439 else
10440 {
10441 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10442 tree_code orig_code = cond.code;
10443 cond.code = invert_tree_comparison (cond.code, honor_nans);
10444 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10445 {
10446 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10447 cond_code = cond.code;
10448 swap_cond_operands = true;
10449 }
10450 else
10451 {
10452 /* Try the inverse of the current mask. We check if the
10453 inverse mask is live and if so we generate a negate of
10454 the current mask such that we still honor NaNs. */
10455 cond.inverted_p = true;
10456 cond.code = orig_code;
10457 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10458 {
10459 bitop1 = orig_code;
10460 bitop2 = BIT_NOT_EXPR;
10461 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10462 cond_code = cond.code;
10463 swap_cond_operands = true;
10464 }
10465 }
10466 }
10467 }
10468 }
10469
10470 /* Handle cond expr. */
10471 if (masked)
10472 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10473 cond_expr, &vec_oprnds0, comp_vectype,
10474 then_clause, &vec_oprnds2, vectype,
10475 reduction_type != EXTRACT_LAST_REDUCTION
10476 ? else_clause : NULL, &vec_oprnds3, vectype);
10477 else
10478 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10479 cond_expr0, &vec_oprnds0, comp_vectype,
10480 cond_expr1, &vec_oprnds1, comp_vectype,
10481 then_clause, &vec_oprnds2, vectype,
10482 reduction_type != EXTRACT_LAST_REDUCTION
10483 ? else_clause : NULL, &vec_oprnds3, vectype);
10484
10485 /* Arguments are ready. Create the new vector stmt. */
10486 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10487 {
10488 vec_then_clause = vec_oprnds2[i];
10489 if (reduction_type != EXTRACT_LAST_REDUCTION)
10490 vec_else_clause = vec_oprnds3[i];
10491
10492 if (swap_cond_operands)
10493 std::swap (vec_then_clause, vec_else_clause);
10494
10495 if (masked)
10496 vec_compare = vec_cond_lhs;
10497 else
10498 {
10499 vec_cond_rhs = vec_oprnds1[i];
10500 if (bitop1 == NOP_EXPR)
10501 {
10502 gimple_seq stmts = NULL;
10503 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10504 vec_cond_lhs, vec_cond_rhs);
10505 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10506 }
10507 else
10508 {
10509 new_temp = make_ssa_name (vec_cmp_type);
10510 gassign *new_stmt;
10511 if (bitop1 == BIT_NOT_EXPR)
10512 new_stmt = gimple_build_assign (new_temp, bitop1,
10513 vec_cond_rhs);
10514 else
10515 new_stmt
10516 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10517 vec_cond_rhs);
10518 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10519 if (bitop2 == NOP_EXPR)
10520 vec_compare = new_temp;
10521 else if (bitop2 == BIT_NOT_EXPR)
10522 {
10523 /* Instead of doing ~x ? y : z do x ? z : y. */
10524 vec_compare = new_temp;
10525 std::swap (vec_then_clause, vec_else_clause);
10526 }
10527 else
10528 {
10529 vec_compare = make_ssa_name (vec_cmp_type);
10530 new_stmt
10531 = gimple_build_assign (vec_compare, bitop2,
10532 vec_cond_lhs, new_temp);
10533 vect_finish_stmt_generation (vinfo, stmt_info,
10534 new_stmt, gsi);
10535 }
10536 }
10537 }
10538
10539 /* If we decided to apply a loop mask to the result of the vector
10540 comparison, AND the comparison with the mask now. Later passes
10541 should then be able to reuse the AND results between mulitple
10542 vector statements.
10543
10544 For example:
10545 for (int i = 0; i < 100; ++i)
10546 x[i] = y[i] ? z[i] : 10;
10547
10548 results in following optimized GIMPLE:
10549
10550 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10551 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10552 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10553 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10554 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10555 vect_iftmp.11_47, { 10, ... }>;
10556
10557 instead of using a masked and unmasked forms of
10558 vec != { 0, ... } (masked in the MASK_LOAD,
10559 unmasked in the VEC_COND_EXPR). */
10560
10561 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10562 in cases where that's necessary. */
10563
10564 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10565 {
10566 if (!is_gimple_val (vec_compare))
10567 {
10568 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10569 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10570 vec_compare);
10571 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10572 vec_compare = vec_compare_name;
10573 }
10574
10575 if (must_invert_cmp_result)
10576 {
10577 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10578 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10579 BIT_NOT_EXPR,
10580 vec_compare);
10581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10582 vec_compare = vec_compare_name;
10583 }
10584
10585 if (masks)
10586 {
10587 tree loop_mask
10588 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10589 vectype, i);
10590 tree tmp2 = make_ssa_name (vec_cmp_type);
10591 gassign *g
10592 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10593 loop_mask);
10594 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10595 vec_compare = tmp2;
10596 }
10597 }
10598
10599 gimple *new_stmt;
10600 if (reduction_type == EXTRACT_LAST_REDUCTION)
10601 {
10602 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10603 tree lhs = gimple_get_lhs (old_stmt);
10604 new_stmt = gimple_build_call_internal
10605 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10606 vec_then_clause);
10607 gimple_call_set_lhs (new_stmt, lhs);
10608 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10609 if (old_stmt == gsi_stmt (*gsi))
10610 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10611 else
10612 {
10613 /* In this case we're moving the definition to later in the
10614 block. That doesn't matter because the only uses of the
10615 lhs are in phi statements. */
10616 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10617 gsi_remove (&old_gsi, true);
10618 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10619 }
10620 }
10621 else
10622 {
10623 new_temp = make_ssa_name (vec_dest);
10624 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10625 vec_then_clause, vec_else_clause);
10626 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10627 }
10628 if (slp_node)
10629 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10630 else
10631 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10632 }
10633
10634 if (!slp_node)
10635 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10636
10637 vec_oprnds0.release ();
10638 vec_oprnds1.release ();
10639 vec_oprnds2.release ();
10640 vec_oprnds3.release ();
10641
10642 return true;
10643 }
10644
10645 /* vectorizable_comparison.
10646
10647 Check if STMT_INFO is comparison expression that can be vectorized.
10648 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10649 comparison, put it in VEC_STMT, and insert it at GSI.
10650
10651 Return true if STMT_INFO is vectorizable in this way. */
10652
10653 static bool
vectorizable_comparison(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10654 vectorizable_comparison (vec_info *vinfo,
10655 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10656 gimple **vec_stmt,
10657 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10658 {
10659 tree lhs, rhs1, rhs2;
10660 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10661 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10662 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10663 tree new_temp;
10664 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10665 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10666 int ndts = 2;
10667 poly_uint64 nunits;
10668 int ncopies;
10669 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10670 int i;
10671 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10672 vec<tree> vec_oprnds0 = vNULL;
10673 vec<tree> vec_oprnds1 = vNULL;
10674 tree mask_type;
10675 tree mask;
10676
10677 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10678 return false;
10679
10680 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10681 return false;
10682
10683 mask_type = vectype;
10684 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10685
10686 if (slp_node)
10687 ncopies = 1;
10688 else
10689 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10690
10691 gcc_assert (ncopies >= 1);
10692 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10693 return false;
10694
10695 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10696 if (!stmt)
10697 return false;
10698
10699 code = gimple_assign_rhs_code (stmt);
10700
10701 if (TREE_CODE_CLASS (code) != tcc_comparison)
10702 return false;
10703
10704 slp_tree slp_rhs1, slp_rhs2;
10705 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10706 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10707 return false;
10708
10709 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10710 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10711 return false;
10712
10713 if (vectype1 && vectype2
10714 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10715 TYPE_VECTOR_SUBPARTS (vectype2)))
10716 return false;
10717
10718 vectype = vectype1 ? vectype1 : vectype2;
10719
10720 /* Invariant comparison. */
10721 if (!vectype)
10722 {
10723 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10724 vectype = mask_type;
10725 else
10726 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10727 slp_node);
10728 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10729 return false;
10730 }
10731 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10732 return false;
10733
10734 /* Can't compare mask and non-mask types. */
10735 if (vectype1 && vectype2
10736 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10737 return false;
10738
10739 /* Boolean values may have another representation in vectors
10740 and therefore we prefer bit operations over comparison for
10741 them (which also works for scalar masks). We store opcodes
10742 to use in bitop1 and bitop2. Statement is vectorized as
10743 BITOP2 (rhs1 BITOP1 rhs2) or
10744 rhs1 BITOP2 (BITOP1 rhs2)
10745 depending on bitop1 and bitop2 arity. */
10746 bool swap_p = false;
10747 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10748 {
10749 if (code == GT_EXPR)
10750 {
10751 bitop1 = BIT_NOT_EXPR;
10752 bitop2 = BIT_AND_EXPR;
10753 }
10754 else if (code == GE_EXPR)
10755 {
10756 bitop1 = BIT_NOT_EXPR;
10757 bitop2 = BIT_IOR_EXPR;
10758 }
10759 else if (code == LT_EXPR)
10760 {
10761 bitop1 = BIT_NOT_EXPR;
10762 bitop2 = BIT_AND_EXPR;
10763 swap_p = true;
10764 }
10765 else if (code == LE_EXPR)
10766 {
10767 bitop1 = BIT_NOT_EXPR;
10768 bitop2 = BIT_IOR_EXPR;
10769 swap_p = true;
10770 }
10771 else
10772 {
10773 bitop1 = BIT_XOR_EXPR;
10774 if (code == EQ_EXPR)
10775 bitop2 = BIT_NOT_EXPR;
10776 }
10777 }
10778
10779 if (!vec_stmt)
10780 {
10781 if (bitop1 == NOP_EXPR)
10782 {
10783 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10784 return false;
10785 }
10786 else
10787 {
10788 machine_mode mode = TYPE_MODE (vectype);
10789 optab optab;
10790
10791 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10792 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10793 return false;
10794
10795 if (bitop2 != NOP_EXPR)
10796 {
10797 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10798 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10799 return false;
10800 }
10801 }
10802
10803 /* Put types on constant and invariant SLP children. */
10804 if (slp_node
10805 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10806 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10807 {
10808 if (dump_enabled_p ())
10809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10810 "incompatible vector types for invariants\n");
10811 return false;
10812 }
10813
10814 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10815 vect_model_simple_cost (vinfo, stmt_info,
10816 ncopies * (1 + (bitop2 != NOP_EXPR)),
10817 dts, ndts, slp_node, cost_vec);
10818 return true;
10819 }
10820
10821 /* Transform. */
10822
10823 /* Handle def. */
10824 lhs = gimple_assign_lhs (stmt);
10825 mask = vect_create_destination_var (lhs, mask_type);
10826
10827 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10828 rhs1, &vec_oprnds0, vectype,
10829 rhs2, &vec_oprnds1, vectype);
10830 if (swap_p)
10831 std::swap (vec_oprnds0, vec_oprnds1);
10832
10833 /* Arguments are ready. Create the new vector stmt. */
10834 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10835 {
10836 gimple *new_stmt;
10837 vec_rhs2 = vec_oprnds1[i];
10838
10839 new_temp = make_ssa_name (mask);
10840 if (bitop1 == NOP_EXPR)
10841 {
10842 new_stmt = gimple_build_assign (new_temp, code,
10843 vec_rhs1, vec_rhs2);
10844 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10845 }
10846 else
10847 {
10848 if (bitop1 == BIT_NOT_EXPR)
10849 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10850 else
10851 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10852 vec_rhs2);
10853 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10854 if (bitop2 != NOP_EXPR)
10855 {
10856 tree res = make_ssa_name (mask);
10857 if (bitop2 == BIT_NOT_EXPR)
10858 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10859 else
10860 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10861 new_temp);
10862 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10863 }
10864 }
10865 if (slp_node)
10866 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10867 else
10868 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10869 }
10870
10871 if (!slp_node)
10872 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10873
10874 vec_oprnds0.release ();
10875 vec_oprnds1.release ();
10876
10877 return true;
10878 }
10879
10880 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10881 can handle all live statements in the node. Otherwise return true
10882 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10883 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10884
10885 static bool
can_vectorize_live_stmts(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10886 can_vectorize_live_stmts (vec_info *vinfo,
10887 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10888 slp_tree slp_node, slp_instance slp_node_instance,
10889 bool vec_stmt_p,
10890 stmt_vector_for_cost *cost_vec)
10891 {
10892 if (slp_node)
10893 {
10894 stmt_vec_info slp_stmt_info;
10895 unsigned int i;
10896 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10897 {
10898 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10899 && !vectorizable_live_operation (vinfo,
10900 slp_stmt_info, gsi, slp_node,
10901 slp_node_instance, i,
10902 vec_stmt_p, cost_vec))
10903 return false;
10904 }
10905 }
10906 else if (STMT_VINFO_LIVE_P (stmt_info)
10907 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10908 slp_node, slp_node_instance, -1,
10909 vec_stmt_p, cost_vec))
10910 return false;
10911
10912 return true;
10913 }
10914
10915 /* Make sure the statement is vectorizable. */
10916
10917 opt_result
vect_analyze_stmt(vec_info * vinfo,stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10918 vect_analyze_stmt (vec_info *vinfo,
10919 stmt_vec_info stmt_info, bool *need_to_vectorize,
10920 slp_tree node, slp_instance node_instance,
10921 stmt_vector_for_cost *cost_vec)
10922 {
10923 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10924 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10925 bool ok;
10926 gimple_seq pattern_def_seq;
10927
10928 if (dump_enabled_p ())
10929 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10930 stmt_info->stmt);
10931
10932 if (gimple_has_volatile_ops (stmt_info->stmt))
10933 return opt_result::failure_at (stmt_info->stmt,
10934 "not vectorized:"
10935 " stmt has volatile operands: %G\n",
10936 stmt_info->stmt);
10937
10938 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10939 && node == NULL
10940 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10941 {
10942 gimple_stmt_iterator si;
10943
10944 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10945 {
10946 stmt_vec_info pattern_def_stmt_info
10947 = vinfo->lookup_stmt (gsi_stmt (si));
10948 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10949 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10950 {
10951 /* Analyze def stmt of STMT if it's a pattern stmt. */
10952 if (dump_enabled_p ())
10953 dump_printf_loc (MSG_NOTE, vect_location,
10954 "==> examining pattern def statement: %G",
10955 pattern_def_stmt_info->stmt);
10956
10957 opt_result res
10958 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10959 need_to_vectorize, node, node_instance,
10960 cost_vec);
10961 if (!res)
10962 return res;
10963 }
10964 }
10965 }
10966
10967 /* Skip stmts that do not need to be vectorized. In loops this is expected
10968 to include:
10969 - the COND_EXPR which is the loop exit condition
10970 - any LABEL_EXPRs in the loop
10971 - computations that are used only for array indexing or loop control.
10972 In basic blocks we only analyze statements that are a part of some SLP
10973 instance, therefore, all the statements are relevant.
10974
10975 Pattern statement needs to be analyzed instead of the original statement
10976 if the original statement is not relevant. Otherwise, we analyze both
10977 statements. In basic blocks we are called from some SLP instance
10978 traversal, don't analyze pattern stmts instead, the pattern stmts
10979 already will be part of SLP instance. */
10980
10981 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10982 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10983 && !STMT_VINFO_LIVE_P (stmt_info))
10984 {
10985 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10986 && pattern_stmt_info
10987 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10988 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10989 {
10990 /* Analyze PATTERN_STMT instead of the original stmt. */
10991 stmt_info = pattern_stmt_info;
10992 if (dump_enabled_p ())
10993 dump_printf_loc (MSG_NOTE, vect_location,
10994 "==> examining pattern statement: %G",
10995 stmt_info->stmt);
10996 }
10997 else
10998 {
10999 if (dump_enabled_p ())
11000 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11001
11002 return opt_result::success ();
11003 }
11004 }
11005 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11006 && node == NULL
11007 && pattern_stmt_info
11008 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11009 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11010 {
11011 /* Analyze PATTERN_STMT too. */
11012 if (dump_enabled_p ())
11013 dump_printf_loc (MSG_NOTE, vect_location,
11014 "==> examining pattern statement: %G",
11015 pattern_stmt_info->stmt);
11016
11017 opt_result res
11018 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11019 node_instance, cost_vec);
11020 if (!res)
11021 return res;
11022 }
11023
11024 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11025 {
11026 case vect_internal_def:
11027 break;
11028
11029 case vect_reduction_def:
11030 case vect_nested_cycle:
11031 gcc_assert (!bb_vinfo
11032 && (relevance == vect_used_in_outer
11033 || relevance == vect_used_in_outer_by_reduction
11034 || relevance == vect_used_by_reduction
11035 || relevance == vect_unused_in_scope
11036 || relevance == vect_used_only_live));
11037 break;
11038
11039 case vect_induction_def:
11040 gcc_assert (!bb_vinfo);
11041 break;
11042
11043 case vect_constant_def:
11044 case vect_external_def:
11045 case vect_unknown_def_type:
11046 default:
11047 gcc_unreachable ();
11048 }
11049
11050 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11051 if (node)
11052 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11053
11054 if (STMT_VINFO_RELEVANT_P (stmt_info))
11055 {
11056 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11057 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11058 || (call && gimple_call_lhs (call) == NULL_TREE));
11059 *need_to_vectorize = true;
11060 }
11061
11062 if (PURE_SLP_STMT (stmt_info) && !node)
11063 {
11064 if (dump_enabled_p ())
11065 dump_printf_loc (MSG_NOTE, vect_location,
11066 "handled only by SLP analysis\n");
11067 return opt_result::success ();
11068 }
11069
11070 ok = true;
11071 if (!bb_vinfo
11072 && (STMT_VINFO_RELEVANT_P (stmt_info)
11073 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11074 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11075 -mveclibabi= takes preference over library functions with
11076 the simd attribute. */
11077 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11078 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11079 cost_vec)
11080 || vectorizable_conversion (vinfo, stmt_info,
11081 NULL, NULL, node, cost_vec)
11082 || vectorizable_operation (vinfo, stmt_info,
11083 NULL, NULL, node, cost_vec)
11084 || vectorizable_assignment (vinfo, stmt_info,
11085 NULL, NULL, node, cost_vec)
11086 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11087 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11088 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11089 node, node_instance, cost_vec)
11090 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11091 NULL, node, cost_vec)
11092 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11093 || vectorizable_condition (vinfo, stmt_info,
11094 NULL, NULL, node, cost_vec)
11095 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11096 cost_vec)
11097 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11098 stmt_info, NULL, node));
11099 else
11100 {
11101 if (bb_vinfo)
11102 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11103 || vectorizable_simd_clone_call (vinfo, stmt_info,
11104 NULL, NULL, node, cost_vec)
11105 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11106 cost_vec)
11107 || vectorizable_shift (vinfo, stmt_info,
11108 NULL, NULL, node, cost_vec)
11109 || vectorizable_operation (vinfo, stmt_info,
11110 NULL, NULL, node, cost_vec)
11111 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11112 cost_vec)
11113 || vectorizable_load (vinfo, stmt_info,
11114 NULL, NULL, node, cost_vec)
11115 || vectorizable_store (vinfo, stmt_info,
11116 NULL, NULL, node, cost_vec)
11117 || vectorizable_condition (vinfo, stmt_info,
11118 NULL, NULL, node, cost_vec)
11119 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11120 cost_vec)
11121 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11122 }
11123
11124 if (node)
11125 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11126
11127 if (!ok)
11128 return opt_result::failure_at (stmt_info->stmt,
11129 "not vectorized:"
11130 " relevant stmt not supported: %G",
11131 stmt_info->stmt);
11132
11133 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11134 need extra handling, except for vectorizable reductions. */
11135 if (!bb_vinfo
11136 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11137 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11138 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11139 stmt_info, NULL, node, node_instance,
11140 false, cost_vec))
11141 return opt_result::failure_at (stmt_info->stmt,
11142 "not vectorized:"
11143 " live stmt not supported: %G",
11144 stmt_info->stmt);
11145
11146 return opt_result::success ();
11147 }
11148
11149
11150 /* Function vect_transform_stmt.
11151
11152 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11153
11154 bool
vect_transform_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11155 vect_transform_stmt (vec_info *vinfo,
11156 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11157 slp_tree slp_node, slp_instance slp_node_instance)
11158 {
11159 bool is_store = false;
11160 gimple *vec_stmt = NULL;
11161 bool done;
11162
11163 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11164
11165 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11166 if (slp_node)
11167 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11168
11169 switch (STMT_VINFO_TYPE (stmt_info))
11170 {
11171 case type_demotion_vec_info_type:
11172 case type_promotion_vec_info_type:
11173 case type_conversion_vec_info_type:
11174 done = vectorizable_conversion (vinfo, stmt_info,
11175 gsi, &vec_stmt, slp_node, NULL);
11176 gcc_assert (done);
11177 break;
11178
11179 case induc_vec_info_type:
11180 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11181 stmt_info, &vec_stmt, slp_node,
11182 NULL);
11183 gcc_assert (done);
11184 break;
11185
11186 case shift_vec_info_type:
11187 done = vectorizable_shift (vinfo, stmt_info,
11188 gsi, &vec_stmt, slp_node, NULL);
11189 gcc_assert (done);
11190 break;
11191
11192 case op_vec_info_type:
11193 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11194 NULL);
11195 gcc_assert (done);
11196 break;
11197
11198 case assignment_vec_info_type:
11199 done = vectorizable_assignment (vinfo, stmt_info,
11200 gsi, &vec_stmt, slp_node, NULL);
11201 gcc_assert (done);
11202 break;
11203
11204 case load_vec_info_type:
11205 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11206 NULL);
11207 gcc_assert (done);
11208 break;
11209
11210 case store_vec_info_type:
11211 done = vectorizable_store (vinfo, stmt_info,
11212 gsi, &vec_stmt, slp_node, NULL);
11213 gcc_assert (done);
11214 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11215 {
11216 /* In case of interleaving, the whole chain is vectorized when the
11217 last store in the chain is reached. Store stmts before the last
11218 one are skipped, and there vec_stmt_info shouldn't be freed
11219 meanwhile. */
11220 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11221 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11222 is_store = true;
11223 }
11224 else
11225 is_store = true;
11226 break;
11227
11228 case condition_vec_info_type:
11229 done = vectorizable_condition (vinfo, stmt_info,
11230 gsi, &vec_stmt, slp_node, NULL);
11231 gcc_assert (done);
11232 break;
11233
11234 case comparison_vec_info_type:
11235 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11236 slp_node, NULL);
11237 gcc_assert (done);
11238 break;
11239
11240 case call_vec_info_type:
11241 done = vectorizable_call (vinfo, stmt_info,
11242 gsi, &vec_stmt, slp_node, NULL);
11243 break;
11244
11245 case call_simd_clone_vec_info_type:
11246 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11247 slp_node, NULL);
11248 break;
11249
11250 case reduc_vec_info_type:
11251 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11252 gsi, &vec_stmt, slp_node);
11253 gcc_assert (done);
11254 break;
11255
11256 case cycle_phi_info_type:
11257 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11258 &vec_stmt, slp_node, slp_node_instance);
11259 gcc_assert (done);
11260 break;
11261
11262 case lc_phi_info_type:
11263 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11264 stmt_info, &vec_stmt, slp_node);
11265 gcc_assert (done);
11266 break;
11267
11268 case phi_info_type:
11269 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11270 gcc_assert (done);
11271 break;
11272
11273 default:
11274 if (!STMT_VINFO_LIVE_P (stmt_info))
11275 {
11276 if (dump_enabled_p ())
11277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11278 "stmt not supported.\n");
11279 gcc_unreachable ();
11280 }
11281 done = true;
11282 }
11283
11284 if (!slp_node && vec_stmt)
11285 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11286
11287 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11288 {
11289 /* Handle stmts whose DEF is used outside the loop-nest that is
11290 being vectorized. */
11291 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11292 slp_node_instance, true, NULL);
11293 gcc_assert (done);
11294 }
11295
11296 if (slp_node)
11297 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11298
11299 return is_store;
11300 }
11301
11302
11303 /* Remove a group of stores (for SLP or interleaving), free their
11304 stmt_vec_info. */
11305
11306 void
vect_remove_stores(vec_info * vinfo,stmt_vec_info first_stmt_info)11307 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11308 {
11309 stmt_vec_info next_stmt_info = first_stmt_info;
11310
11311 while (next_stmt_info)
11312 {
11313 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11314 next_stmt_info = vect_orig_stmt (next_stmt_info);
11315 /* Free the attached stmt_vec_info and remove the stmt. */
11316 vinfo->remove_stmt (next_stmt_info);
11317 next_stmt_info = tmp;
11318 }
11319 }
11320
11321 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11322 elements of type SCALAR_TYPE, or null if the target doesn't support
11323 such a type.
11324
11325 If NUNITS is zero, return a vector type that contains elements of
11326 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11327
11328 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11329 for this vectorization region and want to "autodetect" the best choice.
11330 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11331 and we want the new type to be interoperable with it. PREVAILING_MODE
11332 in this case can be a scalar integer mode or a vector mode; when it
11333 is a vector mode, the function acts like a tree-level version of
11334 related_vector_mode. */
11335
11336 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11337 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11338 tree scalar_type, poly_uint64 nunits)
11339 {
11340 tree orig_scalar_type = scalar_type;
11341 scalar_mode inner_mode;
11342 machine_mode simd_mode;
11343 tree vectype;
11344
11345 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11346 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11347 return NULL_TREE;
11348
11349 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11350
11351 /* For vector types of elements whose mode precision doesn't
11352 match their types precision we use a element type of mode
11353 precision. The vectorization routines will have to make sure
11354 they support the proper result truncation/extension.
11355 We also make sure to build vector types with INTEGER_TYPE
11356 component type only. */
11357 if (INTEGRAL_TYPE_P (scalar_type)
11358 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11359 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11360 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11361 TYPE_UNSIGNED (scalar_type));
11362
11363 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11364 When the component mode passes the above test simply use a type
11365 corresponding to that mode. The theory is that any use that
11366 would cause problems with this will disable vectorization anyway. */
11367 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11368 && !INTEGRAL_TYPE_P (scalar_type))
11369 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11370
11371 /* We can't build a vector type of elements with alignment bigger than
11372 their size. */
11373 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11374 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11375 TYPE_UNSIGNED (scalar_type));
11376
11377 /* If we felt back to using the mode fail if there was
11378 no scalar type for it. */
11379 if (scalar_type == NULL_TREE)
11380 return NULL_TREE;
11381
11382 /* If no prevailing mode was supplied, use the mode the target prefers.
11383 Otherwise lookup a vector mode based on the prevailing mode. */
11384 if (prevailing_mode == VOIDmode)
11385 {
11386 gcc_assert (known_eq (nunits, 0U));
11387 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11388 if (SCALAR_INT_MODE_P (simd_mode))
11389 {
11390 /* Traditional behavior is not to take the integer mode
11391 literally, but simply to use it as a way of determining
11392 the vector size. It is up to mode_for_vector to decide
11393 what the TYPE_MODE should be.
11394
11395 Note that nunits == 1 is allowed in order to support single
11396 element vector types. */
11397 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11398 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11399 return NULL_TREE;
11400 }
11401 }
11402 else if (SCALAR_INT_MODE_P (prevailing_mode)
11403 || !related_vector_mode (prevailing_mode,
11404 inner_mode, nunits).exists (&simd_mode))
11405 {
11406 /* Fall back to using mode_for_vector, mostly in the hope of being
11407 able to use an integer mode. */
11408 if (known_eq (nunits, 0U)
11409 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11410 return NULL_TREE;
11411
11412 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11413 return NULL_TREE;
11414 }
11415
11416 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11417
11418 /* In cases where the mode was chosen by mode_for_vector, check that
11419 the target actually supports the chosen mode, or that it at least
11420 allows the vector mode to be replaced by a like-sized integer. */
11421 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11422 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11423 return NULL_TREE;
11424
11425 /* Re-attach the address-space qualifier if we canonicalized the scalar
11426 type. */
11427 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11428 return build_qualified_type
11429 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11430
11431 return vectype;
11432 }
11433
11434 /* Function get_vectype_for_scalar_type.
11435
11436 Returns the vector type corresponding to SCALAR_TYPE as supported
11437 by the target. If GROUP_SIZE is nonzero and we're performing BB
11438 vectorization, make sure that the number of elements in the vector
11439 is no bigger than GROUP_SIZE. */
11440
11441 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11442 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11443 unsigned int group_size)
11444 {
11445 /* For BB vectorization, we should always have a group size once we've
11446 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11447 are tentative requests during things like early data reference
11448 analysis and pattern recognition. */
11449 if (is_a <bb_vec_info> (vinfo))
11450 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11451 else
11452 group_size = 0;
11453
11454 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11455 scalar_type);
11456 if (vectype && vinfo->vector_mode == VOIDmode)
11457 vinfo->vector_mode = TYPE_MODE (vectype);
11458
11459 /* Register the natural choice of vector type, before the group size
11460 has been applied. */
11461 if (vectype)
11462 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11463
11464 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11465 try again with an explicit number of elements. */
11466 if (vectype
11467 && group_size
11468 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11469 {
11470 /* Start with the biggest number of units that fits within
11471 GROUP_SIZE and halve it until we find a valid vector type.
11472 Usually either the first attempt will succeed or all will
11473 fail (in the latter case because GROUP_SIZE is too small
11474 for the target), but it's possible that a target could have
11475 a hole between supported vector types.
11476
11477 If GROUP_SIZE is not a power of 2, this has the effect of
11478 trying the largest power of 2 that fits within the group,
11479 even though the group is not a multiple of that vector size.
11480 The BB vectorizer will then try to carve up the group into
11481 smaller pieces. */
11482 unsigned int nunits = 1 << floor_log2 (group_size);
11483 do
11484 {
11485 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11486 scalar_type, nunits);
11487 nunits /= 2;
11488 }
11489 while (nunits > 1 && !vectype);
11490 }
11491
11492 return vectype;
11493 }
11494
11495 /* Return the vector type corresponding to SCALAR_TYPE as supported
11496 by the target. NODE, if nonnull, is the SLP tree node that will
11497 use the returned vector type. */
11498
11499 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11500 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11501 {
11502 unsigned int group_size = 0;
11503 if (node)
11504 group_size = SLP_TREE_LANES (node);
11505 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11506 }
11507
11508 /* Function get_mask_type_for_scalar_type.
11509
11510 Returns the mask type corresponding to a result of comparison
11511 of vectors of specified SCALAR_TYPE as supported by target.
11512 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11513 make sure that the number of elements in the vector is no bigger
11514 than GROUP_SIZE. */
11515
11516 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11517 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11518 unsigned int group_size)
11519 {
11520 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11521
11522 if (!vectype)
11523 return NULL;
11524
11525 return truth_type_for (vectype);
11526 }
11527
11528 /* Function get_same_sized_vectype
11529
11530 Returns a vector type corresponding to SCALAR_TYPE of size
11531 VECTOR_TYPE if supported by the target. */
11532
11533 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11534 get_same_sized_vectype (tree scalar_type, tree vector_type)
11535 {
11536 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11537 return truth_type_for (vector_type);
11538
11539 poly_uint64 nunits;
11540 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11541 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11542 return NULL_TREE;
11543
11544 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11545 scalar_type, nunits);
11546 }
11547
11548 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11549 would not change the chosen vector modes. */
11550
11551 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11552 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11553 {
11554 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11555 i != vinfo->used_vector_modes.end (); ++i)
11556 if (!VECTOR_MODE_P (*i)
11557 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11558 return false;
11559 return true;
11560 }
11561
11562 /* Function vect_is_simple_use.
11563
11564 Input:
11565 VINFO - the vect info of the loop or basic block that is being vectorized.
11566 OPERAND - operand in the loop or bb.
11567 Output:
11568 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11569 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11570 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11571 the definition could be anywhere in the function
11572 DT - the type of definition
11573
11574 Returns whether a stmt with OPERAND can be vectorized.
11575 For loops, supportable operands are constants, loop invariants, and operands
11576 that are defined by the current iteration of the loop. Unsupportable
11577 operands are those that are defined by a previous iteration of the loop (as
11578 is the case in reduction/induction computations).
11579 For basic blocks, supportable operands are constants and bb invariants.
11580 For now, operands defined outside the basic block are not supported. */
11581
11582 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11583 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11584 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11585 {
11586 if (def_stmt_info_out)
11587 *def_stmt_info_out = NULL;
11588 if (def_stmt_out)
11589 *def_stmt_out = NULL;
11590 *dt = vect_unknown_def_type;
11591
11592 if (dump_enabled_p ())
11593 {
11594 dump_printf_loc (MSG_NOTE, vect_location,
11595 "vect_is_simple_use: operand ");
11596 if (TREE_CODE (operand) == SSA_NAME
11597 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11598 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11599 else
11600 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11601 }
11602
11603 if (CONSTANT_CLASS_P (operand))
11604 *dt = vect_constant_def;
11605 else if (is_gimple_min_invariant (operand))
11606 *dt = vect_external_def;
11607 else if (TREE_CODE (operand) != SSA_NAME)
11608 *dt = vect_unknown_def_type;
11609 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11610 *dt = vect_external_def;
11611 else
11612 {
11613 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11614 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11615 if (!stmt_vinfo)
11616 *dt = vect_external_def;
11617 else
11618 {
11619 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11620 def_stmt = stmt_vinfo->stmt;
11621 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11622 if (def_stmt_info_out)
11623 *def_stmt_info_out = stmt_vinfo;
11624 }
11625 if (def_stmt_out)
11626 *def_stmt_out = def_stmt;
11627 }
11628
11629 if (dump_enabled_p ())
11630 {
11631 dump_printf (MSG_NOTE, ", type of def: ");
11632 switch (*dt)
11633 {
11634 case vect_uninitialized_def:
11635 dump_printf (MSG_NOTE, "uninitialized\n");
11636 break;
11637 case vect_constant_def:
11638 dump_printf (MSG_NOTE, "constant\n");
11639 break;
11640 case vect_external_def:
11641 dump_printf (MSG_NOTE, "external\n");
11642 break;
11643 case vect_internal_def:
11644 dump_printf (MSG_NOTE, "internal\n");
11645 break;
11646 case vect_induction_def:
11647 dump_printf (MSG_NOTE, "induction\n");
11648 break;
11649 case vect_reduction_def:
11650 dump_printf (MSG_NOTE, "reduction\n");
11651 break;
11652 case vect_double_reduction_def:
11653 dump_printf (MSG_NOTE, "double reduction\n");
11654 break;
11655 case vect_nested_cycle:
11656 dump_printf (MSG_NOTE, "nested cycle\n");
11657 break;
11658 case vect_unknown_def_type:
11659 dump_printf (MSG_NOTE, "unknown\n");
11660 break;
11661 }
11662 }
11663
11664 if (*dt == vect_unknown_def_type)
11665 {
11666 if (dump_enabled_p ())
11667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11668 "Unsupported pattern.\n");
11669 return false;
11670 }
11671
11672 return true;
11673 }
11674
11675 /* Function vect_is_simple_use.
11676
11677 Same as vect_is_simple_use but also determines the vector operand
11678 type of OPERAND and stores it to *VECTYPE. If the definition of
11679 OPERAND is vect_uninitialized_def, vect_constant_def or
11680 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11681 is responsible to compute the best suited vector type for the
11682 scalar operand. */
11683
11684 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11685 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11686 tree *vectype, stmt_vec_info *def_stmt_info_out,
11687 gimple **def_stmt_out)
11688 {
11689 stmt_vec_info def_stmt_info;
11690 gimple *def_stmt;
11691 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11692 return false;
11693
11694 if (def_stmt_out)
11695 *def_stmt_out = def_stmt;
11696 if (def_stmt_info_out)
11697 *def_stmt_info_out = def_stmt_info;
11698
11699 /* Now get a vector type if the def is internal, otherwise supply
11700 NULL_TREE and leave it up to the caller to figure out a proper
11701 type for the use stmt. */
11702 if (*dt == vect_internal_def
11703 || *dt == vect_induction_def
11704 || *dt == vect_reduction_def
11705 || *dt == vect_double_reduction_def
11706 || *dt == vect_nested_cycle)
11707 {
11708 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11709 gcc_assert (*vectype != NULL_TREE);
11710 if (dump_enabled_p ())
11711 dump_printf_loc (MSG_NOTE, vect_location,
11712 "vect_is_simple_use: vectype %T\n", *vectype);
11713 }
11714 else if (*dt == vect_uninitialized_def
11715 || *dt == vect_constant_def
11716 || *dt == vect_external_def)
11717 *vectype = NULL_TREE;
11718 else
11719 gcc_unreachable ();
11720
11721 return true;
11722 }
11723
11724 /* Function vect_is_simple_use.
11725
11726 Same as vect_is_simple_use but determines the operand by operand
11727 position OPERAND from either STMT or SLP_NODE, filling in *OP
11728 and *SLP_DEF (when SLP_NODE is not NULL). */
11729
11730 bool
vect_is_simple_use(vec_info * vinfo,stmt_vec_info stmt,slp_tree slp_node,unsigned operand,tree * op,slp_tree * slp_def,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out)11731 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11732 unsigned operand, tree *op, slp_tree *slp_def,
11733 enum vect_def_type *dt,
11734 tree *vectype, stmt_vec_info *def_stmt_info_out)
11735 {
11736 if (slp_node)
11737 {
11738 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11739 *slp_def = child;
11740 *vectype = SLP_TREE_VECTYPE (child);
11741 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11742 {
11743 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11744 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11745 }
11746 else
11747 {
11748 if (def_stmt_info_out)
11749 *def_stmt_info_out = NULL;
11750 *op = SLP_TREE_SCALAR_OPS (child)[0];
11751 *dt = SLP_TREE_DEF_TYPE (child);
11752 return true;
11753 }
11754 }
11755 else
11756 {
11757 *slp_def = NULL;
11758 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11759 {
11760 if (gimple_assign_rhs_code (ass) == COND_EXPR
11761 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11762 {
11763 if (operand < 2)
11764 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11765 else
11766 *op = gimple_op (ass, operand);
11767 }
11768 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11769 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11770 else
11771 *op = gimple_op (ass, operand + 1);
11772 }
11773 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11774 *op = gimple_call_arg (call, operand);
11775 else
11776 gcc_unreachable ();
11777 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11778 }
11779 }
11780
11781 /* If OP is not NULL and is external or constant update its vector
11782 type with VECTYPE. Returns true if successful or false if not,
11783 for example when conflicting vector types are present. */
11784
11785 bool
vect_maybe_update_slp_op_vectype(slp_tree op,tree vectype)11786 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11787 {
11788 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11789 return true;
11790 if (SLP_TREE_VECTYPE (op))
11791 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11792 SLP_TREE_VECTYPE (op) = vectype;
11793 return true;
11794 }
11795
11796 /* Function supportable_widening_operation
11797
11798 Check whether an operation represented by the code CODE is a
11799 widening operation that is supported by the target platform in
11800 vector form (i.e., when operating on arguments of type VECTYPE_IN
11801 producing a result of type VECTYPE_OUT).
11802
11803 Widening operations we currently support are NOP (CONVERT), FLOAT,
11804 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11805 are supported by the target platform either directly (via vector
11806 tree-codes), or via target builtins.
11807
11808 Output:
11809 - CODE1 and CODE2 are codes of vector operations to be used when
11810 vectorizing the operation, if available.
11811 - MULTI_STEP_CVT determines the number of required intermediate steps in
11812 case of multi-step conversion (like char->short->int - in that case
11813 MULTI_STEP_CVT will be 1).
11814 - INTERM_TYPES contains the intermediate type required to perform the
11815 widening operation (short in the above example). */
11816
11817 bool
supportable_widening_operation(vec_info * vinfo,enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11818 supportable_widening_operation (vec_info *vinfo,
11819 enum tree_code code, stmt_vec_info stmt_info,
11820 tree vectype_out, tree vectype_in,
11821 enum tree_code *code1, enum tree_code *code2,
11822 int *multi_step_cvt,
11823 vec<tree> *interm_types)
11824 {
11825 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11826 class loop *vect_loop = NULL;
11827 machine_mode vec_mode;
11828 enum insn_code icode1, icode2;
11829 optab optab1, optab2;
11830 tree vectype = vectype_in;
11831 tree wide_vectype = vectype_out;
11832 enum tree_code c1, c2;
11833 int i;
11834 tree prev_type, intermediate_type;
11835 machine_mode intermediate_mode, prev_mode;
11836 optab optab3, optab4;
11837
11838 *multi_step_cvt = 0;
11839 if (loop_info)
11840 vect_loop = LOOP_VINFO_LOOP (loop_info);
11841
11842 switch (code)
11843 {
11844 case WIDEN_MULT_EXPR:
11845 /* The result of a vectorized widening operation usually requires
11846 two vectors (because the widened results do not fit into one vector).
11847 The generated vector results would normally be expected to be
11848 generated in the same order as in the original scalar computation,
11849 i.e. if 8 results are generated in each vector iteration, they are
11850 to be organized as follows:
11851 vect1: [res1,res2,res3,res4],
11852 vect2: [res5,res6,res7,res8].
11853
11854 However, in the special case that the result of the widening
11855 operation is used in a reduction computation only, the order doesn't
11856 matter (because when vectorizing a reduction we change the order of
11857 the computation). Some targets can take advantage of this and
11858 generate more efficient code. For example, targets like Altivec,
11859 that support widen_mult using a sequence of {mult_even,mult_odd}
11860 generate the following vectors:
11861 vect1: [res1,res3,res5,res7],
11862 vect2: [res2,res4,res6,res8].
11863
11864 When vectorizing outer-loops, we execute the inner-loop sequentially
11865 (each vectorized inner-loop iteration contributes to VF outer-loop
11866 iterations in parallel). We therefore don't allow to change the
11867 order of the computation in the inner-loop during outer-loop
11868 vectorization. */
11869 /* TODO: Another case in which order doesn't *really* matter is when we
11870 widen and then contract again, e.g. (short)((int)x * y >> 8).
11871 Normally, pack_trunc performs an even/odd permute, whereas the
11872 repack from an even/odd expansion would be an interleave, which
11873 would be significantly simpler for e.g. AVX2. */
11874 /* In any case, in order to avoid duplicating the code below, recurse
11875 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11876 are properly set up for the caller. If we fail, we'll continue with
11877 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11878 if (vect_loop
11879 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11880 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11881 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11882 stmt_info, vectype_out,
11883 vectype_in, code1, code2,
11884 multi_step_cvt, interm_types))
11885 {
11886 /* Elements in a vector with vect_used_by_reduction property cannot
11887 be reordered if the use chain with this property does not have the
11888 same operation. One such an example is s += a * b, where elements
11889 in a and b cannot be reordered. Here we check if the vector defined
11890 by STMT is only directly used in the reduction statement. */
11891 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11892 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11893 if (use_stmt_info
11894 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11895 return true;
11896 }
11897 c1 = VEC_WIDEN_MULT_LO_EXPR;
11898 c2 = VEC_WIDEN_MULT_HI_EXPR;
11899 break;
11900
11901 case DOT_PROD_EXPR:
11902 c1 = DOT_PROD_EXPR;
11903 c2 = DOT_PROD_EXPR;
11904 break;
11905
11906 case SAD_EXPR:
11907 c1 = SAD_EXPR;
11908 c2 = SAD_EXPR;
11909 break;
11910
11911 case VEC_WIDEN_MULT_EVEN_EXPR:
11912 /* Support the recursion induced just above. */
11913 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11914 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11915 break;
11916
11917 case WIDEN_LSHIFT_EXPR:
11918 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11919 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11920 break;
11921
11922 case WIDEN_PLUS_EXPR:
11923 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11924 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11925 break;
11926
11927 case WIDEN_MINUS_EXPR:
11928 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11929 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11930 break;
11931
11932 CASE_CONVERT:
11933 c1 = VEC_UNPACK_LO_EXPR;
11934 c2 = VEC_UNPACK_HI_EXPR;
11935 break;
11936
11937 case FLOAT_EXPR:
11938 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11939 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11940 break;
11941
11942 case FIX_TRUNC_EXPR:
11943 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11944 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11945 break;
11946
11947 default:
11948 gcc_unreachable ();
11949 }
11950
11951 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11952 std::swap (c1, c2);
11953
11954 if (code == FIX_TRUNC_EXPR)
11955 {
11956 /* The signedness is determined from output operand. */
11957 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11958 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11959 }
11960 else if (CONVERT_EXPR_CODE_P (code)
11961 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11962 && VECTOR_BOOLEAN_TYPE_P (vectype)
11963 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11964 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11965 {
11966 /* If the input and result modes are the same, a different optab
11967 is needed where we pass in the number of units in vectype. */
11968 optab1 = vec_unpacks_sbool_lo_optab;
11969 optab2 = vec_unpacks_sbool_hi_optab;
11970 }
11971 else
11972 {
11973 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11974 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11975 }
11976
11977 if (!optab1 || !optab2)
11978 return false;
11979
11980 vec_mode = TYPE_MODE (vectype);
11981 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11982 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11983 return false;
11984
11985 *code1 = c1;
11986 *code2 = c2;
11987
11988 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11989 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11990 {
11991 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11992 return true;
11993 /* For scalar masks we may have different boolean
11994 vector types having the same QImode. Thus we
11995 add additional check for elements number. */
11996 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11997 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11998 return true;
11999 }
12000
12001 /* Check if it's a multi-step conversion that can be done using intermediate
12002 types. */
12003
12004 prev_type = vectype;
12005 prev_mode = vec_mode;
12006
12007 if (!CONVERT_EXPR_CODE_P (code))
12008 return false;
12009
12010 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12011 intermediate steps in promotion sequence. We try
12012 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12013 not. */
12014 interm_types->create (MAX_INTERM_CVT_STEPS);
12015 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12016 {
12017 intermediate_mode = insn_data[icode1].operand[0].mode;
12018 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12019 intermediate_type
12020 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12021 else
12022 intermediate_type
12023 = lang_hooks.types.type_for_mode (intermediate_mode,
12024 TYPE_UNSIGNED (prev_type));
12025
12026 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12027 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12028 && intermediate_mode == prev_mode
12029 && SCALAR_INT_MODE_P (prev_mode))
12030 {
12031 /* If the input and result modes are the same, a different optab
12032 is needed where we pass in the number of units in vectype. */
12033 optab3 = vec_unpacks_sbool_lo_optab;
12034 optab4 = vec_unpacks_sbool_hi_optab;
12035 }
12036 else
12037 {
12038 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12039 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12040 }
12041
12042 if (!optab3 || !optab4
12043 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12044 || insn_data[icode1].operand[0].mode != intermediate_mode
12045 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12046 || insn_data[icode2].operand[0].mode != intermediate_mode
12047 || ((icode1 = optab_handler (optab3, intermediate_mode))
12048 == CODE_FOR_nothing)
12049 || ((icode2 = optab_handler (optab4, intermediate_mode))
12050 == CODE_FOR_nothing))
12051 break;
12052
12053 interm_types->quick_push (intermediate_type);
12054 (*multi_step_cvt)++;
12055
12056 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12057 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12058 {
12059 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12060 return true;
12061 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12062 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12063 return true;
12064 }
12065
12066 prev_type = intermediate_type;
12067 prev_mode = intermediate_mode;
12068 }
12069
12070 interm_types->release ();
12071 return false;
12072 }
12073
12074
12075 /* Function supportable_narrowing_operation
12076
12077 Check whether an operation represented by the code CODE is a
12078 narrowing operation that is supported by the target platform in
12079 vector form (i.e., when operating on arguments of type VECTYPE_IN
12080 and producing a result of type VECTYPE_OUT).
12081
12082 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12083 and FLOAT. This function checks if these operations are supported by
12084 the target platform directly via vector tree-codes.
12085
12086 Output:
12087 - CODE1 is the code of a vector operation to be used when
12088 vectorizing the operation, if available.
12089 - MULTI_STEP_CVT determines the number of required intermediate steps in
12090 case of multi-step conversion (like int->short->char - in that case
12091 MULTI_STEP_CVT will be 1).
12092 - INTERM_TYPES contains the intermediate type required to perform the
12093 narrowing operation (short in the above example). */
12094
12095 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)12096 supportable_narrowing_operation (enum tree_code code,
12097 tree vectype_out, tree vectype_in,
12098 enum tree_code *code1, int *multi_step_cvt,
12099 vec<tree> *interm_types)
12100 {
12101 machine_mode vec_mode;
12102 enum insn_code icode1;
12103 optab optab1, interm_optab;
12104 tree vectype = vectype_in;
12105 tree narrow_vectype = vectype_out;
12106 enum tree_code c1;
12107 tree intermediate_type, prev_type;
12108 machine_mode intermediate_mode, prev_mode;
12109 int i;
12110 bool uns;
12111
12112 *multi_step_cvt = 0;
12113 switch (code)
12114 {
12115 CASE_CONVERT:
12116 c1 = VEC_PACK_TRUNC_EXPR;
12117 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12118 && VECTOR_BOOLEAN_TYPE_P (vectype)
12119 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12120 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12121 optab1 = vec_pack_sbool_trunc_optab;
12122 else
12123 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12124 break;
12125
12126 case FIX_TRUNC_EXPR:
12127 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12128 /* The signedness is determined from output operand. */
12129 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12130 break;
12131
12132 case FLOAT_EXPR:
12133 c1 = VEC_PACK_FLOAT_EXPR;
12134 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12135 break;
12136
12137 default:
12138 gcc_unreachable ();
12139 }
12140
12141 if (!optab1)
12142 return false;
12143
12144 vec_mode = TYPE_MODE (vectype);
12145 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12146 return false;
12147
12148 *code1 = c1;
12149
12150 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12151 {
12152 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12153 return true;
12154 /* For scalar masks we may have different boolean
12155 vector types having the same QImode. Thus we
12156 add additional check for elements number. */
12157 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12158 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12159 return true;
12160 }
12161
12162 if (code == FLOAT_EXPR)
12163 return false;
12164
12165 /* Check if it's a multi-step conversion that can be done using intermediate
12166 types. */
12167 prev_mode = vec_mode;
12168 prev_type = vectype;
12169 if (code == FIX_TRUNC_EXPR)
12170 uns = TYPE_UNSIGNED (vectype_out);
12171 else
12172 uns = TYPE_UNSIGNED (vectype);
12173
12174 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12175 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12176 costly than signed. */
12177 if (code == FIX_TRUNC_EXPR && uns)
12178 {
12179 enum insn_code icode2;
12180
12181 intermediate_type
12182 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12183 interm_optab
12184 = optab_for_tree_code (c1, intermediate_type, optab_default);
12185 if (interm_optab != unknown_optab
12186 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12187 && insn_data[icode1].operand[0].mode
12188 == insn_data[icode2].operand[0].mode)
12189 {
12190 uns = false;
12191 optab1 = interm_optab;
12192 icode1 = icode2;
12193 }
12194 }
12195
12196 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12197 intermediate steps in promotion sequence. We try
12198 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12199 interm_types->create (MAX_INTERM_CVT_STEPS);
12200 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12201 {
12202 intermediate_mode = insn_data[icode1].operand[0].mode;
12203 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12204 intermediate_type
12205 = vect_double_mask_nunits (prev_type, intermediate_mode);
12206 else
12207 intermediate_type
12208 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12209 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12210 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12211 && intermediate_mode == prev_mode
12212 && SCALAR_INT_MODE_P (prev_mode))
12213 interm_optab = vec_pack_sbool_trunc_optab;
12214 else
12215 interm_optab
12216 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12217 optab_default);
12218 if (!interm_optab
12219 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12220 || insn_data[icode1].operand[0].mode != intermediate_mode
12221 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12222 == CODE_FOR_nothing))
12223 break;
12224
12225 interm_types->quick_push (intermediate_type);
12226 (*multi_step_cvt)++;
12227
12228 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12229 {
12230 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12231 return true;
12232 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12233 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12234 return true;
12235 }
12236
12237 prev_mode = intermediate_mode;
12238 prev_type = intermediate_type;
12239 optab1 = interm_optab;
12240 }
12241
12242 interm_types->release ();
12243 return false;
12244 }
12245
12246 /* Generate and return a vector mask of MASK_TYPE such that
12247 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12248 Add the statements to SEQ. */
12249
12250 tree
vect_gen_while(gimple_seq * seq,tree mask_type,tree start_index,tree end_index,const char * name)12251 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12252 tree end_index, const char *name)
12253 {
12254 tree cmp_type = TREE_TYPE (start_index);
12255 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12256 cmp_type, mask_type,
12257 OPTIMIZE_FOR_SPEED));
12258 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12259 start_index, end_index,
12260 build_zero_cst (mask_type));
12261 tree tmp;
12262 if (name)
12263 tmp = make_temp_ssa_name (mask_type, NULL, name);
12264 else
12265 tmp = make_ssa_name (mask_type);
12266 gimple_call_set_lhs (call, tmp);
12267 gimple_seq_add_stmt (seq, call);
12268 return tmp;
12269 }
12270
12271 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12272 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12273
12274 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12275 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12276 tree end_index)
12277 {
12278 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12279 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12280 }
12281
12282 /* Try to compute the vector types required to vectorize STMT_INFO,
12283 returning true on success and false if vectorization isn't possible.
12284 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12285 take sure that the number of elements in the vectors is no bigger
12286 than GROUP_SIZE.
12287
12288 On success:
12289
12290 - Set *STMT_VECTYPE_OUT to:
12291 - NULL_TREE if the statement doesn't need to be vectorized;
12292 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12293
12294 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12295 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12296 statement does not help to determine the overall number of units. */
12297
12298 opt_result
vect_get_vector_types_for_stmt(vec_info * vinfo,stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12299 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12300 tree *stmt_vectype_out,
12301 tree *nunits_vectype_out,
12302 unsigned int group_size)
12303 {
12304 gimple *stmt = stmt_info->stmt;
12305
12306 /* For BB vectorization, we should always have a group size once we've
12307 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12308 are tentative requests during things like early data reference
12309 analysis and pattern recognition. */
12310 if (is_a <bb_vec_info> (vinfo))
12311 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12312 else
12313 group_size = 0;
12314
12315 *stmt_vectype_out = NULL_TREE;
12316 *nunits_vectype_out = NULL_TREE;
12317
12318 if (gimple_get_lhs (stmt) == NULL_TREE
12319 /* MASK_STORE has no lhs, but is ok. */
12320 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12321 {
12322 if (is_a <gcall *> (stmt))
12323 {
12324 /* Ignore calls with no lhs. These must be calls to
12325 #pragma omp simd functions, and what vectorization factor
12326 it really needs can't be determined until
12327 vectorizable_simd_clone_call. */
12328 if (dump_enabled_p ())
12329 dump_printf_loc (MSG_NOTE, vect_location,
12330 "defer to SIMD clone analysis.\n");
12331 return opt_result::success ();
12332 }
12333
12334 return opt_result::failure_at (stmt,
12335 "not vectorized: irregular stmt.%G", stmt);
12336 }
12337
12338 tree vectype;
12339 tree scalar_type = NULL_TREE;
12340 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12341 {
12342 vectype = STMT_VINFO_VECTYPE (stmt_info);
12343 if (dump_enabled_p ())
12344 dump_printf_loc (MSG_NOTE, vect_location,
12345 "precomputed vectype: %T\n", vectype);
12346 }
12347 else if (vect_use_mask_type_p (stmt_info))
12348 {
12349 unsigned int precision = stmt_info->mask_precision;
12350 scalar_type = build_nonstandard_integer_type (precision, 1);
12351 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12352 if (!vectype)
12353 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12354 " data-type %T\n", scalar_type);
12355 if (dump_enabled_p ())
12356 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12357 }
12358 else
12359 {
12360 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12361 scalar_type = TREE_TYPE (DR_REF (dr));
12362 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12363 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12364 else
12365 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12366
12367 if (dump_enabled_p ())
12368 {
12369 if (group_size)
12370 dump_printf_loc (MSG_NOTE, vect_location,
12371 "get vectype for scalar type (group size %d):"
12372 " %T\n", group_size, scalar_type);
12373 else
12374 dump_printf_loc (MSG_NOTE, vect_location,
12375 "get vectype for scalar type: %T\n", scalar_type);
12376 }
12377 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12378 if (!vectype)
12379 return opt_result::failure_at (stmt,
12380 "not vectorized:"
12381 " unsupported data-type %T\n",
12382 scalar_type);
12383
12384 if (dump_enabled_p ())
12385 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12386 }
12387
12388 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12389 return opt_result::failure_at (stmt,
12390 "not vectorized: vector stmt in loop:%G",
12391 stmt);
12392
12393 *stmt_vectype_out = vectype;
12394
12395 /* Don't try to compute scalar types if the stmt produces a boolean
12396 vector; use the existing vector type instead. */
12397 tree nunits_vectype = vectype;
12398 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12399 {
12400 /* The number of units is set according to the smallest scalar
12401 type (or the largest vector size, but we only support one
12402 vector size per vectorization). */
12403 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12404 TREE_TYPE (vectype));
12405 if (scalar_type != TREE_TYPE (vectype))
12406 {
12407 if (dump_enabled_p ())
12408 dump_printf_loc (MSG_NOTE, vect_location,
12409 "get vectype for smallest scalar type: %T\n",
12410 scalar_type);
12411 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12412 group_size);
12413 if (!nunits_vectype)
12414 return opt_result::failure_at
12415 (stmt, "not vectorized: unsupported data-type %T\n",
12416 scalar_type);
12417 if (dump_enabled_p ())
12418 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12419 nunits_vectype);
12420 }
12421 }
12422
12423 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12424 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12425 return opt_result::failure_at (stmt,
12426 "Not vectorized: Incompatible number "
12427 "of vector subparts between %T and %T\n",
12428 nunits_vectype, *stmt_vectype_out);
12429
12430 if (dump_enabled_p ())
12431 {
12432 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12433 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12434 dump_printf (MSG_NOTE, "\n");
12435 }
12436
12437 *nunits_vectype_out = nunits_vectype;
12438 return opt_result::success ();
12439 }
12440
12441 /* Generate and return statement sequence that sets vector length LEN that is:
12442
12443 min_of_start_and_end = min (START_INDEX, END_INDEX);
12444 left_len = END_INDEX - min_of_start_and_end;
12445 rhs = min (left_len, LEN_LIMIT);
12446 LEN = rhs;
12447
12448 Note: the cost of the code generated by this function is modeled
12449 by vect_estimate_min_profitable_iters, so changes here may need
12450 corresponding changes there. */
12451
12452 gimple_seq
vect_gen_len(tree len,tree start_index,tree end_index,tree len_limit)12453 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12454 {
12455 gimple_seq stmts = NULL;
12456 tree len_type = TREE_TYPE (len);
12457 gcc_assert (TREE_TYPE (start_index) == len_type);
12458
12459 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12460 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12461 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12462 gimple* stmt = gimple_build_assign (len, rhs);
12463 gimple_seq_add_stmt (&stmts, stmt);
12464
12465 return stmts;
12466 }
12467
12468