1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
stmt_in_inner_loop_p(class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
96 {
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
103
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
106
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
read_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 {
130 tree vect_type, vect, vect_name, array_ref;
131 gimple *new_stmt;
132
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
139
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144
145 return vect_name;
146 }
147
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
151
152 static void
write_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 tree vect, tree array, unsigned HOST_WIDE_INT n)
155 {
156 tree array_ref;
157 gimple *new_stmt;
158
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
162
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
165 }
166
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
170
171 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 {
174 tree mem_ref;
175
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
180 }
181
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
184
185 static void
vect_clobber_variable(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 tree var)
188 {
189 tree clobber = build_clobber (TREE_TYPE (var));
190 gimple *new_stmt = gimple_build_assign (var, clobber);
191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
192 }
193
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195
196 /* Function vect_mark_relevant.
197
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
199
200 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 enum vect_relevant relevant, bool live_p)
203 {
204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: %G", relevant, live_p,
210 stmt_info->stmt);
211
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 {
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
222
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE, vect_location,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info = stmt_info;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232 }
233
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 {
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
245 }
246
247 worklist->safe_push (stmt_info);
248 }
249
250
251 /* Function is_simple_and_all_uses_invariant
252
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
254
255 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 loop_vec_info loop_vinfo)
258 {
259 tree op;
260 ssa_op_iter iter;
261
262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263 if (!stmt)
264 return false;
265
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 {
268 enum vect_def_type dt = vect_uninitialized_def;
269
270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 {
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
276 }
277
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
280 }
281 return true;
282 }
283
284 /* Function vect_stmt_relevant_p.
285
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
288
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
293
294 CHECKME: what other side effects would the vectorizer allow? */
295
296 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
299 {
300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
305
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
308
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info->stmt)
311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
313
314 /* changing memory. */
315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt_info->stmt)
317 && !gimple_clobber_p (stmt_info->stmt))
318 {
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE, vect_location,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant = vect_used_in_scope;
323 }
324
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 {
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 {
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
332 {
333 if (is_gimple_debug (USE_STMT (use_p)))
334 continue;
335
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
339
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
344
345 *live_p = true;
346 }
347 }
348 }
349
350 if (*live_p && *relevant == vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 {
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant = vect_used_only_live;
357 }
358
359 return (*live_p || *relevant);
360 }
361
362
363 /* Function exist_non_indexing_operands_for_use_p
364
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
367
368 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 {
371 tree operand;
372
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
378
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
384
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
388
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
391
392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
394 {
395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396 if (call && gimple_call_internal_p (call))
397 {
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
410 }
411 return false;
412 }
413
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
419
420 if (operand == use)
421 return true;
422
423 return false;
424 }
425
426
427 /*
428 Function process_use.
429
430 Inputs:
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
437
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
451
452 Return true if everything is as expected. Return false otherwise. */
453
454 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 bool force)
458 {
459 stmt_vec_info dstmt_vinfo;
460 enum vect_def_type dt;
461
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return opt_result::success ();
466
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468 return opt_result::failure_at (stmt_vinfo->stmt,
469 "not vectorized:"
470 " unsupported use in stmt.\n");
471
472 if (!dstmt_vinfo)
473 return opt_result::success ();
474
475 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
476 basic_block bb = gimple_bb (stmt_vinfo->stmt);
477
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 We have to force the stmt live since the epilogue loop needs it to
480 continue computing the reduction. */
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
491 return opt_result::success ();
492 }
493
494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 outer-loop-header-bb:
496 d = dstmt_vinfo
497 inner-loop:
498 stmt # use (d)
499 outer-loop-tail-bb:
500 ... */
501 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
502 {
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE, vect_location,
505 "outer-loop def-stmt defining inner-loop stmt.\n");
506
507 switch (relevant)
508 {
509 case vect_unused_in_scope:
510 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
511 vect_used_in_scope : vect_unused_in_scope;
512 break;
513
514 case vect_used_in_outer_by_reduction:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 relevant = vect_used_by_reduction;
517 break;
518
519 case vect_used_in_outer:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_in_scope;
522 break;
523
524 case vect_used_in_scope:
525 break;
526
527 default:
528 gcc_unreachable ();
529 }
530 }
531
532 /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 outer-loop-header-bb:
534 ...
535 inner-loop:
536 d = dstmt_vinfo
537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
538 stmt # use (d) */
539 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
540 {
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "inner-loop def-stmt defining outer-loop stmt.\n");
544
545 switch (relevant)
546 {
547 case vect_unused_in_scope:
548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
549 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
550 vect_used_in_outer_by_reduction : vect_unused_in_scope;
551 break;
552
553 case vect_used_by_reduction:
554 case vect_used_only_live:
555 relevant = vect_used_in_outer_by_reduction;
556 break;
557
558 case vect_used_in_scope:
559 relevant = vect_used_in_outer;
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566 /* We are also not interested in uses on loop PHI backedges that are
567 inductions. Otherwise we'll needlessly vectorize the IV increment
568 and cause hybrid SLP for SLP inductions. Unless the PHI is live
569 of course. */
570 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
571 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
572 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
574 loop_latch_edge (bb->loop_father))
575 == use))
576 {
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
579 "induction value on backedge.\n");
580 return opt_result::success ();
581 }
582
583
584 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
585 return opt_result::success ();
586 }
587
588
589 /* Function vect_mark_stmts_to_be_vectorized.
590
591 Not all stmts in the loop need to be vectorized. For example:
592
593 for i...
594 for j...
595 1. T0 = i + j
596 2. T1 = a[T0]
597
598 3. j = j + 1
599
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
602
603 This pass detects such stmts. */
604
605 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
607 {
608 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610 unsigned int nbbs = loop->num_nodes;
611 gimple_stmt_iterator si;
612 unsigned int i;
613 basic_block bb;
614 bool live_p;
615 enum vect_relevant relevant;
616
617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
618
619 auto_vec<stmt_vec_info, 64> worklist;
620
621 /* 1. Init worklist. */
622 for (i = 0; i < nbbs; i++)
623 {
624 bb = bbs[i];
625 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
626 {
627 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
630 phi_info->stmt);
631
632 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
634 }
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 {
637 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "init: stmt relevant? %G", stmt_info->stmt);
641
642 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
643 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
644 }
645 }
646
647 /* 2. Process_worklist */
648 while (worklist.length () > 0)
649 {
650 use_operand_p use_p;
651 ssa_op_iter iter;
652
653 stmt_vec_info stmt_vinfo = worklist.pop ();
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "worklist: examine stmt: %G", stmt_vinfo->stmt);
657
658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 (DEF_STMT) as relevant/irrelevant according to the relevance property
660 of STMT. */
661 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
662
663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 propagated as is to the DEF_STMTs of its USEs.
665
666 One exception is when STMT has been identified as defining a reduction
667 variable; in this case we set the relevance to vect_used_by_reduction.
668 This is because we distinguish between two kinds of relevant stmts -
669 those that are used by a reduction computation, and those that are
670 (also) used by a regular computation. This allows us later on to
671 identify stmts that are used solely by a reduction, and therefore the
672 order of the results that they produce does not have to be kept. */
673
674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
675 {
676 case vect_reduction_def:
677 gcc_assert (relevant != vect_unused_in_scope);
678 if (relevant != vect_unused_in_scope
679 && relevant != vect_used_in_scope
680 && relevant != vect_used_by_reduction
681 && relevant != vect_used_only_live)
682 return opt_result::failure_at
683 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
684 break;
685
686 case vect_nested_cycle:
687 if (relevant != vect_unused_in_scope
688 && relevant != vect_used_in_outer_by_reduction
689 && relevant != vect_used_in_outer)
690 return opt_result::failure_at
691 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
692 break;
693
694 case vect_double_reduction_def:
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_by_reduction
697 && relevant != vect_used_only_live)
698 return opt_result::failure_at
699 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
700 break;
701
702 default:
703 break;
704 }
705
706 if (is_pattern_stmt_p (stmt_vinfo))
707 {
708 /* Pattern statements are not inserted into the code, so
709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710 have to scan the RHS or function arguments instead. */
711 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
712 {
713 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
714 tree op = gimple_assign_rhs1 (assign);
715
716 i = 1;
717 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
718 {
719 opt_result res
720 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
721 loop_vinfo, relevant, &worklist, false);
722 if (!res)
723 return res;
724 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 i = 2;
729 }
730 for (; i < gimple_num_ops (assign); i++)
731 {
732 op = gimple_op (assign, i);
733 if (TREE_CODE (op) == SSA_NAME)
734 {
735 opt_result res
736 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
737 &worklist, false);
738 if (!res)
739 return res;
740 }
741 }
742 }
743 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
744 {
745 for (i = 0; i < gimple_call_num_args (call); i++)
746 {
747 tree arg = gimple_call_arg (call, i);
748 opt_result res
749 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
750 &worklist, false);
751 if (!res)
752 return res;
753 }
754 }
755 }
756 else
757 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
758 {
759 tree op = USE_FROM_PTR (use_p);
760 opt_result res
761 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
762 &worklist, false);
763 if (!res)
764 return res;
765 }
766
767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
768 {
769 gather_scatter_info gs_info;
770 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
771 gcc_unreachable ();
772 opt_result res
773 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
774 &worklist, true);
775 if (!res)
776 {
777 if (fatal)
778 *fatal = false;
779 return res;
780 }
781 }
782 } /* while worklist */
783
784 return opt_result::success ();
785 }
786
787 /* Compute the prologue cost for invariant or constant operands. */
788
789 static unsigned
vect_prologue_cost_for_slp_op(slp_tree node,stmt_vec_info stmt_info,unsigned opno,enum vect_def_type dt,stmt_vector_for_cost * cost_vec)790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 unsigned opno, enum vect_def_type dt,
792 stmt_vector_for_cost *cost_vec)
793 {
794 vec_info *vinfo = stmt_info->vinfo;
795 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796 tree op = gimple_op (stmt, opno);
797 unsigned prologue_cost = 0;
798
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
803 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804 unsigned num_vects_to_check;
805 unsigned HOST_WIDE_INT const_nunits;
806 unsigned nelt_limit;
807 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808 && ! multiple_p (const_nunits, group_size))
809 {
810 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811 nelt_limit = const_nunits;
812 }
813 else
814 {
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check = 1;
819 nelt_limit = group_size;
820 }
821 tree elt = NULL_TREE;
822 unsigned nelt = 0;
823 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824 {
825 unsigned si = j % group_size;
826 if (nelt == 0)
827 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 opno))
833 elt = NULL_TREE;
834 nelt++;
835 if (nelt == nelt_limit)
836 {
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost += record_stmt_cost
840 (cost_vec, 1,
841 dt == vect_external_def
842 ? (elt ? scalar_to_vec : vec_construct)
843 : vector_load,
844 stmt_info, 0, vect_prologue);
845 nelt = 0;
846 }
847 }
848
849 return prologue_cost;
850 }
851
852 /* Function vect_model_simple_cost.
853
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
857
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 enum vect_def_type *dt,
861 int ndts,
862 slp_tree node,
863 stmt_vector_for_cost *cost_vec,
864 vect_cost_for_stmt kind = vector_stmt)
865 {
866 int inside_cost = 0, prologue_cost = 0;
867
868 gcc_assert (cost_vec != NULL);
869
870 /* ??? Somehow we need to fix this at the callers. */
871 if (node)
872 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
873
874 if (node)
875 {
876 /* Scan operands and account for prologue cost of constants/externals.
877 ??? This over-estimates cost for multiple uses and should be
878 re-engineered. */
879 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
880 tree lhs = gimple_get_lhs (stmt);
881 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
882 {
883 tree op = gimple_op (stmt, i);
884 enum vect_def_type dt;
885 if (!op || op == lhs)
886 continue;
887 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
888 && (dt == vect_constant_def || dt == vect_external_def))
889 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
890 i, dt, cost_vec);
891 }
892 }
893 else
894 /* Cost the "broadcast" of a scalar operand in to a vector operand.
895 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
896 cost model. */
897 for (int i = 0; i < ndts; i++)
898 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
899 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
900 stmt_info, 0, vect_prologue);
901
902 /* Adjust for two-operator SLP nodes. */
903 if (node && SLP_TREE_TWO_OPERATORS (node))
904 {
905 ncopies *= 2;
906 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
907 stmt_info, 0, vect_body);
908 }
909
910 /* Pass the inside-of-loop statements to the target-specific cost model. */
911 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
912 stmt_info, 0, vect_body);
913
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_NOTE, vect_location,
916 "vect_model_simple_cost: inside_cost = %d, "
917 "prologue_cost = %d .\n", inside_cost, prologue_cost);
918 }
919
920
921 /* Model cost for type demotion and promotion operations. PWR is
922 normally zero for single-step promotions and demotions. It will be
923 one if two-step promotion/demotion is required, and so on. NCOPIES
924 is the number of vector results (and thus number of instructions)
925 for the narrowest end of the operation chain. Each additional
926 step doubles the number of instructions required. */
927
928 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec)929 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
930 enum vect_def_type *dt,
931 unsigned int ncopies, int pwr,
932 stmt_vector_for_cost *cost_vec)
933 {
934 int i;
935 int inside_cost = 0, prologue_cost = 0;
936
937 for (i = 0; i < pwr + 1; i++)
938 {
939 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
940 stmt_info, 0, vect_body);
941 ncopies *= 2;
942 }
943
944 /* FORNOW: Assuming maximum 2 args per stmts. */
945 for (i = 0; i < 2; i++)
946 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
947 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
948 stmt_info, 0, vect_prologue);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_promotion_demotion_cost: inside_cost = %d, "
953 "prologue_cost = %d .\n", inside_cost, prologue_cost);
954 }
955
956 /* Returns true if the current function returns DECL. */
957
958 static bool
cfun_returns(tree decl)959 cfun_returns (tree decl)
960 {
961 edge_iterator ei;
962 edge e;
963 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
964 {
965 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
966 if (!ret)
967 continue;
968 if (gimple_return_retval (ret) == decl)
969 return true;
970 /* We often end up with an aggregate copy to the result decl,
971 handle that case as well. First skip intermediate clobbers
972 though. */
973 gimple *def = ret;
974 do
975 {
976 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
977 }
978 while (gimple_clobber_p (def));
979 if (is_a <gassign *> (def)
980 && gimple_assign_lhs (def) == gimple_return_retval (ret)
981 && gimple_assign_rhs1 (def) == decl)
982 return true;
983 }
984 return false;
985 }
986
987 /* Function vect_model_store_cost
988
989 Models cost for stores. In the case of grouped accesses, one access
990 has the overhead of the grouped access attributed to it. */
991
992 static void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type dt,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)993 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
994 enum vect_def_type dt,
995 vect_memory_access_type memory_access_type,
996 vec_load_store_type vls_type, slp_tree slp_node,
997 stmt_vector_for_cost *cost_vec)
998 {
999 unsigned int inside_cost = 0, prologue_cost = 0;
1000 stmt_vec_info first_stmt_info = stmt_info;
1001 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1002
1003 /* ??? Somehow we need to fix this at the callers. */
1004 if (slp_node)
1005 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1006
1007 if (vls_type == VLS_STORE_INVARIANT)
1008 {
1009 if (slp_node)
1010 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1011 1, dt, cost_vec);
1012 else
1013 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1014 stmt_info, 0, vect_prologue);
1015 }
1016
1017 /* Grouped stores update all elements in the group at once,
1018 so we want the DR for the first statement. */
1019 if (!slp_node && grouped_access_p)
1020 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1021
1022 /* True if we should include any once-per-group costs as well as
1023 the cost of the statement itself. For SLP we only get called
1024 once per group anyhow. */
1025 bool first_stmt_p = (first_stmt_info == stmt_info);
1026
1027 /* We assume that the cost of a single store-lanes instruction is
1028 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1029 access is instead being provided by a permute-and-store operation,
1030 include the cost of the permutes. */
1031 if (first_stmt_p
1032 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1033 {
1034 /* Uses a high and low interleave or shuffle operations for each
1035 needed permute. */
1036 int group_size = DR_GROUP_SIZE (first_stmt_info);
1037 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1038 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1039 stmt_info, 0, vect_body);
1040
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: strided group_size = %d .\n",
1044 group_size);
1045 }
1046
1047 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1048 /* Costs of the stores. */
1049 if (memory_access_type == VMAT_ELEMENTWISE
1050 || memory_access_type == VMAT_GATHER_SCATTER)
1051 {
1052 /* N scalar stores plus extracting the elements. */
1053 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1054 inside_cost += record_stmt_cost (cost_vec,
1055 ncopies * assumed_nunits,
1056 scalar_store, stmt_info, 0, vect_body);
1057 }
1058 else
1059 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1060
1061 if (memory_access_type == VMAT_ELEMENTWISE
1062 || memory_access_type == VMAT_STRIDED_SLP)
1063 {
1064 /* N scalar stores plus extracting the elements. */
1065 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1066 inside_cost += record_stmt_cost (cost_vec,
1067 ncopies * assumed_nunits,
1068 vec_to_scalar, stmt_info, 0, vect_body);
1069 }
1070
1071 /* When vectorizing a store into the function result assign
1072 a penalty if the function returns in a multi-register location.
1073 In this case we assume we'll end up with having to spill the
1074 vector result and do piecewise loads as a conservative estimate. */
1075 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1076 if (base
1077 && (TREE_CODE (base) == RESULT_DECL
1078 || (DECL_P (base) && cfun_returns (base)))
1079 && !aggregate_value_p (base, cfun->decl))
1080 {
1081 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1082 /* ??? Handle PARALLEL in some way. */
1083 if (REG_P (reg))
1084 {
1085 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1086 /* Assume that a single reg-reg move is possible and cheap,
1087 do not account for vector to gp register move cost. */
1088 if (nregs > 1)
1089 {
1090 /* Spill. */
1091 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1092 vector_store,
1093 stmt_info, 0, vect_epilogue);
1094 /* Loads. */
1095 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1096 scalar_load,
1097 stmt_info, 0, vect_epilogue);
1098 }
1099 }
1100 }
1101
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_store_cost: inside_cost = %d, "
1105 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1106 }
1107
1108
1109 /* Calculate cost of DR's memory access. */
1110 void
vect_get_store_cost(stmt_vec_info stmt_info,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1111 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1112 unsigned int *inside_cost,
1113 stmt_vector_for_cost *body_cost_vec)
1114 {
1115 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1116 int alignment_support_scheme
1117 = vect_supportable_dr_alignment (dr_info, false);
1118
1119 switch (alignment_support_scheme)
1120 {
1121 case dr_aligned:
1122 {
1123 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1124 vector_store, stmt_info, 0,
1125 vect_body);
1126
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "vect_model_store_cost: aligned.\n");
1130 break;
1131 }
1132
1133 case dr_unaligned_supported:
1134 {
1135 /* Here, we assign an additional cost for the unaligned store. */
1136 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1137 unaligned_store, stmt_info,
1138 DR_MISALIGNMENT (dr_info),
1139 vect_body);
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "vect_model_store_cost: unaligned supported by "
1143 "hardware.\n");
1144 break;
1145 }
1146
1147 case dr_unaligned_unsupported:
1148 {
1149 *inside_cost = VECT_MAX_COST;
1150
1151 if (dump_enabled_p ())
1152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1153 "vect_model_store_cost: unsupported access.\n");
1154 break;
1155 }
1156
1157 default:
1158 gcc_unreachable ();
1159 }
1160 }
1161
1162
1163 /* Function vect_model_load_cost
1164
1165 Models cost for loads. In the case of grouped accesses, one access has
1166 the overhead of the grouped access attributed to it. Since unaligned
1167 accesses are supported for loads, we also account for the costs of the
1168 access scheme chosen. */
1169
1170 static void
vect_model_load_cost(stmt_vec_info stmt_info,unsigned ncopies,vect_memory_access_type memory_access_type,slp_instance instance,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1171 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1172 vect_memory_access_type memory_access_type,
1173 slp_instance instance,
1174 slp_tree slp_node,
1175 stmt_vector_for_cost *cost_vec)
1176 {
1177 unsigned int inside_cost = 0, prologue_cost = 0;
1178 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1179
1180 gcc_assert (cost_vec);
1181
1182 /* ??? Somehow we need to fix this at the callers. */
1183 if (slp_node)
1184 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1185
1186 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1187 {
1188 /* If the load is permuted then the alignment is determined by
1189 the first group element not by the first scalar stmt DR. */
1190 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1191 /* Record the cost for the permutation. */
1192 unsigned n_perms;
1193 unsigned assumed_nunits
1194 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1195 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1196 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1197 slp_vf, instance, true,
1198 &n_perms);
1199 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1200 first_stmt_info, 0, vect_body);
1201 /* And adjust the number of loads performed. This handles
1202 redundancies as well as loads that are later dead. */
1203 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1204 bitmap_clear (perm);
1205 for (unsigned i = 0;
1206 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1207 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1208 ncopies = 0;
1209 bool load_seen = false;
1210 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1211 {
1212 if (i % assumed_nunits == 0)
1213 {
1214 if (load_seen)
1215 ncopies++;
1216 load_seen = false;
1217 }
1218 if (bitmap_bit_p (perm, i))
1219 load_seen = true;
1220 }
1221 if (load_seen)
1222 ncopies++;
1223 gcc_assert (ncopies
1224 <= (DR_GROUP_SIZE (first_stmt_info)
1225 - DR_GROUP_GAP (first_stmt_info)
1226 + assumed_nunits - 1) / assumed_nunits);
1227 }
1228
1229 /* Grouped loads read all elements in the group at once,
1230 so we want the DR for the first statement. */
1231 stmt_vec_info first_stmt_info = stmt_info;
1232 if (!slp_node && grouped_access_p)
1233 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1234
1235 /* True if we should include any once-per-group costs as well as
1236 the cost of the statement itself. For SLP we only get called
1237 once per group anyhow. */
1238 bool first_stmt_p = (first_stmt_info == stmt_info);
1239
1240 /* We assume that the cost of a single load-lanes instruction is
1241 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1242 access is instead being provided by a load-and-permute operation,
1243 include the cost of the permutes. */
1244 if (first_stmt_p
1245 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1246 {
1247 /* Uses an even and odd extract operations or shuffle operations
1248 for each needed permute. */
1249 int group_size = DR_GROUP_SIZE (first_stmt_info);
1250 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1251 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1252 stmt_info, 0, vect_body);
1253
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: strided group_size = %d .\n",
1257 group_size);
1258 }
1259
1260 /* The loads themselves. */
1261 if (memory_access_type == VMAT_ELEMENTWISE
1262 || memory_access_type == VMAT_GATHER_SCATTER)
1263 {
1264 /* N scalar loads plus gathering them into a vector. */
1265 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1266 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1267 inside_cost += record_stmt_cost (cost_vec,
1268 ncopies * assumed_nunits,
1269 scalar_load, stmt_info, 0, vect_body);
1270 }
1271 else
1272 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1273 &inside_cost, &prologue_cost,
1274 cost_vec, cost_vec, true);
1275 if (memory_access_type == VMAT_ELEMENTWISE
1276 || memory_access_type == VMAT_STRIDED_SLP)
1277 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1278 stmt_info, 0, vect_body);
1279
1280 if (dump_enabled_p ())
1281 dump_printf_loc (MSG_NOTE, vect_location,
1282 "vect_model_load_cost: inside_cost = %d, "
1283 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1284 }
1285
1286
1287 /* Calculate cost of DR's memory access. */
1288 void
vect_get_load_cost(stmt_vec_info stmt_info,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1289 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1290 bool add_realign_cost, unsigned int *inside_cost,
1291 unsigned int *prologue_cost,
1292 stmt_vector_for_cost *prologue_cost_vec,
1293 stmt_vector_for_cost *body_cost_vec,
1294 bool record_prologue_costs)
1295 {
1296 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1297 int alignment_support_scheme
1298 = vect_supportable_dr_alignment (dr_info, false);
1299
1300 switch (alignment_support_scheme)
1301 {
1302 case dr_aligned:
1303 {
1304 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1305 stmt_info, 0, vect_body);
1306
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_NOTE, vect_location,
1309 "vect_model_load_cost: aligned.\n");
1310
1311 break;
1312 }
1313 case dr_unaligned_supported:
1314 {
1315 /* Here, we assign an additional cost for the unaligned load. */
1316 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1317 unaligned_load, stmt_info,
1318 DR_MISALIGNMENT (dr_info),
1319 vect_body);
1320
1321 if (dump_enabled_p ())
1322 dump_printf_loc (MSG_NOTE, vect_location,
1323 "vect_model_load_cost: unaligned supported by "
1324 "hardware.\n");
1325
1326 break;
1327 }
1328 case dr_explicit_realign:
1329 {
1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1331 vector_load, stmt_info, 0, vect_body);
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1333 vec_perm, stmt_info, 0, vect_body);
1334
1335 /* FIXME: If the misalignment remains fixed across the iterations of
1336 the containing loop, the following cost should be added to the
1337 prologue costs. */
1338 if (targetm.vectorize.builtin_mask_for_load)
1339 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1340 stmt_info, 0, vect_body);
1341
1342 if (dump_enabled_p ())
1343 dump_printf_loc (MSG_NOTE, vect_location,
1344 "vect_model_load_cost: explicit realign\n");
1345
1346 break;
1347 }
1348 case dr_explicit_realign_optimized:
1349 {
1350 if (dump_enabled_p ())
1351 dump_printf_loc (MSG_NOTE, vect_location,
1352 "vect_model_load_cost: unaligned software "
1353 "pipelined.\n");
1354
1355 /* Unaligned software pipeline has a load of an address, an initial
1356 load, and possibly a mask operation to "prime" the loop. However,
1357 if this is an access in a group of loads, which provide grouped
1358 access, then the above cost should only be considered for one
1359 access in the group. Inside the loop, there is a load op
1360 and a realignment op. */
1361
1362 if (add_realign_cost && record_prologue_costs)
1363 {
1364 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1365 vector_stmt, stmt_info,
1366 0, vect_prologue);
1367 if (targetm.vectorize.builtin_mask_for_load)
1368 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1369 vector_stmt, stmt_info,
1370 0, vect_prologue);
1371 }
1372
1373 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1374 stmt_info, 0, vect_body);
1375 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1376 stmt_info, 0, vect_body);
1377
1378 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "vect_model_load_cost: explicit realign optimized"
1381 "\n");
1382
1383 break;
1384 }
1385
1386 case dr_unaligned_unsupported:
1387 {
1388 *inside_cost = VECT_MAX_COST;
1389
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392 "vect_model_load_cost: unsupported access.\n");
1393 break;
1394 }
1395
1396 default:
1397 gcc_unreachable ();
1398 }
1399 }
1400
1401 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1402 the loop preheader for the vectorized stmt STMT_VINFO. */
1403
1404 static void
vect_init_vector_1(stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1405 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1406 gimple_stmt_iterator *gsi)
1407 {
1408 if (gsi)
1409 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1410 else
1411 {
1412 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1413
1414 if (loop_vinfo)
1415 {
1416 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1417 basic_block new_bb;
1418 edge pe;
1419
1420 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1421 loop = loop->inner;
1422
1423 pe = loop_preheader_edge (loop);
1424 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1425 gcc_assert (!new_bb);
1426 }
1427 else
1428 {
1429 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1430 basic_block bb;
1431 gimple_stmt_iterator gsi_bb_start;
1432
1433 gcc_assert (bb_vinfo);
1434 bb = BB_VINFO_BB (bb_vinfo);
1435 gsi_bb_start = gsi_after_labels (bb);
1436 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1437 }
1438 }
1439
1440 if (dump_enabled_p ())
1441 dump_printf_loc (MSG_NOTE, vect_location,
1442 "created new init_stmt: %G", new_stmt);
1443 }
1444
1445 /* Function vect_init_vector.
1446
1447 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1448 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1449 vector type a vector with all elements equal to VAL is created first.
1450 Place the initialization at GSI if it is not NULL. Otherwise, place the
1451 initialization at the loop preheader.
1452 Return the DEF of INIT_STMT.
1453 It will be used in the vectorization of STMT_INFO. */
1454
1455 tree
vect_init_vector(stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1456 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1457 gimple_stmt_iterator *gsi)
1458 {
1459 gimple *init_stmt;
1460 tree new_temp;
1461
1462 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1463 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1464 {
1465 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1466 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1467 {
1468 /* Scalar boolean value should be transformed into
1469 all zeros or all ones value before building a vector. */
1470 if (VECTOR_BOOLEAN_TYPE_P (type))
1471 {
1472 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1473 tree false_val = build_zero_cst (TREE_TYPE (type));
1474
1475 if (CONSTANT_CLASS_P (val))
1476 val = integer_zerop (val) ? false_val : true_val;
1477 else
1478 {
1479 new_temp = make_ssa_name (TREE_TYPE (type));
1480 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1481 val, true_val, false_val);
1482 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1483 val = new_temp;
1484 }
1485 }
1486 else
1487 {
1488 gimple_seq stmts = NULL;
1489 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1490 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1491 TREE_TYPE (type), val);
1492 else
1493 /* ??? Condition vectorization expects us to do
1494 promotion of invariant/external defs. */
1495 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1496 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1497 !gsi_end_p (gsi2); )
1498 {
1499 init_stmt = gsi_stmt (gsi2);
1500 gsi_remove (&gsi2, false);
1501 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1502 }
1503 }
1504 }
1505 val = build_vector_from_val (type, val);
1506 }
1507
1508 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1509 init_stmt = gimple_build_assign (new_temp, val);
1510 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1511 return new_temp;
1512 }
1513
1514 /* Function vect_get_vec_def_for_operand_1.
1515
1516 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1517 with type DT that will be used in the vectorized stmt. */
1518
1519 tree
vect_get_vec_def_for_operand_1(stmt_vec_info def_stmt_info,enum vect_def_type dt)1520 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1521 enum vect_def_type dt)
1522 {
1523 tree vec_oprnd;
1524 stmt_vec_info vec_stmt_info;
1525
1526 switch (dt)
1527 {
1528 /* operand is a constant or a loop invariant. */
1529 case vect_constant_def:
1530 case vect_external_def:
1531 /* Code should use vect_get_vec_def_for_operand. */
1532 gcc_unreachable ();
1533
1534 /* Operand is defined by a loop header phi. In case of nested
1535 cycles we also may have uses of the backedge def. */
1536 case vect_reduction_def:
1537 case vect_double_reduction_def:
1538 case vect_nested_cycle:
1539 case vect_induction_def:
1540 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1541 || dt == vect_nested_cycle);
1542 /* Fallthru. */
1543
1544 /* operand is defined inside the loop. */
1545 case vect_internal_def:
1546 {
1547 /* Get the def from the vectorized stmt. */
1548 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1549 /* Get vectorized pattern statement. */
1550 if (!vec_stmt_info
1551 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1552 && !STMT_VINFO_RELEVANT (def_stmt_info))
1553 vec_stmt_info = (STMT_VINFO_VEC_STMT
1554 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1555 gcc_assert (vec_stmt_info);
1556 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1557 vec_oprnd = PHI_RESULT (phi);
1558 else
1559 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1560 return vec_oprnd;
1561 }
1562
1563 default:
1564 gcc_unreachable ();
1565 }
1566 }
1567
1568
1569 /* Function vect_get_vec_def_for_operand.
1570
1571 OP is an operand in STMT_VINFO. This function returns a (vector) def
1572 that will be used in the vectorized stmt for STMT_VINFO.
1573
1574 In the case that OP is an SSA_NAME which is defined in the loop, then
1575 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1576
1577 In case OP is an invariant or constant, a new stmt that creates a vector def
1578 needs to be introduced. VECTYPE may be used to specify a required type for
1579 vector invariant. */
1580
1581 tree
vect_get_vec_def_for_operand(tree op,stmt_vec_info stmt_vinfo,tree vectype)1582 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1583 {
1584 gimple *def_stmt;
1585 enum vect_def_type dt;
1586 bool is_simple_use;
1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1588
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location,
1591 "vect_get_vec_def_for_operand: %T\n", op);
1592
1593 stmt_vec_info def_stmt_info;
1594 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1595 &def_stmt_info, &def_stmt);
1596 gcc_assert (is_simple_use);
1597 if (def_stmt && dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1599
1600 if (dt == vect_constant_def || dt == vect_external_def)
1601 {
1602 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1603 tree vector_type;
1604
1605 if (vectype)
1606 vector_type = vectype;
1607 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1608 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1609 vector_type = truth_type_for (stmt_vectype);
1610 else
1611 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1612
1613 gcc_assert (vector_type);
1614 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1615 }
1616 else
1617 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1618 }
1619
1620
1621 /* Function vect_get_vec_def_for_stmt_copy
1622
1623 Return a vector-def for an operand. This function is used when the
1624 vectorized stmt to be created (by the caller to this function) is a "copy"
1625 created in case the vectorized result cannot fit in one vector, and several
1626 copies of the vector-stmt are required. In this case the vector-def is
1627 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1628 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1629
1630 Context:
1631 In case the vectorization factor (VF) is bigger than the number
1632 of elements that can fit in a vectype (nunits), we have to generate
1633 more than one vector stmt to vectorize the scalar stmt. This situation
1634 arises when there are multiple data-types operated upon in the loop; the
1635 smallest data-type determines the VF, and as a result, when vectorizing
1636 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1637 vector stmt (each computing a vector of 'nunits' results, and together
1638 computing 'VF' results in each iteration). This function is called when
1639 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1640 which VF=16 and nunits=4, so the number of copies required is 4):
1641
1642 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1643
1644 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1645 VS1.1: vx.1 = memref1 VS1.2
1646 VS1.2: vx.2 = memref2 VS1.3
1647 VS1.3: vx.3 = memref3
1648
1649 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1650 VSnew.1: vz1 = vx.1 + ... VSnew.2
1651 VSnew.2: vz2 = vx.2 + ... VSnew.3
1652 VSnew.3: vz3 = vx.3 + ...
1653
1654 The vectorization of S1 is explained in vectorizable_load.
1655 The vectorization of S2:
1656 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1657 the function 'vect_get_vec_def_for_operand' is called to
1658 get the relevant vector-def for each operand of S2. For operand x it
1659 returns the vector-def 'vx.0'.
1660
1661 To create the remaining copies of the vector-stmt (VSnew.j), this
1662 function is called to get the relevant vector-def for each operand. It is
1663 obtained from the respective VS1.j stmt, which is recorded in the
1664 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1665
1666 For example, to obtain the vector-def 'vx.1' in order to create the
1667 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1668 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1669 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1670 and return its def ('vx.1').
1671 Overall, to create the above sequence this function will be called 3 times:
1672 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1673 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1674 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1675
1676 tree
vect_get_vec_def_for_stmt_copy(vec_info * vinfo,tree vec_oprnd)1677 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1678 {
1679 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1680 if (!def_stmt_info)
1681 /* Do nothing; can reuse same def. */
1682 return vec_oprnd;
1683
1684 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1685 gcc_assert (def_stmt_info);
1686 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1687 vec_oprnd = PHI_RESULT (phi);
1688 else
1689 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1690 return vec_oprnd;
1691 }
1692
1693
1694 /* Get vectorized definitions for the operands to create a copy of an original
1695 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1696
1697 void
vect_get_vec_defs_for_stmt_copy(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1698 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1699 vec<tree> *vec_oprnds0,
1700 vec<tree> *vec_oprnds1)
1701 {
1702 tree vec_oprnd = vec_oprnds0->pop ();
1703
1704 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705 vec_oprnds0->quick_push (vec_oprnd);
1706
1707 if (vec_oprnds1 && vec_oprnds1->length ())
1708 {
1709 vec_oprnd = vec_oprnds1->pop ();
1710 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1711 vec_oprnds1->quick_push (vec_oprnd);
1712 }
1713 }
1714
1715
1716 /* Get vectorized definitions for OP0 and OP1. */
1717
1718 void
vect_get_vec_defs(tree op0,tree op1,stmt_vec_info stmt_info,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1719 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1720 vec<tree> *vec_oprnds0,
1721 vec<tree> *vec_oprnds1,
1722 slp_tree slp_node)
1723 {
1724 if (slp_node)
1725 {
1726 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1727 vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1728 *vec_oprnds0 = vec_defs[0];
1729 if (op1)
1730 *vec_oprnds1 = vec_defs[1];
1731 }
1732 else
1733 {
1734 tree vec_oprnd;
1735
1736 vec_oprnds0->create (1);
1737 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1738 vec_oprnds0->quick_push (vec_oprnd);
1739
1740 if (op1)
1741 {
1742 vec_oprnds1->create (1);
1743 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1744 vec_oprnds1->quick_push (vec_oprnd);
1745 }
1746 }
1747 }
1748
1749 /* Helper function called by vect_finish_replace_stmt and
1750 vect_finish_stmt_generation. Set the location of the new
1751 statement and create and return a stmt_vec_info for it. */
1752
1753 static stmt_vec_info
vect_finish_stmt_generation_1(stmt_vec_info stmt_info,gimple * vec_stmt)1754 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1755 {
1756 vec_info *vinfo = stmt_info->vinfo;
1757
1758 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1759
1760 if (dump_enabled_p ())
1761 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1762
1763 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1764
1765 /* While EH edges will generally prevent vectorization, stmt might
1766 e.g. be in a must-not-throw region. Ensure newly created stmts
1767 that could throw are part of the same region. */
1768 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1769 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1770 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1771
1772 return vec_stmt_info;
1773 }
1774
1775 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1776 which sets the same scalar result as STMT_INFO did. Create and return a
1777 stmt_vec_info for VEC_STMT. */
1778
1779 stmt_vec_info
vect_finish_replace_stmt(stmt_vec_info stmt_info,gimple * vec_stmt)1780 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1781 {
1782 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1783 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1784
1785 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1786 gsi_replace (&gsi, vec_stmt, true);
1787
1788 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1789 }
1790
1791 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1792 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1793
1794 stmt_vec_info
vect_finish_stmt_generation(stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1795 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1796 gimple_stmt_iterator *gsi)
1797 {
1798 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1799
1800 if (!gsi_end_p (*gsi)
1801 && gimple_has_mem_ops (vec_stmt))
1802 {
1803 gimple *at_stmt = gsi_stmt (*gsi);
1804 tree vuse = gimple_vuse (at_stmt);
1805 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1806 {
1807 tree vdef = gimple_vdef (at_stmt);
1808 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1809 /* If we have an SSA vuse and insert a store, update virtual
1810 SSA form to avoid triggering the renamer. Do so only
1811 if we can easily see all uses - which is what almost always
1812 happens with the way vectorized stmts are inserted. */
1813 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1814 && ((is_gimple_assign (vec_stmt)
1815 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1816 || (is_gimple_call (vec_stmt)
1817 && !(gimple_call_flags (vec_stmt)
1818 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1819 {
1820 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1821 gimple_set_vdef (vec_stmt, new_vdef);
1822 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1823 }
1824 }
1825 }
1826 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1827 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1828 }
1829
1830 /* We want to vectorize a call to combined function CFN with function
1831 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1832 as the types of all inputs. Check whether this is possible using
1833 an internal function, returning its code if so or IFN_LAST if not. */
1834
1835 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1836 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1837 tree vectype_out, tree vectype_in)
1838 {
1839 internal_fn ifn;
1840 if (internal_fn_p (cfn))
1841 ifn = as_internal_fn (cfn);
1842 else
1843 ifn = associated_internal_fn (fndecl);
1844 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1845 {
1846 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1847 if (info.vectorizable)
1848 {
1849 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1850 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1851 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1852 OPTIMIZE_FOR_SPEED))
1853 return ifn;
1854 }
1855 }
1856 return IFN_LAST;
1857 }
1858
1859
1860 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1861 gimple_stmt_iterator *);
1862
1863 /* Check whether a load or store statement in the loop described by
1864 LOOP_VINFO is possible in a fully-masked loop. This is testing
1865 whether the vectorizer pass has the appropriate support, as well as
1866 whether the target does.
1867
1868 VLS_TYPE says whether the statement is a load or store and VECTYPE
1869 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1870 says how the load or store is going to be implemented and GROUP_SIZE
1871 is the number of load or store statements in the containing group.
1872 If the access is a gather load or scatter store, GS_INFO describes
1873 its arguments. If the load or store is conditional, SCALAR_MASK is the
1874 condition under which it occurs.
1875
1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877 supported, otherwise record the required mask types. */
1878
1879 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 vec_load_store_type vls_type, int group_size,
1882 vect_memory_access_type memory_access_type,
1883 gather_scatter_info *gs_info, tree scalar_mask)
1884 {
1885 /* Invariant loads need no special support. */
1886 if (memory_access_type == VMAT_INVARIANT)
1887 return;
1888
1889 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890 machine_mode vecmode = TYPE_MODE (vectype);
1891 bool is_load = (vls_type == VLS_LOAD);
1892 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1893 {
1894 if (is_load
1895 ? !vect_load_lanes_supported (vectype, group_size, true)
1896 : !vect_store_lanes_supported (vectype, group_size, true))
1897 {
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 "can't use a fully-masked loop because the"
1901 " target doesn't have an appropriate masked"
1902 " load/store-lanes instruction.\n");
1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 return;
1905 }
1906 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1908 return;
1909 }
1910
1911 if (memory_access_type == VMAT_GATHER_SCATTER)
1912 {
1913 internal_fn ifn = (is_load
1914 ? IFN_MASK_GATHER_LOAD
1915 : IFN_MASK_SCATTER_STORE);
1916 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1917 gs_info->memory_type,
1918 gs_info->offset_vectype,
1919 gs_info->scale))
1920 {
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1923 "can't use a fully-masked loop because the"
1924 " target doesn't have an appropriate masked"
1925 " gather load or scatter store instruction.\n");
1926 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1927 return;
1928 }
1929 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1930 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1931 return;
1932 }
1933
1934 if (memory_access_type != VMAT_CONTIGUOUS
1935 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1936 {
1937 /* Element X of the data must come from iteration i * VF + X of the
1938 scalar loop. We need more work to support other mappings. */
1939 if (dump_enabled_p ())
1940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941 "can't use a fully-masked loop because an access"
1942 " isn't contiguous.\n");
1943 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1944 return;
1945 }
1946
1947 machine_mode mask_mode;
1948 if (!VECTOR_MODE_P (vecmode)
1949 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1950 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1951 {
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 "can't use a fully-masked loop because the target"
1955 " doesn't have the appropriate masked load or"
1956 " store.\n");
1957 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1958 return;
1959 }
1960 /* We might load more scalars than we need for permuting SLP loads.
1961 We checked in get_group_load_store_type that the extra elements
1962 don't leak into a new vector. */
1963 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1965 unsigned int nvectors;
1966 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1967 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1968 else
1969 gcc_unreachable ();
1970 }
1971
1972 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1973 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1974 that needs to be applied to all loads and stores in a vectorized loop.
1975 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1976
1977 MASK_TYPE is the type of both masks. If new statements are needed,
1978 insert them before GSI. */
1979
1980 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1981 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1982 gimple_stmt_iterator *gsi)
1983 {
1984 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1985 if (!loop_mask)
1986 return vec_mask;
1987
1988 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1989 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1990 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1991 vec_mask, loop_mask);
1992 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1993 return and_res;
1994 }
1995
1996 /* Determine whether we can use a gather load or scatter store to vectorize
1997 strided load or store STMT_INFO by truncating the current offset to a
1998 smaller width. We need to be able to construct an offset vector:
1999
2000 { 0, X, X*2, X*3, ... }
2001
2002 without loss of precision, where X is STMT_INFO's DR_STEP.
2003
2004 Return true if this is possible, describing the gather load or scatter
2005 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2006
2007 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2008 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2009 loop_vec_info loop_vinfo, bool masked_p,
2010 gather_scatter_info *gs_info)
2011 {
2012 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2013 data_reference *dr = dr_info->dr;
2014 tree step = DR_STEP (dr);
2015 if (TREE_CODE (step) != INTEGER_CST)
2016 {
2017 /* ??? Perhaps we could use range information here? */
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE, vect_location,
2020 "cannot truncate variable step.\n");
2021 return false;
2022 }
2023
2024 /* Get the number of bits in an element. */
2025 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2026 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2027 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2028
2029 /* Set COUNT to the upper limit on the number of elements - 1.
2030 Start with the maximum vectorization factor. */
2031 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2032
2033 /* Try lowering COUNT to the number of scalar latch iterations. */
2034 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2035 widest_int max_iters;
2036 if (max_loop_iterations (loop, &max_iters)
2037 && max_iters < count)
2038 count = max_iters.to_shwi ();
2039
2040 /* Try scales of 1 and the element size. */
2041 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2042 wi::overflow_type overflow = wi::OVF_NONE;
2043 for (int i = 0; i < 2; ++i)
2044 {
2045 int scale = scales[i];
2046 widest_int factor;
2047 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2048 continue;
2049
2050 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2051 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2052 if (overflow)
2053 continue;
2054 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2055 unsigned int min_offset_bits = wi::min_precision (range, sign);
2056
2057 /* Find the narrowest viable offset type. */
2058 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2059 tree offset_type = build_nonstandard_integer_type (offset_bits,
2060 sign == UNSIGNED);
2061
2062 /* See whether the target supports the operation with an offset
2063 no narrower than OFFSET_TYPE. */
2064 tree memory_type = TREE_TYPE (DR_REF (dr));
2065 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2066 vectype, memory_type, offset_type, scale,
2067 &gs_info->ifn, &gs_info->offset_vectype))
2068 continue;
2069
2070 gs_info->decl = NULL_TREE;
2071 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2072 but we don't need to store that here. */
2073 gs_info->base = NULL_TREE;
2074 gs_info->element_type = TREE_TYPE (vectype);
2075 gs_info->offset = fold_convert (offset_type, step);
2076 gs_info->offset_dt = vect_constant_def;
2077 gs_info->scale = scale;
2078 gs_info->memory_type = memory_type;
2079 return true;
2080 }
2081
2082 if (overflow && dump_enabled_p ())
2083 dump_printf_loc (MSG_NOTE, vect_location,
2084 "truncating gather/scatter offset to %d bits"
2085 " might change its value.\n", element_bits);
2086
2087 return false;
2088 }
2089
2090 /* Return true if we can use gather/scatter internal functions to
2091 vectorize STMT_INFO, which is a grouped or strided load or store.
2092 MASKED_P is true if load or store is conditional. When returning
2093 true, fill in GS_INFO with the information required to perform the
2094 operation. */
2095
2096 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2097 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2098 loop_vec_info loop_vinfo, bool masked_p,
2099 gather_scatter_info *gs_info)
2100 {
2101 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2102 || gs_info->decl)
2103 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2104 masked_p, gs_info);
2105
2106 tree old_offset_type = TREE_TYPE (gs_info->offset);
2107 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2108
2109 gcc_assert (TYPE_PRECISION (new_offset_type)
2110 >= TYPE_PRECISION (old_offset_type));
2111 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2112
2113 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_NOTE, vect_location,
2115 "using gather/scatter for strided/grouped access,"
2116 " scale = %d\n", gs_info->scale);
2117
2118 return true;
2119 }
2120
2121 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2122 elements with a known constant step. Return -1 if that step
2123 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2124
2125 static int
compare_step_with_zero(stmt_vec_info stmt_info)2126 compare_step_with_zero (stmt_vec_info stmt_info)
2127 {
2128 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2129 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2130 size_zero_node);
2131 }
2132
2133 /* If the target supports a permute mask that reverses the elements in
2134 a vector of type VECTYPE, return that mask, otherwise return null. */
2135
2136 static tree
perm_mask_for_reverse(tree vectype)2137 perm_mask_for_reverse (tree vectype)
2138 {
2139 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2140
2141 /* The encoding has a single stepped pattern. */
2142 vec_perm_builder sel (nunits, 1, 3);
2143 for (int i = 0; i < 3; ++i)
2144 sel.quick_push (nunits - 1 - i);
2145
2146 vec_perm_indices indices (sel, 1, nunits);
2147 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2148 return NULL_TREE;
2149 return vect_gen_perm_mask_checked (vectype, indices);
2150 }
2151
2152 /* A subroutine of get_load_store_type, with a subset of the same
2153 arguments. Handle the case where STMT_INFO is a load or store that
2154 accesses consecutive elements with a negative step. */
2155
2156 static vect_memory_access_type
get_negative_load_store_type(stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2157 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2158 vec_load_store_type vls_type,
2159 unsigned int ncopies)
2160 {
2161 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2162 dr_alignment_support alignment_support_scheme;
2163
2164 if (ncopies > 1)
2165 {
2166 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 "multiple types with negative step.\n");
2169 return VMAT_ELEMENTWISE;
2170 }
2171
2172 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2173 if (alignment_support_scheme != dr_aligned
2174 && alignment_support_scheme != dr_unaligned_supported)
2175 {
2176 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 "negative step but alignment required.\n");
2179 return VMAT_ELEMENTWISE;
2180 }
2181
2182 if (vls_type == VLS_STORE_INVARIANT)
2183 {
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_NOTE, vect_location,
2186 "negative step with invariant source;"
2187 " no permute needed.\n");
2188 return VMAT_CONTIGUOUS_DOWN;
2189 }
2190
2191 if (!perm_mask_for_reverse (vectype))
2192 {
2193 if (dump_enabled_p ())
2194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195 "negative step and reversing not supported.\n");
2196 return VMAT_ELEMENTWISE;
2197 }
2198
2199 return VMAT_CONTIGUOUS_REVERSE;
2200 }
2201
2202 /* STMT_INFO is either a masked or unconditional store. Return the value
2203 being stored. */
2204
2205 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2206 vect_get_store_rhs (stmt_vec_info stmt_info)
2207 {
2208 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2209 {
2210 gcc_assert (gimple_assign_single_p (assign));
2211 return gimple_assign_rhs1 (assign);
2212 }
2213 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2214 {
2215 internal_fn ifn = gimple_call_internal_fn (call);
2216 int index = internal_fn_stored_value_index (ifn);
2217 gcc_assert (index >= 0);
2218 return gimple_call_arg (call, index);
2219 }
2220 gcc_unreachable ();
2221 }
2222
2223 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2224
2225 This function returns a vector type which can be composed with NETLS pieces,
2226 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2227 same vector size as the return vector. It checks target whether supports
2228 pieces-size vector mode for construction firstly, if target fails to, check
2229 pieces-size scalar mode for construction further. It returns NULL_TREE if
2230 fails to find the available composition.
2231
2232 For example, for (vtype=V16QI, nelts=4), we can probably get:
2233 - V16QI with PTYPE V4QI.
2234 - V4SI with PTYPE SI.
2235 - NULL_TREE. */
2236
2237 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2238 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2239 {
2240 gcc_assert (VECTOR_TYPE_P (vtype));
2241 gcc_assert (known_gt (nelts, 0U));
2242
2243 machine_mode vmode = TYPE_MODE (vtype);
2244 if (!VECTOR_MODE_P (vmode))
2245 return NULL_TREE;
2246
2247 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2248 unsigned int pbsize;
2249 if (constant_multiple_p (vbsize, nelts, &pbsize))
2250 {
2251 /* First check if vec_init optab supports construction from
2252 vector pieces directly. */
2253 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2254 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2255 machine_mode rmode;
2256 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2257 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2258 != CODE_FOR_nothing))
2259 {
2260 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2261 return vtype;
2262 }
2263
2264 /* Otherwise check if exists an integer type of the same piece size and
2265 if vec_init optab supports construction from it directly. */
2266 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2267 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2268 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2269 != CODE_FOR_nothing))
2270 {
2271 *ptype = build_nonstandard_integer_type (pbsize, 1);
2272 return build_vector_type (*ptype, nelts);
2273 }
2274 }
2275
2276 return NULL_TREE;
2277 }
2278
2279 /* A subroutine of get_load_store_type, with a subset of the same
2280 arguments. Handle the case where STMT_INFO is part of a grouped load
2281 or store.
2282
2283 For stores, the statements in the group are all consecutive
2284 and there is no gap at the end. For loads, the statements in the
2285 group might not be consecutive; there can be gaps between statements
2286 as well as at the end. */
2287
2288 static bool
get_group_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2289 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2290 bool masked_p, vec_load_store_type vls_type,
2291 vect_memory_access_type *memory_access_type,
2292 gather_scatter_info *gs_info)
2293 {
2294 vec_info *vinfo = stmt_info->vinfo;
2295 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2296 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2297 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2298 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2299 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2300 bool single_element_p = (stmt_info == first_stmt_info
2301 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2302 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304
2305 /* True if the vectorized statements would access beyond the last
2306 statement in the group. */
2307 bool overrun_p = false;
2308
2309 /* True if we can cope with such overrun by peeling for gaps, so that
2310 there is at least one final scalar iteration after the vector loop. */
2311 bool can_overrun_p = (!masked_p
2312 && vls_type == VLS_LOAD
2313 && loop_vinfo
2314 && !loop->inner);
2315
2316 /* There can only be a gap at the end of the group if the stride is
2317 known at compile time. */
2318 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2319
2320 /* Stores can't yet have gaps. */
2321 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2322
2323 if (slp)
2324 {
2325 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2326 {
2327 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2328 separated by the stride, until we have a complete vector.
2329 Fall back to scalar accesses if that isn't possible. */
2330 if (multiple_p (nunits, group_size))
2331 *memory_access_type = VMAT_STRIDED_SLP;
2332 else
2333 *memory_access_type = VMAT_ELEMENTWISE;
2334 }
2335 else
2336 {
2337 overrun_p = loop_vinfo && gap != 0;
2338 if (overrun_p && vls_type != VLS_LOAD)
2339 {
2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2341 "Grouped store with gaps requires"
2342 " non-consecutive accesses\n");
2343 return false;
2344 }
2345 /* An overrun is fine if the trailing elements are smaller
2346 than the alignment boundary B. Every vector access will
2347 be a multiple of B and so we are guaranteed to access a
2348 non-gap element in the same B-sized block. */
2349 if (overrun_p
2350 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2351 / vect_get_scalar_dr_size (first_dr_info)))
2352 overrun_p = false;
2353
2354 /* If the gap splits the vector in half and the target
2355 can do half-vector operations avoid the epilogue peeling
2356 by simply loading half of the vector only. Usually
2357 the construction with an upper zero half will be elided. */
2358 dr_alignment_support alignment_support_scheme;
2359 tree half_vtype;
2360 if (overrun_p
2361 && !masked_p
2362 && (((alignment_support_scheme
2363 = vect_supportable_dr_alignment (first_dr_info, false)))
2364 == dr_aligned
2365 || alignment_support_scheme == dr_unaligned_supported)
2366 && known_eq (nunits, (group_size - gap) * 2)
2367 && known_eq (nunits, group_size)
2368 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2369 != NULL_TREE))
2370 overrun_p = false;
2371
2372 if (overrun_p && !can_overrun_p)
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "Peeling for outer loop is not supported\n");
2377 return false;
2378 }
2379 int cmp = compare_step_with_zero (stmt_info);
2380 if (cmp < 0)
2381 *memory_access_type = get_negative_load_store_type
2382 (stmt_info, vectype, vls_type, 1);
2383 else
2384 {
2385 gcc_assert (!loop_vinfo || cmp > 0);
2386 *memory_access_type = VMAT_CONTIGUOUS;
2387 }
2388 }
2389 }
2390 else
2391 {
2392 /* We can always handle this case using elementwise accesses,
2393 but see if something more efficient is available. */
2394 *memory_access_type = VMAT_ELEMENTWISE;
2395
2396 /* If there is a gap at the end of the group then these optimizations
2397 would access excess elements in the last iteration. */
2398 bool would_overrun_p = (gap != 0);
2399 /* An overrun is fine if the trailing elements are smaller than the
2400 alignment boundary B. Every vector access will be a multiple of B
2401 and so we are guaranteed to access a non-gap element in the
2402 same B-sized block. */
2403 if (would_overrun_p
2404 && !masked_p
2405 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2406 / vect_get_scalar_dr_size (first_dr_info)))
2407 would_overrun_p = false;
2408
2409 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2410 && (can_overrun_p || !would_overrun_p)
2411 && compare_step_with_zero (stmt_info) > 0)
2412 {
2413 /* First cope with the degenerate case of a single-element
2414 vector. */
2415 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2416 ;
2417
2418 /* Otherwise try using LOAD/STORE_LANES. */
2419 else if (vls_type == VLS_LOAD
2420 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2421 : vect_store_lanes_supported (vectype, group_size,
2422 masked_p))
2423 {
2424 *memory_access_type = VMAT_LOAD_STORE_LANES;
2425 overrun_p = would_overrun_p;
2426 }
2427
2428 /* If that fails, try using permuting loads. */
2429 else if (vls_type == VLS_LOAD
2430 ? vect_grouped_load_supported (vectype, single_element_p,
2431 group_size)
2432 : vect_grouped_store_supported (vectype, group_size))
2433 {
2434 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2435 overrun_p = would_overrun_p;
2436 }
2437 }
2438
2439 /* As a last resort, trying using a gather load or scatter store.
2440
2441 ??? Although the code can handle all group sizes correctly,
2442 it probably isn't a win to use separate strided accesses based
2443 on nearby locations. Or, even if it's a win over scalar code,
2444 it might not be a win over vectorizing at a lower VF, if that
2445 allows us to use contiguous accesses. */
2446 if (*memory_access_type == VMAT_ELEMENTWISE
2447 && single_element_p
2448 && loop_vinfo
2449 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2450 masked_p, gs_info))
2451 *memory_access_type = VMAT_GATHER_SCATTER;
2452 }
2453
2454 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2455 {
2456 /* STMT is the leader of the group. Check the operands of all the
2457 stmts of the group. */
2458 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2459 while (next_stmt_info)
2460 {
2461 tree op = vect_get_store_rhs (next_stmt_info);
2462 enum vect_def_type dt;
2463 if (!vect_is_simple_use (op, vinfo, &dt))
2464 {
2465 if (dump_enabled_p ())
2466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2467 "use not simple.\n");
2468 return false;
2469 }
2470 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2471 }
2472 }
2473
2474 if (overrun_p)
2475 {
2476 gcc_assert (can_overrun_p);
2477 if (dump_enabled_p ())
2478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2479 "Data access with gaps requires scalar "
2480 "epilogue loop\n");
2481 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2482 }
2483
2484 return true;
2485 }
2486
2487 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2488 if there is a memory access type that the vectorized form can use,
2489 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2490 or scatters, fill in GS_INFO accordingly.
2491
2492 SLP says whether we're performing SLP rather than loop vectorization.
2493 MASKED_P is true if the statement is conditional on a vectorized mask.
2494 VECTYPE is the vector type that the vectorized statements will use.
2495 NCOPIES is the number of vector statements that will be needed. */
2496
2497 static bool
get_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2498 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2499 bool masked_p, vec_load_store_type vls_type,
2500 unsigned int ncopies,
2501 vect_memory_access_type *memory_access_type,
2502 gather_scatter_info *gs_info)
2503 {
2504 vec_info *vinfo = stmt_info->vinfo;
2505 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2506 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2507 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2508 {
2509 *memory_access_type = VMAT_GATHER_SCATTER;
2510 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2511 gcc_unreachable ();
2512 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2513 &gs_info->offset_dt,
2514 &gs_info->offset_vectype))
2515 {
2516 if (dump_enabled_p ())
2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 "%s index use not simple.\n",
2519 vls_type == VLS_LOAD ? "gather" : "scatter");
2520 return false;
2521 }
2522 }
2523 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2524 {
2525 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2526 vls_type, memory_access_type, gs_info))
2527 return false;
2528 }
2529 else if (STMT_VINFO_STRIDED_P (stmt_info))
2530 {
2531 gcc_assert (!slp);
2532 if (loop_vinfo
2533 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2534 masked_p, gs_info))
2535 *memory_access_type = VMAT_GATHER_SCATTER;
2536 else
2537 *memory_access_type = VMAT_ELEMENTWISE;
2538 }
2539 else
2540 {
2541 int cmp = compare_step_with_zero (stmt_info);
2542 if (cmp < 0)
2543 *memory_access_type = get_negative_load_store_type
2544 (stmt_info, vectype, vls_type, ncopies);
2545 else if (cmp == 0)
2546 {
2547 gcc_assert (vls_type == VLS_LOAD);
2548 *memory_access_type = VMAT_INVARIANT;
2549 }
2550 else
2551 *memory_access_type = VMAT_CONTIGUOUS;
2552 }
2553
2554 if ((*memory_access_type == VMAT_ELEMENTWISE
2555 || *memory_access_type == VMAT_STRIDED_SLP)
2556 && !nunits.is_constant ())
2557 {
2558 if (dump_enabled_p ())
2559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2560 "Not using elementwise accesses due to variable "
2561 "vectorization factor.\n");
2562 return false;
2563 }
2564
2565 /* FIXME: At the moment the cost model seems to underestimate the
2566 cost of using elementwise accesses. This check preserves the
2567 traditional behavior until that can be fixed. */
2568 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2569 if (!first_stmt_info)
2570 first_stmt_info = stmt_info;
2571 if (*memory_access_type == VMAT_ELEMENTWISE
2572 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2573 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2574 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2575 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2576 {
2577 if (dump_enabled_p ())
2578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2579 "not falling back to elementwise accesses\n");
2580 return false;
2581 }
2582 return true;
2583 }
2584
2585 /* Return true if boolean argument MASK is suitable for vectorizing
2586 conditional operation STMT_INFO. When returning true, store the type
2587 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2588 in *MASK_VECTYPE_OUT. */
2589
2590 static bool
vect_check_scalar_mask(stmt_vec_info stmt_info,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2591 vect_check_scalar_mask (stmt_vec_info stmt_info, tree mask,
2592 vect_def_type *mask_dt_out,
2593 tree *mask_vectype_out)
2594 {
2595 vec_info *vinfo = stmt_info->vinfo;
2596 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2597 {
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2600 "mask argument is not a boolean.\n");
2601 return false;
2602 }
2603
2604 if (TREE_CODE (mask) != SSA_NAME)
2605 {
2606 if (dump_enabled_p ())
2607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2608 "mask argument is not an SSA name.\n");
2609 return false;
2610 }
2611
2612 enum vect_def_type mask_dt;
2613 tree mask_vectype;
2614 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2615 {
2616 if (dump_enabled_p ())
2617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618 "mask use not simple.\n");
2619 return false;
2620 }
2621
2622 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2623 if (!mask_vectype)
2624 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2625
2626 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2627 {
2628 if (dump_enabled_p ())
2629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2630 "could not find an appropriate vector mask type.\n");
2631 return false;
2632 }
2633
2634 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2635 TYPE_VECTOR_SUBPARTS (vectype)))
2636 {
2637 if (dump_enabled_p ())
2638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2639 "vector mask type %T"
2640 " does not match vector data type %T.\n",
2641 mask_vectype, vectype);
2642
2643 return false;
2644 }
2645
2646 *mask_dt_out = mask_dt;
2647 *mask_vectype_out = mask_vectype;
2648 return true;
2649 }
2650
2651 /* Return true if stored value RHS is suitable for vectorizing store
2652 statement STMT_INFO. When returning true, store the type of the
2653 definition in *RHS_DT_OUT, the type of the vectorized store value in
2654 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2655
2656 static bool
vect_check_store_rhs(stmt_vec_info stmt_info,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2657 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2658 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2659 vec_load_store_type *vls_type_out)
2660 {
2661 /* In the case this is a store from a constant make sure
2662 native_encode_expr can handle it. */
2663 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2664 {
2665 if (dump_enabled_p ())
2666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2667 "cannot encode constant as a byte sequence.\n");
2668 return false;
2669 }
2670
2671 enum vect_def_type rhs_dt;
2672 tree rhs_vectype;
2673 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2674 {
2675 if (dump_enabled_p ())
2676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2677 "use not simple.\n");
2678 return false;
2679 }
2680
2681 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2682 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2683 {
2684 if (dump_enabled_p ())
2685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2686 "incompatible vector types.\n");
2687 return false;
2688 }
2689
2690 *rhs_dt_out = rhs_dt;
2691 *rhs_vectype_out = rhs_vectype;
2692 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2693 *vls_type_out = VLS_STORE_INVARIANT;
2694 else
2695 *vls_type_out = VLS_STORE;
2696 return true;
2697 }
2698
2699 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2700 Note that we support masks with floating-point type, in which case the
2701 floats are interpreted as a bitmask. */
2702
2703 static tree
vect_build_all_ones_mask(stmt_vec_info stmt_info,tree masktype)2704 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2705 {
2706 if (TREE_CODE (masktype) == INTEGER_TYPE)
2707 return build_int_cst (masktype, -1);
2708 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2709 {
2710 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2711 mask = build_vector_from_val (masktype, mask);
2712 return vect_init_vector (stmt_info, mask, masktype, NULL);
2713 }
2714 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2715 {
2716 REAL_VALUE_TYPE r;
2717 long tmp[6];
2718 for (int j = 0; j < 6; ++j)
2719 tmp[j] = -1;
2720 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2721 tree mask = build_real (TREE_TYPE (masktype), r);
2722 mask = build_vector_from_val (masktype, mask);
2723 return vect_init_vector (stmt_info, mask, masktype, NULL);
2724 }
2725 gcc_unreachable ();
2726 }
2727
2728 /* Build an all-zero merge value of type VECTYPE while vectorizing
2729 STMT_INFO as a gather load. */
2730
2731 static tree
vect_build_zero_merge_argument(stmt_vec_info stmt_info,tree vectype)2732 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2733 {
2734 tree merge;
2735 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2736 merge = build_int_cst (TREE_TYPE (vectype), 0);
2737 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2738 {
2739 REAL_VALUE_TYPE r;
2740 long tmp[6];
2741 for (int j = 0; j < 6; ++j)
2742 tmp[j] = 0;
2743 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2744 merge = build_real (TREE_TYPE (vectype), r);
2745 }
2746 else
2747 gcc_unreachable ();
2748 merge = build_vector_from_val (vectype, merge);
2749 return vect_init_vector (stmt_info, merge, vectype, NULL);
2750 }
2751
2752 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2753 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2754 the gather load operation. If the load is conditional, MASK is the
2755 unvectorized condition and MASK_DT is its definition type, otherwise
2756 MASK is null. */
2757
2758 static void
vect_build_gather_load_calls(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,gather_scatter_info * gs_info,tree mask)2759 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2760 gimple_stmt_iterator *gsi,
2761 stmt_vec_info *vec_stmt,
2762 gather_scatter_info *gs_info,
2763 tree mask)
2764 {
2765 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2766 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2767 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2768 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2769 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2770 edge pe = loop_preheader_edge (loop);
2771 enum { NARROW, NONE, WIDEN } modifier;
2772 poly_uint64 gather_off_nunits
2773 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2774
2775 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2776 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2777 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2778 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2779 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2780 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2781 tree scaletype = TREE_VALUE (arglist);
2782 tree real_masktype = masktype;
2783 gcc_checking_assert (types_compatible_p (srctype, rettype)
2784 && (!mask
2785 || TREE_CODE (masktype) == INTEGER_TYPE
2786 || types_compatible_p (srctype, masktype)));
2787 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2788 masktype = truth_type_for (srctype);
2789
2790 tree mask_halftype = masktype;
2791 tree perm_mask = NULL_TREE;
2792 tree mask_perm_mask = NULL_TREE;
2793 if (known_eq (nunits, gather_off_nunits))
2794 modifier = NONE;
2795 else if (known_eq (nunits * 2, gather_off_nunits))
2796 {
2797 modifier = WIDEN;
2798
2799 /* Currently widening gathers and scatters are only supported for
2800 fixed-length vectors. */
2801 int count = gather_off_nunits.to_constant ();
2802 vec_perm_builder sel (count, count, 1);
2803 for (int i = 0; i < count; ++i)
2804 sel.quick_push (i | (count / 2));
2805
2806 vec_perm_indices indices (sel, 1, count);
2807 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2808 indices);
2809 }
2810 else if (known_eq (nunits, gather_off_nunits * 2))
2811 {
2812 modifier = NARROW;
2813
2814 /* Currently narrowing gathers and scatters are only supported for
2815 fixed-length vectors. */
2816 int count = nunits.to_constant ();
2817 vec_perm_builder sel (count, count, 1);
2818 sel.quick_grow (count);
2819 for (int i = 0; i < count; ++i)
2820 sel[i] = i < count / 2 ? i : i + count / 2;
2821 vec_perm_indices indices (sel, 2, count);
2822 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2823
2824 ncopies *= 2;
2825
2826 if (mask && masktype == real_masktype)
2827 {
2828 for (int i = 0; i < count; ++i)
2829 sel[i] = i | (count / 2);
2830 indices.new_vector (sel, 2, count);
2831 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2832 }
2833 else if (mask)
2834 mask_halftype = truth_type_for (gs_info->offset_vectype);
2835 }
2836 else
2837 gcc_unreachable ();
2838
2839 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2840 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2841
2842 tree ptr = fold_convert (ptrtype, gs_info->base);
2843 if (!is_gimple_min_invariant (ptr))
2844 {
2845 gimple_seq seq;
2846 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2847 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2848 gcc_assert (!new_bb);
2849 }
2850
2851 tree scale = build_int_cst (scaletype, gs_info->scale);
2852
2853 tree vec_oprnd0 = NULL_TREE;
2854 tree vec_mask = NULL_TREE;
2855 tree src_op = NULL_TREE;
2856 tree mask_op = NULL_TREE;
2857 tree prev_res = NULL_TREE;
2858 stmt_vec_info prev_stmt_info = NULL;
2859
2860 if (!mask)
2861 {
2862 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2863 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2864 }
2865
2866 for (int j = 0; j < ncopies; ++j)
2867 {
2868 tree op, var;
2869 if (modifier == WIDEN && (j & 1))
2870 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2871 perm_mask, stmt_info, gsi);
2872 else if (j == 0)
2873 op = vec_oprnd0
2874 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2875 else
2876 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2877 vec_oprnd0);
2878
2879 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2880 {
2881 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2882 TYPE_VECTOR_SUBPARTS (idxtype)));
2883 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2884 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2885 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2886 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2887 op = var;
2888 }
2889
2890 if (mask)
2891 {
2892 if (mask_perm_mask && (j & 1))
2893 mask_op = permute_vec_elements (mask_op, mask_op,
2894 mask_perm_mask, stmt_info, gsi);
2895 else
2896 {
2897 if (j == 0)
2898 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2899 else if (modifier != NARROW || (j & 1) == 0)
2900 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2901 vec_mask);
2902
2903 mask_op = vec_mask;
2904 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2905 {
2906 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2907 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2908 gcc_assert (known_eq (sub1, sub2));
2909 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2910 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2911 gassign *new_stmt
2912 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2913 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2914 mask_op = var;
2915 }
2916 }
2917 if (modifier == NARROW && masktype != real_masktype)
2918 {
2919 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2920 gassign *new_stmt
2921 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2922 : VEC_UNPACK_LO_EXPR,
2923 mask_op);
2924 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2925 mask_op = var;
2926 }
2927 src_op = mask_op;
2928 }
2929
2930 tree mask_arg = mask_op;
2931 if (masktype != real_masktype)
2932 {
2933 tree utype, optype = TREE_TYPE (mask_op);
2934 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2935 utype = real_masktype;
2936 else
2937 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2938 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2939 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2940 gassign *new_stmt
2941 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2942 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2943 mask_arg = var;
2944 if (!useless_type_conversion_p (real_masktype, utype))
2945 {
2946 gcc_assert (TYPE_PRECISION (utype)
2947 <= TYPE_PRECISION (real_masktype));
2948 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2949 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2950 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2951 mask_arg = var;
2952 }
2953 src_op = build_zero_cst (srctype);
2954 }
2955 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2956 mask_arg, scale);
2957
2958 stmt_vec_info new_stmt_info;
2959 if (!useless_type_conversion_p (vectype, rettype))
2960 {
2961 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2962 TYPE_VECTOR_SUBPARTS (rettype)));
2963 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2964 gimple_call_set_lhs (new_call, op);
2965 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2966 var = make_ssa_name (vec_dest);
2967 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2968 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2969 new_stmt_info
2970 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2971 }
2972 else
2973 {
2974 var = make_ssa_name (vec_dest, new_call);
2975 gimple_call_set_lhs (new_call, var);
2976 new_stmt_info
2977 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2978 }
2979
2980 if (modifier == NARROW)
2981 {
2982 if ((j & 1) == 0)
2983 {
2984 prev_res = var;
2985 continue;
2986 }
2987 var = permute_vec_elements (prev_res, var, perm_mask,
2988 stmt_info, gsi);
2989 new_stmt_info = loop_vinfo->lookup_def (var);
2990 }
2991
2992 if (prev_stmt_info == NULL)
2993 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2994 else
2995 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2996 prev_stmt_info = new_stmt_info;
2997 }
2998 }
2999
3000 /* Prepare the base and offset in GS_INFO for vectorization.
3001 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3002 to the vectorized offset argument for the first copy of STMT_INFO.
3003 STMT_INFO is the statement described by GS_INFO and LOOP is the
3004 containing loop. */
3005
3006 static void
vect_get_gather_scatter_ops(class loop * loop,stmt_vec_info stmt_info,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)3007 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
3008 gather_scatter_info *gs_info,
3009 tree *dataref_ptr, tree *vec_offset)
3010 {
3011 gimple_seq stmts = NULL;
3012 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3013 if (stmts != NULL)
3014 {
3015 basic_block new_bb;
3016 edge pe = loop_preheader_edge (loop);
3017 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3018 gcc_assert (!new_bb);
3019 }
3020 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
3021 gs_info->offset_vectype);
3022 }
3023
3024 /* Prepare to implement a grouped or strided load or store using
3025 the gather load or scatter store operation described by GS_INFO.
3026 STMT_INFO is the load or store statement.
3027
3028 Set *DATAREF_BUMP to the amount that should be added to the base
3029 address after each copy of the vectorized statement. Set *VEC_OFFSET
3030 to an invariant offset vector in which element I has the value
3031 I * DR_STEP / SCALE. */
3032
3033 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3034 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3035 loop_vec_info loop_vinfo,
3036 gather_scatter_info *gs_info,
3037 tree *dataref_bump, tree *vec_offset)
3038 {
3039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3040 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3041 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3042 gimple_seq stmts;
3043
3044 tree bump = size_binop (MULT_EXPR,
3045 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3046 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3047 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3048 if (stmts)
3049 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3050
3051 /* The offset given in GS_INFO can have pointer type, so use the element
3052 type of the vector instead. */
3053 tree offset_type = TREE_TYPE (gs_info->offset);
3054 offset_type = TREE_TYPE (gs_info->offset_vectype);
3055
3056 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3057 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3058 ssize_int (gs_info->scale));
3059 step = fold_convert (offset_type, step);
3060 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3061
3062 /* Create {0, X, X*2, X*3, ...}. */
3063 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3064 build_zero_cst (offset_type), step);
3065 if (stmts)
3066 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3067 }
3068
3069 /* Return the amount that should be added to a vector pointer to move
3070 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3071 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3072 vectorization. */
3073
3074 static tree
vect_get_data_ptr_increment(dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3075 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3076 vect_memory_access_type memory_access_type)
3077 {
3078 if (memory_access_type == VMAT_INVARIANT)
3079 return size_zero_node;
3080
3081 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3082 tree step = vect_dr_behavior (dr_info)->step;
3083 if (tree_int_cst_sgn (step) == -1)
3084 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3085 return iv_step;
3086 }
3087
3088 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3089
3090 static bool
vectorizable_bswap(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,tree vectype_in,stmt_vector_for_cost * cost_vec)3091 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3092 stmt_vec_info *vec_stmt, slp_tree slp_node,
3093 tree vectype_in, stmt_vector_for_cost *cost_vec)
3094 {
3095 tree op, vectype;
3096 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3097 vec_info *vinfo = stmt_info->vinfo;
3098 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3099 unsigned ncopies;
3100
3101 op = gimple_call_arg (stmt, 0);
3102 vectype = STMT_VINFO_VECTYPE (stmt_info);
3103 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3104
3105 /* Multiple types in SLP are handled by creating the appropriate number of
3106 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3107 case of SLP. */
3108 if (slp_node)
3109 ncopies = 1;
3110 else
3111 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3112
3113 gcc_assert (ncopies >= 1);
3114
3115 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3116 if (! char_vectype)
3117 return false;
3118
3119 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3120 unsigned word_bytes;
3121 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3122 return false;
3123
3124 /* The encoding uses one stepped pattern for each byte in the word. */
3125 vec_perm_builder elts (num_bytes, word_bytes, 3);
3126 for (unsigned i = 0; i < 3; ++i)
3127 for (unsigned j = 0; j < word_bytes; ++j)
3128 elts.quick_push ((i + 1) * word_bytes - j - 1);
3129
3130 vec_perm_indices indices (elts, 1, num_bytes);
3131 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3132 return false;
3133
3134 if (! vec_stmt)
3135 {
3136 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3137 DUMP_VECT_SCOPE ("vectorizable_bswap");
3138 if (! slp_node)
3139 {
3140 record_stmt_cost (cost_vec,
3141 1, vector_stmt, stmt_info, 0, vect_prologue);
3142 record_stmt_cost (cost_vec,
3143 ncopies, vec_perm, stmt_info, 0, vect_body);
3144 }
3145 return true;
3146 }
3147
3148 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3149
3150 /* Transform. */
3151 vec<tree> vec_oprnds = vNULL;
3152 stmt_vec_info new_stmt_info = NULL;
3153 stmt_vec_info prev_stmt_info = NULL;
3154 for (unsigned j = 0; j < ncopies; j++)
3155 {
3156 /* Handle uses. */
3157 if (j == 0)
3158 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3159 else
3160 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3161
3162 /* Arguments are ready. create the new vector stmt. */
3163 unsigned i;
3164 tree vop;
3165 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3166 {
3167 gimple *new_stmt;
3168 tree tem = make_ssa_name (char_vectype);
3169 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3170 char_vectype, vop));
3171 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3172 tree tem2 = make_ssa_name (char_vectype);
3173 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3174 tem, tem, bswap_vconst);
3175 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3176 tem = make_ssa_name (vectype);
3177 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3178 vectype, tem2));
3179 new_stmt_info
3180 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3181 if (slp_node)
3182 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3183 }
3184
3185 if (slp_node)
3186 continue;
3187
3188 if (j == 0)
3189 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3190 else
3191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3192
3193 prev_stmt_info = new_stmt_info;
3194 }
3195
3196 vec_oprnds.release ();
3197 return true;
3198 }
3199
3200 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3201 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3202 in a single step. On success, store the binary pack code in
3203 *CONVERT_CODE. */
3204
3205 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3206 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3207 tree_code *convert_code)
3208 {
3209 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3210 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3211 return false;
3212
3213 tree_code code;
3214 int multi_step_cvt = 0;
3215 auto_vec <tree, 8> interm_types;
3216 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3217 &code, &multi_step_cvt, &interm_types)
3218 || multi_step_cvt)
3219 return false;
3220
3221 *convert_code = code;
3222 return true;
3223 }
3224
3225 /* Function vectorizable_call.
3226
3227 Check if STMT_INFO performs a function call that can be vectorized.
3228 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3229 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3230 Return true if STMT_INFO is vectorizable in this way. */
3231
3232 static bool
vectorizable_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3233 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3234 stmt_vec_info *vec_stmt, slp_tree slp_node,
3235 stmt_vector_for_cost *cost_vec)
3236 {
3237 gcall *stmt;
3238 tree vec_dest;
3239 tree scalar_dest;
3240 tree op;
3241 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3242 stmt_vec_info prev_stmt_info;
3243 tree vectype_out, vectype_in;
3244 poly_uint64 nunits_in;
3245 poly_uint64 nunits_out;
3246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3247 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3248 vec_info *vinfo = stmt_info->vinfo;
3249 tree fndecl, new_temp, rhs_type;
3250 enum vect_def_type dt[4]
3251 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3252 vect_unknown_def_type };
3253 tree vectypes[ARRAY_SIZE (dt)] = {};
3254 int ndts = ARRAY_SIZE (dt);
3255 int ncopies, j;
3256 auto_vec<tree, 8> vargs;
3257 auto_vec<tree, 8> orig_vargs;
3258 enum { NARROW, NONE, WIDEN } modifier;
3259 size_t i, nargs;
3260 tree lhs;
3261
3262 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3263 return false;
3264
3265 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3266 && ! vec_stmt)
3267 return false;
3268
3269 /* Is STMT_INFO a vectorizable call? */
3270 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3271 if (!stmt)
3272 return false;
3273
3274 if (gimple_call_internal_p (stmt)
3275 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3276 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3277 /* Handled by vectorizable_load and vectorizable_store. */
3278 return false;
3279
3280 if (gimple_call_lhs (stmt) == NULL_TREE
3281 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3282 return false;
3283
3284 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3285
3286 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3287
3288 /* Process function arguments. */
3289 rhs_type = NULL_TREE;
3290 vectype_in = NULL_TREE;
3291 nargs = gimple_call_num_args (stmt);
3292
3293 /* Bail out if the function has more than three arguments, we do not have
3294 interesting builtin functions to vectorize with more than two arguments
3295 except for fma. No arguments is also not good. */
3296 if (nargs == 0 || nargs > 4)
3297 return false;
3298
3299 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3300 combined_fn cfn = gimple_call_combined_fn (stmt);
3301 if (cfn == CFN_GOMP_SIMD_LANE)
3302 {
3303 nargs = 0;
3304 rhs_type = unsigned_type_node;
3305 }
3306
3307 int mask_opno = -1;
3308 if (internal_fn_p (cfn))
3309 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3310
3311 for (i = 0; i < nargs; i++)
3312 {
3313 op = gimple_call_arg (stmt, i);
3314
3315 if ((int) i == mask_opno)
3316 {
3317 if (!vect_check_scalar_mask (stmt_info, op, &dt[i], &vectypes[i]))
3318 return false;
3319 continue;
3320 }
3321
3322 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3323 {
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3326 "use not simple.\n");
3327 return false;
3328 }
3329
3330 /* We can only handle calls with arguments of the same type. */
3331 if (rhs_type
3332 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3333 {
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3336 "argument types differ.\n");
3337 return false;
3338 }
3339 if (!rhs_type)
3340 rhs_type = TREE_TYPE (op);
3341
3342 if (!vectype_in)
3343 vectype_in = vectypes[i];
3344 else if (vectypes[i]
3345 && !types_compatible_p (vectypes[i], vectype_in))
3346 {
3347 if (dump_enabled_p ())
3348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3349 "argument vector types differ.\n");
3350 return false;
3351 }
3352 }
3353 /* If all arguments are external or constant defs, infer the vector type
3354 from the scalar type. */
3355 if (!vectype_in)
3356 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3357 if (vec_stmt)
3358 gcc_assert (vectype_in);
3359 if (!vectype_in)
3360 {
3361 if (dump_enabled_p ())
3362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3363 "no vectype for scalar type %T\n", rhs_type);
3364
3365 return false;
3366 }
3367 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3368 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3369 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3370 by a pack of the two vectors into an SI vector. We would need
3371 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3372 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3373 {
3374 if (dump_enabled_p ())
3375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3376 "mismatched vector sizes %T and %T\n",
3377 vectype_in, vectype_out);
3378 return false;
3379 }
3380
3381 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3382 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3383 {
3384 if (dump_enabled_p ())
3385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3386 "mixed mask and nonmask vector types\n");
3387 return false;
3388 }
3389
3390 /* FORNOW */
3391 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3392 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3393 if (known_eq (nunits_in * 2, nunits_out))
3394 modifier = NARROW;
3395 else if (known_eq (nunits_out, nunits_in))
3396 modifier = NONE;
3397 else if (known_eq (nunits_out * 2, nunits_in))
3398 modifier = WIDEN;
3399 else
3400 return false;
3401
3402 /* We only handle functions that do not read or clobber memory. */
3403 if (gimple_vuse (stmt))
3404 {
3405 if (dump_enabled_p ())
3406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3407 "function reads from or writes to memory.\n");
3408 return false;
3409 }
3410
3411 /* For now, we only vectorize functions if a target specific builtin
3412 is available. TODO -- in some cases, it might be profitable to
3413 insert the calls for pieces of the vector, in order to be able
3414 to vectorize other operations in the loop. */
3415 fndecl = NULL_TREE;
3416 internal_fn ifn = IFN_LAST;
3417 tree callee = gimple_call_fndecl (stmt);
3418
3419 /* First try using an internal function. */
3420 tree_code convert_code = ERROR_MARK;
3421 if (cfn != CFN_LAST
3422 && (modifier == NONE
3423 || (modifier == NARROW
3424 && simple_integer_narrowing (vectype_out, vectype_in,
3425 &convert_code))))
3426 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3427 vectype_in);
3428
3429 /* If that fails, try asking for a target-specific built-in function. */
3430 if (ifn == IFN_LAST)
3431 {
3432 if (cfn != CFN_LAST)
3433 fndecl = targetm.vectorize.builtin_vectorized_function
3434 (cfn, vectype_out, vectype_in);
3435 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3436 fndecl = targetm.vectorize.builtin_md_vectorized_function
3437 (callee, vectype_out, vectype_in);
3438 }
3439
3440 if (ifn == IFN_LAST && !fndecl)
3441 {
3442 if (cfn == CFN_GOMP_SIMD_LANE
3443 && !slp_node
3444 && loop_vinfo
3445 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3446 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3447 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3448 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3449 {
3450 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3451 { 0, 1, 2, ... vf - 1 } vector. */
3452 gcc_assert (nargs == 0);
3453 }
3454 else if (modifier == NONE
3455 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3456 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3457 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3458 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3459 vectype_in, cost_vec);
3460 else
3461 {
3462 if (dump_enabled_p ())
3463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3464 "function is not vectorizable.\n");
3465 return false;
3466 }
3467 }
3468
3469 if (slp_node)
3470 ncopies = 1;
3471 else if (modifier == NARROW && ifn == IFN_LAST)
3472 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3473 else
3474 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3475
3476 /* Sanity check: make sure that at least one copy of the vectorized stmt
3477 needs to be generated. */
3478 gcc_assert (ncopies >= 1);
3479
3480 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3481 if (!vec_stmt) /* transformation not required. */
3482 {
3483 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3484 DUMP_VECT_SCOPE ("vectorizable_call");
3485 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3486 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3487 record_stmt_cost (cost_vec, ncopies / 2,
3488 vec_promote_demote, stmt_info, 0, vect_body);
3489
3490 if (loop_vinfo && mask_opno >= 0)
3491 {
3492 unsigned int nvectors = (slp_node
3493 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3494 : ncopies);
3495 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3496 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3497 vectype_out, scalar_mask);
3498 }
3499 return true;
3500 }
3501
3502 /* Transform. */
3503
3504 if (dump_enabled_p ())
3505 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3506
3507 /* Handle def. */
3508 scalar_dest = gimple_call_lhs (stmt);
3509 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3510
3511 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3512
3513 stmt_vec_info new_stmt_info = NULL;
3514 prev_stmt_info = NULL;
3515 if (modifier == NONE || ifn != IFN_LAST)
3516 {
3517 tree prev_res = NULL_TREE;
3518 vargs.safe_grow (nargs);
3519 orig_vargs.safe_grow (nargs);
3520 for (j = 0; j < ncopies; ++j)
3521 {
3522 /* Build argument list for the vectorized call. */
3523 if (slp_node)
3524 {
3525 auto_vec<vec<tree> > vec_defs (nargs);
3526 vec<tree> vec_oprnds0;
3527
3528 vect_get_slp_defs (slp_node, &vec_defs);
3529 vec_oprnds0 = vec_defs[0];
3530
3531 /* Arguments are ready. Create the new vector stmt. */
3532 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3533 {
3534 size_t k;
3535 for (k = 0; k < nargs; k++)
3536 {
3537 vec<tree> vec_oprndsk = vec_defs[k];
3538 vargs[k] = vec_oprndsk[i];
3539 }
3540 if (modifier == NARROW)
3541 {
3542 /* We don't define any narrowing conditional functions
3543 at present. */
3544 gcc_assert (mask_opno < 0);
3545 tree half_res = make_ssa_name (vectype_in);
3546 gcall *call
3547 = gimple_build_call_internal_vec (ifn, vargs);
3548 gimple_call_set_lhs (call, half_res);
3549 gimple_call_set_nothrow (call, true);
3550 vect_finish_stmt_generation (stmt_info, call, gsi);
3551 if ((i & 1) == 0)
3552 {
3553 prev_res = half_res;
3554 continue;
3555 }
3556 new_temp = make_ssa_name (vec_dest);
3557 gimple *new_stmt
3558 = gimple_build_assign (new_temp, convert_code,
3559 prev_res, half_res);
3560 new_stmt_info
3561 = vect_finish_stmt_generation (stmt_info, new_stmt,
3562 gsi);
3563 }
3564 else
3565 {
3566 if (mask_opno >= 0 && masked_loop_p)
3567 {
3568 unsigned int vec_num = vec_oprnds0.length ();
3569 /* Always true for SLP. */
3570 gcc_assert (ncopies == 1);
3571 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3572 vectype_out, i);
3573 vargs[mask_opno] = prepare_load_store_mask
3574 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3575 }
3576
3577 gcall *call;
3578 if (ifn != IFN_LAST)
3579 call = gimple_build_call_internal_vec (ifn, vargs);
3580 else
3581 call = gimple_build_call_vec (fndecl, vargs);
3582 new_temp = make_ssa_name (vec_dest, call);
3583 gimple_call_set_lhs (call, new_temp);
3584 gimple_call_set_nothrow (call, true);
3585 new_stmt_info
3586 = vect_finish_stmt_generation (stmt_info, call, gsi);
3587 }
3588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3589 }
3590
3591 for (i = 0; i < nargs; i++)
3592 {
3593 vec<tree> vec_oprndsi = vec_defs[i];
3594 vec_oprndsi.release ();
3595 }
3596 continue;
3597 }
3598
3599 for (i = 0; i < nargs; i++)
3600 {
3601 op = gimple_call_arg (stmt, i);
3602 if (j == 0)
3603 vec_oprnd0
3604 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3605 else
3606 vec_oprnd0
3607 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3608
3609 orig_vargs[i] = vargs[i] = vec_oprnd0;
3610 }
3611
3612 if (mask_opno >= 0 && masked_loop_p)
3613 {
3614 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3615 vectype_out, j);
3616 vargs[mask_opno]
3617 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3618 vargs[mask_opno], gsi);
3619 }
3620
3621 if (cfn == CFN_GOMP_SIMD_LANE)
3622 {
3623 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3624 tree new_var
3625 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3626 gimple *init_stmt = gimple_build_assign (new_var, cst);
3627 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3628 new_temp = make_ssa_name (vec_dest);
3629 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3630 new_stmt_info
3631 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3632 }
3633 else if (modifier == NARROW)
3634 {
3635 /* We don't define any narrowing conditional functions at
3636 present. */
3637 gcc_assert (mask_opno < 0);
3638 tree half_res = make_ssa_name (vectype_in);
3639 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3640 gimple_call_set_lhs (call, half_res);
3641 gimple_call_set_nothrow (call, true);
3642 vect_finish_stmt_generation (stmt_info, call, gsi);
3643 if ((j & 1) == 0)
3644 {
3645 prev_res = half_res;
3646 continue;
3647 }
3648 new_temp = make_ssa_name (vec_dest);
3649 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3650 prev_res, half_res);
3651 new_stmt_info
3652 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3653 }
3654 else
3655 {
3656 gcall *call;
3657 if (ifn != IFN_LAST)
3658 call = gimple_build_call_internal_vec (ifn, vargs);
3659 else
3660 call = gimple_build_call_vec (fndecl, vargs);
3661 new_temp = make_ssa_name (vec_dest, call);
3662 gimple_call_set_lhs (call, new_temp);
3663 gimple_call_set_nothrow (call, true);
3664 new_stmt_info
3665 = vect_finish_stmt_generation (stmt_info, call, gsi);
3666 }
3667
3668 if (j == (modifier == NARROW ? 1 : 0))
3669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3670 else
3671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3672
3673 prev_stmt_info = new_stmt_info;
3674 }
3675 }
3676 else if (modifier == NARROW)
3677 {
3678 /* We don't define any narrowing conditional functions at present. */
3679 gcc_assert (mask_opno < 0);
3680 for (j = 0; j < ncopies; ++j)
3681 {
3682 /* Build argument list for the vectorized call. */
3683 if (j == 0)
3684 vargs.create (nargs * 2);
3685 else
3686 vargs.truncate (0);
3687
3688 if (slp_node)
3689 {
3690 auto_vec<vec<tree> > vec_defs (nargs);
3691 vec<tree> vec_oprnds0;
3692
3693 vect_get_slp_defs (slp_node, &vec_defs);
3694 vec_oprnds0 = vec_defs[0];
3695
3696 /* Arguments are ready. Create the new vector stmt. */
3697 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3698 {
3699 size_t k;
3700 vargs.truncate (0);
3701 for (k = 0; k < nargs; k++)
3702 {
3703 vec<tree> vec_oprndsk = vec_defs[k];
3704 vargs.quick_push (vec_oprndsk[i]);
3705 vargs.quick_push (vec_oprndsk[i + 1]);
3706 }
3707 gcall *call;
3708 if (ifn != IFN_LAST)
3709 call = gimple_build_call_internal_vec (ifn, vargs);
3710 else
3711 call = gimple_build_call_vec (fndecl, vargs);
3712 new_temp = make_ssa_name (vec_dest, call);
3713 gimple_call_set_lhs (call, new_temp);
3714 gimple_call_set_nothrow (call, true);
3715 new_stmt_info
3716 = vect_finish_stmt_generation (stmt_info, call, gsi);
3717 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3718 }
3719
3720 for (i = 0; i < nargs; i++)
3721 {
3722 vec<tree> vec_oprndsi = vec_defs[i];
3723 vec_oprndsi.release ();
3724 }
3725 continue;
3726 }
3727
3728 for (i = 0; i < nargs; i++)
3729 {
3730 op = gimple_call_arg (stmt, i);
3731 if (j == 0)
3732 {
3733 vec_oprnd0
3734 = vect_get_vec_def_for_operand (op, stmt_info,
3735 vectypes[i]);
3736 vec_oprnd1
3737 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3738 }
3739 else
3740 {
3741 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3742 2 * i + 1);
3743 vec_oprnd0
3744 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3745 vec_oprnd1
3746 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3747 }
3748
3749 vargs.quick_push (vec_oprnd0);
3750 vargs.quick_push (vec_oprnd1);
3751 }
3752
3753 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3754 new_temp = make_ssa_name (vec_dest, new_stmt);
3755 gimple_call_set_lhs (new_stmt, new_temp);
3756 new_stmt_info
3757 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3758
3759 if (j == 0)
3760 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3761 else
3762 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3763
3764 prev_stmt_info = new_stmt_info;
3765 }
3766
3767 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3768 }
3769 else
3770 /* No current target implements this case. */
3771 return false;
3772
3773 vargs.release ();
3774
3775 /* The call in STMT might prevent it from being removed in dce.
3776 We however cannot remove it here, due to the way the ssa name
3777 it defines is mapped to the new definition. So just replace
3778 rhs of the statement with something harmless. */
3779
3780 if (slp_node)
3781 return true;
3782
3783 stmt_info = vect_orig_stmt (stmt_info);
3784 lhs = gimple_get_lhs (stmt_info->stmt);
3785
3786 gassign *new_stmt
3787 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3788 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3789
3790 return true;
3791 }
3792
3793
3794 struct simd_call_arg_info
3795 {
3796 tree vectype;
3797 tree op;
3798 HOST_WIDE_INT linear_step;
3799 enum vect_def_type dt;
3800 unsigned int align;
3801 bool simd_lane_linear;
3802 };
3803
3804 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3805 is linear within simd lane (but not within whole loop), note it in
3806 *ARGINFO. */
3807
3808 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3809 vect_simd_lane_linear (tree op, class loop *loop,
3810 struct simd_call_arg_info *arginfo)
3811 {
3812 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3813
3814 if (!is_gimple_assign (def_stmt)
3815 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3816 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3817 return;
3818
3819 tree base = gimple_assign_rhs1 (def_stmt);
3820 HOST_WIDE_INT linear_step = 0;
3821 tree v = gimple_assign_rhs2 (def_stmt);
3822 while (TREE_CODE (v) == SSA_NAME)
3823 {
3824 tree t;
3825 def_stmt = SSA_NAME_DEF_STMT (v);
3826 if (is_gimple_assign (def_stmt))
3827 switch (gimple_assign_rhs_code (def_stmt))
3828 {
3829 case PLUS_EXPR:
3830 t = gimple_assign_rhs2 (def_stmt);
3831 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3832 return;
3833 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3834 v = gimple_assign_rhs1 (def_stmt);
3835 continue;
3836 case MULT_EXPR:
3837 t = gimple_assign_rhs2 (def_stmt);
3838 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3839 return;
3840 linear_step = tree_to_shwi (t);
3841 v = gimple_assign_rhs1 (def_stmt);
3842 continue;
3843 CASE_CONVERT:
3844 t = gimple_assign_rhs1 (def_stmt);
3845 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3846 || (TYPE_PRECISION (TREE_TYPE (v))
3847 < TYPE_PRECISION (TREE_TYPE (t))))
3848 return;
3849 if (!linear_step)
3850 linear_step = 1;
3851 v = t;
3852 continue;
3853 default:
3854 return;
3855 }
3856 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3857 && loop->simduid
3858 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3859 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3860 == loop->simduid))
3861 {
3862 if (!linear_step)
3863 linear_step = 1;
3864 arginfo->linear_step = linear_step;
3865 arginfo->op = base;
3866 arginfo->simd_lane_linear = true;
3867 return;
3868 }
3869 }
3870 }
3871
3872 /* Return the number of elements in vector type VECTYPE, which is associated
3873 with a SIMD clone. At present these vectors always have a constant
3874 length. */
3875
3876 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3877 simd_clone_subparts (tree vectype)
3878 {
3879 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3880 }
3881
3882 /* Function vectorizable_simd_clone_call.
3883
3884 Check if STMT_INFO performs a function call that can be vectorized
3885 by calling a simd clone of the function.
3886 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3887 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3888 Return true if STMT_INFO is vectorizable in this way. */
3889
3890 static bool
vectorizable_simd_clone_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3891 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3892 gimple_stmt_iterator *gsi,
3893 stmt_vec_info *vec_stmt, slp_tree slp_node,
3894 stmt_vector_for_cost *)
3895 {
3896 tree vec_dest;
3897 tree scalar_dest;
3898 tree op, type;
3899 tree vec_oprnd0 = NULL_TREE;
3900 stmt_vec_info prev_stmt_info;
3901 tree vectype;
3902 unsigned int nunits;
3903 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3904 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3905 vec_info *vinfo = stmt_info->vinfo;
3906 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3907 tree fndecl, new_temp;
3908 int ncopies, j;
3909 auto_vec<simd_call_arg_info> arginfo;
3910 vec<tree> vargs = vNULL;
3911 size_t i, nargs;
3912 tree lhs, rtype, ratype;
3913 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3914
3915 /* Is STMT a vectorizable call? */
3916 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3917 if (!stmt)
3918 return false;
3919
3920 fndecl = gimple_call_fndecl (stmt);
3921 if (fndecl == NULL_TREE)
3922 return false;
3923
3924 struct cgraph_node *node = cgraph_node::get (fndecl);
3925 if (node == NULL || node->simd_clones == NULL)
3926 return false;
3927
3928 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3929 return false;
3930
3931 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3932 && ! vec_stmt)
3933 return false;
3934
3935 if (gimple_call_lhs (stmt)
3936 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3937 return false;
3938
3939 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3940
3941 vectype = STMT_VINFO_VECTYPE (stmt_info);
3942
3943 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3944 return false;
3945
3946 /* FORNOW */
3947 if (slp_node)
3948 return false;
3949
3950 /* Process function arguments. */
3951 nargs = gimple_call_num_args (stmt);
3952
3953 /* Bail out if the function has zero arguments. */
3954 if (nargs == 0)
3955 return false;
3956
3957 arginfo.reserve (nargs, true);
3958
3959 for (i = 0; i < nargs; i++)
3960 {
3961 simd_call_arg_info thisarginfo;
3962 affine_iv iv;
3963
3964 thisarginfo.linear_step = 0;
3965 thisarginfo.align = 0;
3966 thisarginfo.op = NULL_TREE;
3967 thisarginfo.simd_lane_linear = false;
3968
3969 op = gimple_call_arg (stmt, i);
3970 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3971 &thisarginfo.vectype)
3972 || thisarginfo.dt == vect_uninitialized_def)
3973 {
3974 if (dump_enabled_p ())
3975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3976 "use not simple.\n");
3977 return false;
3978 }
3979
3980 if (thisarginfo.dt == vect_constant_def
3981 || thisarginfo.dt == vect_external_def)
3982 gcc_assert (thisarginfo.vectype == NULL_TREE);
3983 else
3984 {
3985 gcc_assert (thisarginfo.vectype != NULL_TREE);
3986 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3987 {
3988 if (dump_enabled_p ())
3989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3990 "vector mask arguments are not supported\n");
3991 return false;
3992 }
3993 }
3994
3995 /* For linear arguments, the analyze phase should have saved
3996 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3997 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3998 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3999 {
4000 gcc_assert (vec_stmt);
4001 thisarginfo.linear_step
4002 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4003 thisarginfo.op
4004 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4005 thisarginfo.simd_lane_linear
4006 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4007 == boolean_true_node);
4008 /* If loop has been peeled for alignment, we need to adjust it. */
4009 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4010 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4011 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4012 {
4013 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4014 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4015 tree opt = TREE_TYPE (thisarginfo.op);
4016 bias = fold_convert (TREE_TYPE (step), bias);
4017 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4018 thisarginfo.op
4019 = fold_build2 (POINTER_TYPE_P (opt)
4020 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4021 thisarginfo.op, bias);
4022 }
4023 }
4024 else if (!vec_stmt
4025 && thisarginfo.dt != vect_constant_def
4026 && thisarginfo.dt != vect_external_def
4027 && loop_vinfo
4028 && TREE_CODE (op) == SSA_NAME
4029 && simple_iv (loop, loop_containing_stmt (stmt), op,
4030 &iv, false)
4031 && tree_fits_shwi_p (iv.step))
4032 {
4033 thisarginfo.linear_step = tree_to_shwi (iv.step);
4034 thisarginfo.op = iv.base;
4035 }
4036 else if ((thisarginfo.dt == vect_constant_def
4037 || thisarginfo.dt == vect_external_def)
4038 && POINTER_TYPE_P (TREE_TYPE (op)))
4039 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4040 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4041 linear too. */
4042 if (POINTER_TYPE_P (TREE_TYPE (op))
4043 && !thisarginfo.linear_step
4044 && !vec_stmt
4045 && thisarginfo.dt != vect_constant_def
4046 && thisarginfo.dt != vect_external_def
4047 && loop_vinfo
4048 && !slp_node
4049 && TREE_CODE (op) == SSA_NAME)
4050 vect_simd_lane_linear (op, loop, &thisarginfo);
4051
4052 arginfo.quick_push (thisarginfo);
4053 }
4054
4055 unsigned HOST_WIDE_INT vf;
4056 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4057 {
4058 if (dump_enabled_p ())
4059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4060 "not considering SIMD clones; not yet supported"
4061 " for variable-width vectors.\n");
4062 return false;
4063 }
4064
4065 unsigned int badness = 0;
4066 struct cgraph_node *bestn = NULL;
4067 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4068 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4069 else
4070 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4071 n = n->simdclone->next_clone)
4072 {
4073 unsigned int this_badness = 0;
4074 if (n->simdclone->simdlen > vf
4075 || n->simdclone->nargs != nargs)
4076 continue;
4077 if (n->simdclone->simdlen < vf)
4078 this_badness += (exact_log2 (vf)
4079 - exact_log2 (n->simdclone->simdlen)) * 1024;
4080 if (n->simdclone->inbranch)
4081 this_badness += 2048;
4082 int target_badness = targetm.simd_clone.usable (n);
4083 if (target_badness < 0)
4084 continue;
4085 this_badness += target_badness * 512;
4086 /* FORNOW: Have to add code to add the mask argument. */
4087 if (n->simdclone->inbranch)
4088 continue;
4089 for (i = 0; i < nargs; i++)
4090 {
4091 switch (n->simdclone->args[i].arg_type)
4092 {
4093 case SIMD_CLONE_ARG_TYPE_VECTOR:
4094 if (!useless_type_conversion_p
4095 (n->simdclone->args[i].orig_type,
4096 TREE_TYPE (gimple_call_arg (stmt, i))))
4097 i = -1;
4098 else if (arginfo[i].dt == vect_constant_def
4099 || arginfo[i].dt == vect_external_def
4100 || arginfo[i].linear_step)
4101 this_badness += 64;
4102 break;
4103 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4104 if (arginfo[i].dt != vect_constant_def
4105 && arginfo[i].dt != vect_external_def)
4106 i = -1;
4107 break;
4108 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4110 if (arginfo[i].dt == vect_constant_def
4111 || arginfo[i].dt == vect_external_def
4112 || (arginfo[i].linear_step
4113 != n->simdclone->args[i].linear_step))
4114 i = -1;
4115 break;
4116 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4117 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4118 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4119 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4120 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4121 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4122 /* FORNOW */
4123 i = -1;
4124 break;
4125 case SIMD_CLONE_ARG_TYPE_MASK:
4126 gcc_unreachable ();
4127 }
4128 if (i == (size_t) -1)
4129 break;
4130 if (n->simdclone->args[i].alignment > arginfo[i].align)
4131 {
4132 i = -1;
4133 break;
4134 }
4135 if (arginfo[i].align)
4136 this_badness += (exact_log2 (arginfo[i].align)
4137 - exact_log2 (n->simdclone->args[i].alignment));
4138 }
4139 if (i == (size_t) -1)
4140 continue;
4141 if (bestn == NULL || this_badness < badness)
4142 {
4143 bestn = n;
4144 badness = this_badness;
4145 }
4146 }
4147
4148 if (bestn == NULL)
4149 return false;
4150
4151 for (i = 0; i < nargs; i++)
4152 if ((arginfo[i].dt == vect_constant_def
4153 || arginfo[i].dt == vect_external_def)
4154 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4155 {
4156 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4157 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4158 slp_node);
4159 if (arginfo[i].vectype == NULL
4160 || (simd_clone_subparts (arginfo[i].vectype)
4161 > bestn->simdclone->simdlen))
4162 return false;
4163 }
4164
4165 fndecl = bestn->decl;
4166 nunits = bestn->simdclone->simdlen;
4167 ncopies = vf / nunits;
4168
4169 /* If the function isn't const, only allow it in simd loops where user
4170 has asserted that at least nunits consecutive iterations can be
4171 performed using SIMD instructions. */
4172 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4173 && gimple_vuse (stmt))
4174 return false;
4175
4176 /* Sanity check: make sure that at least one copy of the vectorized stmt
4177 needs to be generated. */
4178 gcc_assert (ncopies >= 1);
4179
4180 if (!vec_stmt) /* transformation not required. */
4181 {
4182 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4183 for (i = 0; i < nargs; i++)
4184 if ((bestn->simdclone->args[i].arg_type
4185 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4186 || (bestn->simdclone->args[i].arg_type
4187 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4188 {
4189 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4190 + 1);
4191 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4192 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4193 ? size_type_node : TREE_TYPE (arginfo[i].op);
4194 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4195 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4196 tree sll = arginfo[i].simd_lane_linear
4197 ? boolean_true_node : boolean_false_node;
4198 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4199 }
4200 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4201 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4202 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4203 return true;
4204 }
4205
4206 /* Transform. */
4207
4208 if (dump_enabled_p ())
4209 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4210
4211 /* Handle def. */
4212 scalar_dest = gimple_call_lhs (stmt);
4213 vec_dest = NULL_TREE;
4214 rtype = NULL_TREE;
4215 ratype = NULL_TREE;
4216 if (scalar_dest)
4217 {
4218 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4219 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4220 if (TREE_CODE (rtype) == ARRAY_TYPE)
4221 {
4222 ratype = rtype;
4223 rtype = TREE_TYPE (ratype);
4224 }
4225 }
4226
4227 prev_stmt_info = NULL;
4228 for (j = 0; j < ncopies; ++j)
4229 {
4230 /* Build argument list for the vectorized call. */
4231 if (j == 0)
4232 vargs.create (nargs);
4233 else
4234 vargs.truncate (0);
4235
4236 for (i = 0; i < nargs; i++)
4237 {
4238 unsigned int k, l, m, o;
4239 tree atype;
4240 op = gimple_call_arg (stmt, i);
4241 switch (bestn->simdclone->args[i].arg_type)
4242 {
4243 case SIMD_CLONE_ARG_TYPE_VECTOR:
4244 atype = bestn->simdclone->args[i].vector_type;
4245 o = nunits / simd_clone_subparts (atype);
4246 for (m = j * o; m < (j + 1) * o; m++)
4247 {
4248 if (simd_clone_subparts (atype)
4249 < simd_clone_subparts (arginfo[i].vectype))
4250 {
4251 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4252 k = (simd_clone_subparts (arginfo[i].vectype)
4253 / simd_clone_subparts (atype));
4254 gcc_assert ((k & (k - 1)) == 0);
4255 if (m == 0)
4256 vec_oprnd0
4257 = vect_get_vec_def_for_operand (op, stmt_info);
4258 else
4259 {
4260 vec_oprnd0 = arginfo[i].op;
4261 if ((m & (k - 1)) == 0)
4262 vec_oprnd0
4263 = vect_get_vec_def_for_stmt_copy (vinfo,
4264 vec_oprnd0);
4265 }
4266 arginfo[i].op = vec_oprnd0;
4267 vec_oprnd0
4268 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4269 bitsize_int (prec),
4270 bitsize_int ((m & (k - 1)) * prec));
4271 gassign *new_stmt
4272 = gimple_build_assign (make_ssa_name (atype),
4273 vec_oprnd0);
4274 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4275 vargs.safe_push (gimple_assign_lhs (new_stmt));
4276 }
4277 else
4278 {
4279 k = (simd_clone_subparts (atype)
4280 / simd_clone_subparts (arginfo[i].vectype));
4281 gcc_assert ((k & (k - 1)) == 0);
4282 vec<constructor_elt, va_gc> *ctor_elts;
4283 if (k != 1)
4284 vec_alloc (ctor_elts, k);
4285 else
4286 ctor_elts = NULL;
4287 for (l = 0; l < k; l++)
4288 {
4289 if (m == 0 && l == 0)
4290 vec_oprnd0
4291 = vect_get_vec_def_for_operand (op, stmt_info);
4292 else
4293 vec_oprnd0
4294 = vect_get_vec_def_for_stmt_copy (vinfo,
4295 arginfo[i].op);
4296 arginfo[i].op = vec_oprnd0;
4297 if (k == 1)
4298 break;
4299 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4300 vec_oprnd0);
4301 }
4302 if (k == 1)
4303 vargs.safe_push (vec_oprnd0);
4304 else
4305 {
4306 vec_oprnd0 = build_constructor (atype, ctor_elts);
4307 gassign *new_stmt
4308 = gimple_build_assign (make_ssa_name (atype),
4309 vec_oprnd0);
4310 vect_finish_stmt_generation (stmt_info, new_stmt,
4311 gsi);
4312 vargs.safe_push (gimple_assign_lhs (new_stmt));
4313 }
4314 }
4315 }
4316 break;
4317 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4318 vargs.safe_push (op);
4319 break;
4320 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4321 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4322 if (j == 0)
4323 {
4324 gimple_seq stmts;
4325 arginfo[i].op
4326 = force_gimple_operand (unshare_expr (arginfo[i].op),
4327 &stmts, true, NULL_TREE);
4328 if (stmts != NULL)
4329 {
4330 basic_block new_bb;
4331 edge pe = loop_preheader_edge (loop);
4332 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4333 gcc_assert (!new_bb);
4334 }
4335 if (arginfo[i].simd_lane_linear)
4336 {
4337 vargs.safe_push (arginfo[i].op);
4338 break;
4339 }
4340 tree phi_res = copy_ssa_name (op);
4341 gphi *new_phi = create_phi_node (phi_res, loop->header);
4342 loop_vinfo->add_stmt (new_phi);
4343 add_phi_arg (new_phi, arginfo[i].op,
4344 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4345 enum tree_code code
4346 = POINTER_TYPE_P (TREE_TYPE (op))
4347 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4348 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4349 ? sizetype : TREE_TYPE (op);
4350 widest_int cst
4351 = wi::mul (bestn->simdclone->args[i].linear_step,
4352 ncopies * nunits);
4353 tree tcst = wide_int_to_tree (type, cst);
4354 tree phi_arg = copy_ssa_name (op);
4355 gassign *new_stmt
4356 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4357 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4358 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4359 loop_vinfo->add_stmt (new_stmt);
4360 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4361 UNKNOWN_LOCATION);
4362 arginfo[i].op = phi_res;
4363 vargs.safe_push (phi_res);
4364 }
4365 else
4366 {
4367 enum tree_code code
4368 = POINTER_TYPE_P (TREE_TYPE (op))
4369 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4370 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4371 ? sizetype : TREE_TYPE (op);
4372 widest_int cst
4373 = wi::mul (bestn->simdclone->args[i].linear_step,
4374 j * nunits);
4375 tree tcst = wide_int_to_tree (type, cst);
4376 new_temp = make_ssa_name (TREE_TYPE (op));
4377 gassign *new_stmt
4378 = gimple_build_assign (new_temp, code,
4379 arginfo[i].op, tcst);
4380 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4381 vargs.safe_push (new_temp);
4382 }
4383 break;
4384 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4385 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4386 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4387 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4388 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4389 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4390 default:
4391 gcc_unreachable ();
4392 }
4393 }
4394
4395 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4396 if (vec_dest)
4397 {
4398 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4399 if (ratype)
4400 new_temp = create_tmp_var (ratype);
4401 else if (simd_clone_subparts (vectype)
4402 == simd_clone_subparts (rtype))
4403 new_temp = make_ssa_name (vec_dest, new_call);
4404 else
4405 new_temp = make_ssa_name (rtype, new_call);
4406 gimple_call_set_lhs (new_call, new_temp);
4407 }
4408 stmt_vec_info new_stmt_info
4409 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4410
4411 if (vec_dest)
4412 {
4413 if (simd_clone_subparts (vectype) < nunits)
4414 {
4415 unsigned int k, l;
4416 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4417 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4418 k = nunits / simd_clone_subparts (vectype);
4419 gcc_assert ((k & (k - 1)) == 0);
4420 for (l = 0; l < k; l++)
4421 {
4422 tree t;
4423 if (ratype)
4424 {
4425 t = build_fold_addr_expr (new_temp);
4426 t = build2 (MEM_REF, vectype, t,
4427 build_int_cst (TREE_TYPE (t), l * bytes));
4428 }
4429 else
4430 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4431 bitsize_int (prec), bitsize_int (l * prec));
4432 gimple *new_stmt
4433 = gimple_build_assign (make_ssa_name (vectype), t);
4434 new_stmt_info
4435 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4436
4437 if (j == 0 && l == 0)
4438 STMT_VINFO_VEC_STMT (stmt_info)
4439 = *vec_stmt = new_stmt_info;
4440 else
4441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4442
4443 prev_stmt_info = new_stmt_info;
4444 }
4445
4446 if (ratype)
4447 vect_clobber_variable (stmt_info, gsi, new_temp);
4448 continue;
4449 }
4450 else if (simd_clone_subparts (vectype) > nunits)
4451 {
4452 unsigned int k = (simd_clone_subparts (vectype)
4453 / simd_clone_subparts (rtype));
4454 gcc_assert ((k & (k - 1)) == 0);
4455 if ((j & (k - 1)) == 0)
4456 vec_alloc (ret_ctor_elts, k);
4457 if (ratype)
4458 {
4459 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4460 for (m = 0; m < o; m++)
4461 {
4462 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4463 size_int (m), NULL_TREE, NULL_TREE);
4464 gimple *new_stmt
4465 = gimple_build_assign (make_ssa_name (rtype), tem);
4466 new_stmt_info
4467 = vect_finish_stmt_generation (stmt_info, new_stmt,
4468 gsi);
4469 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4470 gimple_assign_lhs (new_stmt));
4471 }
4472 vect_clobber_variable (stmt_info, gsi, new_temp);
4473 }
4474 else
4475 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4476 if ((j & (k - 1)) != k - 1)
4477 continue;
4478 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4479 gimple *new_stmt
4480 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4481 new_stmt_info
4482 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4483
4484 if ((unsigned) j == k - 1)
4485 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4486 else
4487 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4488
4489 prev_stmt_info = new_stmt_info;
4490 continue;
4491 }
4492 else if (ratype)
4493 {
4494 tree t = build_fold_addr_expr (new_temp);
4495 t = build2 (MEM_REF, vectype, t,
4496 build_int_cst (TREE_TYPE (t), 0));
4497 gimple *new_stmt
4498 = gimple_build_assign (make_ssa_name (vec_dest), t);
4499 new_stmt_info
4500 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4501 vect_clobber_variable (stmt_info, gsi, new_temp);
4502 }
4503 }
4504
4505 if (j == 0)
4506 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4507 else
4508 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4509
4510 prev_stmt_info = new_stmt_info;
4511 }
4512
4513 vargs.release ();
4514
4515 /* The call in STMT might prevent it from being removed in dce.
4516 We however cannot remove it here, due to the way the ssa name
4517 it defines is mapped to the new definition. So just replace
4518 rhs of the statement with something harmless. */
4519
4520 if (slp_node)
4521 return true;
4522
4523 gimple *new_stmt;
4524 if (scalar_dest)
4525 {
4526 type = TREE_TYPE (scalar_dest);
4527 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4528 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4529 }
4530 else
4531 new_stmt = gimple_build_nop ();
4532 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4533 unlink_stmt_vdef (stmt);
4534
4535 return true;
4536 }
4537
4538
4539 /* Function vect_gen_widened_results_half
4540
4541 Create a vector stmt whose code, type, number of arguments, and result
4542 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4543 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4544 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4545 needs to be created (DECL is a function-decl of a target-builtin).
4546 STMT_INFO is the original scalar stmt that we are vectorizing. */
4547
4548 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4549 vect_gen_widened_results_half (enum tree_code code,
4550 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4551 tree vec_dest, gimple_stmt_iterator *gsi,
4552 stmt_vec_info stmt_info)
4553 {
4554 gimple *new_stmt;
4555 tree new_temp;
4556
4557 /* Generate half of the widened result: */
4558 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4559 if (op_type != binary_op)
4560 vec_oprnd1 = NULL;
4561 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4562 new_temp = make_ssa_name (vec_dest, new_stmt);
4563 gimple_assign_set_lhs (new_stmt, new_temp);
4564 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4565
4566 return new_stmt;
4567 }
4568
4569
4570 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4571 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4572 containing scalar operand), and for the rest we get a copy with
4573 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4574 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4575 The vectors are collected into VEC_OPRNDS. */
4576
4577 static void
vect_get_loop_based_defs(tree * oprnd,stmt_vec_info stmt_info,vec<tree> * vec_oprnds,int multi_step_cvt)4578 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4579 vec<tree> *vec_oprnds, int multi_step_cvt)
4580 {
4581 vec_info *vinfo = stmt_info->vinfo;
4582 tree vec_oprnd;
4583
4584 /* Get first vector operand. */
4585 /* All the vector operands except the very first one (that is scalar oprnd)
4586 are stmt copies. */
4587 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4588 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4589 else
4590 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4591
4592 vec_oprnds->quick_push (vec_oprnd);
4593
4594 /* Get second vector operand. */
4595 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4596 vec_oprnds->quick_push (vec_oprnd);
4597
4598 *oprnd = vec_oprnd;
4599
4600 /* For conversion in multiple steps, continue to get operands
4601 recursively. */
4602 if (multi_step_cvt)
4603 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4604 multi_step_cvt - 1);
4605 }
4606
4607
4608 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4609 For multi-step conversions store the resulting vectors and call the function
4610 recursively. */
4611
4612 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4613 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4614 int multi_step_cvt,
4615 stmt_vec_info stmt_info,
4616 vec<tree> vec_dsts,
4617 gimple_stmt_iterator *gsi,
4618 slp_tree slp_node, enum tree_code code,
4619 stmt_vec_info *prev_stmt_info)
4620 {
4621 unsigned int i;
4622 tree vop0, vop1, new_tmp, vec_dest;
4623
4624 vec_dest = vec_dsts.pop ();
4625
4626 for (i = 0; i < vec_oprnds->length (); i += 2)
4627 {
4628 /* Create demotion operation. */
4629 vop0 = (*vec_oprnds)[i];
4630 vop1 = (*vec_oprnds)[i + 1];
4631 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4632 new_tmp = make_ssa_name (vec_dest, new_stmt);
4633 gimple_assign_set_lhs (new_stmt, new_tmp);
4634 stmt_vec_info new_stmt_info
4635 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4636
4637 if (multi_step_cvt)
4638 /* Store the resulting vector for next recursive call. */
4639 (*vec_oprnds)[i/2] = new_tmp;
4640 else
4641 {
4642 /* This is the last step of the conversion sequence. Store the
4643 vectors in SLP_NODE or in vector info of the scalar statement
4644 (or in STMT_VINFO_RELATED_STMT chain). */
4645 if (slp_node)
4646 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4647 else
4648 {
4649 if (!*prev_stmt_info)
4650 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4651 else
4652 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4653
4654 *prev_stmt_info = new_stmt_info;
4655 }
4656 }
4657 }
4658
4659 /* For multi-step demotion operations we first generate demotion operations
4660 from the source type to the intermediate types, and then combine the
4661 results (stored in VEC_OPRNDS) in demotion operation to the destination
4662 type. */
4663 if (multi_step_cvt)
4664 {
4665 /* At each level of recursion we have half of the operands we had at the
4666 previous level. */
4667 vec_oprnds->truncate ((i+1)/2);
4668 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4669 stmt_info, vec_dsts, gsi,
4670 slp_node, VEC_PACK_TRUNC_EXPR,
4671 prev_stmt_info);
4672 }
4673
4674 vec_dsts.quick_push (vec_dest);
4675 }
4676
4677
4678 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4679 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4680 STMT_INFO. For multi-step conversions store the resulting vectors and
4681 call the function recursively. */
4682
4683 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4684 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4685 vec<tree> *vec_oprnds1,
4686 stmt_vec_info stmt_info, tree vec_dest,
4687 gimple_stmt_iterator *gsi,
4688 enum tree_code code1,
4689 enum tree_code code2, int op_type)
4690 {
4691 int i;
4692 tree vop0, vop1, new_tmp1, new_tmp2;
4693 gimple *new_stmt1, *new_stmt2;
4694 vec<tree> vec_tmp = vNULL;
4695
4696 vec_tmp.create (vec_oprnds0->length () * 2);
4697 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4698 {
4699 if (op_type == binary_op)
4700 vop1 = (*vec_oprnds1)[i];
4701 else
4702 vop1 = NULL_TREE;
4703
4704 /* Generate the two halves of promotion operation. */
4705 new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1,
4706 op_type, vec_dest, gsi,
4707 stmt_info);
4708 new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1,
4709 op_type, vec_dest, gsi,
4710 stmt_info);
4711 if (is_gimple_call (new_stmt1))
4712 {
4713 new_tmp1 = gimple_call_lhs (new_stmt1);
4714 new_tmp2 = gimple_call_lhs (new_stmt2);
4715 }
4716 else
4717 {
4718 new_tmp1 = gimple_assign_lhs (new_stmt1);
4719 new_tmp2 = gimple_assign_lhs (new_stmt2);
4720 }
4721
4722 /* Store the results for the next step. */
4723 vec_tmp.quick_push (new_tmp1);
4724 vec_tmp.quick_push (new_tmp2);
4725 }
4726
4727 vec_oprnds0->release ();
4728 *vec_oprnds0 = vec_tmp;
4729 }
4730
4731
4732 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4733 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4734 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4735 Return true if STMT_INFO is vectorizable in this way. */
4736
4737 static bool
vectorizable_conversion(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4738 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4739 stmt_vec_info *vec_stmt, slp_tree slp_node,
4740 stmt_vector_for_cost *cost_vec)
4741 {
4742 tree vec_dest;
4743 tree scalar_dest;
4744 tree op0, op1 = NULL_TREE;
4745 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4747 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4748 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4749 tree new_temp;
4750 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4751 int ndts = 2;
4752 stmt_vec_info prev_stmt_info;
4753 poly_uint64 nunits_in;
4754 poly_uint64 nunits_out;
4755 tree vectype_out, vectype_in;
4756 int ncopies, i, j;
4757 tree lhs_type, rhs_type;
4758 enum { NARROW, NONE, WIDEN } modifier;
4759 vec<tree> vec_oprnds0 = vNULL;
4760 vec<tree> vec_oprnds1 = vNULL;
4761 tree vop0;
4762 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4763 vec_info *vinfo = stmt_info->vinfo;
4764 int multi_step_cvt = 0;
4765 vec<tree> interm_types = vNULL;
4766 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4767 int op_type;
4768 unsigned short fltsz;
4769
4770 /* Is STMT a vectorizable conversion? */
4771
4772 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4773 return false;
4774
4775 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4776 && ! vec_stmt)
4777 return false;
4778
4779 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4780 if (!stmt)
4781 return false;
4782
4783 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4784 return false;
4785
4786 code = gimple_assign_rhs_code (stmt);
4787 if (!CONVERT_EXPR_CODE_P (code)
4788 && code != FIX_TRUNC_EXPR
4789 && code != FLOAT_EXPR
4790 && code != WIDEN_MULT_EXPR
4791 && code != WIDEN_LSHIFT_EXPR)
4792 return false;
4793
4794 op_type = TREE_CODE_LENGTH (code);
4795
4796 /* Check types of lhs and rhs. */
4797 scalar_dest = gimple_assign_lhs (stmt);
4798 lhs_type = TREE_TYPE (scalar_dest);
4799 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4800
4801 op0 = gimple_assign_rhs1 (stmt);
4802 rhs_type = TREE_TYPE (op0);
4803
4804 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4805 && !((INTEGRAL_TYPE_P (lhs_type)
4806 && INTEGRAL_TYPE_P (rhs_type))
4807 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4808 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4809 return false;
4810
4811 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4812 && ((INTEGRAL_TYPE_P (lhs_type)
4813 && !type_has_mode_precision_p (lhs_type))
4814 || (INTEGRAL_TYPE_P (rhs_type)
4815 && !type_has_mode_precision_p (rhs_type))))
4816 {
4817 if (dump_enabled_p ())
4818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4819 "type conversion to/from bit-precision unsupported."
4820 "\n");
4821 return false;
4822 }
4823
4824 /* Check the operands of the operation. */
4825 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4826 {
4827 if (dump_enabled_p ())
4828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4829 "use not simple.\n");
4830 return false;
4831 }
4832 if (op_type == binary_op)
4833 {
4834 bool ok;
4835
4836 op1 = gimple_assign_rhs2 (stmt);
4837 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4838 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4839 OP1. */
4840 if (CONSTANT_CLASS_P (op0))
4841 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4842 else
4843 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4844
4845 if (!ok)
4846 {
4847 if (dump_enabled_p ())
4848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4849 "use not simple.\n");
4850 return false;
4851 }
4852 }
4853
4854 /* If op0 is an external or constant def, infer the vector type
4855 from the scalar type. */
4856 if (!vectype_in)
4857 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4858 if (vec_stmt)
4859 gcc_assert (vectype_in);
4860 if (!vectype_in)
4861 {
4862 if (dump_enabled_p ())
4863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4864 "no vectype for scalar type %T\n", rhs_type);
4865
4866 return false;
4867 }
4868
4869 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4870 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4871 {
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874 "can't convert between boolean and non "
4875 "boolean vectors %T\n", rhs_type);
4876
4877 return false;
4878 }
4879
4880 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4881 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4882 if (known_eq (nunits_out, nunits_in))
4883 modifier = NONE;
4884 else if (multiple_p (nunits_out, nunits_in))
4885 modifier = NARROW;
4886 else
4887 {
4888 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4889 modifier = WIDEN;
4890 }
4891
4892 /* Multiple types in SLP are handled by creating the appropriate number of
4893 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4894 case of SLP. */
4895 if (slp_node)
4896 ncopies = 1;
4897 else if (modifier == NARROW)
4898 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4899 else
4900 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4901
4902 /* Sanity check: make sure that at least one copy of the vectorized stmt
4903 needs to be generated. */
4904 gcc_assert (ncopies >= 1);
4905
4906 bool found_mode = false;
4907 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4908 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4909 opt_scalar_mode rhs_mode_iter;
4910
4911 /* Supportable by target? */
4912 switch (modifier)
4913 {
4914 case NONE:
4915 if (code != FIX_TRUNC_EXPR
4916 && code != FLOAT_EXPR
4917 && !CONVERT_EXPR_CODE_P (code))
4918 return false;
4919 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4920 break;
4921 /* FALLTHRU */
4922 unsupported:
4923 if (dump_enabled_p ())
4924 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4925 "conversion not supported by target.\n");
4926 return false;
4927
4928 case WIDEN:
4929 if (supportable_widening_operation (code, stmt_info, vectype_out,
4930 vectype_in, &code1, &code2,
4931 &multi_step_cvt, &interm_types))
4932 {
4933 /* Binary widening operation can only be supported directly by the
4934 architecture. */
4935 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4936 break;
4937 }
4938
4939 if (code != FLOAT_EXPR
4940 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4941 goto unsupported;
4942
4943 fltsz = GET_MODE_SIZE (lhs_mode);
4944 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4945 {
4946 rhs_mode = rhs_mode_iter.require ();
4947 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4948 break;
4949
4950 cvt_type
4951 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4952 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4953 if (cvt_type == NULL_TREE)
4954 goto unsupported;
4955
4956 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4957 {
4958 if (!supportable_convert_operation (code, vectype_out,
4959 cvt_type, &codecvt1))
4960 goto unsupported;
4961 }
4962 else if (!supportable_widening_operation (code, stmt_info,
4963 vectype_out, cvt_type,
4964 &codecvt1, &codecvt2,
4965 &multi_step_cvt,
4966 &interm_types))
4967 continue;
4968 else
4969 gcc_assert (multi_step_cvt == 0);
4970
4971 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4972 vectype_in, &code1, &code2,
4973 &multi_step_cvt, &interm_types))
4974 {
4975 found_mode = true;
4976 break;
4977 }
4978 }
4979
4980 if (!found_mode)
4981 goto unsupported;
4982
4983 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4984 codecvt2 = ERROR_MARK;
4985 else
4986 {
4987 multi_step_cvt++;
4988 interm_types.safe_push (cvt_type);
4989 cvt_type = NULL_TREE;
4990 }
4991 break;
4992
4993 case NARROW:
4994 gcc_assert (op_type == unary_op);
4995 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4996 &code1, &multi_step_cvt,
4997 &interm_types))
4998 break;
4999
5000 if (code != FIX_TRUNC_EXPR
5001 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5002 goto unsupported;
5003
5004 cvt_type
5005 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5006 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5007 if (cvt_type == NULL_TREE)
5008 goto unsupported;
5009 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5010 &codecvt1))
5011 goto unsupported;
5012 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5013 &code1, &multi_step_cvt,
5014 &interm_types))
5015 break;
5016 goto unsupported;
5017
5018 default:
5019 gcc_unreachable ();
5020 }
5021
5022 if (!vec_stmt) /* transformation not required. */
5023 {
5024 DUMP_VECT_SCOPE ("vectorizable_conversion");
5025 if (modifier == NONE)
5026 {
5027 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5028 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5029 cost_vec);
5030 }
5031 else if (modifier == NARROW)
5032 {
5033 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5034 /* The final packing step produces one vector result per copy. */
5035 unsigned int nvectors
5036 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5037 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5038 multi_step_cvt, cost_vec);
5039 }
5040 else
5041 {
5042 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5043 /* The initial unpacking step produces two vector results
5044 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5045 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5046 unsigned int nvectors
5047 = (slp_node
5048 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5049 : ncopies * 2);
5050 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5051 multi_step_cvt, cost_vec);
5052 }
5053 interm_types.release ();
5054 return true;
5055 }
5056
5057 /* Transform. */
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_NOTE, vect_location,
5060 "transform conversion. ncopies = %d.\n", ncopies);
5061
5062 if (op_type == binary_op)
5063 {
5064 if (CONSTANT_CLASS_P (op0))
5065 op0 = fold_convert (TREE_TYPE (op1), op0);
5066 else if (CONSTANT_CLASS_P (op1))
5067 op1 = fold_convert (TREE_TYPE (op0), op1);
5068 }
5069
5070 /* In case of multi-step conversion, we first generate conversion operations
5071 to the intermediate types, and then from that types to the final one.
5072 We create vector destinations for the intermediate type (TYPES) received
5073 from supportable_*_operation, and store them in the correct order
5074 for future use in vect_create_vectorized_*_stmts (). */
5075 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5076 vec_dest = vect_create_destination_var (scalar_dest,
5077 (cvt_type && modifier == WIDEN)
5078 ? cvt_type : vectype_out);
5079 vec_dsts.quick_push (vec_dest);
5080
5081 if (multi_step_cvt)
5082 {
5083 for (i = interm_types.length () - 1;
5084 interm_types.iterate (i, &intermediate_type); i--)
5085 {
5086 vec_dest = vect_create_destination_var (scalar_dest,
5087 intermediate_type);
5088 vec_dsts.quick_push (vec_dest);
5089 }
5090 }
5091
5092 if (cvt_type)
5093 vec_dest = vect_create_destination_var (scalar_dest,
5094 modifier == WIDEN
5095 ? vectype_out : cvt_type);
5096
5097 if (!slp_node)
5098 {
5099 if (modifier == WIDEN)
5100 {
5101 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5102 if (op_type == binary_op)
5103 vec_oprnds1.create (1);
5104 }
5105 else if (modifier == NARROW)
5106 vec_oprnds0.create (
5107 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5108 }
5109 else if (code == WIDEN_LSHIFT_EXPR)
5110 vec_oprnds1.create (slp_node->vec_stmts_size);
5111
5112 last_oprnd = op0;
5113 prev_stmt_info = NULL;
5114 switch (modifier)
5115 {
5116 case NONE:
5117 for (j = 0; j < ncopies; j++)
5118 {
5119 if (j == 0)
5120 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5121 NULL, slp_node);
5122 else
5123 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5124
5125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5126 {
5127 stmt_vec_info new_stmt_info;
5128 /* Arguments are ready, create the new vector stmt. */
5129 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5130 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5131 new_temp = make_ssa_name (vec_dest, new_stmt);
5132 gimple_assign_set_lhs (new_stmt, new_temp);
5133 new_stmt_info
5134 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5135
5136 if (slp_node)
5137 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5138 else
5139 {
5140 if (!prev_stmt_info)
5141 STMT_VINFO_VEC_STMT (stmt_info)
5142 = *vec_stmt = new_stmt_info;
5143 else
5144 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5145 prev_stmt_info = new_stmt_info;
5146 }
5147 }
5148 }
5149 break;
5150
5151 case WIDEN:
5152 /* In case the vectorization factor (VF) is bigger than the number
5153 of elements that we can fit in a vectype (nunits), we have to
5154 generate more than one vector stmt - i.e - we need to "unroll"
5155 the vector stmt by a factor VF/nunits. */
5156 for (j = 0; j < ncopies; j++)
5157 {
5158 /* Handle uses. */
5159 if (j == 0)
5160 {
5161 if (slp_node)
5162 {
5163 if (code == WIDEN_LSHIFT_EXPR)
5164 {
5165 unsigned int k;
5166
5167 vec_oprnd1 = op1;
5168 /* Store vec_oprnd1 for every vector stmt to be created
5169 for SLP_NODE. We check during the analysis that all
5170 the shift arguments are the same. */
5171 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5172 vec_oprnds1.quick_push (vec_oprnd1);
5173
5174 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5175 &vec_oprnds0, NULL, slp_node);
5176 }
5177 else
5178 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5179 &vec_oprnds1, slp_node);
5180 }
5181 else
5182 {
5183 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5184 vec_oprnds0.quick_push (vec_oprnd0);
5185 if (op_type == binary_op)
5186 {
5187 if (code == WIDEN_LSHIFT_EXPR)
5188 vec_oprnd1 = op1;
5189 else
5190 vec_oprnd1
5191 = vect_get_vec_def_for_operand (op1, stmt_info);
5192 vec_oprnds1.quick_push (vec_oprnd1);
5193 }
5194 }
5195 }
5196 else
5197 {
5198 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5199 vec_oprnds0.truncate (0);
5200 vec_oprnds0.quick_push (vec_oprnd0);
5201 if (op_type == binary_op)
5202 {
5203 if (code == WIDEN_LSHIFT_EXPR)
5204 vec_oprnd1 = op1;
5205 else
5206 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5207 vec_oprnd1);
5208 vec_oprnds1.truncate (0);
5209 vec_oprnds1.quick_push (vec_oprnd1);
5210 }
5211 }
5212
5213 /* Arguments are ready. Create the new vector stmts. */
5214 for (i = multi_step_cvt; i >= 0; i--)
5215 {
5216 tree this_dest = vec_dsts[i];
5217 enum tree_code c1 = code1, c2 = code2;
5218 if (i == 0 && codecvt2 != ERROR_MARK)
5219 {
5220 c1 = codecvt1;
5221 c2 = codecvt2;
5222 }
5223 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5224 &vec_oprnds1, stmt_info,
5225 this_dest, gsi,
5226 c1, c2, op_type);
5227 }
5228
5229 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5230 {
5231 stmt_vec_info new_stmt_info;
5232 if (cvt_type)
5233 {
5234 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5235 new_temp = make_ssa_name (vec_dest);
5236 gassign *new_stmt
5237 = gimple_build_assign (new_temp, codecvt1, vop0);
5238 new_stmt_info
5239 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5240 }
5241 else
5242 new_stmt_info = vinfo->lookup_def (vop0);
5243
5244 if (slp_node)
5245 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5246 else
5247 {
5248 if (!prev_stmt_info)
5249 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5250 else
5251 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5252 prev_stmt_info = new_stmt_info;
5253 }
5254 }
5255 }
5256
5257 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5258 break;
5259
5260 case NARROW:
5261 /* In case the vectorization factor (VF) is bigger than the number
5262 of elements that we can fit in a vectype (nunits), we have to
5263 generate more than one vector stmt - i.e - we need to "unroll"
5264 the vector stmt by a factor VF/nunits. */
5265 for (j = 0; j < ncopies; j++)
5266 {
5267 /* Handle uses. */
5268 if (slp_node)
5269 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5270 slp_node);
5271 else
5272 {
5273 vec_oprnds0.truncate (0);
5274 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5275 vect_pow2 (multi_step_cvt) - 1);
5276 }
5277
5278 /* Arguments are ready. Create the new vector stmts. */
5279 if (cvt_type)
5280 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5281 {
5282 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5283 new_temp = make_ssa_name (vec_dest);
5284 gassign *new_stmt
5285 = gimple_build_assign (new_temp, codecvt1, vop0);
5286 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5287 vec_oprnds0[i] = new_temp;
5288 }
5289
5290 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5291 stmt_info, vec_dsts, gsi,
5292 slp_node, code1,
5293 &prev_stmt_info);
5294 }
5295
5296 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5297 break;
5298 }
5299
5300 vec_oprnds0.release ();
5301 vec_oprnds1.release ();
5302 interm_types.release ();
5303
5304 return true;
5305 }
5306
5307 /* Return true if we can assume from the scalar form of STMT_INFO that
5308 neither the scalar nor the vector forms will generate code. STMT_INFO
5309 is known not to involve a data reference. */
5310
5311 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5312 vect_nop_conversion_p (stmt_vec_info stmt_info)
5313 {
5314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5315 if (!stmt)
5316 return false;
5317
5318 tree lhs = gimple_assign_lhs (stmt);
5319 tree_code code = gimple_assign_rhs_code (stmt);
5320 tree rhs = gimple_assign_rhs1 (stmt);
5321
5322 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5323 return true;
5324
5325 if (CONVERT_EXPR_CODE_P (code))
5326 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5327
5328 return false;
5329 }
5330
5331 /* Function vectorizable_assignment.
5332
5333 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5334 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5335 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5336 Return true if STMT_INFO is vectorizable in this way. */
5337
5338 static bool
vectorizable_assignment(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5339 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5340 stmt_vec_info *vec_stmt, slp_tree slp_node,
5341 stmt_vector_for_cost *cost_vec)
5342 {
5343 tree vec_dest;
5344 tree scalar_dest;
5345 tree op;
5346 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5347 tree new_temp;
5348 enum vect_def_type dt[1] = {vect_unknown_def_type};
5349 int ndts = 1;
5350 int ncopies;
5351 int i, j;
5352 vec<tree> vec_oprnds = vNULL;
5353 tree vop;
5354 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5355 vec_info *vinfo = stmt_info->vinfo;
5356 stmt_vec_info prev_stmt_info = NULL;
5357 enum tree_code code;
5358 tree vectype_in;
5359
5360 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5361 return false;
5362
5363 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5364 && ! vec_stmt)
5365 return false;
5366
5367 /* Is vectorizable assignment? */
5368 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5369 if (!stmt)
5370 return false;
5371
5372 scalar_dest = gimple_assign_lhs (stmt);
5373 if (TREE_CODE (scalar_dest) != SSA_NAME)
5374 return false;
5375
5376 code = gimple_assign_rhs_code (stmt);
5377 if (gimple_assign_single_p (stmt)
5378 || code == PAREN_EXPR
5379 || CONVERT_EXPR_CODE_P (code))
5380 op = gimple_assign_rhs1 (stmt);
5381 else
5382 return false;
5383
5384 if (code == VIEW_CONVERT_EXPR)
5385 op = TREE_OPERAND (op, 0);
5386
5387 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5388 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5389
5390 /* Multiple types in SLP are handled by creating the appropriate number of
5391 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5392 case of SLP. */
5393 if (slp_node)
5394 ncopies = 1;
5395 else
5396 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5397
5398 gcc_assert (ncopies >= 1);
5399
5400 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5401 {
5402 if (dump_enabled_p ())
5403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5404 "use not simple.\n");
5405 return false;
5406 }
5407
5408 /* We can handle NOP_EXPR conversions that do not change the number
5409 of elements or the vector size. */
5410 if ((CONVERT_EXPR_CODE_P (code)
5411 || code == VIEW_CONVERT_EXPR)
5412 && (!vectype_in
5413 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5414 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5415 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5416 return false;
5417
5418 /* We do not handle bit-precision changes. */
5419 if ((CONVERT_EXPR_CODE_P (code)
5420 || code == VIEW_CONVERT_EXPR)
5421 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5422 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5423 || !type_has_mode_precision_p (TREE_TYPE (op)))
5424 /* But a conversion that does not change the bit-pattern is ok. */
5425 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5426 > TYPE_PRECISION (TREE_TYPE (op)))
5427 && TYPE_UNSIGNED (TREE_TYPE (op)))
5428 /* Conversion between boolean types of different sizes is
5429 a simple assignment in case their vectypes are same
5430 boolean vectors. */
5431 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5432 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5433 {
5434 if (dump_enabled_p ())
5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5436 "type conversion to/from bit-precision "
5437 "unsupported.\n");
5438 return false;
5439 }
5440
5441 if (!vec_stmt) /* transformation not required. */
5442 {
5443 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5444 DUMP_VECT_SCOPE ("vectorizable_assignment");
5445 if (!vect_nop_conversion_p (stmt_info))
5446 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5447 cost_vec);
5448 return true;
5449 }
5450
5451 /* Transform. */
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5454
5455 /* Handle def. */
5456 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5457
5458 /* Handle use. */
5459 for (j = 0; j < ncopies; j++)
5460 {
5461 /* Handle uses. */
5462 if (j == 0)
5463 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5464 else
5465 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5466
5467 /* Arguments are ready. create the new vector stmt. */
5468 stmt_vec_info new_stmt_info = NULL;
5469 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5470 {
5471 if (CONVERT_EXPR_CODE_P (code)
5472 || code == VIEW_CONVERT_EXPR)
5473 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5474 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5475 new_temp = make_ssa_name (vec_dest, new_stmt);
5476 gimple_assign_set_lhs (new_stmt, new_temp);
5477 new_stmt_info
5478 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5479 if (slp_node)
5480 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5481 }
5482
5483 if (slp_node)
5484 continue;
5485
5486 if (j == 0)
5487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5488 else
5489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5490
5491 prev_stmt_info = new_stmt_info;
5492 }
5493
5494 vec_oprnds.release ();
5495 return true;
5496 }
5497
5498
5499 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5500 either as shift by a scalar or by a vector. */
5501
5502 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5503 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5504 {
5505
5506 machine_mode vec_mode;
5507 optab optab;
5508 int icode;
5509 tree vectype;
5510
5511 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5512 if (!vectype)
5513 return false;
5514
5515 optab = optab_for_tree_code (code, vectype, optab_scalar);
5516 if (!optab
5517 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5518 {
5519 optab = optab_for_tree_code (code, vectype, optab_vector);
5520 if (!optab
5521 || (optab_handler (optab, TYPE_MODE (vectype))
5522 == CODE_FOR_nothing))
5523 return false;
5524 }
5525
5526 vec_mode = TYPE_MODE (vectype);
5527 icode = (int) optab_handler (optab, vec_mode);
5528 if (icode == CODE_FOR_nothing)
5529 return false;
5530
5531 return true;
5532 }
5533
5534
5535 /* Function vectorizable_shift.
5536
5537 Check if STMT_INFO performs a shift operation that can be vectorized.
5538 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5539 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5540 Return true if STMT_INFO is vectorizable in this way. */
5541
5542 static bool
vectorizable_shift(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5543 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5544 stmt_vec_info *vec_stmt, slp_tree slp_node,
5545 stmt_vector_for_cost *cost_vec)
5546 {
5547 tree vec_dest;
5548 tree scalar_dest;
5549 tree op0, op1 = NULL;
5550 tree vec_oprnd1 = NULL_TREE;
5551 tree vectype;
5552 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5553 enum tree_code code;
5554 machine_mode vec_mode;
5555 tree new_temp;
5556 optab optab;
5557 int icode;
5558 machine_mode optab_op2_mode;
5559 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5560 int ndts = 2;
5561 stmt_vec_info prev_stmt_info;
5562 poly_uint64 nunits_in;
5563 poly_uint64 nunits_out;
5564 tree vectype_out;
5565 tree op1_vectype;
5566 int ncopies;
5567 int j, i;
5568 vec<tree> vec_oprnds0 = vNULL;
5569 vec<tree> vec_oprnds1 = vNULL;
5570 tree vop0, vop1;
5571 unsigned int k;
5572 bool scalar_shift_arg = true;
5573 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5574 vec_info *vinfo = stmt_info->vinfo;
5575 bool incompatible_op1_vectype_p = false;
5576
5577 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5578 return false;
5579
5580 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5581 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5582 && ! vec_stmt)
5583 return false;
5584
5585 /* Is STMT a vectorizable binary/unary operation? */
5586 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5587 if (!stmt)
5588 return false;
5589
5590 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5591 return false;
5592
5593 code = gimple_assign_rhs_code (stmt);
5594
5595 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5596 || code == RROTATE_EXPR))
5597 return false;
5598
5599 scalar_dest = gimple_assign_lhs (stmt);
5600 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5601 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5602 {
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5605 "bit-precision shifts not supported.\n");
5606 return false;
5607 }
5608
5609 op0 = gimple_assign_rhs1 (stmt);
5610 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5611 {
5612 if (dump_enabled_p ())
5613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5614 "use not simple.\n");
5615 return false;
5616 }
5617 /* If op0 is an external or constant def, infer the vector type
5618 from the scalar type. */
5619 if (!vectype)
5620 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5621 if (vec_stmt)
5622 gcc_assert (vectype);
5623 if (!vectype)
5624 {
5625 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5627 "no vectype for scalar type\n");
5628 return false;
5629 }
5630
5631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5632 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5633 if (maybe_ne (nunits_out, nunits_in))
5634 return false;
5635
5636 op1 = gimple_assign_rhs2 (stmt);
5637 stmt_vec_info op1_def_stmt_info;
5638 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5639 &op1_def_stmt_info))
5640 {
5641 if (dump_enabled_p ())
5642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5643 "use not simple.\n");
5644 return false;
5645 }
5646
5647 /* Multiple types in SLP are handled by creating the appropriate number of
5648 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5649 case of SLP. */
5650 if (slp_node)
5651 ncopies = 1;
5652 else
5653 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5654
5655 gcc_assert (ncopies >= 1);
5656
5657 /* Determine whether the shift amount is a vector, or scalar. If the
5658 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5659
5660 if ((dt[1] == vect_internal_def
5661 || dt[1] == vect_induction_def
5662 || dt[1] == vect_nested_cycle)
5663 && !slp_node)
5664 scalar_shift_arg = false;
5665 else if (dt[1] == vect_constant_def
5666 || dt[1] == vect_external_def
5667 || dt[1] == vect_internal_def)
5668 {
5669 /* In SLP, need to check whether the shift count is the same,
5670 in loops if it is a constant or invariant, it is always
5671 a scalar shift. */
5672 if (slp_node)
5673 {
5674 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5675 stmt_vec_info slpstmt_info;
5676
5677 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5678 {
5679 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5680 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5681 scalar_shift_arg = false;
5682 }
5683
5684 /* For internal SLP defs we have to make sure we see scalar stmts
5685 for all vector elements.
5686 ??? For different vectors we could resort to a different
5687 scalar shift operand but code-generation below simply always
5688 takes the first. */
5689 if (dt[1] == vect_internal_def
5690 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5691 stmts.length ()))
5692 scalar_shift_arg = false;
5693 }
5694
5695 /* If the shift amount is computed by a pattern stmt we cannot
5696 use the scalar amount directly thus give up and use a vector
5697 shift. */
5698 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5699 scalar_shift_arg = false;
5700 }
5701 else
5702 {
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5705 "operand mode requires invariant argument.\n");
5706 return false;
5707 }
5708
5709 /* Vector shifted by vector. */
5710 bool was_scalar_shift_arg = scalar_shift_arg;
5711 if (!scalar_shift_arg)
5712 {
5713 optab = optab_for_tree_code (code, vectype, optab_vector);
5714 if (dump_enabled_p ())
5715 dump_printf_loc (MSG_NOTE, vect_location,
5716 "vector/vector shift/rotate found.\n");
5717
5718 if (!op1_vectype)
5719 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5720 slp_node);
5721 incompatible_op1_vectype_p
5722 = (op1_vectype == NULL_TREE
5723 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5724 TYPE_VECTOR_SUBPARTS (vectype))
5725 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5726 if (incompatible_op1_vectype_p
5727 && (!slp_node
5728 || SLP_TREE_DEF_TYPE
5729 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5730 {
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733 "unusable type for last operand in"
5734 " vector/vector shift/rotate.\n");
5735 return false;
5736 }
5737 }
5738 /* See if the machine has a vector shifted by scalar insn and if not
5739 then see if it has a vector shifted by vector insn. */
5740 else
5741 {
5742 optab = optab_for_tree_code (code, vectype, optab_scalar);
5743 if (optab
5744 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5745 {
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_NOTE, vect_location,
5748 "vector/scalar shift/rotate found.\n");
5749 }
5750 else
5751 {
5752 optab = optab_for_tree_code (code, vectype, optab_vector);
5753 if (optab
5754 && (optab_handler (optab, TYPE_MODE (vectype))
5755 != CODE_FOR_nothing))
5756 {
5757 scalar_shift_arg = false;
5758
5759 if (dump_enabled_p ())
5760 dump_printf_loc (MSG_NOTE, vect_location,
5761 "vector/vector shift/rotate found.\n");
5762
5763 /* Unlike the other binary operators, shifts/rotates have
5764 the rhs being int, instead of the same type as the lhs,
5765 so make sure the scalar is the right type if we are
5766 dealing with vectors of long long/long/short/char. */
5767 incompatible_op1_vectype_p
5768 = !tree_nop_conversion_p (TREE_TYPE (vectype),
5769 TREE_TYPE (op1));
5770 }
5771 }
5772 }
5773
5774 /* Supportable by target? */
5775 if (!optab)
5776 {
5777 if (dump_enabled_p ())
5778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5779 "no optab.\n");
5780 return false;
5781 }
5782 vec_mode = TYPE_MODE (vectype);
5783 icode = (int) optab_handler (optab, vec_mode);
5784 if (icode == CODE_FOR_nothing)
5785 {
5786 if (dump_enabled_p ())
5787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5788 "op not supported by target.\n");
5789 /* Check only during analysis. */
5790 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5791 || (!vec_stmt
5792 && !vect_worthwhile_without_simd_p (vinfo, code)))
5793 return false;
5794 if (dump_enabled_p ())
5795 dump_printf_loc (MSG_NOTE, vect_location,
5796 "proceeding using word mode.\n");
5797 }
5798
5799 /* Worthwhile without SIMD support? Check only during analysis. */
5800 if (!vec_stmt
5801 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5802 && !vect_worthwhile_without_simd_p (vinfo, code))
5803 {
5804 if (dump_enabled_p ())
5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5806 "not worthwhile without SIMD support.\n");
5807 return false;
5808 }
5809
5810 if (!vec_stmt) /* transformation not required. */
5811 {
5812 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5813 DUMP_VECT_SCOPE ("vectorizable_shift");
5814 vect_model_simple_cost (stmt_info, ncopies, dt,
5815 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5816 return true;
5817 }
5818
5819 /* Transform. */
5820
5821 if (dump_enabled_p ())
5822 dump_printf_loc (MSG_NOTE, vect_location,
5823 "transform binary/unary operation.\n");
5824
5825 if (incompatible_op1_vectype_p && !slp_node)
5826 {
5827 op1 = fold_convert (TREE_TYPE (vectype), op1);
5828 if (dt[1] != vect_constant_def)
5829 op1 = vect_init_vector (stmt_info, op1,
5830 TREE_TYPE (vectype), NULL);
5831 }
5832
5833 /* Handle def. */
5834 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5835
5836 prev_stmt_info = NULL;
5837 for (j = 0; j < ncopies; j++)
5838 {
5839 /* Handle uses. */
5840 if (j == 0)
5841 {
5842 if (scalar_shift_arg)
5843 {
5844 /* Vector shl and shr insn patterns can be defined with scalar
5845 operand 2 (shift operand). In this case, use constant or loop
5846 invariant op1 directly, without extending it to vector mode
5847 first. */
5848 optab_op2_mode = insn_data[icode].operand[2].mode;
5849 if (!VECTOR_MODE_P (optab_op2_mode))
5850 {
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_NOTE, vect_location,
5853 "operand 1 using scalar mode.\n");
5854 vec_oprnd1 = op1;
5855 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5856 vec_oprnds1.quick_push (vec_oprnd1);
5857 if (slp_node)
5858 {
5859 /* Store vec_oprnd1 for every vector stmt to be created
5860 for SLP_NODE. We check during the analysis that all
5861 the shift arguments are the same.
5862 TODO: Allow different constants for different vector
5863 stmts generated for an SLP instance. */
5864 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5865 vec_oprnds1.quick_push (vec_oprnd1);
5866 }
5867 }
5868 }
5869 else if (slp_node && incompatible_op1_vectype_p)
5870 {
5871 if (was_scalar_shift_arg)
5872 {
5873 /* If the argument was the same in all lanes create
5874 the correctly typed vector shift amount directly. */
5875 op1 = fold_convert (TREE_TYPE (vectype), op1);
5876 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5877 !loop_vinfo ? gsi : NULL);
5878 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5879 !loop_vinfo ? gsi : NULL);
5880 vec_oprnds1.create (slp_node->vec_stmts_size);
5881 for (k = 0; k < slp_node->vec_stmts_size; k++)
5882 vec_oprnds1.quick_push (vec_oprnd1);
5883 }
5884 else if (dt[1] == vect_constant_def)
5885 {
5886 /* Convert the scalar constant shift amounts in-place. */
5887 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5888 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5889 for (unsigned i = 0;
5890 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5891 {
5892 SLP_TREE_SCALAR_OPS (shift)[i]
5893 = fold_convert (TREE_TYPE (vectype),
5894 SLP_TREE_SCALAR_OPS (shift)[i]);
5895 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5896 == INTEGER_CST));
5897 }
5898 }
5899 else
5900 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5901 }
5902
5903 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5904 (a special case for certain kind of vector shifts); otherwise,
5905 operand 1 should be of a vector type (the usual case). */
5906 if (vec_oprnd1)
5907 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5908 slp_node);
5909 else
5910 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5911 slp_node);
5912 }
5913 else
5914 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5915
5916 /* Arguments are ready. Create the new vector stmt. */
5917 stmt_vec_info new_stmt_info = NULL;
5918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5919 {
5920 vop1 = vec_oprnds1[i];
5921 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5922 new_temp = make_ssa_name (vec_dest, new_stmt);
5923 gimple_assign_set_lhs (new_stmt, new_temp);
5924 new_stmt_info
5925 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5926 if (slp_node)
5927 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5928 }
5929
5930 if (slp_node)
5931 continue;
5932
5933 if (j == 0)
5934 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5935 else
5936 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5937 prev_stmt_info = new_stmt_info;
5938 }
5939
5940 vec_oprnds0.release ();
5941 vec_oprnds1.release ();
5942
5943 return true;
5944 }
5945
5946
5947 /* Function vectorizable_operation.
5948
5949 Check if STMT_INFO performs a binary, unary or ternary operation that can
5950 be vectorized.
5951 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5952 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5953 Return true if STMT_INFO is vectorizable in this way. */
5954
5955 static bool
vectorizable_operation(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5956 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5957 stmt_vec_info *vec_stmt, slp_tree slp_node,
5958 stmt_vector_for_cost *cost_vec)
5959 {
5960 tree vec_dest;
5961 tree scalar_dest;
5962 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5963 tree vectype;
5964 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5965 enum tree_code code, orig_code;
5966 machine_mode vec_mode;
5967 tree new_temp;
5968 int op_type;
5969 optab optab;
5970 bool target_support_p;
5971 enum vect_def_type dt[3]
5972 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5973 int ndts = 3;
5974 stmt_vec_info prev_stmt_info;
5975 poly_uint64 nunits_in;
5976 poly_uint64 nunits_out;
5977 tree vectype_out;
5978 int ncopies, vec_num;
5979 int j, i;
5980 vec<tree> vec_oprnds0 = vNULL;
5981 vec<tree> vec_oprnds1 = vNULL;
5982 vec<tree> vec_oprnds2 = vNULL;
5983 tree vop0, vop1, vop2;
5984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5985 vec_info *vinfo = stmt_info->vinfo;
5986
5987 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5988 return false;
5989
5990 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5991 && ! vec_stmt)
5992 return false;
5993
5994 /* Is STMT a vectorizable binary/unary operation? */
5995 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5996 if (!stmt)
5997 return false;
5998
5999 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6000 return false;
6001
6002 orig_code = code = gimple_assign_rhs_code (stmt);
6003
6004 /* Shifts are handled in vectorizable_shift. */
6005 if (code == LSHIFT_EXPR
6006 || code == RSHIFT_EXPR
6007 || code == LROTATE_EXPR
6008 || code == RROTATE_EXPR)
6009 return false;
6010
6011 /* Comparisons are handled in vectorizable_comparison. */
6012 if (TREE_CODE_CLASS (code) == tcc_comparison)
6013 return false;
6014
6015 /* Conditions are handled in vectorizable_condition. */
6016 if (code == COND_EXPR)
6017 return false;
6018
6019 /* For pointer addition and subtraction, we should use the normal
6020 plus and minus for the vector operation. */
6021 if (code == POINTER_PLUS_EXPR)
6022 code = PLUS_EXPR;
6023 if (code == POINTER_DIFF_EXPR)
6024 code = MINUS_EXPR;
6025
6026 /* Support only unary or binary operations. */
6027 op_type = TREE_CODE_LENGTH (code);
6028 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6029 {
6030 if (dump_enabled_p ())
6031 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6032 "num. args = %d (not unary/binary/ternary op).\n",
6033 op_type);
6034 return false;
6035 }
6036
6037 scalar_dest = gimple_assign_lhs (stmt);
6038 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6039
6040 /* Most operations cannot handle bit-precision types without extra
6041 truncations. */
6042 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6043 if (!mask_op_p
6044 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6045 /* Exception are bitwise binary operations. */
6046 && code != BIT_IOR_EXPR
6047 && code != BIT_XOR_EXPR
6048 && code != BIT_AND_EXPR)
6049 {
6050 if (dump_enabled_p ())
6051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6052 "bit-precision arithmetic not supported.\n");
6053 return false;
6054 }
6055
6056 op0 = gimple_assign_rhs1 (stmt);
6057 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6058 {
6059 if (dump_enabled_p ())
6060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6061 "use not simple.\n");
6062 return false;
6063 }
6064 /* If op0 is an external or constant def, infer the vector type
6065 from the scalar type. */
6066 if (!vectype)
6067 {
6068 /* For boolean type we cannot determine vectype by
6069 invariant value (don't know whether it is a vector
6070 of booleans or vector of integers). We use output
6071 vectype because operations on boolean don't change
6072 type. */
6073 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6074 {
6075 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6076 {
6077 if (dump_enabled_p ())
6078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6079 "not supported operation on bool value.\n");
6080 return false;
6081 }
6082 vectype = vectype_out;
6083 }
6084 else
6085 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6086 slp_node);
6087 }
6088 if (vec_stmt)
6089 gcc_assert (vectype);
6090 if (!vectype)
6091 {
6092 if (dump_enabled_p ())
6093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6094 "no vectype for scalar type %T\n",
6095 TREE_TYPE (op0));
6096
6097 return false;
6098 }
6099
6100 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6101 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6102 if (maybe_ne (nunits_out, nunits_in))
6103 return false;
6104
6105 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6106 if (op_type == binary_op || op_type == ternary_op)
6107 {
6108 op1 = gimple_assign_rhs2 (stmt);
6109 if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
6110 {
6111 if (dump_enabled_p ())
6112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6113 "use not simple.\n");
6114 return false;
6115 }
6116 }
6117 if (op_type == ternary_op)
6118 {
6119 op2 = gimple_assign_rhs3 (stmt);
6120 if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
6121 {
6122 if (dump_enabled_p ())
6123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6124 "use not simple.\n");
6125 return false;
6126 }
6127 }
6128
6129 /* Multiple types in SLP are handled by creating the appropriate number of
6130 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6131 case of SLP. */
6132 if (slp_node)
6133 {
6134 ncopies = 1;
6135 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6136 }
6137 else
6138 {
6139 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6140 vec_num = 1;
6141 }
6142
6143 gcc_assert (ncopies >= 1);
6144
6145 /* Reject attempts to combine mask types with nonmask types, e.g. if
6146 we have an AND between a (nonmask) boolean loaded from memory and
6147 a (mask) boolean result of a comparison.
6148
6149 TODO: We could easily fix these cases up using pattern statements. */
6150 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6151 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6152 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6153 {
6154 if (dump_enabled_p ())
6155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6156 "mixed mask and nonmask vector types\n");
6157 return false;
6158 }
6159
6160 /* Supportable by target? */
6161
6162 vec_mode = TYPE_MODE (vectype);
6163 if (code == MULT_HIGHPART_EXPR)
6164 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6165 else
6166 {
6167 optab = optab_for_tree_code (code, vectype, optab_default);
6168 if (!optab)
6169 {
6170 if (dump_enabled_p ())
6171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6172 "no optab.\n");
6173 return false;
6174 }
6175 target_support_p = (optab_handler (optab, vec_mode)
6176 != CODE_FOR_nothing);
6177 }
6178
6179 if (!target_support_p)
6180 {
6181 if (dump_enabled_p ())
6182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6183 "op not supported by target.\n");
6184 /* Check only during analysis. */
6185 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6186 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6187 return false;
6188 if (dump_enabled_p ())
6189 dump_printf_loc (MSG_NOTE, vect_location,
6190 "proceeding using word mode.\n");
6191 }
6192
6193 /* Worthwhile without SIMD support? Check only during analysis. */
6194 if (!VECTOR_MODE_P (vec_mode)
6195 && !vec_stmt
6196 && !vect_worthwhile_without_simd_p (vinfo, code))
6197 {
6198 if (dump_enabled_p ())
6199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6200 "not worthwhile without SIMD support.\n");
6201 return false;
6202 }
6203
6204 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6205 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6206 internal_fn cond_fn = get_conditional_internal_fn (code);
6207
6208 if (!vec_stmt) /* transformation not required. */
6209 {
6210 /* If this operation is part of a reduction, a fully-masked loop
6211 should only change the active lanes of the reduction chain,
6212 keeping the inactive lanes as-is. */
6213 if (loop_vinfo
6214 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6215 && reduc_idx >= 0)
6216 {
6217 if (cond_fn == IFN_LAST
6218 || !direct_internal_fn_supported_p (cond_fn, vectype,
6219 OPTIMIZE_FOR_SPEED))
6220 {
6221 if (dump_enabled_p ())
6222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6223 "can't use a fully-masked loop because no"
6224 " conditional operation is available.\n");
6225 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6226 }
6227 else
6228 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6229 vectype, NULL);
6230 }
6231
6232 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6233 DUMP_VECT_SCOPE ("vectorizable_operation");
6234 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6235 return true;
6236 }
6237
6238 /* Transform. */
6239
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_NOTE, vect_location,
6242 "transform binary/unary operation.\n");
6243
6244 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6245
6246 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6247 vectors with unsigned elements, but the result is signed. So, we
6248 need to compute the MINUS_EXPR into vectype temporary and
6249 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6250 tree vec_cvt_dest = NULL_TREE;
6251 if (orig_code == POINTER_DIFF_EXPR)
6252 {
6253 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6254 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6255 }
6256 /* Handle def. */
6257 else
6258 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6259
6260 /* In case the vectorization factor (VF) is bigger than the number
6261 of elements that we can fit in a vectype (nunits), we have to generate
6262 more than one vector stmt - i.e - we need to "unroll" the
6263 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6264 from one copy of the vector stmt to the next, in the field
6265 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6266 stages to find the correct vector defs to be used when vectorizing
6267 stmts that use the defs of the current stmt. The example below
6268 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6269 we need to create 4 vectorized stmts):
6270
6271 before vectorization:
6272 RELATED_STMT VEC_STMT
6273 S1: x = memref - -
6274 S2: z = x + 1 - -
6275
6276 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6277 there):
6278 RELATED_STMT VEC_STMT
6279 VS1_0: vx0 = memref0 VS1_1 -
6280 VS1_1: vx1 = memref1 VS1_2 -
6281 VS1_2: vx2 = memref2 VS1_3 -
6282 VS1_3: vx3 = memref3 - -
6283 S1: x = load - VS1_0
6284 S2: z = x + 1 - -
6285
6286 step2: vectorize stmt S2 (done here):
6287 To vectorize stmt S2 we first need to find the relevant vector
6288 def for the first operand 'x'. This is, as usual, obtained from
6289 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6290 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6291 relevant vector def 'vx0'. Having found 'vx0' we can generate
6292 the vector stmt VS2_0, and as usual, record it in the
6293 STMT_VINFO_VEC_STMT of stmt S2.
6294 When creating the second copy (VS2_1), we obtain the relevant vector
6295 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6296 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6297 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6298 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6299 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6300 chain of stmts and pointers:
6301 RELATED_STMT VEC_STMT
6302 VS1_0: vx0 = memref0 VS1_1 -
6303 VS1_1: vx1 = memref1 VS1_2 -
6304 VS1_2: vx2 = memref2 VS1_3 -
6305 VS1_3: vx3 = memref3 - -
6306 S1: x = load - VS1_0
6307 VS2_0: vz0 = vx0 + v1 VS2_1 -
6308 VS2_1: vz1 = vx1 + v1 VS2_2 -
6309 VS2_2: vz2 = vx2 + v1 VS2_3 -
6310 VS2_3: vz3 = vx3 + v1 - -
6311 S2: z = x + 1 - VS2_0 */
6312
6313 prev_stmt_info = NULL;
6314 for (j = 0; j < ncopies; j++)
6315 {
6316 /* Handle uses. */
6317 if (j == 0)
6318 {
6319 if (op_type == binary_op)
6320 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6321 slp_node);
6322 else if (op_type == ternary_op)
6323 {
6324 if (slp_node)
6325 {
6326 auto_vec<vec<tree> > vec_defs(3);
6327 vect_get_slp_defs (slp_node, &vec_defs);
6328 vec_oprnds0 = vec_defs[0];
6329 vec_oprnds1 = vec_defs[1];
6330 vec_oprnds2 = vec_defs[2];
6331 }
6332 else
6333 {
6334 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6335 &vec_oprnds1, NULL);
6336 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6337 NULL, NULL);
6338 }
6339 }
6340 else
6341 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6342 slp_node);
6343 }
6344 else
6345 {
6346 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6347 if (op_type == ternary_op)
6348 {
6349 tree vec_oprnd = vec_oprnds2.pop ();
6350 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6351 vec_oprnd));
6352 }
6353 }
6354
6355 /* Arguments are ready. Create the new vector stmt. */
6356 stmt_vec_info new_stmt_info = NULL;
6357 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6358 {
6359 vop1 = ((op_type == binary_op || op_type == ternary_op)
6360 ? vec_oprnds1[i] : NULL_TREE);
6361 vop2 = ((op_type == ternary_op)
6362 ? vec_oprnds2[i] : NULL_TREE);
6363 if (masked_loop_p && reduc_idx >= 0)
6364 {
6365 /* Perform the operation on active elements only and take
6366 inactive elements from the reduction chain input. */
6367 gcc_assert (!vop2);
6368 vop2 = reduc_idx == 1 ? vop1 : vop0;
6369 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6370 vectype, i * ncopies + j);
6371 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6372 vop0, vop1, vop2);
6373 new_temp = make_ssa_name (vec_dest, call);
6374 gimple_call_set_lhs (call, new_temp);
6375 gimple_call_set_nothrow (call, true);
6376 new_stmt_info
6377 = vect_finish_stmt_generation (stmt_info, call, gsi);
6378 }
6379 else
6380 {
6381 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6382 vop0, vop1, vop2);
6383 new_temp = make_ssa_name (vec_dest, new_stmt);
6384 gimple_assign_set_lhs (new_stmt, new_temp);
6385 new_stmt_info
6386 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6387 if (vec_cvt_dest)
6388 {
6389 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6390 gassign *new_stmt
6391 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6392 new_temp);
6393 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6394 gimple_assign_set_lhs (new_stmt, new_temp);
6395 new_stmt_info
6396 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6397 }
6398 }
6399 if (slp_node)
6400 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6401 }
6402
6403 if (slp_node)
6404 continue;
6405
6406 if (j == 0)
6407 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6408 else
6409 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6410 prev_stmt_info = new_stmt_info;
6411 }
6412
6413 vec_oprnds0.release ();
6414 vec_oprnds1.release ();
6415 vec_oprnds2.release ();
6416
6417 return true;
6418 }
6419
6420 /* A helper function to ensure data reference DR_INFO's base alignment. */
6421
6422 static void
ensure_base_align(dr_vec_info * dr_info)6423 ensure_base_align (dr_vec_info *dr_info)
6424 {
6425 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6426 return;
6427
6428 if (dr_info->base_misaligned)
6429 {
6430 tree base_decl = dr_info->base_decl;
6431
6432 // We should only be able to increase the alignment of a base object if
6433 // we know what its new alignment should be at compile time.
6434 unsigned HOST_WIDE_INT align_base_to =
6435 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6436
6437 if (decl_in_symtab_p (base_decl))
6438 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6439 else if (DECL_ALIGN (base_decl) < align_base_to)
6440 {
6441 SET_DECL_ALIGN (base_decl, align_base_to);
6442 DECL_USER_ALIGN (base_decl) = 1;
6443 }
6444 dr_info->base_misaligned = false;
6445 }
6446 }
6447
6448
6449 /* Function get_group_alias_ptr_type.
6450
6451 Return the alias type for the group starting at FIRST_STMT_INFO. */
6452
6453 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6454 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6455 {
6456 struct data_reference *first_dr, *next_dr;
6457
6458 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6459 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6460 while (next_stmt_info)
6461 {
6462 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6463 if (get_alias_set (DR_REF (first_dr))
6464 != get_alias_set (DR_REF (next_dr)))
6465 {
6466 if (dump_enabled_p ())
6467 dump_printf_loc (MSG_NOTE, vect_location,
6468 "conflicting alias set types.\n");
6469 return ptr_type_node;
6470 }
6471 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6472 }
6473 return reference_alias_ptr_type (DR_REF (first_dr));
6474 }
6475
6476
6477 /* Function scan_operand_equal_p.
6478
6479 Helper function for check_scan_store. Compare two references
6480 with .GOMP_SIMD_LANE bases. */
6481
6482 static bool
scan_operand_equal_p(tree ref1,tree ref2)6483 scan_operand_equal_p (tree ref1, tree ref2)
6484 {
6485 tree ref[2] = { ref1, ref2 };
6486 poly_int64 bitsize[2], bitpos[2];
6487 tree offset[2], base[2];
6488 for (int i = 0; i < 2; ++i)
6489 {
6490 machine_mode mode;
6491 int unsignedp, reversep, volatilep = 0;
6492 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6493 &offset[i], &mode, &unsignedp,
6494 &reversep, &volatilep);
6495 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6496 return false;
6497 if (TREE_CODE (base[i]) == MEM_REF
6498 && offset[i] == NULL_TREE
6499 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6500 {
6501 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6502 if (is_gimple_assign (def_stmt)
6503 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6504 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6505 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6506 {
6507 if (maybe_ne (mem_ref_offset (base[i]), 0))
6508 return false;
6509 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6510 offset[i] = gimple_assign_rhs2 (def_stmt);
6511 }
6512 }
6513 }
6514
6515 if (!operand_equal_p (base[0], base[1], 0))
6516 return false;
6517 if (maybe_ne (bitsize[0], bitsize[1]))
6518 return false;
6519 if (offset[0] != offset[1])
6520 {
6521 if (!offset[0] || !offset[1])
6522 return false;
6523 if (!operand_equal_p (offset[0], offset[1], 0))
6524 {
6525 tree step[2];
6526 for (int i = 0; i < 2; ++i)
6527 {
6528 step[i] = integer_one_node;
6529 if (TREE_CODE (offset[i]) == SSA_NAME)
6530 {
6531 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6532 if (is_gimple_assign (def_stmt)
6533 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6534 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6535 == INTEGER_CST))
6536 {
6537 step[i] = gimple_assign_rhs2 (def_stmt);
6538 offset[i] = gimple_assign_rhs1 (def_stmt);
6539 }
6540 }
6541 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6542 {
6543 step[i] = TREE_OPERAND (offset[i], 1);
6544 offset[i] = TREE_OPERAND (offset[i], 0);
6545 }
6546 tree rhs1 = NULL_TREE;
6547 if (TREE_CODE (offset[i]) == SSA_NAME)
6548 {
6549 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6550 if (gimple_assign_cast_p (def_stmt))
6551 rhs1 = gimple_assign_rhs1 (def_stmt);
6552 }
6553 else if (CONVERT_EXPR_P (offset[i]))
6554 rhs1 = TREE_OPERAND (offset[i], 0);
6555 if (rhs1
6556 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6557 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6558 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6559 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6560 offset[i] = rhs1;
6561 }
6562 if (!operand_equal_p (offset[0], offset[1], 0)
6563 || !operand_equal_p (step[0], step[1], 0))
6564 return false;
6565 }
6566 }
6567 return true;
6568 }
6569
6570
6571 enum scan_store_kind {
6572 /* Normal permutation. */
6573 scan_store_kind_perm,
6574
6575 /* Whole vector left shift permutation with zero init. */
6576 scan_store_kind_lshift_zero,
6577
6578 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6579 scan_store_kind_lshift_cond
6580 };
6581
6582 /* Function check_scan_store.
6583
6584 Verify if we can perform the needed permutations or whole vector shifts.
6585 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6586 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6587 to do at each step. */
6588
6589 static int
6590 scan_store_can_perm_p (tree vectype, tree init,
6591 vec<enum scan_store_kind> *use_whole_vector = NULL)
6592 {
6593 enum machine_mode vec_mode = TYPE_MODE (vectype);
6594 unsigned HOST_WIDE_INT nunits;
6595 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6596 return -1;
6597 int units_log2 = exact_log2 (nunits);
6598 if (units_log2 <= 0)
6599 return -1;
6600
6601 int i;
6602 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6603 for (i = 0; i <= units_log2; ++i)
6604 {
6605 unsigned HOST_WIDE_INT j, k;
6606 enum scan_store_kind kind = scan_store_kind_perm;
6607 vec_perm_builder sel (nunits, nunits, 1);
6608 sel.quick_grow (nunits);
6609 if (i == units_log2)
6610 {
6611 for (j = 0; j < nunits; ++j)
6612 sel[j] = nunits - 1;
6613 }
6614 else
6615 {
6616 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6617 sel[j] = j;
6618 for (k = 0; j < nunits; ++j, ++k)
6619 sel[j] = nunits + k;
6620 }
6621 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6622 if (!can_vec_perm_const_p (vec_mode, indices))
6623 {
6624 if (i == units_log2)
6625 return -1;
6626
6627 if (whole_vector_shift_kind == scan_store_kind_perm)
6628 {
6629 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6630 return -1;
6631 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6632 /* Whole vector shifts shift in zeros, so if init is all zero
6633 constant, there is no need to do anything further. */
6634 if ((TREE_CODE (init) != INTEGER_CST
6635 && TREE_CODE (init) != REAL_CST)
6636 || !initializer_zerop (init))
6637 {
6638 tree masktype = truth_type_for (vectype);
6639 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6640 return -1;
6641 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6642 }
6643 }
6644 kind = whole_vector_shift_kind;
6645 }
6646 if (use_whole_vector)
6647 {
6648 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6649 use_whole_vector->safe_grow_cleared (i);
6650 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6651 use_whole_vector->safe_push (kind);
6652 }
6653 }
6654
6655 return units_log2;
6656 }
6657
6658
6659 /* Function check_scan_store.
6660
6661 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6662
6663 static bool
check_scan_store(stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6664 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6665 enum vect_def_type rhs_dt, bool slp, tree mask,
6666 vect_memory_access_type memory_access_type)
6667 {
6668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6669 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6670 tree ref_type;
6671
6672 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6673 if (slp
6674 || mask
6675 || memory_access_type != VMAT_CONTIGUOUS
6676 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6677 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6678 || loop_vinfo == NULL
6679 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6680 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6681 || !integer_zerop (get_dr_vinfo_offset (dr_info))
6682 || !integer_zerop (DR_INIT (dr_info->dr))
6683 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6684 || !alias_sets_conflict_p (get_alias_set (vectype),
6685 get_alias_set (TREE_TYPE (ref_type))))
6686 {
6687 if (dump_enabled_p ())
6688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6689 "unsupported OpenMP scan store.\n");
6690 return false;
6691 }
6692
6693 /* We need to pattern match code built by OpenMP lowering and simplified
6694 by following optimizations into something we can handle.
6695 #pragma omp simd reduction(inscan,+:r)
6696 for (...)
6697 {
6698 r += something ();
6699 #pragma omp scan inclusive (r)
6700 use (r);
6701 }
6702 shall have body with:
6703 // Initialization for input phase, store the reduction initializer:
6704 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6705 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6706 D.2042[_21] = 0;
6707 // Actual input phase:
6708 ...
6709 r.0_5 = D.2042[_20];
6710 _6 = _4 + r.0_5;
6711 D.2042[_20] = _6;
6712 // Initialization for scan phase:
6713 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6714 _26 = D.2043[_25];
6715 _27 = D.2042[_25];
6716 _28 = _26 + _27;
6717 D.2043[_25] = _28;
6718 D.2042[_25] = _28;
6719 // Actual scan phase:
6720 ...
6721 r.1_8 = D.2042[_20];
6722 ...
6723 The "omp simd array" variable D.2042 holds the privatized copy used
6724 inside of the loop and D.2043 is another one that holds copies of
6725 the current original list item. The separate GOMP_SIMD_LANE ifn
6726 kinds are there in order to allow optimizing the initializer store
6727 and combiner sequence, e.g. if it is originally some C++ish user
6728 defined reduction, but allow the vectorizer to pattern recognize it
6729 and turn into the appropriate vectorized scan.
6730
6731 For exclusive scan, this is slightly different:
6732 #pragma omp simd reduction(inscan,+:r)
6733 for (...)
6734 {
6735 use (r);
6736 #pragma omp scan exclusive (r)
6737 r += something ();
6738 }
6739 shall have body with:
6740 // Initialization for input phase, store the reduction initializer:
6741 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6742 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6743 D.2042[_21] = 0;
6744 // Actual input phase:
6745 ...
6746 r.0_5 = D.2042[_20];
6747 _6 = _4 + r.0_5;
6748 D.2042[_20] = _6;
6749 // Initialization for scan phase:
6750 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6751 _26 = D.2043[_25];
6752 D.2044[_25] = _26;
6753 _27 = D.2042[_25];
6754 _28 = _26 + _27;
6755 D.2043[_25] = _28;
6756 // Actual scan phase:
6757 ...
6758 r.1_8 = D.2044[_20];
6759 ... */
6760
6761 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6762 {
6763 /* Match the D.2042[_21] = 0; store above. Just require that
6764 it is a constant or external definition store. */
6765 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6766 {
6767 fail_init:
6768 if (dump_enabled_p ())
6769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6770 "unsupported OpenMP scan initializer store.\n");
6771 return false;
6772 }
6773
6774 if (! loop_vinfo->scan_map)
6775 loop_vinfo->scan_map = new hash_map<tree, tree>;
6776 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6777 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6778 if (cached)
6779 goto fail_init;
6780 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6781
6782 /* These stores can be vectorized normally. */
6783 return true;
6784 }
6785
6786 if (rhs_dt != vect_internal_def)
6787 {
6788 fail:
6789 if (dump_enabled_p ())
6790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6791 "unsupported OpenMP scan combiner pattern.\n");
6792 return false;
6793 }
6794
6795 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6796 tree rhs = gimple_assign_rhs1 (stmt);
6797 if (TREE_CODE (rhs) != SSA_NAME)
6798 goto fail;
6799
6800 gimple *other_store_stmt = NULL;
6801 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6802 bool inscan_var_store
6803 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6804
6805 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6806 {
6807 if (!inscan_var_store)
6808 {
6809 use_operand_p use_p;
6810 imm_use_iterator iter;
6811 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6812 {
6813 gimple *use_stmt = USE_STMT (use_p);
6814 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6815 continue;
6816 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6817 || !is_gimple_assign (use_stmt)
6818 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6819 || other_store_stmt
6820 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6821 goto fail;
6822 other_store_stmt = use_stmt;
6823 }
6824 if (other_store_stmt == NULL)
6825 goto fail;
6826 rhs = gimple_assign_lhs (other_store_stmt);
6827 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6828 goto fail;
6829 }
6830 }
6831 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6832 {
6833 use_operand_p use_p;
6834 imm_use_iterator iter;
6835 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6836 {
6837 gimple *use_stmt = USE_STMT (use_p);
6838 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6839 continue;
6840 if (other_store_stmt)
6841 goto fail;
6842 other_store_stmt = use_stmt;
6843 }
6844 }
6845 else
6846 goto fail;
6847
6848 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6849 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6850 || !is_gimple_assign (def_stmt)
6851 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6852 goto fail;
6853
6854 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6855 /* For pointer addition, we should use the normal plus for the vector
6856 operation. */
6857 switch (code)
6858 {
6859 case POINTER_PLUS_EXPR:
6860 code = PLUS_EXPR;
6861 break;
6862 case MULT_HIGHPART_EXPR:
6863 goto fail;
6864 default:
6865 break;
6866 }
6867 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6868 goto fail;
6869
6870 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6871 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6872 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6873 goto fail;
6874
6875 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6876 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6877 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6878 || !gimple_assign_load_p (load1_stmt)
6879 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6880 || !gimple_assign_load_p (load2_stmt))
6881 goto fail;
6882
6883 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6884 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6885 if (load1_stmt_info == NULL
6886 || load2_stmt_info == NULL
6887 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6888 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6889 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6890 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6891 goto fail;
6892
6893 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6894 {
6895 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6896 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6897 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6898 goto fail;
6899 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6900 tree lrhs;
6901 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6902 lrhs = rhs1;
6903 else
6904 lrhs = rhs2;
6905 use_operand_p use_p;
6906 imm_use_iterator iter;
6907 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6908 {
6909 gimple *use_stmt = USE_STMT (use_p);
6910 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6911 continue;
6912 if (other_store_stmt)
6913 goto fail;
6914 other_store_stmt = use_stmt;
6915 }
6916 }
6917
6918 if (other_store_stmt == NULL)
6919 goto fail;
6920 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6921 || !gimple_store_p (other_store_stmt))
6922 goto fail;
6923
6924 stmt_vec_info other_store_stmt_info
6925 = loop_vinfo->lookup_stmt (other_store_stmt);
6926 if (other_store_stmt_info == NULL
6927 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6928 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6929 goto fail;
6930
6931 gimple *stmt1 = stmt;
6932 gimple *stmt2 = other_store_stmt;
6933 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6934 std::swap (stmt1, stmt2);
6935 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6936 gimple_assign_rhs1 (load2_stmt)))
6937 {
6938 std::swap (rhs1, rhs2);
6939 std::swap (load1_stmt, load2_stmt);
6940 std::swap (load1_stmt_info, load2_stmt_info);
6941 }
6942 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6943 gimple_assign_rhs1 (load1_stmt)))
6944 goto fail;
6945
6946 tree var3 = NULL_TREE;
6947 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6948 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6949 gimple_assign_rhs1 (load2_stmt)))
6950 goto fail;
6951 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6952 {
6953 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6954 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6955 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6956 goto fail;
6957 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6958 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6959 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6960 || lookup_attribute ("omp simd inscan exclusive",
6961 DECL_ATTRIBUTES (var3)))
6962 goto fail;
6963 }
6964
6965 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6966 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6967 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6968 goto fail;
6969
6970 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6971 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6972 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6973 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6974 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6975 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6976 goto fail;
6977
6978 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6979 std::swap (var1, var2);
6980
6981 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6982 {
6983 if (!lookup_attribute ("omp simd inscan exclusive",
6984 DECL_ATTRIBUTES (var1)))
6985 goto fail;
6986 var1 = var3;
6987 }
6988
6989 if (loop_vinfo->scan_map == NULL)
6990 goto fail;
6991 tree *init = loop_vinfo->scan_map->get (var1);
6992 if (init == NULL)
6993 goto fail;
6994
6995 /* The IL is as expected, now check if we can actually vectorize it.
6996 Inclusive scan:
6997 _26 = D.2043[_25];
6998 _27 = D.2042[_25];
6999 _28 = _26 + _27;
7000 D.2043[_25] = _28;
7001 D.2042[_25] = _28;
7002 should be vectorized as (where _40 is the vectorized rhs
7003 from the D.2042[_21] = 0; store):
7004 _30 = MEM <vector(8) int> [(int *)&D.2043];
7005 _31 = MEM <vector(8) int> [(int *)&D.2042];
7006 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7007 _33 = _31 + _32;
7008 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7009 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7010 _35 = _33 + _34;
7011 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7012 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7013 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7014 _37 = _35 + _36;
7015 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7016 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7017 _38 = _30 + _37;
7018 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7019 MEM <vector(8) int> [(int *)&D.2043] = _39;
7020 MEM <vector(8) int> [(int *)&D.2042] = _38;
7021 Exclusive scan:
7022 _26 = D.2043[_25];
7023 D.2044[_25] = _26;
7024 _27 = D.2042[_25];
7025 _28 = _26 + _27;
7026 D.2043[_25] = _28;
7027 should be vectorized as (where _40 is the vectorized rhs
7028 from the D.2042[_21] = 0; store):
7029 _30 = MEM <vector(8) int> [(int *)&D.2043];
7030 _31 = MEM <vector(8) int> [(int *)&D.2042];
7031 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7032 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7033 _34 = _32 + _33;
7034 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7035 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7036 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7037 _36 = _34 + _35;
7038 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7039 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7040 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7041 _38 = _36 + _37;
7042 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7043 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7044 _39 = _30 + _38;
7045 _50 = _31 + _39;
7046 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7047 MEM <vector(8) int> [(int *)&D.2044] = _39;
7048 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7049 enum machine_mode vec_mode = TYPE_MODE (vectype);
7050 optab optab = optab_for_tree_code (code, vectype, optab_default);
7051 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7052 goto fail;
7053
7054 int units_log2 = scan_store_can_perm_p (vectype, *init);
7055 if (units_log2 == -1)
7056 goto fail;
7057
7058 return true;
7059 }
7060
7061
7062 /* Function vectorizable_scan_store.
7063
7064 Helper of vectorizable_score, arguments like on vectorizable_store.
7065 Handle only the transformation, checking is done in check_scan_store. */
7066
7067 static bool
vectorizable_scan_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,int ncopies)7068 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7069 stmt_vec_info *vec_stmt, int ncopies)
7070 {
7071 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7072 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7073 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7074 vec_info *vinfo = stmt_info->vinfo;
7075 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7076
7077 if (dump_enabled_p ())
7078 dump_printf_loc (MSG_NOTE, vect_location,
7079 "transform scan store. ncopies = %d\n", ncopies);
7080
7081 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7082 tree rhs = gimple_assign_rhs1 (stmt);
7083 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7084
7085 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7086 bool inscan_var_store
7087 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7088
7089 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7090 {
7091 use_operand_p use_p;
7092 imm_use_iterator iter;
7093 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7094 {
7095 gimple *use_stmt = USE_STMT (use_p);
7096 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7097 continue;
7098 rhs = gimple_assign_lhs (use_stmt);
7099 break;
7100 }
7101 }
7102
7103 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7104 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7105 if (code == POINTER_PLUS_EXPR)
7106 code = PLUS_EXPR;
7107 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7108 && commutative_tree_code (code));
7109 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7110 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7111 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7112 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7113 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7114 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7115 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7116 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7117 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7118 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7119 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7120
7121 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7122 {
7123 std::swap (rhs1, rhs2);
7124 std::swap (var1, var2);
7125 std::swap (load1_dr_info, load2_dr_info);
7126 }
7127
7128 tree *init = loop_vinfo->scan_map->get (var1);
7129 gcc_assert (init);
7130
7131 unsigned HOST_WIDE_INT nunits;
7132 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7133 gcc_unreachable ();
7134 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7135 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7136 gcc_assert (units_log2 > 0);
7137 auto_vec<tree, 16> perms;
7138 perms.quick_grow (units_log2 + 1);
7139 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7140 for (int i = 0; i <= units_log2; ++i)
7141 {
7142 unsigned HOST_WIDE_INT j, k;
7143 vec_perm_builder sel (nunits, nunits, 1);
7144 sel.quick_grow (nunits);
7145 if (i == units_log2)
7146 for (j = 0; j < nunits; ++j)
7147 sel[j] = nunits - 1;
7148 else
7149 {
7150 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7151 sel[j] = j;
7152 for (k = 0; j < nunits; ++j, ++k)
7153 sel[j] = nunits + k;
7154 }
7155 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7156 if (!use_whole_vector.is_empty ()
7157 && use_whole_vector[i] != scan_store_kind_perm)
7158 {
7159 if (zero_vec == NULL_TREE)
7160 zero_vec = build_zero_cst (vectype);
7161 if (masktype == NULL_TREE
7162 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7163 masktype = truth_type_for (vectype);
7164 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7165 }
7166 else
7167 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7168 }
7169
7170 stmt_vec_info prev_stmt_info = NULL;
7171 tree vec_oprnd1 = NULL_TREE;
7172 tree vec_oprnd2 = NULL_TREE;
7173 tree vec_oprnd3 = NULL_TREE;
7174 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7175 tree dataref_offset = build_int_cst (ref_type, 0);
7176 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7177 tree ldataref_ptr = NULL_TREE;
7178 tree orig = NULL_TREE;
7179 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7180 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7181 for (int j = 0; j < ncopies; j++)
7182 {
7183 stmt_vec_info new_stmt_info;
7184 if (j == 0)
7185 {
7186 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7187 if (ldataref_ptr == NULL)
7188 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7189 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7190 orig = vec_oprnd3;
7191 }
7192 else
7193 {
7194 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7195 if (ldataref_ptr == NULL)
7196 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7197 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7198 if (!inscan_var_store)
7199 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7200 }
7201
7202 if (ldataref_ptr)
7203 {
7204 vec_oprnd2 = make_ssa_name (vectype);
7205 tree data_ref = fold_build2 (MEM_REF, vectype,
7206 unshare_expr (ldataref_ptr),
7207 dataref_offset);
7208 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7209 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7210 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7211 if (prev_stmt_info == NULL)
7212 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7213 else
7214 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7215 prev_stmt_info = new_stmt_info;
7216 }
7217
7218 tree v = vec_oprnd2;
7219 for (int i = 0; i < units_log2; ++i)
7220 {
7221 tree new_temp = make_ssa_name (vectype);
7222 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7223 (zero_vec
7224 && (use_whole_vector[i]
7225 != scan_store_kind_perm))
7226 ? zero_vec : vec_oprnd1, v,
7227 perms[i]);
7228 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7229 if (prev_stmt_info == NULL)
7230 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7231 else
7232 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7233 prev_stmt_info = new_stmt_info;
7234
7235 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7236 {
7237 /* Whole vector shift shifted in zero bits, but if *init
7238 is not initializer_zerop, we need to replace those elements
7239 with elements from vec_oprnd1. */
7240 tree_vector_builder vb (masktype, nunits, 1);
7241 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7242 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7243 ? boolean_false_node : boolean_true_node);
7244
7245 tree new_temp2 = make_ssa_name (vectype);
7246 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7247 new_temp, vec_oprnd1);
7248 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7249 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7250 prev_stmt_info = new_stmt_info;
7251 new_temp = new_temp2;
7252 }
7253
7254 /* For exclusive scan, perform the perms[i] permutation once
7255 more. */
7256 if (i == 0
7257 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7258 && v == vec_oprnd2)
7259 {
7260 v = new_temp;
7261 --i;
7262 continue;
7263 }
7264
7265 tree new_temp2 = make_ssa_name (vectype);
7266 g = gimple_build_assign (new_temp2, code, v, new_temp);
7267 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7268 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7269 prev_stmt_info = new_stmt_info;
7270
7271 v = new_temp2;
7272 }
7273
7274 tree new_temp = make_ssa_name (vectype);
7275 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7276 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7277 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7278 prev_stmt_info = new_stmt_info;
7279
7280 tree last_perm_arg = new_temp;
7281 /* For exclusive scan, new_temp computed above is the exclusive scan
7282 prefix sum. Turn it into inclusive prefix sum for the broadcast
7283 of the last element into orig. */
7284 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7285 {
7286 last_perm_arg = make_ssa_name (vectype);
7287 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7288 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7289 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7290 prev_stmt_info = new_stmt_info;
7291 }
7292
7293 orig = make_ssa_name (vectype);
7294 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7295 last_perm_arg, perms[units_log2]);
7296 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7297 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7298 prev_stmt_info = new_stmt_info;
7299
7300 if (!inscan_var_store)
7301 {
7302 tree data_ref = fold_build2 (MEM_REF, vectype,
7303 unshare_expr (dataref_ptr),
7304 dataref_offset);
7305 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7306 g = gimple_build_assign (data_ref, new_temp);
7307 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7308 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7309 prev_stmt_info = new_stmt_info;
7310 }
7311 }
7312
7313 if (inscan_var_store)
7314 for (int j = 0; j < ncopies; j++)
7315 {
7316 if (j != 0)
7317 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7318
7319 tree data_ref = fold_build2 (MEM_REF, vectype,
7320 unshare_expr (dataref_ptr),
7321 dataref_offset);
7322 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7323 gimple *g = gimple_build_assign (data_ref, orig);
7324 stmt_vec_info new_stmt_info
7325 = vect_finish_stmt_generation (stmt_info, g, gsi);
7326 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7327 prev_stmt_info = new_stmt_info;
7328 }
7329 return true;
7330 }
7331
7332
7333 /* Function vectorizable_store.
7334
7335 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7336 that can be vectorized.
7337 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7338 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7339 Return true if STMT_INFO is vectorizable in this way. */
7340
7341 static bool
vectorizable_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7342 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7343 stmt_vec_info *vec_stmt, slp_tree slp_node,
7344 stmt_vector_for_cost *cost_vec)
7345 {
7346 tree data_ref;
7347 tree op;
7348 tree vec_oprnd = NULL_TREE;
7349 tree elem_type;
7350 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7351 class loop *loop = NULL;
7352 machine_mode vec_mode;
7353 tree dummy;
7354 enum dr_alignment_support alignment_support_scheme;
7355 enum vect_def_type rhs_dt = vect_unknown_def_type;
7356 enum vect_def_type mask_dt = vect_unknown_def_type;
7357 stmt_vec_info prev_stmt_info = NULL;
7358 tree dataref_ptr = NULL_TREE;
7359 tree dataref_offset = NULL_TREE;
7360 gimple *ptr_incr = NULL;
7361 int ncopies;
7362 int j;
7363 stmt_vec_info first_stmt_info;
7364 bool grouped_store;
7365 unsigned int group_size, i;
7366 vec<tree> oprnds = vNULL;
7367 vec<tree> result_chain = vNULL;
7368 tree offset = NULL_TREE;
7369 vec<tree> vec_oprnds = vNULL;
7370 bool slp = (slp_node != NULL);
7371 unsigned int vec_num;
7372 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7373 vec_info *vinfo = stmt_info->vinfo;
7374 tree aggr_type;
7375 gather_scatter_info gs_info;
7376 poly_uint64 vf;
7377 vec_load_store_type vls_type;
7378 tree ref_type;
7379
7380 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7381 return false;
7382
7383 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7384 && ! vec_stmt)
7385 return false;
7386
7387 /* Is vectorizable store? */
7388
7389 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7390 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7391 {
7392 tree scalar_dest = gimple_assign_lhs (assign);
7393 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7394 && is_pattern_stmt_p (stmt_info))
7395 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7396 if (TREE_CODE (scalar_dest) != ARRAY_REF
7397 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7398 && TREE_CODE (scalar_dest) != INDIRECT_REF
7399 && TREE_CODE (scalar_dest) != COMPONENT_REF
7400 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7401 && TREE_CODE (scalar_dest) != REALPART_EXPR
7402 && TREE_CODE (scalar_dest) != MEM_REF)
7403 return false;
7404 }
7405 else
7406 {
7407 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7408 if (!call || !gimple_call_internal_p (call))
7409 return false;
7410
7411 internal_fn ifn = gimple_call_internal_fn (call);
7412 if (!internal_store_fn_p (ifn))
7413 return false;
7414
7415 if (slp_node != NULL)
7416 {
7417 if (dump_enabled_p ())
7418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7419 "SLP of masked stores not supported.\n");
7420 return false;
7421 }
7422
7423 int mask_index = internal_fn_mask_index (ifn);
7424 if (mask_index >= 0)
7425 {
7426 mask = gimple_call_arg (call, mask_index);
7427 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
7428 &mask_vectype))
7429 return false;
7430 }
7431 }
7432
7433 op = vect_get_store_rhs (stmt_info);
7434
7435 /* Cannot have hybrid store SLP -- that would mean storing to the
7436 same location twice. */
7437 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7438
7439 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7440 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7441
7442 if (loop_vinfo)
7443 {
7444 loop = LOOP_VINFO_LOOP (loop_vinfo);
7445 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7446 }
7447 else
7448 vf = 1;
7449
7450 /* Multiple types in SLP are handled by creating the appropriate number of
7451 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7452 case of SLP. */
7453 if (slp)
7454 ncopies = 1;
7455 else
7456 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7457
7458 gcc_assert (ncopies >= 1);
7459
7460 /* FORNOW. This restriction should be relaxed. */
7461 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7462 {
7463 if (dump_enabled_p ())
7464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7465 "multiple types in nested loop.\n");
7466 return false;
7467 }
7468
7469 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7470 return false;
7471
7472 elem_type = TREE_TYPE (vectype);
7473 vec_mode = TYPE_MODE (vectype);
7474
7475 if (!STMT_VINFO_DATA_REF (stmt_info))
7476 return false;
7477
7478 vect_memory_access_type memory_access_type;
7479 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7480 &memory_access_type, &gs_info))
7481 return false;
7482
7483 if (mask)
7484 {
7485 if (memory_access_type == VMAT_CONTIGUOUS)
7486 {
7487 if (!VECTOR_MODE_P (vec_mode)
7488 || !can_vec_mask_load_store_p (vec_mode,
7489 TYPE_MODE (mask_vectype), false))
7490 return false;
7491 }
7492 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7493 && (memory_access_type != VMAT_GATHER_SCATTER
7494 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7495 {
7496 if (dump_enabled_p ())
7497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7498 "unsupported access type for masked store.\n");
7499 return false;
7500 }
7501 }
7502 else
7503 {
7504 /* FORNOW. In some cases can vectorize even if data-type not supported
7505 (e.g. - array initialization with 0). */
7506 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7507 return false;
7508 }
7509
7510 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7511 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7512 && memory_access_type != VMAT_GATHER_SCATTER
7513 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7514 if (grouped_store)
7515 {
7516 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7517 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7518 group_size = DR_GROUP_SIZE (first_stmt_info);
7519 }
7520 else
7521 {
7522 first_stmt_info = stmt_info;
7523 first_dr_info = dr_info;
7524 group_size = vec_num = 1;
7525 }
7526
7527 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7528 {
7529 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7530 memory_access_type))
7531 return false;
7532 }
7533
7534 if (!vec_stmt) /* transformation not required. */
7535 {
7536 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7537
7538 if (loop_vinfo
7539 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7540 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7541 memory_access_type, &gs_info, mask);
7542
7543 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7544 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7545 vls_type, slp_node, cost_vec);
7546 return true;
7547 }
7548 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7549
7550 /* Transform. */
7551
7552 ensure_base_align (dr_info);
7553
7554 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7555 {
7556 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7557 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7558 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7559 tree ptr, var, scale, vec_mask;
7560 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7561 tree mask_halfvectype = mask_vectype;
7562 edge pe = loop_preheader_edge (loop);
7563 gimple_seq seq;
7564 basic_block new_bb;
7565 enum { NARROW, NONE, WIDEN } modifier;
7566 poly_uint64 scatter_off_nunits
7567 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7568
7569 if (known_eq (nunits, scatter_off_nunits))
7570 modifier = NONE;
7571 else if (known_eq (nunits * 2, scatter_off_nunits))
7572 {
7573 modifier = WIDEN;
7574
7575 /* Currently gathers and scatters are only supported for
7576 fixed-length vectors. */
7577 unsigned int count = scatter_off_nunits.to_constant ();
7578 vec_perm_builder sel (count, count, 1);
7579 for (i = 0; i < (unsigned int) count; ++i)
7580 sel.quick_push (i | (count / 2));
7581
7582 vec_perm_indices indices (sel, 1, count);
7583 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7584 indices);
7585 gcc_assert (perm_mask != NULL_TREE);
7586 }
7587 else if (known_eq (nunits, scatter_off_nunits * 2))
7588 {
7589 modifier = NARROW;
7590
7591 /* Currently gathers and scatters are only supported for
7592 fixed-length vectors. */
7593 unsigned int count = nunits.to_constant ();
7594 vec_perm_builder sel (count, count, 1);
7595 for (i = 0; i < (unsigned int) count; ++i)
7596 sel.quick_push (i | (count / 2));
7597
7598 vec_perm_indices indices (sel, 2, count);
7599 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7600 gcc_assert (perm_mask != NULL_TREE);
7601 ncopies *= 2;
7602
7603 if (mask)
7604 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7605 }
7606 else
7607 gcc_unreachable ();
7608
7609 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7610 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7611 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7612 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7613 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7614 scaletype = TREE_VALUE (arglist);
7615
7616 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7617 && TREE_CODE (rettype) == VOID_TYPE);
7618
7619 ptr = fold_convert (ptrtype, gs_info.base);
7620 if (!is_gimple_min_invariant (ptr))
7621 {
7622 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7623 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7624 gcc_assert (!new_bb);
7625 }
7626
7627 if (mask == NULL_TREE)
7628 {
7629 mask_arg = build_int_cst (masktype, -1);
7630 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7631 }
7632
7633 scale = build_int_cst (scaletype, gs_info.scale);
7634
7635 prev_stmt_info = NULL;
7636 for (j = 0; j < ncopies; ++j)
7637 {
7638 if (j == 0)
7639 {
7640 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7641 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7642 stmt_info);
7643 if (mask)
7644 {
7645 tree mask_vectype = truth_type_for (vectype);
7646 mask_op = vec_mask
7647 = vect_get_vec_def_for_operand (mask,
7648 stmt_info, mask_vectype);
7649 }
7650 }
7651 else if (modifier != NONE && (j & 1))
7652 {
7653 if (modifier == WIDEN)
7654 {
7655 src
7656 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7657 vec_oprnd1);
7658 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7659 stmt_info, gsi);
7660 if (mask)
7661 mask_op
7662 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7663 vec_mask);
7664 }
7665 else if (modifier == NARROW)
7666 {
7667 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7668 stmt_info, gsi);
7669 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7670 vec_oprnd0);
7671 }
7672 else
7673 gcc_unreachable ();
7674 }
7675 else
7676 {
7677 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7678 vec_oprnd1);
7679 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7680 vec_oprnd0);
7681 if (mask)
7682 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7683 vec_mask);
7684 }
7685
7686 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7687 {
7688 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7689 TYPE_VECTOR_SUBPARTS (srctype)));
7690 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7691 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7692 gassign *new_stmt
7693 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7694 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7695 src = var;
7696 }
7697
7698 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7699 {
7700 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7701 TYPE_VECTOR_SUBPARTS (idxtype)));
7702 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7703 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7704 gassign *new_stmt
7705 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7706 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7707 op = var;
7708 }
7709
7710 if (mask)
7711 {
7712 tree utype;
7713 mask_arg = mask_op;
7714 if (modifier == NARROW)
7715 {
7716 var = vect_get_new_ssa_name (mask_halfvectype,
7717 vect_simple_var);
7718 gassign *new_stmt
7719 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7720 : VEC_UNPACK_LO_EXPR,
7721 mask_op);
7722 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7723 mask_arg = var;
7724 }
7725 tree optype = TREE_TYPE (mask_arg);
7726 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7727 utype = masktype;
7728 else
7729 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7730 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7731 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7732 gassign *new_stmt
7733 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7734 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7735 mask_arg = var;
7736 if (!useless_type_conversion_p (masktype, utype))
7737 {
7738 gcc_assert (TYPE_PRECISION (utype)
7739 <= TYPE_PRECISION (masktype));
7740 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7741 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7742 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7743 mask_arg = var;
7744 }
7745 }
7746
7747 gcall *new_stmt
7748 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7749 stmt_vec_info new_stmt_info
7750 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7751
7752 if (prev_stmt_info == NULL)
7753 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7754 else
7755 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7756 prev_stmt_info = new_stmt_info;
7757 }
7758 return true;
7759 }
7760 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7761 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7762
7763 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7764 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7765
7766 if (grouped_store)
7767 {
7768 /* FORNOW */
7769 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7770
7771 /* We vectorize all the stmts of the interleaving group when we
7772 reach the last stmt in the group. */
7773 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7774 < DR_GROUP_SIZE (first_stmt_info)
7775 && !slp)
7776 {
7777 *vec_stmt = NULL;
7778 return true;
7779 }
7780
7781 if (slp)
7782 {
7783 grouped_store = false;
7784 /* VEC_NUM is the number of vect stmts to be created for this
7785 group. */
7786 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7787 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7788 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7789 == first_stmt_info);
7790 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7791 op = vect_get_store_rhs (first_stmt_info);
7792 }
7793 else
7794 /* VEC_NUM is the number of vect stmts to be created for this
7795 group. */
7796 vec_num = group_size;
7797
7798 ref_type = get_group_alias_ptr_type (first_stmt_info);
7799 }
7800 else
7801 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7802
7803 if (dump_enabled_p ())
7804 dump_printf_loc (MSG_NOTE, vect_location,
7805 "transform store. ncopies = %d\n", ncopies);
7806
7807 if (memory_access_type == VMAT_ELEMENTWISE
7808 || memory_access_type == VMAT_STRIDED_SLP)
7809 {
7810 gimple_stmt_iterator incr_gsi;
7811 bool insert_after;
7812 gimple *incr;
7813 tree offvar;
7814 tree ivstep;
7815 tree running_off;
7816 tree stride_base, stride_step, alias_off;
7817 tree vec_oprnd;
7818 tree dr_offset;
7819 unsigned int g;
7820 /* Checked by get_load_store_type. */
7821 unsigned int const_nunits = nunits.to_constant ();
7822
7823 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7824 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7825
7826 dr_offset = get_dr_vinfo_offset (first_dr_info);
7827 stride_base
7828 = fold_build_pointer_plus
7829 (DR_BASE_ADDRESS (first_dr_info->dr),
7830 size_binop (PLUS_EXPR,
7831 convert_to_ptrofftype (dr_offset),
7832 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7833 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7834
7835 /* For a store with loop-invariant (but other than power-of-2)
7836 stride (i.e. not a grouped access) like so:
7837
7838 for (i = 0; i < n; i += stride)
7839 array[i] = ...;
7840
7841 we generate a new induction variable and new stores from
7842 the components of the (vectorized) rhs:
7843
7844 for (j = 0; ; j += VF*stride)
7845 vectemp = ...;
7846 tmp1 = vectemp[0];
7847 array[j] = tmp1;
7848 tmp2 = vectemp[1];
7849 array[j + stride] = tmp2;
7850 ...
7851 */
7852
7853 unsigned nstores = const_nunits;
7854 unsigned lnel = 1;
7855 tree ltype = elem_type;
7856 tree lvectype = vectype;
7857 if (slp)
7858 {
7859 if (group_size < const_nunits
7860 && const_nunits % group_size == 0)
7861 {
7862 nstores = const_nunits / group_size;
7863 lnel = group_size;
7864 ltype = build_vector_type (elem_type, group_size);
7865 lvectype = vectype;
7866
7867 /* First check if vec_extract optab doesn't support extraction
7868 of vector elts directly. */
7869 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7870 machine_mode vmode;
7871 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7872 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7873 group_size).exists (&vmode)
7874 || (convert_optab_handler (vec_extract_optab,
7875 TYPE_MODE (vectype), vmode)
7876 == CODE_FOR_nothing))
7877 {
7878 /* Try to avoid emitting an extract of vector elements
7879 by performing the extracts using an integer type of the
7880 same size, extracting from a vector of those and then
7881 re-interpreting it as the original vector type if
7882 supported. */
7883 unsigned lsize
7884 = group_size * GET_MODE_BITSIZE (elmode);
7885 unsigned int lnunits = const_nunits / group_size;
7886 /* If we can't construct such a vector fall back to
7887 element extracts from the original vector type and
7888 element size stores. */
7889 if (int_mode_for_size (lsize, 0).exists (&elmode)
7890 && VECTOR_MODE_P (TYPE_MODE (vectype))
7891 && related_vector_mode (TYPE_MODE (vectype), elmode,
7892 lnunits).exists (&vmode)
7893 && (convert_optab_handler (vec_extract_optab,
7894 vmode, elmode)
7895 != CODE_FOR_nothing))
7896 {
7897 nstores = lnunits;
7898 lnel = group_size;
7899 ltype = build_nonstandard_integer_type (lsize, 1);
7900 lvectype = build_vector_type (ltype, nstores);
7901 }
7902 /* Else fall back to vector extraction anyway.
7903 Fewer stores are more important than avoiding spilling
7904 of the vector we extract from. Compared to the
7905 construction case in vectorizable_load no store-forwarding
7906 issue exists here for reasonable archs. */
7907 }
7908 }
7909 else if (group_size >= const_nunits
7910 && group_size % const_nunits == 0)
7911 {
7912 nstores = 1;
7913 lnel = const_nunits;
7914 ltype = vectype;
7915 lvectype = vectype;
7916 }
7917 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7918 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7919 }
7920
7921 ivstep = stride_step;
7922 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7923 build_int_cst (TREE_TYPE (ivstep), vf));
7924
7925 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7926
7927 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7928 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7929 create_iv (stride_base, ivstep, NULL,
7930 loop, &incr_gsi, insert_after,
7931 &offvar, NULL);
7932 incr = gsi_stmt (incr_gsi);
7933 loop_vinfo->add_stmt (incr);
7934
7935 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7936
7937 prev_stmt_info = NULL;
7938 alias_off = build_int_cst (ref_type, 0);
7939 stmt_vec_info next_stmt_info = first_stmt_info;
7940 for (g = 0; g < group_size; g++)
7941 {
7942 running_off = offvar;
7943 if (g)
7944 {
7945 tree size = TYPE_SIZE_UNIT (ltype);
7946 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7947 size);
7948 tree newoff = copy_ssa_name (running_off, NULL);
7949 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7950 running_off, pos);
7951 vect_finish_stmt_generation (stmt_info, incr, gsi);
7952 running_off = newoff;
7953 }
7954 unsigned int group_el = 0;
7955 unsigned HOST_WIDE_INT
7956 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7957 for (j = 0; j < ncopies; j++)
7958 {
7959 /* We've set op and dt above, from vect_get_store_rhs,
7960 and first_stmt_info == stmt_info. */
7961 if (j == 0)
7962 {
7963 if (slp)
7964 {
7965 vect_get_vec_defs (op, NULL_TREE, stmt_info,
7966 &vec_oprnds, NULL, slp_node);
7967 vec_oprnd = vec_oprnds[0];
7968 }
7969 else
7970 {
7971 op = vect_get_store_rhs (next_stmt_info);
7972 vec_oprnd = vect_get_vec_def_for_operand
7973 (op, next_stmt_info);
7974 }
7975 }
7976 else
7977 {
7978 if (slp)
7979 vec_oprnd = vec_oprnds[j];
7980 else
7981 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7982 vec_oprnd);
7983 }
7984 /* Pun the vector to extract from if necessary. */
7985 if (lvectype != vectype)
7986 {
7987 tree tem = make_ssa_name (lvectype);
7988 gimple *pun
7989 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7990 lvectype, vec_oprnd));
7991 vect_finish_stmt_generation (stmt_info, pun, gsi);
7992 vec_oprnd = tem;
7993 }
7994 for (i = 0; i < nstores; i++)
7995 {
7996 tree newref, newoff;
7997 gimple *incr, *assign;
7998 tree size = TYPE_SIZE (ltype);
7999 /* Extract the i'th component. */
8000 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8001 bitsize_int (i), size);
8002 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8003 size, pos);
8004
8005 elem = force_gimple_operand_gsi (gsi, elem, true,
8006 NULL_TREE, true,
8007 GSI_SAME_STMT);
8008
8009 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8010 group_el * elsz);
8011 newref = build2 (MEM_REF, ltype,
8012 running_off, this_off);
8013 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8014
8015 /* And store it to *running_off. */
8016 assign = gimple_build_assign (newref, elem);
8017 stmt_vec_info assign_info
8018 = vect_finish_stmt_generation (stmt_info, assign, gsi);
8019
8020 group_el += lnel;
8021 if (! slp
8022 || group_el == group_size)
8023 {
8024 newoff = copy_ssa_name (running_off, NULL);
8025 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8026 running_off, stride_step);
8027 vect_finish_stmt_generation (stmt_info, incr, gsi);
8028
8029 running_off = newoff;
8030 group_el = 0;
8031 }
8032 if (g == group_size - 1
8033 && !slp)
8034 {
8035 if (j == 0 && i == 0)
8036 STMT_VINFO_VEC_STMT (stmt_info)
8037 = *vec_stmt = assign_info;
8038 else
8039 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8040 prev_stmt_info = assign_info;
8041 }
8042 }
8043 }
8044 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8045 if (slp)
8046 break;
8047 }
8048
8049 vec_oprnds.release ();
8050 return true;
8051 }
8052
8053 auto_vec<tree> dr_chain (group_size);
8054 oprnds.create (group_size);
8055
8056 /* Gather-scatter accesses perform only component accesses, alignment
8057 is irrelevant for them. */
8058 if (memory_access_type == VMAT_GATHER_SCATTER)
8059 alignment_support_scheme = dr_unaligned_supported;
8060 else
8061 alignment_support_scheme
8062 = vect_supportable_dr_alignment (first_dr_info, false);
8063
8064 gcc_assert (alignment_support_scheme);
8065 vec_loop_masks *loop_masks
8066 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8067 ? &LOOP_VINFO_MASKS (loop_vinfo)
8068 : NULL);
8069 /* Targets with store-lane instructions must not require explicit
8070 realignment. vect_supportable_dr_alignment always returns either
8071 dr_aligned or dr_unaligned_supported for masked operations. */
8072 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8073 && !mask
8074 && !loop_masks)
8075 || alignment_support_scheme == dr_aligned
8076 || alignment_support_scheme == dr_unaligned_supported);
8077
8078 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8079 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8080 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8081
8082 tree bump;
8083 tree vec_offset = NULL_TREE;
8084 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8085 {
8086 aggr_type = NULL_TREE;
8087 bump = NULL_TREE;
8088 }
8089 else if (memory_access_type == VMAT_GATHER_SCATTER)
8090 {
8091 aggr_type = elem_type;
8092 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8093 &bump, &vec_offset);
8094 }
8095 else
8096 {
8097 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8098 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8099 else
8100 aggr_type = vectype;
8101 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8102 memory_access_type);
8103 }
8104
8105 if (mask)
8106 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8107
8108 /* In case the vectorization factor (VF) is bigger than the number
8109 of elements that we can fit in a vectype (nunits), we have to generate
8110 more than one vector stmt - i.e - we need to "unroll" the
8111 vector stmt by a factor VF/nunits. For more details see documentation in
8112 vect_get_vec_def_for_copy_stmt. */
8113
8114 /* In case of interleaving (non-unit grouped access):
8115
8116 S1: &base + 2 = x2
8117 S2: &base = x0
8118 S3: &base + 1 = x1
8119 S4: &base + 3 = x3
8120
8121 We create vectorized stores starting from base address (the access of the
8122 first stmt in the chain (S2 in the above example), when the last store stmt
8123 of the chain (S4) is reached:
8124
8125 VS1: &base = vx2
8126 VS2: &base + vec_size*1 = vx0
8127 VS3: &base + vec_size*2 = vx1
8128 VS4: &base + vec_size*3 = vx3
8129
8130 Then permutation statements are generated:
8131
8132 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8133 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8134 ...
8135
8136 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8137 (the order of the data-refs in the output of vect_permute_store_chain
8138 corresponds to the order of scalar stmts in the interleaving chain - see
8139 the documentation of vect_permute_store_chain()).
8140
8141 In case of both multiple types and interleaving, above vector stores and
8142 permutation stmts are created for every copy. The result vector stmts are
8143 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8144 STMT_VINFO_RELATED_STMT for the next copies.
8145 */
8146
8147 prev_stmt_info = NULL;
8148 tree vec_mask = NULL_TREE;
8149 for (j = 0; j < ncopies; j++)
8150 {
8151 stmt_vec_info new_stmt_info;
8152 if (j == 0)
8153 {
8154 if (slp)
8155 {
8156 /* Get vectorized arguments for SLP_NODE. */
8157 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8158 NULL, slp_node);
8159
8160 vec_oprnd = vec_oprnds[0];
8161 }
8162 else
8163 {
8164 /* For interleaved stores we collect vectorized defs for all the
8165 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8166 used as an input to vect_permute_store_chain(), and OPRNDS as
8167 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8168
8169 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8170 OPRNDS are of size 1. */
8171 stmt_vec_info next_stmt_info = first_stmt_info;
8172 for (i = 0; i < group_size; i++)
8173 {
8174 /* Since gaps are not supported for interleaved stores,
8175 DR_GROUP_SIZE is the exact number of stmts in the chain.
8176 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8177 that there is no interleaving, DR_GROUP_SIZE is 1,
8178 and only one iteration of the loop will be executed. */
8179 op = vect_get_store_rhs (next_stmt_info);
8180 vec_oprnd = vect_get_vec_def_for_operand
8181 (op, next_stmt_info);
8182 dr_chain.quick_push (vec_oprnd);
8183 oprnds.quick_push (vec_oprnd);
8184 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8185 }
8186 if (mask)
8187 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8188 mask_vectype);
8189 }
8190
8191 /* We should have catched mismatched types earlier. */
8192 gcc_assert (useless_type_conversion_p (vectype,
8193 TREE_TYPE (vec_oprnd)));
8194 bool simd_lane_access_p
8195 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8196 if (simd_lane_access_p
8197 && !loop_masks
8198 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8199 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8200 && integer_zerop (get_dr_vinfo_offset (first_dr_info))
8201 && integer_zerop (DR_INIT (first_dr_info->dr))
8202 && alias_sets_conflict_p (get_alias_set (aggr_type),
8203 get_alias_set (TREE_TYPE (ref_type))))
8204 {
8205 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8206 dataref_offset = build_int_cst (ref_type, 0);
8207 }
8208 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8209 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8210 &dataref_ptr, &vec_offset);
8211 else
8212 dataref_ptr
8213 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8214 simd_lane_access_p ? loop : NULL,
8215 offset, &dummy, gsi, &ptr_incr,
8216 simd_lane_access_p, NULL_TREE, bump);
8217 }
8218 else
8219 {
8220 /* For interleaved stores we created vectorized defs for all the
8221 defs stored in OPRNDS in the previous iteration (previous copy).
8222 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8223 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8224 next copy.
8225 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8226 OPRNDS are of size 1. */
8227 for (i = 0; i < group_size; i++)
8228 {
8229 op = oprnds[i];
8230 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8231 dr_chain[i] = vec_oprnd;
8232 oprnds[i] = vec_oprnd;
8233 }
8234 if (mask)
8235 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8236 if (dataref_offset)
8237 dataref_offset
8238 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8239 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8240 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8241 else
8242 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8243 stmt_info, bump);
8244 }
8245
8246 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8247 {
8248 tree vec_array;
8249
8250 /* Get an array into which we can store the individual vectors. */
8251 vec_array = create_vector_array (vectype, vec_num);
8252
8253 /* Invalidate the current contents of VEC_ARRAY. This should
8254 become an RTL clobber too, which prevents the vector registers
8255 from being upward-exposed. */
8256 vect_clobber_variable (stmt_info, gsi, vec_array);
8257
8258 /* Store the individual vectors into the array. */
8259 for (i = 0; i < vec_num; i++)
8260 {
8261 vec_oprnd = dr_chain[i];
8262 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8263 }
8264
8265 tree final_mask = NULL;
8266 if (loop_masks)
8267 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8268 vectype, j);
8269 if (vec_mask)
8270 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8271 vec_mask, gsi);
8272
8273 gcall *call;
8274 if (final_mask)
8275 {
8276 /* Emit:
8277 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8278 VEC_ARRAY). */
8279 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8280 tree alias_ptr = build_int_cst (ref_type, align);
8281 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8282 dataref_ptr, alias_ptr,
8283 final_mask, vec_array);
8284 }
8285 else
8286 {
8287 /* Emit:
8288 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8289 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8290 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8291 vec_array);
8292 gimple_call_set_lhs (call, data_ref);
8293 }
8294 gimple_call_set_nothrow (call, true);
8295 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8296
8297 /* Record that VEC_ARRAY is now dead. */
8298 vect_clobber_variable (stmt_info, gsi, vec_array);
8299 }
8300 else
8301 {
8302 new_stmt_info = NULL;
8303 if (grouped_store)
8304 {
8305 if (j == 0)
8306 result_chain.create (group_size);
8307 /* Permute. */
8308 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8309 &result_chain);
8310 }
8311
8312 stmt_vec_info next_stmt_info = first_stmt_info;
8313 for (i = 0; i < vec_num; i++)
8314 {
8315 unsigned misalign;
8316 unsigned HOST_WIDE_INT align;
8317
8318 tree final_mask = NULL_TREE;
8319 if (loop_masks)
8320 final_mask = vect_get_loop_mask (gsi, loop_masks,
8321 vec_num * ncopies,
8322 vectype, vec_num * j + i);
8323 if (vec_mask)
8324 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8325 vec_mask, gsi);
8326
8327 if (memory_access_type == VMAT_GATHER_SCATTER)
8328 {
8329 tree scale = size_int (gs_info.scale);
8330 gcall *call;
8331 if (loop_masks)
8332 call = gimple_build_call_internal
8333 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8334 scale, vec_oprnd, final_mask);
8335 else
8336 call = gimple_build_call_internal
8337 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8338 scale, vec_oprnd);
8339 gimple_call_set_nothrow (call, true);
8340 new_stmt_info
8341 = vect_finish_stmt_generation (stmt_info, call, gsi);
8342 break;
8343 }
8344
8345 if (i > 0)
8346 /* Bump the vector pointer. */
8347 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8348 stmt_info, bump);
8349
8350 if (slp)
8351 vec_oprnd = vec_oprnds[i];
8352 else if (grouped_store)
8353 /* For grouped stores vectorized defs are interleaved in
8354 vect_permute_store_chain(). */
8355 vec_oprnd = result_chain[i];
8356
8357 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8358 if (aligned_access_p (first_dr_info))
8359 misalign = 0;
8360 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8361 {
8362 align = dr_alignment (vect_dr_behavior (first_dr_info));
8363 misalign = 0;
8364 }
8365 else
8366 misalign = DR_MISALIGNMENT (first_dr_info);
8367 if (dataref_offset == NULL_TREE
8368 && TREE_CODE (dataref_ptr) == SSA_NAME)
8369 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8370 misalign);
8371
8372 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8373 {
8374 tree perm_mask = perm_mask_for_reverse (vectype);
8375 tree perm_dest = vect_create_destination_var
8376 (vect_get_store_rhs (stmt_info), vectype);
8377 tree new_temp = make_ssa_name (perm_dest);
8378
8379 /* Generate the permute statement. */
8380 gimple *perm_stmt
8381 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8382 vec_oprnd, perm_mask);
8383 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8384
8385 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8386 vec_oprnd = new_temp;
8387 }
8388
8389 /* Arguments are ready. Create the new vector stmt. */
8390 if (final_mask)
8391 {
8392 align = least_bit_hwi (misalign | align);
8393 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8394 gcall *call
8395 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8396 dataref_ptr, ptr,
8397 final_mask, vec_oprnd);
8398 gimple_call_set_nothrow (call, true);
8399 new_stmt_info
8400 = vect_finish_stmt_generation (stmt_info, call, gsi);
8401 }
8402 else
8403 {
8404 data_ref = fold_build2 (MEM_REF, vectype,
8405 dataref_ptr,
8406 dataref_offset
8407 ? dataref_offset
8408 : build_int_cst (ref_type, 0));
8409 if (aligned_access_p (first_dr_info))
8410 ;
8411 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8412 TREE_TYPE (data_ref)
8413 = build_aligned_type (TREE_TYPE (data_ref),
8414 align * BITS_PER_UNIT);
8415 else
8416 TREE_TYPE (data_ref)
8417 = build_aligned_type (TREE_TYPE (data_ref),
8418 TYPE_ALIGN (elem_type));
8419 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8420 gassign *new_stmt
8421 = gimple_build_assign (data_ref, vec_oprnd);
8422 new_stmt_info
8423 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8424 }
8425
8426 if (slp)
8427 continue;
8428
8429 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8430 if (!next_stmt_info)
8431 break;
8432 }
8433 }
8434 if (!slp)
8435 {
8436 if (j == 0)
8437 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8438 else
8439 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8440 prev_stmt_info = new_stmt_info;
8441 }
8442 }
8443
8444 oprnds.release ();
8445 result_chain.release ();
8446 vec_oprnds.release ();
8447
8448 return true;
8449 }
8450
8451 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8452 VECTOR_CST mask. No checks are made that the target platform supports the
8453 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8454 vect_gen_perm_mask_checked. */
8455
8456 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8457 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8458 {
8459 tree mask_type;
8460
8461 poly_uint64 nunits = sel.length ();
8462 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8463
8464 mask_type = build_vector_type (ssizetype, nunits);
8465 return vec_perm_indices_to_tree (mask_type, sel);
8466 }
8467
8468 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8469 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8470
8471 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8472 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8473 {
8474 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8475 return vect_gen_perm_mask_any (vectype, sel);
8476 }
8477
8478 /* Given a vector variable X and Y, that was generated for the scalar
8479 STMT_INFO, generate instructions to permute the vector elements of X and Y
8480 using permutation mask MASK_VEC, insert them at *GSI and return the
8481 permuted vector variable. */
8482
8483 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8484 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8485 gimple_stmt_iterator *gsi)
8486 {
8487 tree vectype = TREE_TYPE (x);
8488 tree perm_dest, data_ref;
8489 gimple *perm_stmt;
8490
8491 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8492 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8493 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8494 else
8495 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8496 data_ref = make_ssa_name (perm_dest);
8497
8498 /* Generate the permute statement. */
8499 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8500 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8501
8502 return data_ref;
8503 }
8504
8505 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8506 inserting them on the loops preheader edge. Returns true if we
8507 were successful in doing so (and thus STMT_INFO can be moved then),
8508 otherwise returns false. */
8509
8510 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8511 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8512 {
8513 ssa_op_iter i;
8514 tree op;
8515 bool any = false;
8516
8517 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8518 {
8519 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8520 if (!gimple_nop_p (def_stmt)
8521 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8522 {
8523 /* Make sure we don't need to recurse. While we could do
8524 so in simple cases when there are more complex use webs
8525 we don't have an easy way to preserve stmt order to fulfil
8526 dependencies within them. */
8527 tree op2;
8528 ssa_op_iter i2;
8529 if (gimple_code (def_stmt) == GIMPLE_PHI)
8530 return false;
8531 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8532 {
8533 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8534 if (!gimple_nop_p (def_stmt2)
8535 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8536 return false;
8537 }
8538 any = true;
8539 }
8540 }
8541
8542 if (!any)
8543 return true;
8544
8545 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8546 {
8547 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8548 if (!gimple_nop_p (def_stmt)
8549 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8550 {
8551 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8552 gsi_remove (&gsi, false);
8553 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8554 }
8555 }
8556
8557 return true;
8558 }
8559
8560 /* vectorizable_load.
8561
8562 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8563 that can be vectorized.
8564 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8565 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8566 Return true if STMT_INFO is vectorizable in this way. */
8567
8568 static bool
vectorizable_load(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,slp_instance slp_node_instance,stmt_vector_for_cost * cost_vec)8569 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8570 stmt_vec_info *vec_stmt, slp_tree slp_node,
8571 slp_instance slp_node_instance,
8572 stmt_vector_for_cost *cost_vec)
8573 {
8574 tree scalar_dest;
8575 tree vec_dest = NULL;
8576 tree data_ref = NULL;
8577 stmt_vec_info prev_stmt_info;
8578 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8579 class loop *loop = NULL;
8580 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8581 bool nested_in_vect_loop = false;
8582 tree elem_type;
8583 tree new_temp;
8584 machine_mode mode;
8585 tree dummy;
8586 enum dr_alignment_support alignment_support_scheme;
8587 tree dataref_ptr = NULL_TREE;
8588 tree dataref_offset = NULL_TREE;
8589 gimple *ptr_incr = NULL;
8590 int ncopies;
8591 int i, j;
8592 unsigned int group_size;
8593 poly_uint64 group_gap_adj;
8594 tree msq = NULL_TREE, lsq;
8595 tree offset = NULL_TREE;
8596 tree byte_offset = NULL_TREE;
8597 tree realignment_token = NULL_TREE;
8598 gphi *phi = NULL;
8599 vec<tree> dr_chain = vNULL;
8600 bool grouped_load = false;
8601 stmt_vec_info first_stmt_info;
8602 stmt_vec_info first_stmt_info_for_drptr = NULL;
8603 bool compute_in_loop = false;
8604 class loop *at_loop;
8605 int vec_num;
8606 bool slp = (slp_node != NULL);
8607 bool slp_perm = false;
8608 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8609 poly_uint64 vf;
8610 tree aggr_type;
8611 gather_scatter_info gs_info;
8612 vec_info *vinfo = stmt_info->vinfo;
8613 tree ref_type;
8614 enum vect_def_type mask_dt = vect_unknown_def_type;
8615
8616 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8617 return false;
8618
8619 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8620 && ! vec_stmt)
8621 return false;
8622
8623 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8624 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8625 {
8626 scalar_dest = gimple_assign_lhs (assign);
8627 if (TREE_CODE (scalar_dest) != SSA_NAME)
8628 return false;
8629
8630 tree_code code = gimple_assign_rhs_code (assign);
8631 if (code != ARRAY_REF
8632 && code != BIT_FIELD_REF
8633 && code != INDIRECT_REF
8634 && code != COMPONENT_REF
8635 && code != IMAGPART_EXPR
8636 && code != REALPART_EXPR
8637 && code != MEM_REF
8638 && TREE_CODE_CLASS (code) != tcc_declaration)
8639 return false;
8640 }
8641 else
8642 {
8643 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8644 if (!call || !gimple_call_internal_p (call))
8645 return false;
8646
8647 internal_fn ifn = gimple_call_internal_fn (call);
8648 if (!internal_load_fn_p (ifn))
8649 return false;
8650
8651 scalar_dest = gimple_call_lhs (call);
8652 if (!scalar_dest)
8653 return false;
8654
8655 int mask_index = internal_fn_mask_index (ifn);
8656 if (mask_index >= 0)
8657 {
8658 mask = gimple_call_arg (call, mask_index);
8659 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
8660 &mask_vectype))
8661 return false;
8662 }
8663 }
8664
8665 if (!STMT_VINFO_DATA_REF (stmt_info))
8666 return false;
8667
8668 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8669 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8670
8671 if (loop_vinfo)
8672 {
8673 loop = LOOP_VINFO_LOOP (loop_vinfo);
8674 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8675 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8676 }
8677 else
8678 vf = 1;
8679
8680 /* Multiple types in SLP are handled by creating the appropriate number of
8681 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8682 case of SLP. */
8683 if (slp)
8684 ncopies = 1;
8685 else
8686 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8687
8688 gcc_assert (ncopies >= 1);
8689
8690 /* FORNOW. This restriction should be relaxed. */
8691 if (nested_in_vect_loop && ncopies > 1)
8692 {
8693 if (dump_enabled_p ())
8694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8695 "multiple types in nested loop.\n");
8696 return false;
8697 }
8698
8699 /* Invalidate assumptions made by dependence analysis when vectorization
8700 on the unrolled body effectively re-orders stmts. */
8701 if (ncopies > 1
8702 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8703 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8704 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8705 {
8706 if (dump_enabled_p ())
8707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8708 "cannot perform implicit CSE when unrolling "
8709 "with negative dependence distance\n");
8710 return false;
8711 }
8712
8713 elem_type = TREE_TYPE (vectype);
8714 mode = TYPE_MODE (vectype);
8715
8716 /* FORNOW. In some cases can vectorize even if data-type not supported
8717 (e.g. - data copies). */
8718 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8719 {
8720 if (dump_enabled_p ())
8721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8722 "Aligned load, but unsupported type.\n");
8723 return false;
8724 }
8725
8726 /* Check if the load is a part of an interleaving chain. */
8727 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8728 {
8729 grouped_load = true;
8730 /* FORNOW */
8731 gcc_assert (!nested_in_vect_loop);
8732 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8733
8734 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8735 group_size = DR_GROUP_SIZE (first_stmt_info);
8736
8737 /* Refuse non-SLP vectorization of SLP-only groups. */
8738 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8739 {
8740 if (dump_enabled_p ())
8741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8742 "cannot vectorize load in non-SLP mode.\n");
8743 return false;
8744 }
8745
8746 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8747 slp_perm = true;
8748
8749 /* Invalidate assumptions made by dependence analysis when vectorization
8750 on the unrolled body effectively re-orders stmts. */
8751 if (!PURE_SLP_STMT (stmt_info)
8752 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8753 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8754 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8755 {
8756 if (dump_enabled_p ())
8757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8758 "cannot perform implicit CSE when performing "
8759 "group loads with negative dependence distance\n");
8760 return false;
8761 }
8762 }
8763 else
8764 group_size = 1;
8765
8766 vect_memory_access_type memory_access_type;
8767 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8768 &memory_access_type, &gs_info))
8769 return false;
8770
8771 if (mask)
8772 {
8773 if (memory_access_type == VMAT_CONTIGUOUS)
8774 {
8775 machine_mode vec_mode = TYPE_MODE (vectype);
8776 if (!VECTOR_MODE_P (vec_mode)
8777 || !can_vec_mask_load_store_p (vec_mode,
8778 TYPE_MODE (mask_vectype), true))
8779 return false;
8780 }
8781 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8782 && memory_access_type != VMAT_GATHER_SCATTER)
8783 {
8784 if (dump_enabled_p ())
8785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8786 "unsupported access type for masked load.\n");
8787 return false;
8788 }
8789 }
8790
8791 if (!vec_stmt) /* transformation not required. */
8792 {
8793 if (!slp)
8794 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8795
8796 if (loop_vinfo
8797 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8798 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8799 memory_access_type, &gs_info, mask);
8800
8801 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8802 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8803 slp_node_instance, slp_node, cost_vec);
8804 return true;
8805 }
8806
8807 if (!slp)
8808 gcc_assert (memory_access_type
8809 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8810
8811 if (dump_enabled_p ())
8812 dump_printf_loc (MSG_NOTE, vect_location,
8813 "transform load. ncopies = %d\n", ncopies);
8814
8815 /* Transform. */
8816
8817 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8818 ensure_base_align (dr_info);
8819
8820 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8821 {
8822 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8823 return true;
8824 }
8825
8826 if (memory_access_type == VMAT_INVARIANT)
8827 {
8828 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8829 /* If we have versioned for aliasing or the loop doesn't
8830 have any data dependencies that would preclude this,
8831 then we are sure this is a loop invariant load and
8832 thus we can insert it on the preheader edge. */
8833 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8834 && !nested_in_vect_loop
8835 && hoist_defs_of_uses (stmt_info, loop));
8836 if (hoist_p)
8837 {
8838 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8839 if (dump_enabled_p ())
8840 dump_printf_loc (MSG_NOTE, vect_location,
8841 "hoisting out of the vectorized loop: %G", stmt);
8842 scalar_dest = copy_ssa_name (scalar_dest);
8843 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8844 gsi_insert_on_edge_immediate
8845 (loop_preheader_edge (loop),
8846 gimple_build_assign (scalar_dest, rhs));
8847 }
8848 /* These copies are all equivalent, but currently the representation
8849 requires a separate STMT_VINFO_VEC_STMT for each one. */
8850 prev_stmt_info = NULL;
8851 gimple_stmt_iterator gsi2 = *gsi;
8852 gsi_next (&gsi2);
8853 for (j = 0; j < ncopies; j++)
8854 {
8855 stmt_vec_info new_stmt_info;
8856 if (hoist_p)
8857 {
8858 new_temp = vect_init_vector (stmt_info, scalar_dest,
8859 vectype, NULL);
8860 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8861 new_stmt_info = vinfo->add_stmt (new_stmt);
8862 }
8863 else
8864 {
8865 new_temp = vect_init_vector (stmt_info, scalar_dest,
8866 vectype, &gsi2);
8867 new_stmt_info = vinfo->lookup_def (new_temp);
8868 }
8869 if (slp)
8870 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8871 else if (j == 0)
8872 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8873 else
8874 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8875 prev_stmt_info = new_stmt_info;
8876 }
8877 return true;
8878 }
8879
8880 if (memory_access_type == VMAT_ELEMENTWISE
8881 || memory_access_type == VMAT_STRIDED_SLP)
8882 {
8883 gimple_stmt_iterator incr_gsi;
8884 bool insert_after;
8885 gimple *incr;
8886 tree offvar;
8887 tree ivstep;
8888 tree running_off;
8889 vec<constructor_elt, va_gc> *v = NULL;
8890 tree stride_base, stride_step, alias_off;
8891 /* Checked by get_load_store_type. */
8892 unsigned int const_nunits = nunits.to_constant ();
8893 unsigned HOST_WIDE_INT cst_offset = 0;
8894 tree dr_offset;
8895
8896 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8897 gcc_assert (!nested_in_vect_loop);
8898
8899 if (grouped_load)
8900 {
8901 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8902 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8903 }
8904 else
8905 {
8906 first_stmt_info = stmt_info;
8907 first_dr_info = dr_info;
8908 }
8909 if (slp && grouped_load)
8910 {
8911 group_size = DR_GROUP_SIZE (first_stmt_info);
8912 ref_type = get_group_alias_ptr_type (first_stmt_info);
8913 }
8914 else
8915 {
8916 if (grouped_load)
8917 cst_offset
8918 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8919 * vect_get_place_in_interleaving_chain (stmt_info,
8920 first_stmt_info));
8921 group_size = 1;
8922 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8923 }
8924
8925 dr_offset = get_dr_vinfo_offset (first_dr_info);
8926 stride_base
8927 = fold_build_pointer_plus
8928 (DR_BASE_ADDRESS (first_dr_info->dr),
8929 size_binop (PLUS_EXPR,
8930 convert_to_ptrofftype (dr_offset),
8931 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8932 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8933
8934 /* For a load with loop-invariant (but other than power-of-2)
8935 stride (i.e. not a grouped access) like so:
8936
8937 for (i = 0; i < n; i += stride)
8938 ... = array[i];
8939
8940 we generate a new induction variable and new accesses to
8941 form a new vector (or vectors, depending on ncopies):
8942
8943 for (j = 0; ; j += VF*stride)
8944 tmp1 = array[j];
8945 tmp2 = array[j + stride];
8946 ...
8947 vectemp = {tmp1, tmp2, ...}
8948 */
8949
8950 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8951 build_int_cst (TREE_TYPE (stride_step), vf));
8952
8953 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8954
8955 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8956 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8957 create_iv (stride_base, ivstep, NULL,
8958 loop, &incr_gsi, insert_after,
8959 &offvar, NULL);
8960 incr = gsi_stmt (incr_gsi);
8961 loop_vinfo->add_stmt (incr);
8962
8963 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8964
8965 prev_stmt_info = NULL;
8966 running_off = offvar;
8967 alias_off = build_int_cst (ref_type, 0);
8968 int nloads = const_nunits;
8969 int lnel = 1;
8970 tree ltype = TREE_TYPE (vectype);
8971 tree lvectype = vectype;
8972 auto_vec<tree> dr_chain;
8973 if (memory_access_type == VMAT_STRIDED_SLP)
8974 {
8975 if (group_size < const_nunits)
8976 {
8977 /* First check if vec_init optab supports construction from vector
8978 elts directly. Otherwise avoid emitting a constructor of
8979 vector elements by performing the loads using an integer type
8980 of the same size, constructing a vector of those and then
8981 re-interpreting it as the original vector type. This avoids a
8982 huge runtime penalty due to the general inability to perform
8983 store forwarding from smaller stores to a larger load. */
8984 tree ptype;
8985 tree vtype
8986 = vector_vector_composition_type (vectype,
8987 const_nunits / group_size,
8988 &ptype);
8989 if (vtype != NULL_TREE)
8990 {
8991 nloads = const_nunits / group_size;
8992 lnel = group_size;
8993 lvectype = vtype;
8994 ltype = ptype;
8995 }
8996 }
8997 else
8998 {
8999 nloads = 1;
9000 lnel = const_nunits;
9001 ltype = vectype;
9002 }
9003 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9004 }
9005 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9006 else if (nloads == 1)
9007 ltype = vectype;
9008
9009 if (slp)
9010 {
9011 /* For SLP permutation support we need to load the whole group,
9012 not only the number of vector stmts the permutation result
9013 fits in. */
9014 if (slp_perm)
9015 {
9016 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9017 variable VF. */
9018 unsigned int const_vf = vf.to_constant ();
9019 ncopies = CEIL (group_size * const_vf, const_nunits);
9020 dr_chain.create (ncopies);
9021 }
9022 else
9023 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9024 }
9025 unsigned int group_el = 0;
9026 unsigned HOST_WIDE_INT
9027 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9028 for (j = 0; j < ncopies; j++)
9029 {
9030 if (nloads > 1)
9031 vec_alloc (v, nloads);
9032 stmt_vec_info new_stmt_info = NULL;
9033 for (i = 0; i < nloads; i++)
9034 {
9035 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9036 group_el * elsz + cst_offset);
9037 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9038 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9039 gassign *new_stmt
9040 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9041 new_stmt_info
9042 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9043 if (nloads > 1)
9044 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9045 gimple_assign_lhs (new_stmt));
9046
9047 group_el += lnel;
9048 if (! slp
9049 || group_el == group_size)
9050 {
9051 tree newoff = copy_ssa_name (running_off);
9052 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9053 running_off, stride_step);
9054 vect_finish_stmt_generation (stmt_info, incr, gsi);
9055
9056 running_off = newoff;
9057 group_el = 0;
9058 }
9059 }
9060 if (nloads > 1)
9061 {
9062 tree vec_inv = build_constructor (lvectype, v);
9063 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9064 new_stmt_info = vinfo->lookup_def (new_temp);
9065 if (lvectype != vectype)
9066 {
9067 gassign *new_stmt
9068 = gimple_build_assign (make_ssa_name (vectype),
9069 VIEW_CONVERT_EXPR,
9070 build1 (VIEW_CONVERT_EXPR,
9071 vectype, new_temp));
9072 new_stmt_info
9073 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9074 }
9075 }
9076
9077 if (slp)
9078 {
9079 if (slp_perm)
9080 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9081 else
9082 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9083 }
9084 else
9085 {
9086 if (j == 0)
9087 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9088 else
9089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9090 prev_stmt_info = new_stmt_info;
9091 }
9092 }
9093 if (slp_perm)
9094 {
9095 unsigned n_perms;
9096 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9097 slp_node_instance, false, &n_perms);
9098 }
9099 return true;
9100 }
9101
9102 if (memory_access_type == VMAT_GATHER_SCATTER
9103 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9104 grouped_load = false;
9105
9106 if (grouped_load)
9107 {
9108 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9109 group_size = DR_GROUP_SIZE (first_stmt_info);
9110 /* For SLP vectorization we directly vectorize a subchain
9111 without permutation. */
9112 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9113 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9114 /* For BB vectorization always use the first stmt to base
9115 the data ref pointer on. */
9116 if (bb_vinfo)
9117 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9118
9119 /* Check if the chain of loads is already vectorized. */
9120 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9121 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9122 ??? But we can only do so if there is exactly one
9123 as we have no way to get at the rest. Leave the CSE
9124 opportunity alone.
9125 ??? With the group load eventually participating
9126 in multiple different permutations (having multiple
9127 slp nodes which refer to the same group) the CSE
9128 is even wrong code. See PR56270. */
9129 && !slp)
9130 {
9131 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9132 return true;
9133 }
9134 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9135 group_gap_adj = 0;
9136
9137 /* VEC_NUM is the number of vect stmts to be created for this group. */
9138 if (slp)
9139 {
9140 grouped_load = false;
9141 /* If an SLP permutation is from N elements to N elements,
9142 and if one vector holds a whole number of N, we can load
9143 the inputs to the permutation in the same way as an
9144 unpermuted sequence. In other cases we need to load the
9145 whole group, not only the number of vector stmts the
9146 permutation result fits in. */
9147 if (slp_perm
9148 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9149 || !multiple_p (nunits, group_size)))
9150 {
9151 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9152 variable VF; see vect_transform_slp_perm_load. */
9153 unsigned int const_vf = vf.to_constant ();
9154 unsigned int const_nunits = nunits.to_constant ();
9155 vec_num = CEIL (group_size * const_vf, const_nunits);
9156 group_gap_adj = vf * group_size - nunits * vec_num;
9157 }
9158 else
9159 {
9160 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9161 group_gap_adj
9162 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9163 }
9164 }
9165 else
9166 vec_num = group_size;
9167
9168 ref_type = get_group_alias_ptr_type (first_stmt_info);
9169 }
9170 else
9171 {
9172 first_stmt_info = stmt_info;
9173 first_dr_info = dr_info;
9174 group_size = vec_num = 1;
9175 group_gap_adj = 0;
9176 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9177 }
9178
9179 /* Gather-scatter accesses perform only component accesses, alignment
9180 is irrelevant for them. */
9181 if (memory_access_type == VMAT_GATHER_SCATTER)
9182 alignment_support_scheme = dr_unaligned_supported;
9183 else
9184 alignment_support_scheme
9185 = vect_supportable_dr_alignment (first_dr_info, false);
9186
9187 gcc_assert (alignment_support_scheme);
9188 vec_loop_masks *loop_masks
9189 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9190 ? &LOOP_VINFO_MASKS (loop_vinfo)
9191 : NULL);
9192 /* Targets with store-lane instructions must not require explicit
9193 realignment. vect_supportable_dr_alignment always returns either
9194 dr_aligned or dr_unaligned_supported for masked operations. */
9195 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9196 && !mask
9197 && !loop_masks)
9198 || alignment_support_scheme == dr_aligned
9199 || alignment_support_scheme == dr_unaligned_supported);
9200
9201 /* In case the vectorization factor (VF) is bigger than the number
9202 of elements that we can fit in a vectype (nunits), we have to generate
9203 more than one vector stmt - i.e - we need to "unroll" the
9204 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9205 from one copy of the vector stmt to the next, in the field
9206 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9207 stages to find the correct vector defs to be used when vectorizing
9208 stmts that use the defs of the current stmt. The example below
9209 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9210 need to create 4 vectorized stmts):
9211
9212 before vectorization:
9213 RELATED_STMT VEC_STMT
9214 S1: x = memref - -
9215 S2: z = x + 1 - -
9216
9217 step 1: vectorize stmt S1:
9218 We first create the vector stmt VS1_0, and, as usual, record a
9219 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9220 Next, we create the vector stmt VS1_1, and record a pointer to
9221 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9222 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9223 stmts and pointers:
9224 RELATED_STMT VEC_STMT
9225 VS1_0: vx0 = memref0 VS1_1 -
9226 VS1_1: vx1 = memref1 VS1_2 -
9227 VS1_2: vx2 = memref2 VS1_3 -
9228 VS1_3: vx3 = memref3 - -
9229 S1: x = load - VS1_0
9230 S2: z = x + 1 - -
9231
9232 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9233 information we recorded in RELATED_STMT field is used to vectorize
9234 stmt S2. */
9235
9236 /* In case of interleaving (non-unit grouped access):
9237
9238 S1: x2 = &base + 2
9239 S2: x0 = &base
9240 S3: x1 = &base + 1
9241 S4: x3 = &base + 3
9242
9243 Vectorized loads are created in the order of memory accesses
9244 starting from the access of the first stmt of the chain:
9245
9246 VS1: vx0 = &base
9247 VS2: vx1 = &base + vec_size*1
9248 VS3: vx3 = &base + vec_size*2
9249 VS4: vx4 = &base + vec_size*3
9250
9251 Then permutation statements are generated:
9252
9253 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9254 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9255 ...
9256
9257 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9258 (the order of the data-refs in the output of vect_permute_load_chain
9259 corresponds to the order of scalar stmts in the interleaving chain - see
9260 the documentation of vect_permute_load_chain()).
9261 The generation of permutation stmts and recording them in
9262 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9263
9264 In case of both multiple types and interleaving, the vector loads and
9265 permutation stmts above are created for every copy. The result vector
9266 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9267 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9268
9269 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9270 on a target that supports unaligned accesses (dr_unaligned_supported)
9271 we generate the following code:
9272 p = initial_addr;
9273 indx = 0;
9274 loop {
9275 p = p + indx * vectype_size;
9276 vec_dest = *(p);
9277 indx = indx + 1;
9278 }
9279
9280 Otherwise, the data reference is potentially unaligned on a target that
9281 does not support unaligned accesses (dr_explicit_realign_optimized) -
9282 then generate the following code, in which the data in each iteration is
9283 obtained by two vector loads, one from the previous iteration, and one
9284 from the current iteration:
9285 p1 = initial_addr;
9286 msq_init = *(floor(p1))
9287 p2 = initial_addr + VS - 1;
9288 realignment_token = call target_builtin;
9289 indx = 0;
9290 loop {
9291 p2 = p2 + indx * vectype_size
9292 lsq = *(floor(p2))
9293 vec_dest = realign_load (msq, lsq, realignment_token)
9294 indx = indx + 1;
9295 msq = lsq;
9296 } */
9297
9298 /* If the misalignment remains the same throughout the execution of the
9299 loop, we can create the init_addr and permutation mask at the loop
9300 preheader. Otherwise, it needs to be created inside the loop.
9301 This can only occur when vectorizing memory accesses in the inner-loop
9302 nested within an outer-loop that is being vectorized. */
9303
9304 if (nested_in_vect_loop
9305 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9306 GET_MODE_SIZE (TYPE_MODE (vectype))))
9307 {
9308 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9309 compute_in_loop = true;
9310 }
9311
9312 bool diff_first_stmt_info
9313 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9314
9315 if ((alignment_support_scheme == dr_explicit_realign_optimized
9316 || alignment_support_scheme == dr_explicit_realign)
9317 && !compute_in_loop)
9318 {
9319 /* If we have different first_stmt_info, we can't set up realignment
9320 here, since we can't guarantee first_stmt_info DR has been
9321 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9322 distance from first_stmt_info DR instead as below. */
9323 if (!diff_first_stmt_info)
9324 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
9325 alignment_support_scheme, NULL_TREE,
9326 &at_loop);
9327 if (alignment_support_scheme == dr_explicit_realign_optimized)
9328 {
9329 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9330 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9331 size_one_node);
9332 gcc_assert (!first_stmt_info_for_drptr);
9333 }
9334 }
9335 else
9336 at_loop = loop;
9337
9338 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9339 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9340
9341 tree bump;
9342 tree vec_offset = NULL_TREE;
9343 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9344 {
9345 aggr_type = NULL_TREE;
9346 bump = NULL_TREE;
9347 }
9348 else if (memory_access_type == VMAT_GATHER_SCATTER)
9349 {
9350 aggr_type = elem_type;
9351 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9352 &bump, &vec_offset);
9353 }
9354 else
9355 {
9356 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9357 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9358 else
9359 aggr_type = vectype;
9360 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9361 memory_access_type);
9362 }
9363
9364 tree vec_mask = NULL_TREE;
9365 prev_stmt_info = NULL;
9366 poly_uint64 group_elt = 0;
9367 for (j = 0; j < ncopies; j++)
9368 {
9369 stmt_vec_info new_stmt_info = NULL;
9370 /* 1. Create the vector or array pointer update chain. */
9371 if (j == 0)
9372 {
9373 bool simd_lane_access_p
9374 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9375 if (simd_lane_access_p
9376 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9377 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9378 && integer_zerop (get_dr_vinfo_offset (first_dr_info))
9379 && integer_zerop (DR_INIT (first_dr_info->dr))
9380 && alias_sets_conflict_p (get_alias_set (aggr_type),
9381 get_alias_set (TREE_TYPE (ref_type)))
9382 && (alignment_support_scheme == dr_aligned
9383 || alignment_support_scheme == dr_unaligned_supported))
9384 {
9385 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9386 dataref_offset = build_int_cst (ref_type, 0);
9387 }
9388 else if (diff_first_stmt_info)
9389 {
9390 dataref_ptr
9391 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9392 aggr_type, at_loop, offset, &dummy,
9393 gsi, &ptr_incr, simd_lane_access_p,
9394 byte_offset, bump);
9395 /* Adjust the pointer by the difference to first_stmt. */
9396 data_reference_p ptrdr
9397 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9398 tree diff
9399 = fold_convert (sizetype,
9400 size_binop (MINUS_EXPR,
9401 DR_INIT (first_dr_info->dr),
9402 DR_INIT (ptrdr)));
9403 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9404 stmt_info, diff);
9405 if (alignment_support_scheme == dr_explicit_realign)
9406 {
9407 msq = vect_setup_realignment (first_stmt_info_for_drptr, gsi,
9408 &realignment_token,
9409 alignment_support_scheme,
9410 dataref_ptr, &at_loop);
9411 gcc_assert (!compute_in_loop);
9412 }
9413 }
9414 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9415 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9416 &dataref_ptr, &vec_offset);
9417 else
9418 dataref_ptr
9419 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9420 offset, &dummy, gsi, &ptr_incr,
9421 simd_lane_access_p,
9422 byte_offset, bump);
9423 if (mask)
9424 {
9425 if (slp_node)
9426 {
9427 auto_vec<vec<tree> > vec_defs (1);
9428 vect_get_slp_defs (slp_node, &vec_defs);
9429 vec_mask = vec_defs[0][0];
9430 }
9431 else
9432 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9433 mask_vectype);
9434 }
9435 }
9436 else
9437 {
9438 if (dataref_offset)
9439 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9440 bump);
9441 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9442 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9443 else
9444 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9445 stmt_info, bump);
9446 if (mask)
9447 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9448 }
9449
9450 if (grouped_load || slp_perm)
9451 dr_chain.create (vec_num);
9452
9453 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9454 {
9455 tree vec_array;
9456
9457 vec_array = create_vector_array (vectype, vec_num);
9458
9459 tree final_mask = NULL_TREE;
9460 if (loop_masks)
9461 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9462 vectype, j);
9463 if (vec_mask)
9464 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9465 vec_mask, gsi);
9466
9467 gcall *call;
9468 if (final_mask)
9469 {
9470 /* Emit:
9471 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9472 VEC_MASK). */
9473 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9474 tree alias_ptr = build_int_cst (ref_type, align);
9475 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9476 dataref_ptr, alias_ptr,
9477 final_mask);
9478 }
9479 else
9480 {
9481 /* Emit:
9482 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9483 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9484 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9485 }
9486 gimple_call_set_lhs (call, vec_array);
9487 gimple_call_set_nothrow (call, true);
9488 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9489
9490 /* Extract each vector into an SSA_NAME. */
9491 for (i = 0; i < vec_num; i++)
9492 {
9493 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9494 vec_array, i);
9495 dr_chain.quick_push (new_temp);
9496 }
9497
9498 /* Record the mapping between SSA_NAMEs and statements. */
9499 vect_record_grouped_load_vectors (stmt_info, dr_chain);
9500
9501 /* Record that VEC_ARRAY is now dead. */
9502 vect_clobber_variable (stmt_info, gsi, vec_array);
9503 }
9504 else
9505 {
9506 for (i = 0; i < vec_num; i++)
9507 {
9508 tree final_mask = NULL_TREE;
9509 if (loop_masks
9510 && memory_access_type != VMAT_INVARIANT)
9511 final_mask = vect_get_loop_mask (gsi, loop_masks,
9512 vec_num * ncopies,
9513 vectype, vec_num * j + i);
9514 if (vec_mask)
9515 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9516 vec_mask, gsi);
9517
9518 if (i > 0)
9519 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9520 stmt_info, bump);
9521
9522 /* 2. Create the vector-load in the loop. */
9523 gimple *new_stmt = NULL;
9524 switch (alignment_support_scheme)
9525 {
9526 case dr_aligned:
9527 case dr_unaligned_supported:
9528 {
9529 unsigned int misalign;
9530 unsigned HOST_WIDE_INT align;
9531
9532 if (memory_access_type == VMAT_GATHER_SCATTER)
9533 {
9534 tree zero = build_zero_cst (vectype);
9535 tree scale = size_int (gs_info.scale);
9536 gcall *call;
9537 if (loop_masks)
9538 call = gimple_build_call_internal
9539 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9540 vec_offset, scale, zero, final_mask);
9541 else
9542 call = gimple_build_call_internal
9543 (IFN_GATHER_LOAD, 4, dataref_ptr,
9544 vec_offset, scale, zero);
9545 gimple_call_set_nothrow (call, true);
9546 new_stmt = call;
9547 data_ref = NULL_TREE;
9548 break;
9549 }
9550
9551 align =
9552 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9553 if (alignment_support_scheme == dr_aligned)
9554 {
9555 gcc_assert (aligned_access_p (first_dr_info));
9556 misalign = 0;
9557 }
9558 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9559 {
9560 align = dr_alignment
9561 (vect_dr_behavior (first_dr_info));
9562 misalign = 0;
9563 }
9564 else
9565 misalign = DR_MISALIGNMENT (first_dr_info);
9566 if (dataref_offset == NULL_TREE
9567 && TREE_CODE (dataref_ptr) == SSA_NAME)
9568 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9569 align, misalign);
9570
9571 if (final_mask)
9572 {
9573 align = least_bit_hwi (misalign | align);
9574 tree ptr = build_int_cst (ref_type,
9575 align * BITS_PER_UNIT);
9576 gcall *call
9577 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9578 dataref_ptr, ptr,
9579 final_mask);
9580 gimple_call_set_nothrow (call, true);
9581 new_stmt = call;
9582 data_ref = NULL_TREE;
9583 }
9584 else
9585 {
9586 tree ltype = vectype;
9587 tree new_vtype = NULL_TREE;
9588 /* If there's no peeling for gaps but we have a gap
9589 with slp loads then load the lower half of the
9590 vector only. See get_group_load_store_type for
9591 when we apply this optimization. */
9592 if (slp
9593 && loop_vinfo
9594 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9595 && DR_GROUP_GAP (first_stmt_info) != 0
9596 && known_eq (nunits,
9597 (group_size
9598 - DR_GROUP_GAP (first_stmt_info)) * 2)
9599 && known_eq (nunits, group_size))
9600 {
9601 tree half_vtype;
9602 new_vtype
9603 = vector_vector_composition_type (vectype, 2,
9604 &half_vtype);
9605 if (new_vtype != NULL_TREE)
9606 ltype = half_vtype;
9607 }
9608 tree offset
9609 = (dataref_offset ? dataref_offset
9610 : build_int_cst (ref_type, 0));
9611 if (ltype != vectype
9612 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9613 {
9614 unsigned HOST_WIDE_INT gap
9615 = DR_GROUP_GAP (first_stmt_info);
9616 gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9617 tree gapcst = build_int_cst (ref_type, gap);
9618 offset = size_binop (PLUS_EXPR, offset, gapcst);
9619 }
9620 data_ref
9621 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9622 if (alignment_support_scheme == dr_aligned)
9623 ;
9624 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9625 TREE_TYPE (data_ref)
9626 = build_aligned_type (TREE_TYPE (data_ref),
9627 align * BITS_PER_UNIT);
9628 else
9629 TREE_TYPE (data_ref)
9630 = build_aligned_type (TREE_TYPE (data_ref),
9631 TYPE_ALIGN (elem_type));
9632 if (ltype != vectype)
9633 {
9634 vect_copy_ref_info (data_ref,
9635 DR_REF (first_dr_info->dr));
9636 tree tem = make_ssa_name (ltype);
9637 new_stmt = gimple_build_assign (tem, data_ref);
9638 vect_finish_stmt_generation (stmt_info, new_stmt,
9639 gsi);
9640 data_ref = NULL;
9641 vec<constructor_elt, va_gc> *v;
9642 vec_alloc (v, 2);
9643 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9644 {
9645 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9646 build_zero_cst (ltype));
9647 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9648 }
9649 else
9650 {
9651 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9652 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9653 build_zero_cst (ltype));
9654 }
9655 gcc_assert (new_vtype != NULL_TREE);
9656 if (new_vtype == vectype)
9657 new_stmt = gimple_build_assign (
9658 vec_dest, build_constructor (vectype, v));
9659 else
9660 {
9661 tree new_vname = make_ssa_name (new_vtype);
9662 new_stmt = gimple_build_assign (
9663 new_vname, build_constructor (new_vtype, v));
9664 vect_finish_stmt_generation (stmt_info,
9665 new_stmt, gsi);
9666 new_stmt = gimple_build_assign (
9667 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9668 new_vname));
9669 }
9670 }
9671 }
9672 break;
9673 }
9674 case dr_explicit_realign:
9675 {
9676 tree ptr, bump;
9677
9678 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9679
9680 if (compute_in_loop)
9681 msq = vect_setup_realignment (first_stmt_info, gsi,
9682 &realignment_token,
9683 dr_explicit_realign,
9684 dataref_ptr, NULL);
9685
9686 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9687 ptr = copy_ssa_name (dataref_ptr);
9688 else
9689 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9690 // For explicit realign the target alignment should be
9691 // known at compile time.
9692 unsigned HOST_WIDE_INT align =
9693 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9694 new_stmt = gimple_build_assign
9695 (ptr, BIT_AND_EXPR, dataref_ptr,
9696 build_int_cst
9697 (TREE_TYPE (dataref_ptr),
9698 -(HOST_WIDE_INT) align));
9699 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9700 data_ref
9701 = build2 (MEM_REF, vectype, ptr,
9702 build_int_cst (ref_type, 0));
9703 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9704 vec_dest = vect_create_destination_var (scalar_dest,
9705 vectype);
9706 new_stmt = gimple_build_assign (vec_dest, data_ref);
9707 new_temp = make_ssa_name (vec_dest, new_stmt);
9708 gimple_assign_set_lhs (new_stmt, new_temp);
9709 gimple_move_vops (new_stmt, stmt_info->stmt);
9710 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9711 msq = new_temp;
9712
9713 bump = size_binop (MULT_EXPR, vs,
9714 TYPE_SIZE_UNIT (elem_type));
9715 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9716 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9717 stmt_info, bump);
9718 new_stmt = gimple_build_assign
9719 (NULL_TREE, BIT_AND_EXPR, ptr,
9720 build_int_cst
9721 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9722 ptr = copy_ssa_name (ptr, new_stmt);
9723 gimple_assign_set_lhs (new_stmt, ptr);
9724 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9725 data_ref
9726 = build2 (MEM_REF, vectype, ptr,
9727 build_int_cst (ref_type, 0));
9728 break;
9729 }
9730 case dr_explicit_realign_optimized:
9731 {
9732 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9733 new_temp = copy_ssa_name (dataref_ptr);
9734 else
9735 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9736 // We should only be doing this if we know the target
9737 // alignment at compile time.
9738 unsigned HOST_WIDE_INT align =
9739 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9740 new_stmt = gimple_build_assign
9741 (new_temp, BIT_AND_EXPR, dataref_ptr,
9742 build_int_cst (TREE_TYPE (dataref_ptr),
9743 -(HOST_WIDE_INT) align));
9744 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9745 data_ref
9746 = build2 (MEM_REF, vectype, new_temp,
9747 build_int_cst (ref_type, 0));
9748 break;
9749 }
9750 default:
9751 gcc_unreachable ();
9752 }
9753 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9754 /* DATA_REF is null if we've already built the statement. */
9755 if (data_ref)
9756 {
9757 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9758 new_stmt = gimple_build_assign (vec_dest, data_ref);
9759 }
9760 new_temp = make_ssa_name (vec_dest, new_stmt);
9761 gimple_set_lhs (new_stmt, new_temp);
9762 new_stmt_info
9763 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9764
9765 /* 3. Handle explicit realignment if necessary/supported.
9766 Create in loop:
9767 vec_dest = realign_load (msq, lsq, realignment_token) */
9768 if (alignment_support_scheme == dr_explicit_realign_optimized
9769 || alignment_support_scheme == dr_explicit_realign)
9770 {
9771 lsq = gimple_assign_lhs (new_stmt);
9772 if (!realignment_token)
9773 realignment_token = dataref_ptr;
9774 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9775 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9776 msq, lsq, realignment_token);
9777 new_temp = make_ssa_name (vec_dest, new_stmt);
9778 gimple_assign_set_lhs (new_stmt, new_temp);
9779 new_stmt_info
9780 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9781
9782 if (alignment_support_scheme == dr_explicit_realign_optimized)
9783 {
9784 gcc_assert (phi);
9785 if (i == vec_num - 1 && j == ncopies - 1)
9786 add_phi_arg (phi, lsq,
9787 loop_latch_edge (containing_loop),
9788 UNKNOWN_LOCATION);
9789 msq = lsq;
9790 }
9791 }
9792
9793 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9794 {
9795 tree perm_mask = perm_mask_for_reverse (vectype);
9796 new_temp = permute_vec_elements (new_temp, new_temp,
9797 perm_mask, stmt_info, gsi);
9798 new_stmt_info = vinfo->lookup_def (new_temp);
9799 }
9800
9801 /* Collect vector loads and later create their permutation in
9802 vect_transform_grouped_load (). */
9803 if (grouped_load || slp_perm)
9804 dr_chain.quick_push (new_temp);
9805
9806 /* Store vector loads in the corresponding SLP_NODE. */
9807 if (slp && !slp_perm)
9808 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9809
9810 /* With SLP permutation we load the gaps as well, without
9811 we need to skip the gaps after we manage to fully load
9812 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9813 group_elt += nunits;
9814 if (maybe_ne (group_gap_adj, 0U)
9815 && !slp_perm
9816 && known_eq (group_elt, group_size - group_gap_adj))
9817 {
9818 poly_wide_int bump_val
9819 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9820 * group_gap_adj);
9821 tree bump = wide_int_to_tree (sizetype, bump_val);
9822 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9823 stmt_info, bump);
9824 group_elt = 0;
9825 }
9826 }
9827 /* Bump the vector pointer to account for a gap or for excess
9828 elements loaded for a permuted SLP load. */
9829 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9830 {
9831 poly_wide_int bump_val
9832 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9833 * group_gap_adj);
9834 tree bump = wide_int_to_tree (sizetype, bump_val);
9835 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9836 stmt_info, bump);
9837 }
9838 }
9839
9840 if (slp && !slp_perm)
9841 continue;
9842
9843 if (slp_perm)
9844 {
9845 unsigned n_perms;
9846 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9847 slp_node_instance, false,
9848 &n_perms))
9849 {
9850 dr_chain.release ();
9851 return false;
9852 }
9853 }
9854 else
9855 {
9856 if (grouped_load)
9857 {
9858 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9859 vect_transform_grouped_load (stmt_info, dr_chain,
9860 group_size, gsi);
9861 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9862 }
9863 else
9864 {
9865 if (j == 0)
9866 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9867 else
9868 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9869 prev_stmt_info = new_stmt_info;
9870 }
9871 }
9872 dr_chain.release ();
9873 }
9874
9875 return true;
9876 }
9877
9878 /* Function vect_is_simple_cond.
9879
9880 Input:
9881 LOOP - the loop that is being vectorized.
9882 COND - Condition that is checked for simple use.
9883
9884 Output:
9885 *COMP_VECTYPE - the vector type for the comparison.
9886 *DTS - The def types for the arguments of the comparison
9887
9888 Returns whether a COND can be vectorized. Checks whether
9889 condition operands are supportable using vec_is_simple_use. */
9890
9891 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)9892 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9893 tree *comp_vectype, enum vect_def_type *dts,
9894 tree vectype)
9895 {
9896 tree lhs, rhs;
9897 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9898
9899 /* Mask case. */
9900 if (TREE_CODE (cond) == SSA_NAME
9901 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9902 {
9903 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9904 || !*comp_vectype
9905 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9906 return false;
9907 return true;
9908 }
9909
9910 if (!COMPARISON_CLASS_P (cond))
9911 return false;
9912
9913 lhs = TREE_OPERAND (cond, 0);
9914 rhs = TREE_OPERAND (cond, 1);
9915
9916 if (TREE_CODE (lhs) == SSA_NAME)
9917 {
9918 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9919 return false;
9920 }
9921 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9922 || TREE_CODE (lhs) == FIXED_CST)
9923 dts[0] = vect_constant_def;
9924 else
9925 return false;
9926
9927 if (TREE_CODE (rhs) == SSA_NAME)
9928 {
9929 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9930 return false;
9931 }
9932 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9933 || TREE_CODE (rhs) == FIXED_CST)
9934 dts[1] = vect_constant_def;
9935 else
9936 return false;
9937
9938 if (vectype1 && vectype2
9939 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9940 TYPE_VECTOR_SUBPARTS (vectype2)))
9941 return false;
9942
9943 *comp_vectype = vectype1 ? vectype1 : vectype2;
9944 /* Invariant comparison. */
9945 if (! *comp_vectype)
9946 {
9947 tree scalar_type = TREE_TYPE (lhs);
9948 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9949 *comp_vectype = truth_type_for (vectype);
9950 else
9951 {
9952 /* If we can widen the comparison to match vectype do so. */
9953 if (INTEGRAL_TYPE_P (scalar_type)
9954 && !slp_node
9955 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9956 TYPE_SIZE (TREE_TYPE (vectype))))
9957 scalar_type = build_nonstandard_integer_type
9958 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9959 TYPE_UNSIGNED (scalar_type));
9960 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9961 slp_node);
9962 }
9963 }
9964
9965 return true;
9966 }
9967
9968 /* vectorizable_condition.
9969
9970 Check if STMT_INFO is conditional modify expression that can be vectorized.
9971 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9972 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9973 at GSI.
9974
9975 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9976
9977 Return true if STMT_INFO is vectorizable in this way. */
9978
9979 static bool
vectorizable_condition(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)9980 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9981 stmt_vec_info *vec_stmt,
9982 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9983 {
9984 vec_info *vinfo = stmt_info->vinfo;
9985 tree scalar_dest = NULL_TREE;
9986 tree vec_dest = NULL_TREE;
9987 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9988 tree then_clause, else_clause;
9989 tree comp_vectype = NULL_TREE;
9990 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9991 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9992 tree vec_compare;
9993 tree new_temp;
9994 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9995 enum vect_def_type dts[4]
9996 = {vect_unknown_def_type, vect_unknown_def_type,
9997 vect_unknown_def_type, vect_unknown_def_type};
9998 int ndts = 4;
9999 int ncopies;
10000 int vec_num;
10001 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10002 stmt_vec_info prev_stmt_info = NULL;
10003 int i, j;
10004 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10005 vec<tree> vec_oprnds0 = vNULL;
10006 vec<tree> vec_oprnds1 = vNULL;
10007 vec<tree> vec_oprnds2 = vNULL;
10008 vec<tree> vec_oprnds3 = vNULL;
10009 tree vec_cmp_type;
10010 bool masked = false;
10011
10012 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10013 return false;
10014
10015 /* Is vectorizable conditional operation? */
10016 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10017 if (!stmt)
10018 return false;
10019
10020 code = gimple_assign_rhs_code (stmt);
10021 if (code != COND_EXPR)
10022 return false;
10023
10024 stmt_vec_info reduc_info = NULL;
10025 int reduc_index = -1;
10026 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10027 bool for_reduction
10028 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10029 if (for_reduction)
10030 {
10031 if (STMT_SLP_TYPE (stmt_info))
10032 return false;
10033 reduc_info = info_for_reduction (stmt_info);
10034 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10035 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10036 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10037 || reduc_index != -1);
10038 }
10039 else
10040 {
10041 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10042 return false;
10043
10044 /* FORNOW: only supported as part of a reduction. */
10045 if (STMT_VINFO_LIVE_P (stmt_info))
10046 {
10047 if (dump_enabled_p ())
10048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10049 "value used after loop.\n");
10050 return false;
10051 }
10052 }
10053
10054 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10055 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10056
10057 if (slp_node)
10058 {
10059 ncopies = 1;
10060 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10061 }
10062 else
10063 {
10064 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10065 vec_num = 1;
10066 }
10067
10068 gcc_assert (ncopies >= 1);
10069 if (for_reduction && ncopies > 1)
10070 return false; /* FORNOW */
10071
10072 cond_expr = gimple_assign_rhs1 (stmt);
10073 then_clause = gimple_assign_rhs2 (stmt);
10074 else_clause = gimple_assign_rhs3 (stmt);
10075
10076 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node,
10077 &comp_vectype, &dts[0], vectype)
10078 || !comp_vectype)
10079 return false;
10080
10081 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
10082 return false;
10083 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
10084 return false;
10085
10086 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10087 return false;
10088
10089 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10090 return false;
10091
10092 masked = !COMPARISON_CLASS_P (cond_expr);
10093 vec_cmp_type = truth_type_for (comp_vectype);
10094
10095 if (vec_cmp_type == NULL_TREE)
10096 return false;
10097
10098 cond_code = TREE_CODE (cond_expr);
10099 if (!masked)
10100 {
10101 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10102 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10103 }
10104
10105 /* For conditional reductions, the "then" value needs to be the candidate
10106 value calculated by this iteration while the "else" value needs to be
10107 the result carried over from previous iterations. If the COND_EXPR
10108 is the other way around, we need to swap it. */
10109 bool must_invert_cmp_result = false;
10110 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10111 {
10112 if (masked)
10113 must_invert_cmp_result = true;
10114 else
10115 {
10116 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10117 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10118 if (new_code == ERROR_MARK)
10119 must_invert_cmp_result = true;
10120 else
10121 {
10122 cond_code = new_code;
10123 /* Make sure we don't accidentally use the old condition. */
10124 cond_expr = NULL_TREE;
10125 }
10126 }
10127 std::swap (then_clause, else_clause);
10128 }
10129
10130 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10131 {
10132 /* Boolean values may have another representation in vectors
10133 and therefore we prefer bit operations over comparison for
10134 them (which also works for scalar masks). We store opcodes
10135 to use in bitop1 and bitop2. Statement is vectorized as
10136 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10137 depending on bitop1 and bitop2 arity. */
10138 switch (cond_code)
10139 {
10140 case GT_EXPR:
10141 bitop1 = BIT_NOT_EXPR;
10142 bitop2 = BIT_AND_EXPR;
10143 break;
10144 case GE_EXPR:
10145 bitop1 = BIT_NOT_EXPR;
10146 bitop2 = BIT_IOR_EXPR;
10147 break;
10148 case LT_EXPR:
10149 bitop1 = BIT_NOT_EXPR;
10150 bitop2 = BIT_AND_EXPR;
10151 std::swap (cond_expr0, cond_expr1);
10152 break;
10153 case LE_EXPR:
10154 bitop1 = BIT_NOT_EXPR;
10155 bitop2 = BIT_IOR_EXPR;
10156 std::swap (cond_expr0, cond_expr1);
10157 break;
10158 case NE_EXPR:
10159 bitop1 = BIT_XOR_EXPR;
10160 break;
10161 case EQ_EXPR:
10162 bitop1 = BIT_XOR_EXPR;
10163 bitop2 = BIT_NOT_EXPR;
10164 break;
10165 default:
10166 return false;
10167 }
10168 cond_code = SSA_NAME;
10169 }
10170
10171 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10172 && reduction_type == EXTRACT_LAST_REDUCTION
10173 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10174 {
10175 if (dump_enabled_p ())
10176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10177 "reduction comparison operation not supported.\n");
10178 return false;
10179 }
10180
10181 if (!vec_stmt)
10182 {
10183 if (bitop1 != NOP_EXPR)
10184 {
10185 machine_mode mode = TYPE_MODE (comp_vectype);
10186 optab optab;
10187
10188 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10189 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10190 return false;
10191
10192 if (bitop2 != NOP_EXPR)
10193 {
10194 optab = optab_for_tree_code (bitop2, comp_vectype,
10195 optab_default);
10196 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10197 return false;
10198 }
10199 }
10200
10201 if (loop_vinfo
10202 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10203 && reduction_type == EXTRACT_LAST_REDUCTION)
10204 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10205 ncopies * vec_num, vectype, NULL);
10206
10207 vect_cost_for_stmt kind = vector_stmt;
10208 if (reduction_type == EXTRACT_LAST_REDUCTION)
10209 /* Count one reduction-like operation per vector. */
10210 kind = vec_to_scalar;
10211 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10212 return false;
10213
10214 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10215 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10216 cost_vec, kind);
10217 return true;
10218 }
10219
10220 /* Transform. */
10221
10222 if (!slp_node)
10223 {
10224 vec_oprnds0.create (1);
10225 vec_oprnds1.create (1);
10226 vec_oprnds2.create (1);
10227 vec_oprnds3.create (1);
10228 }
10229
10230 /* Handle def. */
10231 scalar_dest = gimple_assign_lhs (stmt);
10232 if (reduction_type != EXTRACT_LAST_REDUCTION)
10233 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10234
10235 /* Handle cond expr. */
10236 for (j = 0; j < ncopies; j++)
10237 {
10238 bool swap_cond_operands = false;
10239
10240 /* See whether another part of the vectorized code applies a loop
10241 mask to the condition, or to its inverse. */
10242
10243 vec_loop_masks *masks = NULL;
10244 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10245 {
10246 if (reduction_type == EXTRACT_LAST_REDUCTION)
10247 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10248 else
10249 {
10250 scalar_cond_masked_key cond (cond_expr, ncopies);
10251 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10252 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10253 else
10254 {
10255 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10256 cond.code = invert_tree_comparison (cond.code, honor_nans);
10257 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10258 {
10259 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10260 cond_code = cond.code;
10261 swap_cond_operands = true;
10262 }
10263 }
10264 }
10265 }
10266
10267 stmt_vec_info new_stmt_info = NULL;
10268 if (j == 0)
10269 {
10270 if (slp_node)
10271 {
10272 auto_vec<vec<tree>, 4> vec_defs;
10273 vect_get_slp_defs (slp_node, &vec_defs);
10274 vec_oprnds3 = vec_defs.pop ();
10275 vec_oprnds2 = vec_defs.pop ();
10276 if (!masked)
10277 vec_oprnds1 = vec_defs.pop ();
10278 vec_oprnds0 = vec_defs.pop ();
10279 }
10280 else
10281 {
10282 if (masked)
10283 {
10284 vec_cond_lhs
10285 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10286 comp_vectype);
10287 }
10288 else
10289 {
10290 vec_cond_lhs
10291 = vect_get_vec_def_for_operand (cond_expr0,
10292 stmt_info, comp_vectype);
10293 vec_cond_rhs
10294 = vect_get_vec_def_for_operand (cond_expr1,
10295 stmt_info, comp_vectype);
10296 }
10297 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10298 stmt_info);
10299 if (reduction_type != EXTRACT_LAST_REDUCTION)
10300 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10301 stmt_info);
10302 }
10303 }
10304 else
10305 {
10306 vec_cond_lhs
10307 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10308 if (!masked)
10309 vec_cond_rhs
10310 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10311
10312 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10313 vec_oprnds2.pop ());
10314 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10315 vec_oprnds3.pop ());
10316 }
10317
10318 if (!slp_node)
10319 {
10320 vec_oprnds0.quick_push (vec_cond_lhs);
10321 if (!masked)
10322 vec_oprnds1.quick_push (vec_cond_rhs);
10323 vec_oprnds2.quick_push (vec_then_clause);
10324 vec_oprnds3.quick_push (vec_else_clause);
10325 }
10326
10327 /* Arguments are ready. Create the new vector stmt. */
10328 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10329 {
10330 vec_then_clause = vec_oprnds2[i];
10331 vec_else_clause = vec_oprnds3[i];
10332
10333 if (swap_cond_operands)
10334 std::swap (vec_then_clause, vec_else_clause);
10335
10336 if (masked)
10337 vec_compare = vec_cond_lhs;
10338 else
10339 {
10340 vec_cond_rhs = vec_oprnds1[i];
10341 if (bitop1 == NOP_EXPR)
10342 vec_compare = build2 (cond_code, vec_cmp_type,
10343 vec_cond_lhs, vec_cond_rhs);
10344 else
10345 {
10346 new_temp = make_ssa_name (vec_cmp_type);
10347 gassign *new_stmt;
10348 if (bitop1 == BIT_NOT_EXPR)
10349 new_stmt = gimple_build_assign (new_temp, bitop1,
10350 vec_cond_rhs);
10351 else
10352 new_stmt
10353 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10354 vec_cond_rhs);
10355 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10356 if (bitop2 == NOP_EXPR)
10357 vec_compare = new_temp;
10358 else if (bitop2 == BIT_NOT_EXPR)
10359 {
10360 /* Instead of doing ~x ? y : z do x ? z : y. */
10361 vec_compare = new_temp;
10362 std::swap (vec_then_clause, vec_else_clause);
10363 }
10364 else
10365 {
10366 vec_compare = make_ssa_name (vec_cmp_type);
10367 new_stmt
10368 = gimple_build_assign (vec_compare, bitop2,
10369 vec_cond_lhs, new_temp);
10370 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10371 }
10372 }
10373 }
10374
10375 /* If we decided to apply a loop mask to the result of the vector
10376 comparison, AND the comparison with the mask now. Later passes
10377 should then be able to reuse the AND results between mulitple
10378 vector statements.
10379
10380 For example:
10381 for (int i = 0; i < 100; ++i)
10382 x[i] = y[i] ? z[i] : 10;
10383
10384 results in following optimized GIMPLE:
10385
10386 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10387 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10388 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10389 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10390 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10391 vect_iftmp.11_47, { 10, ... }>;
10392
10393 instead of using a masked and unmasked forms of
10394 vec != { 0, ... } (masked in the MASK_LOAD,
10395 unmasked in the VEC_COND_EXPR). */
10396
10397 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10398 in cases where that's necessary. */
10399
10400 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10401 {
10402 if (!is_gimple_val (vec_compare))
10403 {
10404 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10405 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10406 vec_compare);
10407 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10408 vec_compare = vec_compare_name;
10409 }
10410
10411 if (must_invert_cmp_result)
10412 {
10413 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10414 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10415 BIT_NOT_EXPR,
10416 vec_compare);
10417 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10418 vec_compare = vec_compare_name;
10419 }
10420
10421 if (masks)
10422 {
10423 unsigned vec_num = vec_oprnds0.length ();
10424 tree loop_mask
10425 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10426 vectype, vec_num * j + i);
10427 tree tmp2 = make_ssa_name (vec_cmp_type);
10428 gassign *g
10429 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10430 loop_mask);
10431 vect_finish_stmt_generation (stmt_info, g, gsi);
10432 vec_compare = tmp2;
10433 }
10434 }
10435
10436 if (reduction_type == EXTRACT_LAST_REDUCTION)
10437 {
10438 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10439 tree lhs = gimple_get_lhs (old_stmt);
10440 gcall *new_stmt = gimple_build_call_internal
10441 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10442 vec_then_clause);
10443 gimple_call_set_lhs (new_stmt, lhs);
10444 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10445 if (old_stmt == gsi_stmt (*gsi))
10446 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10447 else
10448 {
10449 /* In this case we're moving the definition to later in the
10450 block. That doesn't matter because the only uses of the
10451 lhs are in phi statements. */
10452 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10453 gsi_remove (&old_gsi, true);
10454 new_stmt_info
10455 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10456 }
10457 }
10458 else
10459 {
10460 new_temp = make_ssa_name (vec_dest);
10461 gassign *new_stmt
10462 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10463 vec_then_clause, vec_else_clause);
10464 new_stmt_info
10465 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10466 }
10467 if (slp_node)
10468 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10469 }
10470
10471 if (slp_node)
10472 continue;
10473
10474 if (j == 0)
10475 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10476 else
10477 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10478
10479 prev_stmt_info = new_stmt_info;
10480 }
10481
10482 vec_oprnds0.release ();
10483 vec_oprnds1.release ();
10484 vec_oprnds2.release ();
10485 vec_oprnds3.release ();
10486
10487 return true;
10488 }
10489
10490 /* vectorizable_comparison.
10491
10492 Check if STMT_INFO is comparison expression that can be vectorized.
10493 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10494 comparison, put it in VEC_STMT, and insert it at GSI.
10495
10496 Return true if STMT_INFO is vectorizable in this way. */
10497
10498 static bool
vectorizable_comparison(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10499 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10500 stmt_vec_info *vec_stmt,
10501 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10502 {
10503 vec_info *vinfo = stmt_info->vinfo;
10504 tree lhs, rhs1, rhs2;
10505 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10506 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10507 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10508 tree new_temp;
10509 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10510 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10511 int ndts = 2;
10512 poly_uint64 nunits;
10513 int ncopies;
10514 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10515 stmt_vec_info prev_stmt_info = NULL;
10516 int i, j;
10517 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10518 vec<tree> vec_oprnds0 = vNULL;
10519 vec<tree> vec_oprnds1 = vNULL;
10520 tree mask_type;
10521 tree mask;
10522
10523 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10524 return false;
10525
10526 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10527 return false;
10528
10529 mask_type = vectype;
10530 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10531
10532 if (slp_node)
10533 ncopies = 1;
10534 else
10535 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10536
10537 gcc_assert (ncopies >= 1);
10538 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10539 return false;
10540
10541 if (STMT_VINFO_LIVE_P (stmt_info))
10542 {
10543 if (dump_enabled_p ())
10544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10545 "value used after loop.\n");
10546 return false;
10547 }
10548
10549 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10550 if (!stmt)
10551 return false;
10552
10553 code = gimple_assign_rhs_code (stmt);
10554
10555 if (TREE_CODE_CLASS (code) != tcc_comparison)
10556 return false;
10557
10558 rhs1 = gimple_assign_rhs1 (stmt);
10559 rhs2 = gimple_assign_rhs2 (stmt);
10560
10561 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10562 return false;
10563
10564 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10565 return false;
10566
10567 if (vectype1 && vectype2
10568 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10569 TYPE_VECTOR_SUBPARTS (vectype2)))
10570 return false;
10571
10572 vectype = vectype1 ? vectype1 : vectype2;
10573
10574 /* Invariant comparison. */
10575 if (!vectype)
10576 {
10577 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10578 vectype = mask_type;
10579 else
10580 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10581 slp_node);
10582 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10583 return false;
10584 }
10585 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10586 return false;
10587
10588 /* Can't compare mask and non-mask types. */
10589 if (vectype1 && vectype2
10590 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10591 return false;
10592
10593 /* Boolean values may have another representation in vectors
10594 and therefore we prefer bit operations over comparison for
10595 them (which also works for scalar masks). We store opcodes
10596 to use in bitop1 and bitop2. Statement is vectorized as
10597 BITOP2 (rhs1 BITOP1 rhs2) or
10598 rhs1 BITOP2 (BITOP1 rhs2)
10599 depending on bitop1 and bitop2 arity. */
10600 bool swap_p = false;
10601 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10602 {
10603 if (code == GT_EXPR)
10604 {
10605 bitop1 = BIT_NOT_EXPR;
10606 bitop2 = BIT_AND_EXPR;
10607 }
10608 else if (code == GE_EXPR)
10609 {
10610 bitop1 = BIT_NOT_EXPR;
10611 bitop2 = BIT_IOR_EXPR;
10612 }
10613 else if (code == LT_EXPR)
10614 {
10615 bitop1 = BIT_NOT_EXPR;
10616 bitop2 = BIT_AND_EXPR;
10617 swap_p = true;
10618 }
10619 else if (code == LE_EXPR)
10620 {
10621 bitop1 = BIT_NOT_EXPR;
10622 bitop2 = BIT_IOR_EXPR;
10623 swap_p = true;
10624 }
10625 else
10626 {
10627 bitop1 = BIT_XOR_EXPR;
10628 if (code == EQ_EXPR)
10629 bitop2 = BIT_NOT_EXPR;
10630 }
10631 }
10632
10633 if (!vec_stmt)
10634 {
10635 if (bitop1 == NOP_EXPR)
10636 {
10637 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10638 return false;
10639 }
10640 else
10641 {
10642 machine_mode mode = TYPE_MODE (vectype);
10643 optab optab;
10644
10645 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10646 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10647 return false;
10648
10649 if (bitop2 != NOP_EXPR)
10650 {
10651 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10652 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10653 return false;
10654 }
10655 }
10656
10657 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10658 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10659 dts, ndts, slp_node, cost_vec);
10660 return true;
10661 }
10662
10663 /* Transform. */
10664 if (!slp_node)
10665 {
10666 vec_oprnds0.create (1);
10667 vec_oprnds1.create (1);
10668 }
10669
10670 /* Handle def. */
10671 lhs = gimple_assign_lhs (stmt);
10672 mask = vect_create_destination_var (lhs, mask_type);
10673
10674 /* Handle cmp expr. */
10675 for (j = 0; j < ncopies; j++)
10676 {
10677 stmt_vec_info new_stmt_info = NULL;
10678 if (j == 0)
10679 {
10680 if (slp_node)
10681 {
10682 auto_vec<vec<tree>, 2> vec_defs;
10683 vect_get_slp_defs (slp_node, &vec_defs);
10684 vec_oprnds1 = vec_defs.pop ();
10685 vec_oprnds0 = vec_defs.pop ();
10686 if (swap_p)
10687 std::swap (vec_oprnds0, vec_oprnds1);
10688 }
10689 else
10690 {
10691 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10692 vectype);
10693 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10694 vectype);
10695 }
10696 }
10697 else
10698 {
10699 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10700 vec_oprnds0.pop ());
10701 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10702 vec_oprnds1.pop ());
10703 }
10704
10705 if (!slp_node)
10706 {
10707 if (swap_p && j == 0)
10708 std::swap (vec_rhs1, vec_rhs2);
10709 vec_oprnds0.quick_push (vec_rhs1);
10710 vec_oprnds1.quick_push (vec_rhs2);
10711 }
10712
10713 /* Arguments are ready. Create the new vector stmt. */
10714 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10715 {
10716 vec_rhs2 = vec_oprnds1[i];
10717
10718 new_temp = make_ssa_name (mask);
10719 if (bitop1 == NOP_EXPR)
10720 {
10721 gassign *new_stmt = gimple_build_assign (new_temp, code,
10722 vec_rhs1, vec_rhs2);
10723 new_stmt_info
10724 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10725 }
10726 else
10727 {
10728 gassign *new_stmt;
10729 if (bitop1 == BIT_NOT_EXPR)
10730 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10731 else
10732 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10733 vec_rhs2);
10734 new_stmt_info
10735 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10736 if (bitop2 != NOP_EXPR)
10737 {
10738 tree res = make_ssa_name (mask);
10739 if (bitop2 == BIT_NOT_EXPR)
10740 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10741 else
10742 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10743 new_temp);
10744 new_stmt_info
10745 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10746 }
10747 }
10748 if (slp_node)
10749 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10750 }
10751
10752 if (slp_node)
10753 continue;
10754
10755 if (j == 0)
10756 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10757 else
10758 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10759
10760 prev_stmt_info = new_stmt_info;
10761 }
10762
10763 vec_oprnds0.release ();
10764 vec_oprnds1.release ();
10765
10766 return true;
10767 }
10768
10769 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10770 can handle all live statements in the node. Otherwise return true
10771 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10772 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10773
10774 static bool
can_vectorize_live_stmts(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10775 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10776 slp_tree slp_node, slp_instance slp_node_instance,
10777 bool vec_stmt_p,
10778 stmt_vector_for_cost *cost_vec)
10779 {
10780 if (slp_node)
10781 {
10782 stmt_vec_info slp_stmt_info;
10783 unsigned int i;
10784 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10785 {
10786 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10787 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10788 slp_node_instance, i,
10789 vec_stmt_p, cost_vec))
10790 return false;
10791 }
10792 }
10793 else if (STMT_VINFO_LIVE_P (stmt_info)
10794 && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10795 slp_node_instance, -1,
10796 vec_stmt_p, cost_vec))
10797 return false;
10798
10799 return true;
10800 }
10801
10802 /* Make sure the statement is vectorizable. */
10803
10804 opt_result
vect_analyze_stmt(stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10805 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10806 slp_tree node, slp_instance node_instance,
10807 stmt_vector_for_cost *cost_vec)
10808 {
10809 vec_info *vinfo = stmt_info->vinfo;
10810 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10811 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10812 bool ok;
10813 gimple_seq pattern_def_seq;
10814
10815 if (dump_enabled_p ())
10816 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10817 stmt_info->stmt);
10818
10819 if (gimple_has_volatile_ops (stmt_info->stmt))
10820 return opt_result::failure_at (stmt_info->stmt,
10821 "not vectorized:"
10822 " stmt has volatile operands: %G\n",
10823 stmt_info->stmt);
10824
10825 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10826 && node == NULL
10827 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10828 {
10829 gimple_stmt_iterator si;
10830
10831 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10832 {
10833 stmt_vec_info pattern_def_stmt_info
10834 = vinfo->lookup_stmt (gsi_stmt (si));
10835 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10836 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10837 {
10838 /* Analyze def stmt of STMT if it's a pattern stmt. */
10839 if (dump_enabled_p ())
10840 dump_printf_loc (MSG_NOTE, vect_location,
10841 "==> examining pattern def statement: %G",
10842 pattern_def_stmt_info->stmt);
10843
10844 opt_result res
10845 = vect_analyze_stmt (pattern_def_stmt_info,
10846 need_to_vectorize, node, node_instance,
10847 cost_vec);
10848 if (!res)
10849 return res;
10850 }
10851 }
10852 }
10853
10854 /* Skip stmts that do not need to be vectorized. In loops this is expected
10855 to include:
10856 - the COND_EXPR which is the loop exit condition
10857 - any LABEL_EXPRs in the loop
10858 - computations that are used only for array indexing or loop control.
10859 In basic blocks we only analyze statements that are a part of some SLP
10860 instance, therefore, all the statements are relevant.
10861
10862 Pattern statement needs to be analyzed instead of the original statement
10863 if the original statement is not relevant. Otherwise, we analyze both
10864 statements. In basic blocks we are called from some SLP instance
10865 traversal, don't analyze pattern stmts instead, the pattern stmts
10866 already will be part of SLP instance. */
10867
10868 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10869 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10870 && !STMT_VINFO_LIVE_P (stmt_info))
10871 {
10872 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10873 && pattern_stmt_info
10874 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10875 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10876 {
10877 /* Analyze PATTERN_STMT instead of the original stmt. */
10878 stmt_info = pattern_stmt_info;
10879 if (dump_enabled_p ())
10880 dump_printf_loc (MSG_NOTE, vect_location,
10881 "==> examining pattern statement: %G",
10882 stmt_info->stmt);
10883 }
10884 else
10885 {
10886 if (dump_enabled_p ())
10887 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10888
10889 return opt_result::success ();
10890 }
10891 }
10892 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10893 && node == NULL
10894 && pattern_stmt_info
10895 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10896 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10897 {
10898 /* Analyze PATTERN_STMT too. */
10899 if (dump_enabled_p ())
10900 dump_printf_loc (MSG_NOTE, vect_location,
10901 "==> examining pattern statement: %G",
10902 pattern_stmt_info->stmt);
10903
10904 opt_result res
10905 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10906 node_instance, cost_vec);
10907 if (!res)
10908 return res;
10909 }
10910
10911 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10912 {
10913 case vect_internal_def:
10914 break;
10915
10916 case vect_reduction_def:
10917 case vect_nested_cycle:
10918 gcc_assert (!bb_vinfo
10919 && (relevance == vect_used_in_outer
10920 || relevance == vect_used_in_outer_by_reduction
10921 || relevance == vect_used_by_reduction
10922 || relevance == vect_unused_in_scope
10923 || relevance == vect_used_only_live));
10924 break;
10925
10926 case vect_induction_def:
10927 gcc_assert (!bb_vinfo);
10928 break;
10929
10930 case vect_constant_def:
10931 case vect_external_def:
10932 case vect_unknown_def_type:
10933 default:
10934 gcc_unreachable ();
10935 }
10936
10937 if (STMT_VINFO_RELEVANT_P (stmt_info))
10938 {
10939 tree type = gimple_expr_type (stmt_info->stmt);
10940 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10941 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10942 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10943 || (call && gimple_call_lhs (call) == NULL_TREE));
10944 *need_to_vectorize = true;
10945 }
10946
10947 if (PURE_SLP_STMT (stmt_info) && !node)
10948 {
10949 if (dump_enabled_p ())
10950 dump_printf_loc (MSG_NOTE, vect_location,
10951 "handled only by SLP analysis\n");
10952 return opt_result::success ();
10953 }
10954
10955 ok = true;
10956 if (!bb_vinfo
10957 && (STMT_VINFO_RELEVANT_P (stmt_info)
10958 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10959 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10960 -mveclibabi= takes preference over library functions with
10961 the simd attribute. */
10962 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10963 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10964 cost_vec)
10965 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10966 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10967 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10968 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10969 cost_vec)
10970 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10971 || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10972 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10973 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10974 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10975 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10976 cost_vec)
10977 || vectorizable_lc_phi (stmt_info, NULL, node));
10978 else
10979 {
10980 if (bb_vinfo)
10981 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10982 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10983 cost_vec)
10984 || vectorizable_conversion (stmt_info, NULL, NULL, node,
10985 cost_vec)
10986 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10987 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10988 || vectorizable_assignment (stmt_info, NULL, NULL, node,
10989 cost_vec)
10990 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10991 cost_vec)
10992 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10993 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10994 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10995 cost_vec));
10996 }
10997
10998 if (!ok)
10999 return opt_result::failure_at (stmt_info->stmt,
11000 "not vectorized:"
11001 " relevant stmt not supported: %G",
11002 stmt_info->stmt);
11003
11004 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11005 need extra handling, except for vectorizable reductions. */
11006 if (!bb_vinfo
11007 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11008 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11009 && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
11010 false, cost_vec))
11011 return opt_result::failure_at (stmt_info->stmt,
11012 "not vectorized:"
11013 " live stmt not supported: %G",
11014 stmt_info->stmt);
11015
11016 return opt_result::success ();
11017 }
11018
11019
11020 /* Function vect_transform_stmt.
11021
11022 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11023
11024 bool
vect_transform_stmt(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11025 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11026 slp_tree slp_node, slp_instance slp_node_instance)
11027 {
11028 vec_info *vinfo = stmt_info->vinfo;
11029 bool is_store = false;
11030 stmt_vec_info vec_stmt = NULL;
11031 bool done;
11032
11033 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11034 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11035
11036 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
11037 && nested_in_vect_loop_p
11038 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
11039 stmt_info));
11040
11041 gimple *stmt = stmt_info->stmt;
11042 switch (STMT_VINFO_TYPE (stmt_info))
11043 {
11044 case type_demotion_vec_info_type:
11045 case type_promotion_vec_info_type:
11046 case type_conversion_vec_info_type:
11047 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
11048 NULL);
11049 gcc_assert (done);
11050 break;
11051
11052 case induc_vec_info_type:
11053 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
11054 NULL);
11055 gcc_assert (done);
11056 break;
11057
11058 case shift_vec_info_type:
11059 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11060 gcc_assert (done);
11061 break;
11062
11063 case op_vec_info_type:
11064 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
11065 NULL);
11066 gcc_assert (done);
11067 break;
11068
11069 case assignment_vec_info_type:
11070 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
11071 NULL);
11072 gcc_assert (done);
11073 break;
11074
11075 case load_vec_info_type:
11076 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
11077 slp_node_instance, NULL);
11078 gcc_assert (done);
11079 break;
11080
11081 case store_vec_info_type:
11082 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11083 gcc_assert (done);
11084 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11085 {
11086 /* In case of interleaving, the whole chain is vectorized when the
11087 last store in the chain is reached. Store stmts before the last
11088 one are skipped, and there vec_stmt_info shouldn't be freed
11089 meanwhile. */
11090 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11091 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11092 is_store = true;
11093 }
11094 else
11095 is_store = true;
11096 break;
11097
11098 case condition_vec_info_type:
11099 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11100 gcc_assert (done);
11101 break;
11102
11103 case comparison_vec_info_type:
11104 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
11105 slp_node, NULL);
11106 gcc_assert (done);
11107 break;
11108
11109 case call_vec_info_type:
11110 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11111 stmt = gsi_stmt (*gsi);
11112 break;
11113
11114 case call_simd_clone_vec_info_type:
11115 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
11116 slp_node, NULL);
11117 stmt = gsi_stmt (*gsi);
11118 break;
11119
11120 case reduc_vec_info_type:
11121 done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
11122 gcc_assert (done);
11123 break;
11124
11125 case cycle_phi_info_type:
11126 done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
11127 slp_node_instance);
11128 gcc_assert (done);
11129 break;
11130
11131 case lc_phi_info_type:
11132 done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
11133 gcc_assert (done);
11134 break;
11135
11136 default:
11137 if (!STMT_VINFO_LIVE_P (stmt_info))
11138 {
11139 if (dump_enabled_p ())
11140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11141 "stmt not supported.\n");
11142 gcc_unreachable ();
11143 }
11144 }
11145
11146 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11147 This would break hybrid SLP vectorization. */
11148 if (slp_node)
11149 gcc_assert (!vec_stmt
11150 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11151
11152 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11153 is being vectorized, but outside the immediately enclosing loop. */
11154 if (vec_stmt
11155 && nested_p
11156 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11157 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11158 || STMT_VINFO_RELEVANT (stmt_info) ==
11159 vect_used_in_outer_by_reduction))
11160 {
11161 class loop *innerloop = LOOP_VINFO_LOOP (
11162 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11163 imm_use_iterator imm_iter;
11164 use_operand_p use_p;
11165 tree scalar_dest;
11166
11167 if (dump_enabled_p ())
11168 dump_printf_loc (MSG_NOTE, vect_location,
11169 "Record the vdef for outer-loop vectorization.\n");
11170
11171 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11172 (to be used when vectorizing outer-loop stmts that use the DEF of
11173 STMT). */
11174 if (gimple_code (stmt) == GIMPLE_PHI)
11175 scalar_dest = PHI_RESULT (stmt);
11176 else
11177 scalar_dest = gimple_get_lhs (stmt);
11178
11179 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11180 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11181 {
11182 stmt_vec_info exit_phi_info
11183 = vinfo->lookup_stmt (USE_STMT (use_p));
11184 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11185 }
11186 }
11187
11188 if (vec_stmt)
11189 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11190
11191 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11192 return is_store;
11193
11194 /* Handle stmts whose DEF is used outside the loop-nest that is
11195 being vectorized. */
11196 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11197 slp_node_instance, true, NULL);
11198 gcc_assert (done);
11199
11200 return false;
11201 }
11202
11203
11204 /* Remove a group of stores (for SLP or interleaving), free their
11205 stmt_vec_info. */
11206
11207 void
vect_remove_stores(stmt_vec_info first_stmt_info)11208 vect_remove_stores (stmt_vec_info first_stmt_info)
11209 {
11210 vec_info *vinfo = first_stmt_info->vinfo;
11211 stmt_vec_info next_stmt_info = first_stmt_info;
11212
11213 while (next_stmt_info)
11214 {
11215 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11216 next_stmt_info = vect_orig_stmt (next_stmt_info);
11217 /* Free the attached stmt_vec_info and remove the stmt. */
11218 vinfo->remove_stmt (next_stmt_info);
11219 next_stmt_info = tmp;
11220 }
11221 }
11222
11223 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11224 elements of type SCALAR_TYPE, or null if the target doesn't support
11225 such a type.
11226
11227 If NUNITS is zero, return a vector type that contains elements of
11228 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11229
11230 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11231 for this vectorization region and want to "autodetect" the best choice.
11232 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11233 and we want the new type to be interoperable with it. PREVAILING_MODE
11234 in this case can be a scalar integer mode or a vector mode; when it
11235 is a vector mode, the function acts like a tree-level version of
11236 related_vector_mode. */
11237
11238 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11239 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11240 tree scalar_type, poly_uint64 nunits)
11241 {
11242 tree orig_scalar_type = scalar_type;
11243 scalar_mode inner_mode;
11244 machine_mode simd_mode;
11245 tree vectype;
11246
11247 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11248 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11249 return NULL_TREE;
11250
11251 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11252
11253 /* For vector types of elements whose mode precision doesn't
11254 match their types precision we use a element type of mode
11255 precision. The vectorization routines will have to make sure
11256 they support the proper result truncation/extension.
11257 We also make sure to build vector types with INTEGER_TYPE
11258 component type only. */
11259 if (INTEGRAL_TYPE_P (scalar_type)
11260 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11261 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11262 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11263 TYPE_UNSIGNED (scalar_type));
11264
11265 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11266 When the component mode passes the above test simply use a type
11267 corresponding to that mode. The theory is that any use that
11268 would cause problems with this will disable vectorization anyway. */
11269 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11270 && !INTEGRAL_TYPE_P (scalar_type))
11271 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11272
11273 /* We can't build a vector type of elements with alignment bigger than
11274 their size. */
11275 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11276 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11277 TYPE_UNSIGNED (scalar_type));
11278
11279 /* If we felt back to using the mode fail if there was
11280 no scalar type for it. */
11281 if (scalar_type == NULL_TREE)
11282 return NULL_TREE;
11283
11284 /* If no prevailing mode was supplied, use the mode the target prefers.
11285 Otherwise lookup a vector mode based on the prevailing mode. */
11286 if (prevailing_mode == VOIDmode)
11287 {
11288 gcc_assert (known_eq (nunits, 0U));
11289 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11290 if (SCALAR_INT_MODE_P (simd_mode))
11291 {
11292 /* Traditional behavior is not to take the integer mode
11293 literally, but simply to use it as a way of determining
11294 the vector size. It is up to mode_for_vector to decide
11295 what the TYPE_MODE should be.
11296
11297 Note that nunits == 1 is allowed in order to support single
11298 element vector types. */
11299 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11300 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11301 return NULL_TREE;
11302 }
11303 }
11304 else if (SCALAR_INT_MODE_P (prevailing_mode)
11305 || !related_vector_mode (prevailing_mode,
11306 inner_mode, nunits).exists (&simd_mode))
11307 {
11308 /* Fall back to using mode_for_vector, mostly in the hope of being
11309 able to use an integer mode. */
11310 if (known_eq (nunits, 0U)
11311 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11312 return NULL_TREE;
11313
11314 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11315 return NULL_TREE;
11316 }
11317
11318 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11319
11320 /* In cases where the mode was chosen by mode_for_vector, check that
11321 the target actually supports the chosen mode, or that it at least
11322 allows the vector mode to be replaced by a like-sized integer. */
11323 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11324 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11325 return NULL_TREE;
11326
11327 /* Re-attach the address-space qualifier if we canonicalized the scalar
11328 type. */
11329 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11330 return build_qualified_type
11331 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11332
11333 return vectype;
11334 }
11335
11336 /* Function get_vectype_for_scalar_type.
11337
11338 Returns the vector type corresponding to SCALAR_TYPE as supported
11339 by the target. If GROUP_SIZE is nonzero and we're performing BB
11340 vectorization, make sure that the number of elements in the vector
11341 is no bigger than GROUP_SIZE. */
11342
11343 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11344 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11345 unsigned int group_size)
11346 {
11347 /* For BB vectorization, we should always have a group size once we've
11348 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11349 are tentative requests during things like early data reference
11350 analysis and pattern recognition. */
11351 if (is_a <bb_vec_info> (vinfo))
11352 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11353 else
11354 group_size = 0;
11355
11356 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11357 scalar_type);
11358 if (vectype && vinfo->vector_mode == VOIDmode)
11359 vinfo->vector_mode = TYPE_MODE (vectype);
11360
11361 /* Register the natural choice of vector type, before the group size
11362 has been applied. */
11363 if (vectype)
11364 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11365
11366 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11367 try again with an explicit number of elements. */
11368 if (vectype
11369 && group_size
11370 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11371 {
11372 /* Start with the biggest number of units that fits within
11373 GROUP_SIZE and halve it until we find a valid vector type.
11374 Usually either the first attempt will succeed or all will
11375 fail (in the latter case because GROUP_SIZE is too small
11376 for the target), but it's possible that a target could have
11377 a hole between supported vector types.
11378
11379 If GROUP_SIZE is not a power of 2, this has the effect of
11380 trying the largest power of 2 that fits within the group,
11381 even though the group is not a multiple of that vector size.
11382 The BB vectorizer will then try to carve up the group into
11383 smaller pieces. */
11384 unsigned int nunits = 1 << floor_log2 (group_size);
11385 do
11386 {
11387 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11388 scalar_type, nunits);
11389 nunits /= 2;
11390 }
11391 while (nunits > 1 && !vectype);
11392 }
11393
11394 return vectype;
11395 }
11396
11397 /* Return the vector type corresponding to SCALAR_TYPE as supported
11398 by the target. NODE, if nonnull, is the SLP tree node that will
11399 use the returned vector type. */
11400
11401 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11402 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11403 {
11404 unsigned int group_size = 0;
11405 if (node)
11406 {
11407 group_size = SLP_TREE_SCALAR_OPS (node).length ();
11408 if (group_size == 0)
11409 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11410 }
11411 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11412 }
11413
11414 /* Function get_mask_type_for_scalar_type.
11415
11416 Returns the mask type corresponding to a result of comparison
11417 of vectors of specified SCALAR_TYPE as supported by target.
11418 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11419 make sure that the number of elements in the vector is no bigger
11420 than GROUP_SIZE. */
11421
11422 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11423 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11424 unsigned int group_size)
11425 {
11426 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11427
11428 if (!vectype)
11429 return NULL;
11430
11431 return truth_type_for (vectype);
11432 }
11433
11434 /* Function get_same_sized_vectype
11435
11436 Returns a vector type corresponding to SCALAR_TYPE of size
11437 VECTOR_TYPE if supported by the target. */
11438
11439 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11440 get_same_sized_vectype (tree scalar_type, tree vector_type)
11441 {
11442 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11443 return truth_type_for (vector_type);
11444
11445 poly_uint64 nunits;
11446 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11447 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11448 return NULL_TREE;
11449
11450 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11451 scalar_type, nunits);
11452 }
11453
11454 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11455 would not change the chosen vector modes. */
11456
11457 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11458 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11459 {
11460 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11461 i != vinfo->used_vector_modes.end (); ++i)
11462 if (!VECTOR_MODE_P (*i)
11463 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11464 return false;
11465 return true;
11466 }
11467
11468 /* Function vect_is_simple_use.
11469
11470 Input:
11471 VINFO - the vect info of the loop or basic block that is being vectorized.
11472 OPERAND - operand in the loop or bb.
11473 Output:
11474 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11475 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11476 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11477 the definition could be anywhere in the function
11478 DT - the type of definition
11479
11480 Returns whether a stmt with OPERAND can be vectorized.
11481 For loops, supportable operands are constants, loop invariants, and operands
11482 that are defined by the current iteration of the loop. Unsupportable
11483 operands are those that are defined by a previous iteration of the loop (as
11484 is the case in reduction/induction computations).
11485 For basic blocks, supportable operands are constants and bb invariants.
11486 For now, operands defined outside the basic block are not supported. */
11487
11488 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11489 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11490 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11491 {
11492 if (def_stmt_info_out)
11493 *def_stmt_info_out = NULL;
11494 if (def_stmt_out)
11495 *def_stmt_out = NULL;
11496 *dt = vect_unknown_def_type;
11497
11498 if (dump_enabled_p ())
11499 {
11500 dump_printf_loc (MSG_NOTE, vect_location,
11501 "vect_is_simple_use: operand ");
11502 if (TREE_CODE (operand) == SSA_NAME
11503 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11504 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11505 else
11506 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11507 }
11508
11509 if (CONSTANT_CLASS_P (operand))
11510 *dt = vect_constant_def;
11511 else if (is_gimple_min_invariant (operand))
11512 *dt = vect_external_def;
11513 else if (TREE_CODE (operand) != SSA_NAME)
11514 *dt = vect_unknown_def_type;
11515 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11516 *dt = vect_external_def;
11517 else
11518 {
11519 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11520 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11521 if (!stmt_vinfo)
11522 *dt = vect_external_def;
11523 else
11524 {
11525 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11526 def_stmt = stmt_vinfo->stmt;
11527 switch (gimple_code (def_stmt))
11528 {
11529 case GIMPLE_PHI:
11530 case GIMPLE_ASSIGN:
11531 case GIMPLE_CALL:
11532 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11533 break;
11534 default:
11535 *dt = vect_unknown_def_type;
11536 break;
11537 }
11538 if (def_stmt_info_out)
11539 *def_stmt_info_out = stmt_vinfo;
11540 }
11541 if (def_stmt_out)
11542 *def_stmt_out = def_stmt;
11543 }
11544
11545 if (dump_enabled_p ())
11546 {
11547 dump_printf (MSG_NOTE, ", type of def: ");
11548 switch (*dt)
11549 {
11550 case vect_uninitialized_def:
11551 dump_printf (MSG_NOTE, "uninitialized\n");
11552 break;
11553 case vect_constant_def:
11554 dump_printf (MSG_NOTE, "constant\n");
11555 break;
11556 case vect_external_def:
11557 dump_printf (MSG_NOTE, "external\n");
11558 break;
11559 case vect_internal_def:
11560 dump_printf (MSG_NOTE, "internal\n");
11561 break;
11562 case vect_induction_def:
11563 dump_printf (MSG_NOTE, "induction\n");
11564 break;
11565 case vect_reduction_def:
11566 dump_printf (MSG_NOTE, "reduction\n");
11567 break;
11568 case vect_double_reduction_def:
11569 dump_printf (MSG_NOTE, "double reduction\n");
11570 break;
11571 case vect_nested_cycle:
11572 dump_printf (MSG_NOTE, "nested cycle\n");
11573 break;
11574 case vect_unknown_def_type:
11575 dump_printf (MSG_NOTE, "unknown\n");
11576 break;
11577 }
11578 }
11579
11580 if (*dt == vect_unknown_def_type)
11581 {
11582 if (dump_enabled_p ())
11583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11584 "Unsupported pattern.\n");
11585 return false;
11586 }
11587
11588 return true;
11589 }
11590
11591 /* Function vect_is_simple_use.
11592
11593 Same as vect_is_simple_use but also determines the vector operand
11594 type of OPERAND and stores it to *VECTYPE. If the definition of
11595 OPERAND is vect_uninitialized_def, vect_constant_def or
11596 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11597 is responsible to compute the best suited vector type for the
11598 scalar operand. */
11599
11600 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11601 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11602 tree *vectype, stmt_vec_info *def_stmt_info_out,
11603 gimple **def_stmt_out)
11604 {
11605 stmt_vec_info def_stmt_info;
11606 gimple *def_stmt;
11607 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11608 return false;
11609
11610 if (def_stmt_out)
11611 *def_stmt_out = def_stmt;
11612 if (def_stmt_info_out)
11613 *def_stmt_info_out = def_stmt_info;
11614
11615 /* Now get a vector type if the def is internal, otherwise supply
11616 NULL_TREE and leave it up to the caller to figure out a proper
11617 type for the use stmt. */
11618 if (*dt == vect_internal_def
11619 || *dt == vect_induction_def
11620 || *dt == vect_reduction_def
11621 || *dt == vect_double_reduction_def
11622 || *dt == vect_nested_cycle)
11623 {
11624 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11625 gcc_assert (*vectype != NULL_TREE);
11626 if (dump_enabled_p ())
11627 dump_printf_loc (MSG_NOTE, vect_location,
11628 "vect_is_simple_use: vectype %T\n", *vectype);
11629 }
11630 else if (*dt == vect_uninitialized_def
11631 || *dt == vect_constant_def
11632 || *dt == vect_external_def)
11633 *vectype = NULL_TREE;
11634 else
11635 gcc_unreachable ();
11636
11637 return true;
11638 }
11639
11640
11641 /* Function supportable_widening_operation
11642
11643 Check whether an operation represented by the code CODE is a
11644 widening operation that is supported by the target platform in
11645 vector form (i.e., when operating on arguments of type VECTYPE_IN
11646 producing a result of type VECTYPE_OUT).
11647
11648 Widening operations we currently support are NOP (CONVERT), FLOAT,
11649 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11650 are supported by the target platform either directly (via vector
11651 tree-codes), or via target builtins.
11652
11653 Output:
11654 - CODE1 and CODE2 are codes of vector operations to be used when
11655 vectorizing the operation, if available.
11656 - MULTI_STEP_CVT determines the number of required intermediate steps in
11657 case of multi-step conversion (like char->short->int - in that case
11658 MULTI_STEP_CVT will be 1).
11659 - INTERM_TYPES contains the intermediate type required to perform the
11660 widening operation (short in the above example). */
11661
11662 bool
supportable_widening_operation(enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11663 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11664 tree vectype_out, tree vectype_in,
11665 enum tree_code *code1, enum tree_code *code2,
11666 int *multi_step_cvt,
11667 vec<tree> *interm_types)
11668 {
11669 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11670 class loop *vect_loop = NULL;
11671 machine_mode vec_mode;
11672 enum insn_code icode1, icode2;
11673 optab optab1, optab2;
11674 tree vectype = vectype_in;
11675 tree wide_vectype = vectype_out;
11676 enum tree_code c1, c2;
11677 int i;
11678 tree prev_type, intermediate_type;
11679 machine_mode intermediate_mode, prev_mode;
11680 optab optab3, optab4;
11681
11682 *multi_step_cvt = 0;
11683 if (loop_info)
11684 vect_loop = LOOP_VINFO_LOOP (loop_info);
11685
11686 switch (code)
11687 {
11688 case WIDEN_MULT_EXPR:
11689 /* The result of a vectorized widening operation usually requires
11690 two vectors (because the widened results do not fit into one vector).
11691 The generated vector results would normally be expected to be
11692 generated in the same order as in the original scalar computation,
11693 i.e. if 8 results are generated in each vector iteration, they are
11694 to be organized as follows:
11695 vect1: [res1,res2,res3,res4],
11696 vect2: [res5,res6,res7,res8].
11697
11698 However, in the special case that the result of the widening
11699 operation is used in a reduction computation only, the order doesn't
11700 matter (because when vectorizing a reduction we change the order of
11701 the computation). Some targets can take advantage of this and
11702 generate more efficient code. For example, targets like Altivec,
11703 that support widen_mult using a sequence of {mult_even,mult_odd}
11704 generate the following vectors:
11705 vect1: [res1,res3,res5,res7],
11706 vect2: [res2,res4,res6,res8].
11707
11708 When vectorizing outer-loops, we execute the inner-loop sequentially
11709 (each vectorized inner-loop iteration contributes to VF outer-loop
11710 iterations in parallel). We therefore don't allow to change the
11711 order of the computation in the inner-loop during outer-loop
11712 vectorization. */
11713 /* TODO: Another case in which order doesn't *really* matter is when we
11714 widen and then contract again, e.g. (short)((int)x * y >> 8).
11715 Normally, pack_trunc performs an even/odd permute, whereas the
11716 repack from an even/odd expansion would be an interleave, which
11717 would be significantly simpler for e.g. AVX2. */
11718 /* In any case, in order to avoid duplicating the code below, recurse
11719 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11720 are properly set up for the caller. If we fail, we'll continue with
11721 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11722 if (vect_loop
11723 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11724 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11725 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11726 stmt_info, vectype_out,
11727 vectype_in, code1, code2,
11728 multi_step_cvt, interm_types))
11729 {
11730 /* Elements in a vector with vect_used_by_reduction property cannot
11731 be reordered if the use chain with this property does not have the
11732 same operation. One such an example is s += a * b, where elements
11733 in a and b cannot be reordered. Here we check if the vector defined
11734 by STMT is only directly used in the reduction statement. */
11735 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11736 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11737 if (use_stmt_info
11738 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11739 return true;
11740 }
11741 c1 = VEC_WIDEN_MULT_LO_EXPR;
11742 c2 = VEC_WIDEN_MULT_HI_EXPR;
11743 break;
11744
11745 case DOT_PROD_EXPR:
11746 c1 = DOT_PROD_EXPR;
11747 c2 = DOT_PROD_EXPR;
11748 break;
11749
11750 case SAD_EXPR:
11751 c1 = SAD_EXPR;
11752 c2 = SAD_EXPR;
11753 break;
11754
11755 case VEC_WIDEN_MULT_EVEN_EXPR:
11756 /* Support the recursion induced just above. */
11757 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11758 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11759 break;
11760
11761 case WIDEN_LSHIFT_EXPR:
11762 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11763 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11764 break;
11765
11766 CASE_CONVERT:
11767 c1 = VEC_UNPACK_LO_EXPR;
11768 c2 = VEC_UNPACK_HI_EXPR;
11769 break;
11770
11771 case FLOAT_EXPR:
11772 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11773 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11774 break;
11775
11776 case FIX_TRUNC_EXPR:
11777 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11778 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11779 break;
11780
11781 default:
11782 gcc_unreachable ();
11783 }
11784
11785 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11786 std::swap (c1, c2);
11787
11788 if (code == FIX_TRUNC_EXPR)
11789 {
11790 /* The signedness is determined from output operand. */
11791 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11792 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11793 }
11794 else if (CONVERT_EXPR_CODE_P (code)
11795 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11796 && VECTOR_BOOLEAN_TYPE_P (vectype)
11797 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11798 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11799 {
11800 /* If the input and result modes are the same, a different optab
11801 is needed where we pass in the number of units in vectype. */
11802 optab1 = vec_unpacks_sbool_lo_optab;
11803 optab2 = vec_unpacks_sbool_hi_optab;
11804 }
11805 else
11806 {
11807 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11808 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11809 }
11810
11811 if (!optab1 || !optab2)
11812 return false;
11813
11814 vec_mode = TYPE_MODE (vectype);
11815 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11816 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11817 return false;
11818
11819 *code1 = c1;
11820 *code2 = c2;
11821
11822 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11823 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11824 {
11825 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11826 return true;
11827 /* For scalar masks we may have different boolean
11828 vector types having the same QImode. Thus we
11829 add additional check for elements number. */
11830 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11831 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11832 return true;
11833 }
11834
11835 /* Check if it's a multi-step conversion that can be done using intermediate
11836 types. */
11837
11838 prev_type = vectype;
11839 prev_mode = vec_mode;
11840
11841 if (!CONVERT_EXPR_CODE_P (code))
11842 return false;
11843
11844 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11845 intermediate steps in promotion sequence. We try
11846 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11847 not. */
11848 interm_types->create (MAX_INTERM_CVT_STEPS);
11849 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11850 {
11851 intermediate_mode = insn_data[icode1].operand[0].mode;
11852 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11853 intermediate_type
11854 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11855 else
11856 intermediate_type
11857 = lang_hooks.types.type_for_mode (intermediate_mode,
11858 TYPE_UNSIGNED (prev_type));
11859
11860 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11861 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11862 && intermediate_mode == prev_mode
11863 && SCALAR_INT_MODE_P (prev_mode))
11864 {
11865 /* If the input and result modes are the same, a different optab
11866 is needed where we pass in the number of units in vectype. */
11867 optab3 = vec_unpacks_sbool_lo_optab;
11868 optab4 = vec_unpacks_sbool_hi_optab;
11869 }
11870 else
11871 {
11872 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11873 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11874 }
11875
11876 if (!optab3 || !optab4
11877 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11878 || insn_data[icode1].operand[0].mode != intermediate_mode
11879 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11880 || insn_data[icode2].operand[0].mode != intermediate_mode
11881 || ((icode1 = optab_handler (optab3, intermediate_mode))
11882 == CODE_FOR_nothing)
11883 || ((icode2 = optab_handler (optab4, intermediate_mode))
11884 == CODE_FOR_nothing))
11885 break;
11886
11887 interm_types->quick_push (intermediate_type);
11888 (*multi_step_cvt)++;
11889
11890 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11891 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11892 {
11893 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11894 return true;
11895 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11896 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11897 return true;
11898 }
11899
11900 prev_type = intermediate_type;
11901 prev_mode = intermediate_mode;
11902 }
11903
11904 interm_types->release ();
11905 return false;
11906 }
11907
11908
11909 /* Function supportable_narrowing_operation
11910
11911 Check whether an operation represented by the code CODE is a
11912 narrowing operation that is supported by the target platform in
11913 vector form (i.e., when operating on arguments of type VECTYPE_IN
11914 and producing a result of type VECTYPE_OUT).
11915
11916 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11917 and FLOAT. This function checks if these operations are supported by
11918 the target platform directly via vector tree-codes.
11919
11920 Output:
11921 - CODE1 is the code of a vector operation to be used when
11922 vectorizing the operation, if available.
11923 - MULTI_STEP_CVT determines the number of required intermediate steps in
11924 case of multi-step conversion (like int->short->char - in that case
11925 MULTI_STEP_CVT will be 1).
11926 - INTERM_TYPES contains the intermediate type required to perform the
11927 narrowing operation (short in the above example). */
11928
11929 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)11930 supportable_narrowing_operation (enum tree_code code,
11931 tree vectype_out, tree vectype_in,
11932 enum tree_code *code1, int *multi_step_cvt,
11933 vec<tree> *interm_types)
11934 {
11935 machine_mode vec_mode;
11936 enum insn_code icode1;
11937 optab optab1, interm_optab;
11938 tree vectype = vectype_in;
11939 tree narrow_vectype = vectype_out;
11940 enum tree_code c1;
11941 tree intermediate_type, prev_type;
11942 machine_mode intermediate_mode, prev_mode;
11943 int i;
11944 bool uns;
11945
11946 *multi_step_cvt = 0;
11947 switch (code)
11948 {
11949 CASE_CONVERT:
11950 c1 = VEC_PACK_TRUNC_EXPR;
11951 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11952 && VECTOR_BOOLEAN_TYPE_P (vectype)
11953 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11954 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11955 optab1 = vec_pack_sbool_trunc_optab;
11956 else
11957 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11958 break;
11959
11960 case FIX_TRUNC_EXPR:
11961 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11962 /* The signedness is determined from output operand. */
11963 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11964 break;
11965
11966 case FLOAT_EXPR:
11967 c1 = VEC_PACK_FLOAT_EXPR;
11968 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11969 break;
11970
11971 default:
11972 gcc_unreachable ();
11973 }
11974
11975 if (!optab1)
11976 return false;
11977
11978 vec_mode = TYPE_MODE (vectype);
11979 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11980 return false;
11981
11982 *code1 = c1;
11983
11984 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11985 {
11986 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11987 return true;
11988 /* For scalar masks we may have different boolean
11989 vector types having the same QImode. Thus we
11990 add additional check for elements number. */
11991 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11992 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11993 return true;
11994 }
11995
11996 if (code == FLOAT_EXPR)
11997 return false;
11998
11999 /* Check if it's a multi-step conversion that can be done using intermediate
12000 types. */
12001 prev_mode = vec_mode;
12002 prev_type = vectype;
12003 if (code == FIX_TRUNC_EXPR)
12004 uns = TYPE_UNSIGNED (vectype_out);
12005 else
12006 uns = TYPE_UNSIGNED (vectype);
12007
12008 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12009 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12010 costly than signed. */
12011 if (code == FIX_TRUNC_EXPR && uns)
12012 {
12013 enum insn_code icode2;
12014
12015 intermediate_type
12016 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12017 interm_optab
12018 = optab_for_tree_code (c1, intermediate_type, optab_default);
12019 if (interm_optab != unknown_optab
12020 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12021 && insn_data[icode1].operand[0].mode
12022 == insn_data[icode2].operand[0].mode)
12023 {
12024 uns = false;
12025 optab1 = interm_optab;
12026 icode1 = icode2;
12027 }
12028 }
12029
12030 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12031 intermediate steps in promotion sequence. We try
12032 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12033 interm_types->create (MAX_INTERM_CVT_STEPS);
12034 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12035 {
12036 intermediate_mode = insn_data[icode1].operand[0].mode;
12037 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12038 intermediate_type
12039 = vect_double_mask_nunits (prev_type, intermediate_mode);
12040 else
12041 intermediate_type
12042 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12043 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12044 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12045 && intermediate_mode == prev_mode
12046 && SCALAR_INT_MODE_P (prev_mode))
12047 interm_optab = vec_pack_sbool_trunc_optab;
12048 else
12049 interm_optab
12050 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12051 optab_default);
12052 if (!interm_optab
12053 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12054 || insn_data[icode1].operand[0].mode != intermediate_mode
12055 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12056 == CODE_FOR_nothing))
12057 break;
12058
12059 interm_types->quick_push (intermediate_type);
12060 (*multi_step_cvt)++;
12061
12062 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12063 {
12064 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12065 return true;
12066 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12067 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12068 return true;
12069 }
12070
12071 prev_mode = intermediate_mode;
12072 prev_type = intermediate_type;
12073 optab1 = interm_optab;
12074 }
12075
12076 interm_types->release ();
12077 return false;
12078 }
12079
12080 /* Generate and return a statement that sets vector mask MASK such that
12081 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12082
12083 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)12084 vect_gen_while (tree mask, tree start_index, tree end_index)
12085 {
12086 tree cmp_type = TREE_TYPE (start_index);
12087 tree mask_type = TREE_TYPE (mask);
12088 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12089 cmp_type, mask_type,
12090 OPTIMIZE_FOR_SPEED));
12091 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12092 start_index, end_index,
12093 build_zero_cst (mask_type));
12094 gimple_call_set_lhs (call, mask);
12095 return call;
12096 }
12097
12098 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12099 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12100
12101 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12102 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12103 tree end_index)
12104 {
12105 tree tmp = make_ssa_name (mask_type);
12106 gcall *call = vect_gen_while (tmp, start_index, end_index);
12107 gimple_seq_add_stmt (seq, call);
12108 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12109 }
12110
12111 /* Try to compute the vector types required to vectorize STMT_INFO,
12112 returning true on success and false if vectorization isn't possible.
12113 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12114 take sure that the number of elements in the vectors is no bigger
12115 than GROUP_SIZE.
12116
12117 On success:
12118
12119 - Set *STMT_VECTYPE_OUT to:
12120 - NULL_TREE if the statement doesn't need to be vectorized;
12121 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12122
12123 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12124 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12125 statement does not help to determine the overall number of units. */
12126
12127 opt_result
vect_get_vector_types_for_stmt(stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12128 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
12129 tree *stmt_vectype_out,
12130 tree *nunits_vectype_out,
12131 unsigned int group_size)
12132 {
12133 vec_info *vinfo = stmt_info->vinfo;
12134 gimple *stmt = stmt_info->stmt;
12135
12136 /* For BB vectorization, we should always have a group size once we've
12137 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12138 are tentative requests during things like early data reference
12139 analysis and pattern recognition. */
12140 if (is_a <bb_vec_info> (vinfo))
12141 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12142 else
12143 group_size = 0;
12144
12145 *stmt_vectype_out = NULL_TREE;
12146 *nunits_vectype_out = NULL_TREE;
12147
12148 if (gimple_get_lhs (stmt) == NULL_TREE
12149 /* MASK_STORE has no lhs, but is ok. */
12150 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12151 {
12152 if (is_a <gcall *> (stmt))
12153 {
12154 /* Ignore calls with no lhs. These must be calls to
12155 #pragma omp simd functions, and what vectorization factor
12156 it really needs can't be determined until
12157 vectorizable_simd_clone_call. */
12158 if (dump_enabled_p ())
12159 dump_printf_loc (MSG_NOTE, vect_location,
12160 "defer to SIMD clone analysis.\n");
12161 return opt_result::success ();
12162 }
12163
12164 return opt_result::failure_at (stmt,
12165 "not vectorized: irregular stmt.%G", stmt);
12166 }
12167
12168 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12169 return opt_result::failure_at (stmt,
12170 "not vectorized: vector stmt in loop:%G",
12171 stmt);
12172
12173 tree vectype;
12174 tree scalar_type = NULL_TREE;
12175 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12176 {
12177 vectype = STMT_VINFO_VECTYPE (stmt_info);
12178 if (dump_enabled_p ())
12179 dump_printf_loc (MSG_NOTE, vect_location,
12180 "precomputed vectype: %T\n", vectype);
12181 }
12182 else if (vect_use_mask_type_p (stmt_info))
12183 {
12184 unsigned int precision = stmt_info->mask_precision;
12185 scalar_type = build_nonstandard_integer_type (precision, 1);
12186 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12187 if (!vectype)
12188 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12189 " data-type %T\n", scalar_type);
12190 if (dump_enabled_p ())
12191 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12192 }
12193 else
12194 {
12195 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12196 scalar_type = TREE_TYPE (DR_REF (dr));
12197 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12198 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12199 else
12200 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12201
12202 if (dump_enabled_p ())
12203 {
12204 if (group_size)
12205 dump_printf_loc (MSG_NOTE, vect_location,
12206 "get vectype for scalar type (group size %d):"
12207 " %T\n", group_size, scalar_type);
12208 else
12209 dump_printf_loc (MSG_NOTE, vect_location,
12210 "get vectype for scalar type: %T\n", scalar_type);
12211 }
12212 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12213 if (!vectype)
12214 return opt_result::failure_at (stmt,
12215 "not vectorized:"
12216 " unsupported data-type %T\n",
12217 scalar_type);
12218
12219 if (dump_enabled_p ())
12220 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12221 }
12222 *stmt_vectype_out = vectype;
12223
12224 /* Don't try to compute scalar types if the stmt produces a boolean
12225 vector; use the existing vector type instead. */
12226 tree nunits_vectype = vectype;
12227 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12228 {
12229 /* The number of units is set according to the smallest scalar
12230 type (or the largest vector size, but we only support one
12231 vector size per vectorization). */
12232 HOST_WIDE_INT dummy;
12233 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12234 if (scalar_type != TREE_TYPE (vectype))
12235 {
12236 if (dump_enabled_p ())
12237 dump_printf_loc (MSG_NOTE, vect_location,
12238 "get vectype for smallest scalar type: %T\n",
12239 scalar_type);
12240 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12241 group_size);
12242 if (!nunits_vectype)
12243 return opt_result::failure_at
12244 (stmt, "not vectorized: unsupported data-type %T\n",
12245 scalar_type);
12246 if (dump_enabled_p ())
12247 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12248 nunits_vectype);
12249 }
12250 }
12251
12252 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12253 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12254 return opt_result::failure_at (stmt,
12255 "Not vectorized: Incompatible number "
12256 "of vector subparts between %T and %T\n",
12257 nunits_vectype, *stmt_vectype_out);
12258
12259 if (dump_enabled_p ())
12260 {
12261 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12262 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12263 dump_printf (MSG_NOTE, "\n");
12264 }
12265
12266 *nunits_vectype_out = nunits_vectype;
12267 return opt_result::success ();
12268 }
12269