1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
55
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
58
59 /* Return the vectorized type for the given statement. */
60
61 tree
stmt_vectype(struct _stmt_vec_info * stmt_info)62 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 {
64 return STMT_VINFO_VECTYPE (stmt_info);
65 }
66
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
stmt_in_inner_loop_p(struct _stmt_vec_info * stmt_info)70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 {
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83 }
84
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
93 {
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
100 if (body_cost_vec)
101 {
102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 }
110 else
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
113 }
114
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116
117 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119 {
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
122 }
123
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
128
129 static tree
read_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 tree array, unsigned HOST_WIDE_INT n)
132 {
133 tree vect_type, vect, vect_name, array_ref;
134 gimple *new_stmt;
135
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
142
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
147
148 return vect_name;
149 }
150
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
154
155 static void
write_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 tree array, unsigned HOST_WIDE_INT n)
158 {
159 tree array_ref;
160 gimple *new_stmt;
161
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
165
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 }
169
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
173
174 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 {
177 tree mem_ref;
178
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182 return mem_ref;
183 }
184
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
186
187 /* Function vect_mark_relevant.
188
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190
191 static void
vect_mark_relevant(vec<gimple * > * worklist,gimple * stmt,enum vect_relevant relevant,bool live_p)192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193 enum vect_relevant relevant, bool live_p)
194 {
195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 gimple *pattern_stmt;
199
200 if (dump_enabled_p ())
201 {
202 dump_printf_loc (MSG_NOTE, vect_location,
203 "mark relevant %d, live %d: ", relevant, live_p);
204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 }
206
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212 {
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
217
218 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info = vinfo_for_stmt (pattern_stmt);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228 stmt = pattern_stmt;
229 }
230
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
234
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237 {
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
242 }
243
244 worklist->safe_push (stmt);
245 }
246
247
248 /* Function is_simple_and_all_uses_invariant
249
250 Return true if STMT is simple and all uses of it are invariant. */
251
252 bool
is_simple_and_all_uses_invariant(gimple * stmt,loop_vec_info loop_vinfo)253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254 {
255 tree op;
256 gimple *def_stmt;
257 ssa_op_iter iter;
258
259 if (!is_gimple_assign (stmt))
260 return false;
261
262 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263 {
264 enum vect_def_type dt = vect_uninitialized_def;
265
266 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267 {
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "use not simple.\n");
271 return false;
272 }
273
274 if (dt != vect_external_def && dt != vect_constant_def)
275 return false;
276 }
277 return true;
278 }
279
280 /* Function vect_stmt_relevant_p.
281
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
284
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
289
290 CHECKME: what other side effects would the vectorizer allow? */
291
292 static bool
vect_stmt_relevant_p(gimple * stmt,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 enum vect_relevant *relevant, bool *live_p)
295 {
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
301
302 *relevant = vect_unused_in_scope;
303 *live_p = false;
304
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
310
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
315 {
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
320 }
321
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324 {
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 {
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
329 {
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
333
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
336
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
341
342 *live_p = true;
343 }
344 }
345 }
346
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
354 }
355
356 return (*live_p || *relevant);
357 }
358
359
360 /* Function exist_non_indexing_operands_for_use_p
361
362 USE is one of the uses attached to STMT. Check if USE is
363 used in STMT for anything other than indexing an array. */
364
365 static bool
exist_non_indexing_operands_for_use_p(tree use,gimple * stmt)366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 {
368 tree operand;
369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
370
371 /* USE corresponds to some operand in STMT. If there is no data
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info))
375 return true;
376
377 /* STMT has a data_ref. FORNOW this means that its of one of
378 the following forms:
379 -1- ARRAY_REF = var
380 -2- var = ARRAY_REF
381 (This should have been verified in analyze_data_refs).
382
383 'var' in the second case corresponds to a def, not a use,
384 so USE cannot correspond to any operands that are not used
385 for array indexing.
386
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
389
390 if (!gimple_assign_copy_p (stmt))
391 {
392 if (is_gimple_call (stmt)
393 && gimple_call_internal_p (stmt))
394 {
395 internal_fn ifn = gimple_call_internal_fn (stmt);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (stmt, mask_index))
399 return true;
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (stmt, stored_value_index))
403 return true;
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (stmt, 1))
406 return true;
407 }
408 return false;
409 }
410
411 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (stmt);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
416
417 if (operand == use)
418 return true;
419
420 return false;
421 }
422
423
424 /*
425 Function process_use.
426
427 Inputs:
428 - a USE in STMT in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
434
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445 skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
447 be modified accordingly.
448
449 Return true if everything is as expected. Return false otherwise. */
450
451 static bool
process_use(gimple * stmt,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<gimple * > * worklist,bool force)452 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<gimple *> *worklist,
454 bool force)
455 {
456 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458 stmt_vec_info dstmt_vinfo;
459 basic_block bb, def_bb;
460 gimple *def_stmt;
461 enum vect_def_type dt;
462
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
466 return true;
467
468 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 {
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472 "not vectorized: unsupported use in stmt.\n");
473 return false;
474 }
475
476 if (!def_stmt || gimple_nop_p (def_stmt))
477 return true;
478
479 def_bb = gimple_bb (def_stmt);
480 if (!flow_bb_inside_loop_p (loop, def_bb))
481 {
482 if (dump_enabled_p ())
483 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
484 return true;
485 }
486
487 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488 DEF_STMT must have already been processed, because this should be the
489 only way that STMT, which is a reduction-phi, was put in the worklist,
490 as there should be no other uses for DEF_STMT in the loop. So we just
491 check that everything is as expected, and we are done. */
492 dstmt_vinfo = vinfo_for_stmt (def_stmt);
493 bb = gimple_bb (stmt);
494 if (gimple_code (stmt) == GIMPLE_PHI
495 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
496 && gimple_code (def_stmt) != GIMPLE_PHI
497 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
498 && bb->loop_father == def_bb->loop_father)
499 {
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE, vect_location,
502 "reduc-stmt defining reduc-phi in the same nest.\n");
503 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
504 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
505 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
506 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
507 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
508 return true;
509 }
510
511 /* case 3a: outer-loop stmt defining an inner-loop stmt:
512 outer-loop-header-bb:
513 d = def_stmt
514 inner-loop:
515 stmt # use (d)
516 outer-loop-tail-bb:
517 ... */
518 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 {
520 if (dump_enabled_p ())
521 dump_printf_loc (MSG_NOTE, vect_location,
522 "outer-loop def-stmt defining inner-loop stmt.\n");
523
524 switch (relevant)
525 {
526 case vect_unused_in_scope:
527 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
528 vect_used_in_scope : vect_unused_in_scope;
529 break;
530
531 case vect_used_in_outer_by_reduction:
532 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
533 relevant = vect_used_by_reduction;
534 break;
535
536 case vect_used_in_outer:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 relevant = vect_used_in_scope;
539 break;
540
541 case vect_used_in_scope:
542 break;
543
544 default:
545 gcc_unreachable ();
546 }
547 }
548
549 /* case 3b: inner-loop stmt defining an outer-loop stmt:
550 outer-loop-header-bb:
551 ...
552 inner-loop:
553 d = def_stmt
554 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 stmt # use (d) */
556 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 {
558 if (dump_enabled_p ())
559 dump_printf_loc (MSG_NOTE, vect_location,
560 "inner-loop def-stmt defining outer-loop stmt.\n");
561
562 switch (relevant)
563 {
564 case vect_unused_in_scope:
565 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
566 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
567 vect_used_in_outer_by_reduction : vect_unused_in_scope;
568 break;
569
570 case vect_used_by_reduction:
571 case vect_used_only_live:
572 relevant = vect_used_in_outer_by_reduction;
573 break;
574
575 case vect_used_in_scope:
576 relevant = vect_used_in_outer;
577 break;
578
579 default:
580 gcc_unreachable ();
581 }
582 }
583 /* We are also not interested in uses on loop PHI backedges that are
584 inductions. Otherwise we'll needlessly vectorize the IV increment
585 and cause hybrid SLP for SLP inductions. Unless the PHI is live
586 of course. */
587 else if (gimple_code (stmt) == GIMPLE_PHI
588 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
589 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
590 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
591 == use))
592 {
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "induction value on backedge.\n");
596 return true;
597 }
598
599
600 vect_mark_relevant (worklist, def_stmt, relevant, false);
601 return true;
602 }
603
604
605 /* Function vect_mark_stmts_to_be_vectorized.
606
607 Not all stmts in the loop need to be vectorized. For example:
608
609 for i...
610 for j...
611 1. T0 = i + j
612 2. T1 = a[T0]
613
614 3. j = j + 1
615
616 Stmt 1 and 3 do not need to be vectorized, because loop control and
617 addressing of vectorized data-refs are handled differently.
618
619 This pass detects such stmts. */
620
621 bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)622 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 {
624 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
625 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
626 unsigned int nbbs = loop->num_nodes;
627 gimple_stmt_iterator si;
628 gimple *stmt;
629 unsigned int i;
630 stmt_vec_info stmt_vinfo;
631 basic_block bb;
632 gimple *phi;
633 bool live_p;
634 enum vect_relevant relevant;
635
636 if (dump_enabled_p ())
637 dump_printf_loc (MSG_NOTE, vect_location,
638 "=== vect_mark_stmts_to_be_vectorized ===\n");
639
640 auto_vec<gimple *, 64> worklist;
641
642 /* 1. Init worklist. */
643 for (i = 0; i < nbbs; i++)
644 {
645 bb = bbs[i];
646 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 {
648 phi = gsi_stmt (si);
649 if (dump_enabled_p ())
650 {
651 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
652 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
653 }
654
655 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
656 vect_mark_relevant (&worklist, phi, relevant, live_p);
657 }
658 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 {
660 stmt = gsi_stmt (si);
661 if (dump_enabled_p ())
662 {
663 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
664 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
665 }
666
667 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 }
670 }
671
672 /* 2. Process_worklist */
673 while (worklist.length () > 0)
674 {
675 use_operand_p use_p;
676 ssa_op_iter iter;
677
678 stmt = worklist.pop ();
679 if (dump_enabled_p ())
680 {
681 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
683 }
684
685 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
686 (DEF_STMT) as relevant/irrelevant according to the relevance property
687 of STMT. */
688 stmt_vinfo = vinfo_for_stmt (stmt);
689 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690
691 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
692 propagated as is to the DEF_STMTs of its USEs.
693
694 One exception is when STMT has been identified as defining a reduction
695 variable; in this case we set the relevance to vect_used_by_reduction.
696 This is because we distinguish between two kinds of relevant stmts -
697 those that are used by a reduction computation, and those that are
698 (also) used by a regular computation. This allows us later on to
699 identify stmts that are used solely by a reduction, and therefore the
700 order of the results that they produce does not have to be kept. */
701
702 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703 {
704 case vect_reduction_def:
705 gcc_assert (relevant != vect_unused_in_scope);
706 if (relevant != vect_unused_in_scope
707 && relevant != vect_used_in_scope
708 && relevant != vect_used_by_reduction
709 && relevant != vect_used_only_live)
710 {
711 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "unsupported use of reduction.\n");
714 return false;
715 }
716 break;
717
718 case vect_nested_cycle:
719 if (relevant != vect_unused_in_scope
720 && relevant != vect_used_in_outer_by_reduction
721 && relevant != vect_used_in_outer)
722 {
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725 "unsupported use of nested cycle.\n");
726
727 return false;
728 }
729 break;
730
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 {
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738 "unsupported use of double reduction.\n");
739
740 return false;
741 }
742 break;
743
744 default:
745 break;
746 }
747
748 if (is_pattern_stmt_p (stmt_vinfo))
749 {
750 /* Pattern statements are not inserted into the code, so
751 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
752 have to scan the RHS or function arguments instead. */
753 if (is_gimple_assign (stmt))
754 {
755 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
756 tree op = gimple_assign_rhs1 (stmt);
757
758 i = 1;
759 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 {
761 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
762 relevant, &worklist, false)
763 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
764 relevant, &worklist, false))
765 return false;
766 i = 2;
767 }
768 for (; i < gimple_num_ops (stmt); i++)
769 {
770 op = gimple_op (stmt, i);
771 if (TREE_CODE (op) == SSA_NAME
772 && !process_use (stmt, op, loop_vinfo, relevant,
773 &worklist, false))
774 return false;
775 }
776 }
777 else if (is_gimple_call (stmt))
778 {
779 for (i = 0; i < gimple_call_num_args (stmt); i++)
780 {
781 tree arg = gimple_call_arg (stmt, i);
782 if (!process_use (stmt, arg, loop_vinfo, relevant,
783 &worklist, false))
784 return false;
785 }
786 }
787 }
788 else
789 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790 {
791 tree op = USE_FROM_PTR (use_p);
792 if (!process_use (stmt, op, loop_vinfo, relevant,
793 &worklist, false))
794 return false;
795 }
796
797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 {
799 gather_scatter_info gs_info;
800 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
801 gcc_unreachable ();
802 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
803 &worklist, true))
804 return false;
805 }
806 } /* while worklist */
807
808 return true;
809 }
810
811
812 /* Function vect_model_simple_cost.
813
814 Models cost for simple operations, i.e. those that only emit ncopies of a
815 single op. Right now, this does not account for multiple insns that could
816 be generated for the single vector op. We will handle that shortly. */
817
818 void
vect_model_simple_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)819 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
820 enum vect_def_type *dt,
821 int ndts,
822 stmt_vector_for_cost *prologue_cost_vec,
823 stmt_vector_for_cost *body_cost_vec)
824 {
825 int i;
826 int inside_cost = 0, prologue_cost = 0;
827
828 /* The SLP costs were already calculated during SLP tree build. */
829 gcc_assert (!PURE_SLP_STMT (stmt_info));
830
831 /* Cost the "broadcast" of a scalar operand in to a vector operand.
832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833 cost model. */
834 for (i = 0; i < ndts; i++)
835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 stmt_info, 0, vect_prologue);
838
839 /* Pass the inside-of-loop statements to the target-specific cost model. */
840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 stmt_info, 0, vect_body);
842
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location,
845 "vect_model_simple_cost: inside_cost = %d, "
846 "prologue_cost = %d .\n", inside_cost, prologue_cost);
847 }
848
849
850 /* Model cost for type demotion and promotion operations. PWR is normally
851 zero for single-step promotions and demotions. It will be one if
852 two-step promotion/demotion is required, and so on. Each additional
853 step doubles the number of instructions required. */
854
855 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,int pwr)856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 enum vect_def_type *dt, int pwr)
858 {
859 int i, tmp;
860 int inside_cost = 0, prologue_cost = 0;
861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863 void *target_cost_data;
864
865 /* The SLP costs were already calculated during SLP tree build. */
866 gcc_assert (!PURE_SLP_STMT (stmt_info));
867
868 if (loop_vinfo)
869 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
870 else
871 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872
873 for (i = 0; i < pwr + 1; i++)
874 {
875 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
876 (i + 1) : i;
877 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
878 vec_promote_demote, stmt_info, 0,
879 vect_body);
880 }
881
882 /* FORNOW: Assuming maximum 2 args per stmts. */
883 for (i = 0; i < 2; i++)
884 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
885 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
886 stmt_info, 0, vect_prologue);
887
888 if (dump_enabled_p ())
889 dump_printf_loc (MSG_NOTE, vect_location,
890 "vect_model_promotion_demotion_cost: inside_cost = %d, "
891 "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 }
893
894 /* Function vect_model_store_cost
895
896 Models cost for stores. In the case of grouped accesses, one access
897 has the overhead of the grouped access attributed to it. */
898
899 void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)900 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
901 vect_memory_access_type memory_access_type,
902 vec_load_store_type vls_type, slp_tree slp_node,
903 stmt_vector_for_cost *prologue_cost_vec,
904 stmt_vector_for_cost *body_cost_vec)
905 {
906 unsigned int inside_cost = 0, prologue_cost = 0;
907 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
908 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
909 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910
911 if (vls_type == VLS_STORE_INVARIANT)
912 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
913 stmt_info, 0, vect_prologue);
914
915 /* Grouped stores update all elements in the group at once,
916 so we want the DR for the first statement. */
917 if (!slp_node && grouped_access_p)
918 {
919 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
920 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
921 }
922
923 /* True if we should include any once-per-group costs as well as
924 the cost of the statement itself. For SLP we only get called
925 once per group anyhow. */
926 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927
928 /* We assume that the cost of a single store-lanes instruction is
929 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
930 access is instead being provided by a permute-and-store operation,
931 include the cost of the permutes. */
932 if (first_stmt_p
933 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934 {
935 /* Uses a high and low interleave or shuffle operations for each
936 needed permute. */
937 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
938 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
939 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
940 stmt_info, 0, vect_body);
941
942 if (dump_enabled_p ())
943 dump_printf_loc (MSG_NOTE, vect_location,
944 "vect_model_store_cost: strided group_size = %d .\n",
945 group_size);
946 }
947
948 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
949 /* Costs of the stores. */
950 if (memory_access_type == VMAT_ELEMENTWISE
951 || memory_access_type == VMAT_GATHER_SCATTER)
952 {
953 /* N scalar stores plus extracting the elements. */
954 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
955 inside_cost += record_stmt_cost (body_cost_vec,
956 ncopies * assumed_nunits,
957 scalar_store, stmt_info, 0, vect_body);
958 }
959 else
960 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
961
962 if (memory_access_type == VMAT_ELEMENTWISE
963 || memory_access_type == VMAT_STRIDED_SLP)
964 {
965 /* N scalar stores plus extracting the elements. */
966 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967 inside_cost += record_stmt_cost (body_cost_vec,
968 ncopies * assumed_nunits,
969 vec_to_scalar, stmt_info, 0, vect_body);
970 }
971
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .\n", inside_cost, prologue_cost);
976 }
977
978
979 /* Calculate cost of DR's memory access. */
980 void
vect_get_store_cost(struct data_reference * dr,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 unsigned int *inside_cost,
983 stmt_vector_for_cost *body_cost_vec)
984 {
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986 gimple *stmt = DR_STMT (dr);
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
988
989 switch (alignment_support_scheme)
990 {
991 case dr_aligned:
992 {
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
996
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: aligned.\n");
1000 break;
1001 }
1002
1003 case dr_unaligned_supported:
1004 {
1005 /* Here, we assign an additional cost for the unaligned store. */
1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 unaligned_store, stmt_info,
1008 DR_MISALIGNMENT (dr), vect_body);
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
1012 "hardware.\n");
1013 break;
1014 }
1015
1016 case dr_unaligned_unsupported:
1017 {
1018 *inside_cost = VECT_MAX_COST;
1019
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022 "vect_model_store_cost: unsupported access.\n");
1023 break;
1024 }
1025
1026 default:
1027 gcc_unreachable ();
1028 }
1029 }
1030
1031
1032 /* Function vect_model_load_cost
1033
1034 Models cost for loads. In the case of grouped accesses, one access has
1035 the overhead of the grouped access attributed to it. Since unaligned
1036 accesses are supported for loads, we also account for the costs of the
1037 access scheme chosen. */
1038
1039 void
vect_model_load_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 vect_memory_access_type memory_access_type,
1042 slp_tree slp_node,
1043 stmt_vector_for_cost *prologue_cost_vec,
1044 stmt_vector_for_cost *body_cost_vec)
1045 {
1046 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1048 unsigned int inside_cost = 0, prologue_cost = 0;
1049 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1050
1051 /* Grouped loads read all elements in the group at once,
1052 so we want the DR for the first statement. */
1053 if (!slp_node && grouped_access_p)
1054 {
1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1057 }
1058
1059 /* True if we should include any once-per-group costs as well as
1060 the cost of the statement itself. For SLP we only get called
1061 once per group anyhow. */
1062 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1063
1064 /* We assume that the cost of a single load-lanes instruction is
1065 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1066 access is instead being provided by a load-and-permute operation,
1067 include the cost of the permutes. */
1068 if (first_stmt_p
1069 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1070 {
1071 /* Uses an even and odd extract operations or shuffle operations
1072 for each needed permute. */
1073 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1074 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
1077
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .\n",
1081 group_size);
1082 }
1083
1084 /* The loads themselves. */
1085 if (memory_access_type == VMAT_ELEMENTWISE
1086 || memory_access_type == VMAT_GATHER_SCATTER)
1087 {
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1091 inside_cost += record_stmt_cost (body_cost_vec,
1092 ncopies * assumed_nunits,
1093 scalar_load, stmt_info, 0, vect_body);
1094 }
1095 else
1096 vect_get_load_cost (dr, ncopies, first_stmt_p,
1097 &inside_cost, &prologue_cost,
1098 prologue_cost_vec, body_cost_vec, true);
1099 if (memory_access_type == VMAT_ELEMENTWISE
1100 || memory_access_type == VMAT_STRIDED_SLP)
1101 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 stmt_info, 0, vect_body);
1103
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE, vect_location,
1106 "vect_model_load_cost: inside_cost = %d, "
1107 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1108 }
1109
1110
1111 /* Calculate cost of DR's memory access. */
1112 void
vect_get_load_cost(struct data_reference * dr,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1113 vect_get_load_cost (struct data_reference *dr, int ncopies,
1114 bool add_realign_cost, unsigned int *inside_cost,
1115 unsigned int *prologue_cost,
1116 stmt_vector_for_cost *prologue_cost_vec,
1117 stmt_vector_for_cost *body_cost_vec,
1118 bool record_prologue_costs)
1119 {
1120 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1121 gimple *stmt = DR_STMT (dr);
1122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123
1124 switch (alignment_support_scheme)
1125 {
1126 case dr_aligned:
1127 {
1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129 stmt_info, 0, vect_body);
1130
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: aligned.\n");
1134
1135 break;
1136 }
1137 case dr_unaligned_supported:
1138 {
1139 /* Here, we assign an additional cost for the unaligned load. */
1140 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1141 unaligned_load, stmt_info,
1142 DR_MISALIGNMENT (dr), vect_body);
1143
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: unaligned supported by "
1147 "hardware.\n");
1148
1149 break;
1150 }
1151 case dr_explicit_realign:
1152 {
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154 vector_load, stmt_info, 0, vect_body);
1155 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156 vec_perm, stmt_info, 0, vect_body);
1157
1158 /* FIXME: If the misalignment remains fixed across the iterations of
1159 the containing loop, the following cost should be added to the
1160 prologue costs. */
1161 if (targetm.vectorize.builtin_mask_for_load)
1162 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163 stmt_info, 0, vect_body);
1164
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "vect_model_load_cost: explicit realign\n");
1168
1169 break;
1170 }
1171 case dr_explicit_realign_optimized:
1172 {
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: unaligned software "
1176 "pipelined.\n");
1177
1178 /* Unaligned software pipeline has a load of an address, an initial
1179 load, and possibly a mask operation to "prime" the loop. However,
1180 if this is an access in a group of loads, which provide grouped
1181 access, then the above cost should only be considered for one
1182 access in the group. Inside the loop, there is a load op
1183 and a realignment op. */
1184
1185 if (add_realign_cost && record_prologue_costs)
1186 {
1187 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188 vector_stmt, stmt_info,
1189 0, vect_prologue);
1190 if (targetm.vectorize.builtin_mask_for_load)
1191 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192 vector_stmt, stmt_info,
1193 0, vect_prologue);
1194 }
1195
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197 stmt_info, 0, vect_body);
1198 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199 stmt_info, 0, vect_body);
1200
1201 if (dump_enabled_p ())
1202 dump_printf_loc (MSG_NOTE, vect_location,
1203 "vect_model_load_cost: explicit realign optimized"
1204 "\n");
1205
1206 break;
1207 }
1208
1209 case dr_unaligned_unsupported:
1210 {
1211 *inside_cost = VECT_MAX_COST;
1212
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1215 "vect_model_load_cost: unsupported access.\n");
1216 break;
1217 }
1218
1219 default:
1220 gcc_unreachable ();
1221 }
1222 }
1223
1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225 the loop preheader for the vectorized stmt STMT. */
1226
1227 static void
vect_init_vector_1(gimple * stmt,gimple * new_stmt,gimple_stmt_iterator * gsi)1228 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229 {
1230 if (gsi)
1231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1232 else
1233 {
1234 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236
1237 if (loop_vinfo)
1238 {
1239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1240 basic_block new_bb;
1241 edge pe;
1242
1243 if (nested_in_vect_loop_p (loop, stmt))
1244 loop = loop->inner;
1245
1246 pe = loop_preheader_edge (loop);
1247 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1248 gcc_assert (!new_bb);
1249 }
1250 else
1251 {
1252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253 basic_block bb;
1254 gimple_stmt_iterator gsi_bb_start;
1255
1256 gcc_assert (bb_vinfo);
1257 bb = BB_VINFO_BB (bb_vinfo);
1258 gsi_bb_start = gsi_after_labels (bb);
1259 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1260 }
1261 }
1262
1263 if (dump_enabled_p ())
1264 {
1265 dump_printf_loc (MSG_NOTE, vect_location,
1266 "created new init_stmt: ");
1267 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1268 }
1269 }
1270
1271 /* Function vect_init_vector.
1272
1273 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1275 vector type a vector with all elements equal to VAL is created first.
1276 Place the initialization at BSI if it is not NULL. Otherwise, place the
1277 initialization at the loop preheader.
1278 Return the DEF of INIT_STMT.
1279 It will be used in the vectorization of STMT. */
1280
1281 tree
vect_init_vector(gimple * stmt,tree val,tree type,gimple_stmt_iterator * gsi)1282 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283 {
1284 gimple *init_stmt;
1285 tree new_temp;
1286
1287 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1288 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289 {
1290 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 {
1293 /* Scalar boolean value should be transformed into
1294 all zeros or all ones value before building a vector. */
1295 if (VECTOR_BOOLEAN_TYPE_P (type))
1296 {
1297 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298 tree false_val = build_zero_cst (TREE_TYPE (type));
1299
1300 if (CONSTANT_CLASS_P (val))
1301 val = integer_zerop (val) ? false_val : true_val;
1302 else
1303 {
1304 new_temp = make_ssa_name (TREE_TYPE (type));
1305 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306 val, true_val, false_val);
1307 vect_init_vector_1 (stmt, init_stmt, gsi);
1308 val = new_temp;
1309 }
1310 }
1311 else if (CONSTANT_CLASS_P (val))
1312 val = fold_convert (TREE_TYPE (type), val);
1313 else
1314 {
1315 new_temp = make_ssa_name (TREE_TYPE (type));
1316 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317 init_stmt = gimple_build_assign (new_temp,
1318 fold_build1 (VIEW_CONVERT_EXPR,
1319 TREE_TYPE (type),
1320 val));
1321 else
1322 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1323 vect_init_vector_1 (stmt, init_stmt, gsi);
1324 val = new_temp;
1325 }
1326 }
1327 val = build_vector_from_val (type, val);
1328 }
1329
1330 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331 init_stmt = gimple_build_assign (new_temp, val);
1332 vect_init_vector_1 (stmt, init_stmt, gsi);
1333 return new_temp;
1334 }
1335
1336 /* Function vect_get_vec_def_for_operand_1.
1337
1338 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339 DT that will be used in the vectorized stmt. */
1340
1341 tree
vect_get_vec_def_for_operand_1(gimple * def_stmt,enum vect_def_type dt)1342 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343 {
1344 tree vec_oprnd;
1345 gimple *vec_stmt;
1346 stmt_vec_info def_stmt_info = NULL;
1347
1348 switch (dt)
1349 {
1350 /* operand is a constant or a loop invariant. */
1351 case vect_constant_def:
1352 case vect_external_def:
1353 /* Code should use vect_get_vec_def_for_operand. */
1354 gcc_unreachable ();
1355
1356 /* operand is defined inside the loop. */
1357 case vect_internal_def:
1358 {
1359 /* Get the def from the vectorized stmt. */
1360 def_stmt_info = vinfo_for_stmt (def_stmt);
1361
1362 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1363 /* Get vectorized pattern statement. */
1364 if (!vec_stmt
1365 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366 && !STMT_VINFO_RELEVANT (def_stmt_info))
1367 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1369 gcc_assert (vec_stmt);
1370 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371 vec_oprnd = PHI_RESULT (vec_stmt);
1372 else if (is_gimple_call (vec_stmt))
1373 vec_oprnd = gimple_call_lhs (vec_stmt);
1374 else
1375 vec_oprnd = gimple_assign_lhs (vec_stmt);
1376 return vec_oprnd;
1377 }
1378
1379 /* operand is defined by a loop header phi. */
1380 case vect_reduction_def:
1381 case vect_double_reduction_def:
1382 case vect_nested_cycle:
1383 case vect_induction_def:
1384 {
1385 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386
1387 /* Get the def from the vectorized stmt. */
1388 def_stmt_info = vinfo_for_stmt (def_stmt);
1389 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 vec_oprnd = PHI_RESULT (vec_stmt);
1392 else
1393 vec_oprnd = gimple_get_lhs (vec_stmt);
1394 return vec_oprnd;
1395 }
1396
1397 default:
1398 gcc_unreachable ();
1399 }
1400 }
1401
1402
1403 /* Function vect_get_vec_def_for_operand.
1404
1405 OP is an operand in STMT. This function returns a (vector) def that will be
1406 used in the vectorized stmt for STMT.
1407
1408 In the case that OP is an SSA_NAME which is defined in the loop, then
1409 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410
1411 In case OP is an invariant or constant, a new stmt that creates a vector def
1412 needs to be introduced. VECTYPE may be used to specify a required type for
1413 vector invariant. */
1414
1415 tree
vect_get_vec_def_for_operand(tree op,gimple * stmt,tree vectype)1416 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 {
1418 gimple *def_stmt;
1419 enum vect_def_type dt;
1420 bool is_simple_use;
1421 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423
1424 if (dump_enabled_p ())
1425 {
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "vect_get_vec_def_for_operand: ");
1428 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429 dump_printf (MSG_NOTE, "\n");
1430 }
1431
1432 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433 gcc_assert (is_simple_use);
1434 if (def_stmt && dump_enabled_p ())
1435 {
1436 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1437 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438 }
1439
1440 if (dt == vect_constant_def || dt == vect_external_def)
1441 {
1442 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443 tree vector_type;
1444
1445 if (vectype)
1446 vector_type = vectype;
1447 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1448 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450 else
1451 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452
1453 gcc_assert (vector_type);
1454 return vect_init_vector (stmt, op, vector_type, NULL);
1455 }
1456 else
1457 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1458 }
1459
1460
1461 /* Function vect_get_vec_def_for_stmt_copy
1462
1463 Return a vector-def for an operand. This function is used when the
1464 vectorized stmt to be created (by the caller to this function) is a "copy"
1465 created in case the vectorized result cannot fit in one vector, and several
1466 copies of the vector-stmt are required. In this case the vector-def is
1467 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468 of the stmt that defines VEC_OPRND.
1469 DT is the type of the vector def VEC_OPRND.
1470
1471 Context:
1472 In case the vectorization factor (VF) is bigger than the number
1473 of elements that can fit in a vectype (nunits), we have to generate
1474 more than one vector stmt to vectorize the scalar stmt. This situation
1475 arises when there are multiple data-types operated upon in the loop; the
1476 smallest data-type determines the VF, and as a result, when vectorizing
1477 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478 vector stmt (each computing a vector of 'nunits' results, and together
1479 computing 'VF' results in each iteration). This function is called when
1480 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481 which VF=16 and nunits=4, so the number of copies required is 4):
1482
1483 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1484
1485 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1486 VS1.1: vx.1 = memref1 VS1.2
1487 VS1.2: vx.2 = memref2 VS1.3
1488 VS1.3: vx.3 = memref3
1489
1490 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1491 VSnew.1: vz1 = vx.1 + ... VSnew.2
1492 VSnew.2: vz2 = vx.2 + ... VSnew.3
1493 VSnew.3: vz3 = vx.3 + ...
1494
1495 The vectorization of S1 is explained in vectorizable_load.
1496 The vectorization of S2:
1497 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498 the function 'vect_get_vec_def_for_operand' is called to
1499 get the relevant vector-def for each operand of S2. For operand x it
1500 returns the vector-def 'vx.0'.
1501
1502 To create the remaining copies of the vector-stmt (VSnew.j), this
1503 function is called to get the relevant vector-def for each operand. It is
1504 obtained from the respective VS1.j stmt, which is recorded in the
1505 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506
1507 For example, to obtain the vector-def 'vx.1' in order to create the
1508 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511 and return its def ('vx.1').
1512 Overall, to create the above sequence this function will be called 3 times:
1513 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516
1517 tree
vect_get_vec_def_for_stmt_copy(enum vect_def_type dt,tree vec_oprnd)1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519 {
1520 gimple *vec_stmt_for_operand;
1521 stmt_vec_info def_stmt_info;
1522
1523 /* Do nothing; can reuse same def. */
1524 if (dt == vect_external_def || dt == vect_constant_def )
1525 return vec_oprnd;
1526
1527 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529 gcc_assert (def_stmt_info);
1530 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531 gcc_assert (vec_stmt_for_operand);
1532 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534 else
1535 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536 return vec_oprnd;
1537 }
1538
1539
1540 /* Get vectorized definitions for the operands to create a copy of an original
1541 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1542
1543 void
vect_get_vec_defs_for_stmt_copy(enum vect_def_type * dt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1545 vec<tree> *vec_oprnds0,
1546 vec<tree> *vec_oprnds1)
1547 {
1548 tree vec_oprnd = vec_oprnds0->pop ();
1549
1550 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1551 vec_oprnds0->quick_push (vec_oprnd);
1552
1553 if (vec_oprnds1 && vec_oprnds1->length ())
1554 {
1555 vec_oprnd = vec_oprnds1->pop ();
1556 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1557 vec_oprnds1->quick_push (vec_oprnd);
1558 }
1559 }
1560
1561
1562 /* Get vectorized definitions for OP0 and OP1. */
1563
1564 void
vect_get_vec_defs(tree op0,tree op1,gimple * stmt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
1568 slp_tree slp_node)
1569 {
1570 if (slp_node)
1571 {
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
1575
1576 ops.quick_push (op0);
1577 if (op1)
1578 ops.quick_push (op1);
1579
1580 vect_get_slp_defs (ops, slp_node, &vec_defs);
1581
1582 *vec_oprnds0 = vec_defs[0];
1583 if (op1)
1584 *vec_oprnds1 = vec_defs[1];
1585 }
1586 else
1587 {
1588 tree vec_oprnd;
1589
1590 vec_oprnds0->create (1);
1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592 vec_oprnds0->quick_push (vec_oprnd);
1593
1594 if (op1)
1595 {
1596 vec_oprnds1->create (1);
1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 vec_oprnds1->quick_push (vec_oprnd);
1599 }
1600 }
1601 }
1602
1603 /* Helper function called by vect_finish_replace_stmt and
1604 vect_finish_stmt_generation. Set the location of the new
1605 statement and create a stmt_vec_info for it. */
1606
1607 static void
vect_finish_stmt_generation_1(gimple * stmt,gimple * vec_stmt)1608 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1609 {
1610 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1611 vec_info *vinfo = stmt_info->vinfo;
1612
1613 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614
1615 if (dump_enabled_p ())
1616 {
1617 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1619 }
1620
1621 gimple_set_location (vec_stmt, gimple_location (stmt));
1622
1623 /* While EH edges will generally prevent vectorization, stmt might
1624 e.g. be in a must-not-throw region. Ensure newly created stmts
1625 that could throw are part of the same region. */
1626 int lp_nr = lookup_stmt_eh_lp (stmt);
1627 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1629 }
1630
1631 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1632 which sets the same scalar result as STMT did. */
1633
1634 void
vect_finish_replace_stmt(gimple * stmt,gimple * vec_stmt)1635 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1636 {
1637 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1638
1639 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640 gsi_replace (&gsi, vec_stmt, true);
1641
1642 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1643 }
1644
1645 /* Function vect_finish_stmt_generation.
1646
1647 Insert a new stmt. */
1648
1649 void
vect_finish_stmt_generation(gimple * stmt,gimple * vec_stmt,gimple_stmt_iterator * gsi)1650 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1651 gimple_stmt_iterator *gsi)
1652 {
1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654
1655 if (!gsi_end_p (*gsi)
1656 && gimple_has_mem_ops (vec_stmt))
1657 {
1658 gimple *at_stmt = gsi_stmt (*gsi);
1659 tree vuse = gimple_vuse (at_stmt);
1660 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661 {
1662 tree vdef = gimple_vdef (at_stmt);
1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 && ((is_gimple_assign (vec_stmt)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 || (is_gimple_call (vec_stmt)
1672 && !(gimple_call_flags (vec_stmt)
1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674 {
1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 gimple_set_vdef (vec_stmt, new_vdef);
1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678 }
1679 }
1680 }
1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1682 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1683 }
1684
1685 /* We want to vectorize a call to combined function CFN with function
1686 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1687 as the types of all inputs. Check whether this is possible using
1688 an internal function, returning its code if so or IFN_LAST if not. */
1689
1690 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1691 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1692 tree vectype_out, tree vectype_in)
1693 {
1694 internal_fn ifn;
1695 if (internal_fn_p (cfn))
1696 ifn = as_internal_fn (cfn);
1697 else
1698 ifn = associated_internal_fn (fndecl);
1699 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1700 {
1701 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1702 if (info.vectorizable)
1703 {
1704 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1705 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1706 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1707 OPTIMIZE_FOR_SPEED))
1708 return ifn;
1709 }
1710 }
1711 return IFN_LAST;
1712 }
1713
1714
1715 static tree permute_vec_elements (tree, tree, tree, gimple *,
1716 gimple_stmt_iterator *);
1717
1718 /* Check whether a load or store statement in the loop described by
1719 LOOP_VINFO is possible in a fully-masked loop. This is testing
1720 whether the vectorizer pass has the appropriate support, as well as
1721 whether the target does.
1722
1723 VLS_TYPE says whether the statement is a load or store and VECTYPE
1724 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1725 says how the load or store is going to be implemented and GROUP_SIZE
1726 is the number of load or store statements in the containing group.
1727 If the access is a gather load or scatter store, GS_INFO describes
1728 its arguments.
1729
1730 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1731 supported, otherwise record the required mask types. */
1732
1733 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info)1734 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1735 vec_load_store_type vls_type, int group_size,
1736 vect_memory_access_type memory_access_type,
1737 gather_scatter_info *gs_info)
1738 {
1739 /* Invariant loads need no special support. */
1740 if (memory_access_type == VMAT_INVARIANT)
1741 return;
1742
1743 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744 machine_mode vecmode = TYPE_MODE (vectype);
1745 bool is_load = (vls_type == VLS_LOAD);
1746 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1747 {
1748 if (is_load
1749 ? !vect_load_lanes_supported (vectype, group_size, true)
1750 : !vect_store_lanes_supported (vectype, group_size, true))
1751 {
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 "can't use a fully-masked loop because the"
1755 " target doesn't have an appropriate masked"
1756 " load/store-lanes instruction.\n");
1757 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758 return;
1759 }
1760 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762 return;
1763 }
1764
1765 if (memory_access_type == VMAT_GATHER_SCATTER)
1766 {
1767 internal_fn ifn = (is_load
1768 ? IFN_MASK_GATHER_LOAD
1769 : IFN_MASK_SCATTER_STORE);
1770 tree offset_type = TREE_TYPE (gs_info->offset);
1771 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1772 gs_info->memory_type,
1773 TYPE_SIGN (offset_type),
1774 gs_info->scale))
1775 {
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "can't use a fully-masked loop because the"
1779 " target doesn't have an appropriate masked"
1780 " gather load or scatter store instruction.\n");
1781 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1782 return;
1783 }
1784 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1785 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1786 return;
1787 }
1788
1789 if (memory_access_type != VMAT_CONTIGUOUS
1790 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1791 {
1792 /* Element X of the data must come from iteration i * VF + X of the
1793 scalar loop. We need more work to support other mappings. */
1794 if (dump_enabled_p ())
1795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796 "can't use a fully-masked loop because an access"
1797 " isn't contiguous.\n");
1798 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1799 return;
1800 }
1801
1802 machine_mode mask_mode;
1803 if (!(targetm.vectorize.get_mask_mode
1804 (GET_MODE_NUNITS (vecmode),
1805 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1806 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1807 {
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "can't use a fully-masked loop because the target"
1811 " doesn't have the appropriate masked load or"
1812 " store.\n");
1813 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814 return;
1815 }
1816 /* We might load more scalars than we need for permuting SLP loads.
1817 We checked in get_group_load_store_type that the extra elements
1818 don't leak into a new vector. */
1819 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1820 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1821 unsigned int nvectors;
1822 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1823 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1824 else
1825 gcc_unreachable ();
1826 }
1827
1828 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1829 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1830 that needs to be applied to all loads and stores in a vectorized loop.
1831 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1832
1833 MASK_TYPE is the type of both masks. If new statements are needed,
1834 insert them before GSI. */
1835
1836 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1837 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1838 gimple_stmt_iterator *gsi)
1839 {
1840 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1841 if (!loop_mask)
1842 return vec_mask;
1843
1844 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1845 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1846 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1847 vec_mask, loop_mask);
1848 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1849 return and_res;
1850 }
1851
1852 /* Determine whether we can use a gather load or scatter store to vectorize
1853 strided load or store STMT by truncating the current offset to a smaller
1854 width. We need to be able to construct an offset vector:
1855
1856 { 0, X, X*2, X*3, ... }
1857
1858 without loss of precision, where X is STMT's DR_STEP.
1859
1860 Return true if this is possible, describing the gather load or scatter
1861 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1862
1863 static bool
vect_truncate_gather_scatter_offset(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1864 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1865 bool masked_p,
1866 gather_scatter_info *gs_info)
1867 {
1868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1869 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1870 tree step = DR_STEP (dr);
1871 if (TREE_CODE (step) != INTEGER_CST)
1872 {
1873 /* ??? Perhaps we could use range information here? */
1874 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_NOTE, vect_location,
1876 "cannot truncate variable step.\n");
1877 return false;
1878 }
1879
1880 /* Get the number of bits in an element. */
1881 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1883 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1884
1885 /* Set COUNT to the upper limit on the number of elements - 1.
1886 Start with the maximum vectorization factor. */
1887 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1888
1889 /* Try lowering COUNT to the number of scalar latch iterations. */
1890 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1891 widest_int max_iters;
1892 if (max_loop_iterations (loop, &max_iters)
1893 && max_iters < count)
1894 count = max_iters.to_shwi ();
1895
1896 /* Try scales of 1 and the element size. */
1897 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1898 bool overflow_p = false;
1899 for (int i = 0; i < 2; ++i)
1900 {
1901 int scale = scales[i];
1902 widest_int factor;
1903 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1904 continue;
1905
1906 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1907 in OFFSET_BITS bits. */
1908 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1909 if (overflow_p)
1910 continue;
1911 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1912 if (wi::min_precision (range, sign) > element_bits)
1913 {
1914 overflow_p = true;
1915 continue;
1916 }
1917
1918 /* See whether the target supports the operation. */
1919 tree memory_type = TREE_TYPE (DR_REF (dr));
1920 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1921 memory_type, element_bits, sign, scale,
1922 &gs_info->ifn, &gs_info->element_type))
1923 continue;
1924
1925 tree offset_type = build_nonstandard_integer_type (element_bits,
1926 sign == UNSIGNED);
1927
1928 gs_info->decl = NULL_TREE;
1929 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1930 but we don't need to store that here. */
1931 gs_info->base = NULL_TREE;
1932 gs_info->offset = fold_convert (offset_type, step);
1933 gs_info->offset_dt = vect_constant_def;
1934 gs_info->offset_vectype = NULL_TREE;
1935 gs_info->scale = scale;
1936 gs_info->memory_type = memory_type;
1937 return true;
1938 }
1939
1940 if (overflow_p && dump_enabled_p ())
1941 dump_printf_loc (MSG_NOTE, vect_location,
1942 "truncating gather/scatter offset to %d bits"
1943 " might change its value.\n", element_bits);
1944
1945 return false;
1946 }
1947
1948 /* Return true if we can use gather/scatter internal functions to
1949 vectorize STMT, which is a grouped or strided load or store.
1950 MASKED_P is true if load or store is conditional. When returning
1951 true, fill in GS_INFO with the information required to perform the
1952 operation. */
1953
1954 static bool
vect_use_strided_gather_scatters_p(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1955 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
1956 bool masked_p,
1957 gather_scatter_info *gs_info)
1958 {
1959 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1960 || gs_info->decl)
1961 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1962 masked_p, gs_info);
1963
1964 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1966 tree offset_type = TREE_TYPE (gs_info->offset);
1967 unsigned int offset_bits = TYPE_PRECISION (offset_type);
1968
1969 /* Enforced by vect_check_gather_scatter. */
1970 gcc_assert (element_bits >= offset_bits);
1971
1972 /* If the elements are wider than the offset, convert the offset to the
1973 same width, without changing its sign. */
1974 if (element_bits > offset_bits)
1975 {
1976 bool unsigned_p = TYPE_UNSIGNED (offset_type);
1977 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1978 gs_info->offset = fold_convert (offset_type, gs_info->offset);
1979 }
1980
1981 if (dump_enabled_p ())
1982 dump_printf_loc (MSG_NOTE, vect_location,
1983 "using gather/scatter for strided/grouped access,"
1984 " scale = %d\n", gs_info->scale);
1985
1986 return true;
1987 }
1988
1989 /* STMT is a non-strided load or store, meaning that it accesses
1990 elements with a known constant step. Return -1 if that step
1991 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1992
1993 static int
compare_step_with_zero(gimple * stmt)1994 compare_step_with_zero (gimple *stmt)
1995 {
1996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1997 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1999 size_zero_node);
2000 }
2001
2002 /* If the target supports a permute mask that reverses the elements in
2003 a vector of type VECTYPE, return that mask, otherwise return null. */
2004
2005 static tree
perm_mask_for_reverse(tree vectype)2006 perm_mask_for_reverse (tree vectype)
2007 {
2008 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2009
2010 /* The encoding has a single stepped pattern. */
2011 vec_perm_builder sel (nunits, 1, 3);
2012 for (int i = 0; i < 3; ++i)
2013 sel.quick_push (nunits - 1 - i);
2014
2015 vec_perm_indices indices (sel, 1, nunits);
2016 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2017 return NULL_TREE;
2018 return vect_gen_perm_mask_checked (vectype, indices);
2019 }
2020
2021 /* A subroutine of get_load_store_type, with a subset of the same
2022 arguments. Handle the case where STMT is a load or store that
2023 accesses consecutive elements with a negative step. */
2024
2025 static vect_memory_access_type
get_negative_load_store_type(gimple * stmt,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2026 get_negative_load_store_type (gimple *stmt, tree vectype,
2027 vec_load_store_type vls_type,
2028 unsigned int ncopies)
2029 {
2030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2032 dr_alignment_support alignment_support_scheme;
2033
2034 if (ncopies > 1)
2035 {
2036 if (dump_enabled_p ())
2037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2038 "multiple types with negative step.\n");
2039 return VMAT_ELEMENTWISE;
2040 }
2041
2042 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2043 if (alignment_support_scheme != dr_aligned
2044 && alignment_support_scheme != dr_unaligned_supported)
2045 {
2046 if (dump_enabled_p ())
2047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2048 "negative step but alignment required.\n");
2049 return VMAT_ELEMENTWISE;
2050 }
2051
2052 if (vls_type == VLS_STORE_INVARIANT)
2053 {
2054 if (dump_enabled_p ())
2055 dump_printf_loc (MSG_NOTE, vect_location,
2056 "negative step with invariant source;"
2057 " no permute needed.\n");
2058 return VMAT_CONTIGUOUS_DOWN;
2059 }
2060
2061 if (!perm_mask_for_reverse (vectype))
2062 {
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2065 "negative step and reversing not supported.\n");
2066 return VMAT_ELEMENTWISE;
2067 }
2068
2069 return VMAT_CONTIGUOUS_REVERSE;
2070 }
2071
2072 /* STMT is either a masked or unconditional store. Return the value
2073 being stored. */
2074
2075 tree
vect_get_store_rhs(gimple * stmt)2076 vect_get_store_rhs (gimple *stmt)
2077 {
2078 if (gassign *assign = dyn_cast <gassign *> (stmt))
2079 {
2080 gcc_assert (gimple_assign_single_p (assign));
2081 return gimple_assign_rhs1 (assign);
2082 }
2083 if (gcall *call = dyn_cast <gcall *> (stmt))
2084 {
2085 internal_fn ifn = gimple_call_internal_fn (call);
2086 int index = internal_fn_stored_value_index (ifn);
2087 gcc_assert (index >= 0);
2088 return gimple_call_arg (stmt, index);
2089 }
2090 gcc_unreachable ();
2091 }
2092
2093 /* A subroutine of get_load_store_type, with a subset of the same
2094 arguments. Handle the case where STMT is part of a grouped load
2095 or store.
2096
2097 For stores, the statements in the group are all consecutive
2098 and there is no gap at the end. For loads, the statements in the
2099 group might not be consecutive; there can be gaps between statements
2100 as well as at the end. */
2101
2102 static bool
get_group_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2103 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2104 bool masked_p, vec_load_store_type vls_type,
2105 vect_memory_access_type *memory_access_type,
2106 gather_scatter_info *gs_info)
2107 {
2108 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2109 vec_info *vinfo = stmt_info->vinfo;
2110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2111 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2112 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
2113 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2114 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2115 bool single_element_p = (stmt == first_stmt
2116 && !GROUP_NEXT_ELEMENT (stmt_info));
2117 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
2118 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2119
2120 /* True if the vectorized statements would access beyond the last
2121 statement in the group. */
2122 bool overrun_p = false;
2123
2124 /* True if we can cope with such overrun by peeling for gaps, so that
2125 there is at least one final scalar iteration after the vector loop. */
2126 bool can_overrun_p = (!masked_p
2127 && vls_type == VLS_LOAD
2128 && loop_vinfo
2129 && !loop->inner);
2130
2131 /* There can only be a gap at the end of the group if the stride is
2132 known at compile time. */
2133 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2134
2135 /* Stores can't yet have gaps. */
2136 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2137
2138 if (slp)
2139 {
2140 if (STMT_VINFO_STRIDED_P (stmt_info))
2141 {
2142 /* Try to use consecutive accesses of GROUP_SIZE elements,
2143 separated by the stride, until we have a complete vector.
2144 Fall back to scalar accesses if that isn't possible. */
2145 if (multiple_p (nunits, group_size))
2146 *memory_access_type = VMAT_STRIDED_SLP;
2147 else
2148 *memory_access_type = VMAT_ELEMENTWISE;
2149 }
2150 else
2151 {
2152 overrun_p = loop_vinfo && gap != 0;
2153 if (overrun_p && vls_type != VLS_LOAD)
2154 {
2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2156 "Grouped store with gaps requires"
2157 " non-consecutive accesses\n");
2158 return false;
2159 }
2160 /* An overrun is fine if the trailing elements are smaller
2161 than the alignment boundary B. Every vector access will
2162 be a multiple of B and so we are guaranteed to access a
2163 non-gap element in the same B-sized block. */
2164 if (overrun_p
2165 && gap < (vect_known_alignment_in_bytes (first_dr)
2166 / vect_get_scalar_dr_size (first_dr)))
2167 overrun_p = false;
2168 if (overrun_p && !can_overrun_p)
2169 {
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2172 "Peeling for outer loop is not supported\n");
2173 return false;
2174 }
2175 int cmp = compare_step_with_zero (stmt);
2176 if (cmp < 0)
2177 *memory_access_type = get_negative_load_store_type
2178 (stmt, vectype, vls_type, 1);
2179 else
2180 {
2181 gcc_assert (!loop_vinfo || cmp > 0);
2182 *memory_access_type = VMAT_CONTIGUOUS;
2183 }
2184 }
2185 }
2186 else
2187 {
2188 /* We can always handle this case using elementwise accesses,
2189 but see if something more efficient is available. */
2190 *memory_access_type = VMAT_ELEMENTWISE;
2191
2192 /* If there is a gap at the end of the group then these optimizations
2193 would access excess elements in the last iteration. */
2194 bool would_overrun_p = (gap != 0);
2195 /* An overrun is fine if the trailing elements are smaller than the
2196 alignment boundary B. Every vector access will be a multiple of B
2197 and so we are guaranteed to access a non-gap element in the
2198 same B-sized block. */
2199 if (would_overrun_p
2200 && !masked_p
2201 && gap < (vect_known_alignment_in_bytes (first_dr)
2202 / vect_get_scalar_dr_size (first_dr)))
2203 would_overrun_p = false;
2204
2205 if (!STMT_VINFO_STRIDED_P (stmt_info)
2206 && (can_overrun_p || !would_overrun_p)
2207 && compare_step_with_zero (stmt) > 0)
2208 {
2209 /* First cope with the degenerate case of a single-element
2210 vector. */
2211 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2212 *memory_access_type = VMAT_CONTIGUOUS;
2213
2214 /* Otherwise try using LOAD/STORE_LANES. */
2215 if (*memory_access_type == VMAT_ELEMENTWISE
2216 && (vls_type == VLS_LOAD
2217 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2218 : vect_store_lanes_supported (vectype, group_size,
2219 masked_p)))
2220 {
2221 *memory_access_type = VMAT_LOAD_STORE_LANES;
2222 overrun_p = would_overrun_p;
2223 }
2224
2225 /* If that fails, try using permuting loads. */
2226 if (*memory_access_type == VMAT_ELEMENTWISE
2227 && (vls_type == VLS_LOAD
2228 ? vect_grouped_load_supported (vectype, single_element_p,
2229 group_size)
2230 : vect_grouped_store_supported (vectype, group_size)))
2231 {
2232 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2233 overrun_p = would_overrun_p;
2234 }
2235 }
2236
2237 /* As a last resort, trying using a gather load or scatter store.
2238
2239 ??? Although the code can handle all group sizes correctly,
2240 it probably isn't a win to use separate strided accesses based
2241 on nearby locations. Or, even if it's a win over scalar code,
2242 it might not be a win over vectorizing at a lower VF, if that
2243 allows us to use contiguous accesses. */
2244 if (*memory_access_type == VMAT_ELEMENTWISE
2245 && single_element_p
2246 && loop_vinfo
2247 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2248 masked_p, gs_info))
2249 *memory_access_type = VMAT_GATHER_SCATTER;
2250 }
2251
2252 if (vls_type != VLS_LOAD && first_stmt == stmt)
2253 {
2254 /* STMT is the leader of the group. Check the operands of all the
2255 stmts of the group. */
2256 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2257 while (next_stmt)
2258 {
2259 tree op = vect_get_store_rhs (next_stmt);
2260 gimple *def_stmt;
2261 enum vect_def_type dt;
2262 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2263 {
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2266 "use not simple.\n");
2267 return false;
2268 }
2269 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2270 }
2271 }
2272
2273 if (overrun_p)
2274 {
2275 gcc_assert (can_overrun_p);
2276 if (dump_enabled_p ())
2277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2278 "Data access with gaps requires scalar "
2279 "epilogue loop\n");
2280 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2281 }
2282
2283 return true;
2284 }
2285
2286 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2287 if there is a memory access type that the vectorized form can use,
2288 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2289 or scatters, fill in GS_INFO accordingly.
2290
2291 SLP says whether we're performing SLP rather than loop vectorization.
2292 MASKED_P is true if the statement is conditional on a vectorized mask.
2293 VECTYPE is the vector type that the vectorized statements will use.
2294 NCOPIES is the number of vector statements that will be needed. */
2295
2296 static bool
get_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2297 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2298 vec_load_store_type vls_type, unsigned int ncopies,
2299 vect_memory_access_type *memory_access_type,
2300 gather_scatter_info *gs_info)
2301 {
2302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2303 vec_info *vinfo = stmt_info->vinfo;
2304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2305 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2306 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2307 {
2308 *memory_access_type = VMAT_GATHER_SCATTER;
2309 gimple *def_stmt;
2310 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2311 gcc_unreachable ();
2312 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2313 &gs_info->offset_dt,
2314 &gs_info->offset_vectype))
2315 {
2316 if (dump_enabled_p ())
2317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2318 "%s index use not simple.\n",
2319 vls_type == VLS_LOAD ? "gather" : "scatter");
2320 return false;
2321 }
2322 }
2323 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2324 {
2325 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2326 memory_access_type, gs_info))
2327 return false;
2328 }
2329 else if (STMT_VINFO_STRIDED_P (stmt_info))
2330 {
2331 gcc_assert (!slp);
2332 if (loop_vinfo
2333 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2334 masked_p, gs_info))
2335 *memory_access_type = VMAT_GATHER_SCATTER;
2336 else
2337 *memory_access_type = VMAT_ELEMENTWISE;
2338 }
2339 else
2340 {
2341 int cmp = compare_step_with_zero (stmt);
2342 if (cmp < 0)
2343 *memory_access_type = get_negative_load_store_type
2344 (stmt, vectype, vls_type, ncopies);
2345 else if (cmp == 0)
2346 {
2347 gcc_assert (vls_type == VLS_LOAD);
2348 *memory_access_type = VMAT_INVARIANT;
2349 }
2350 else
2351 *memory_access_type = VMAT_CONTIGUOUS;
2352 }
2353
2354 if ((*memory_access_type == VMAT_ELEMENTWISE
2355 || *memory_access_type == VMAT_STRIDED_SLP)
2356 && !nunits.is_constant ())
2357 {
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "Not using elementwise accesses due to variable "
2361 "vectorization factor.\n");
2362 return false;
2363 }
2364
2365 /* FIXME: At the moment the cost model seems to underestimate the
2366 cost of using elementwise accesses. This check preserves the
2367 traditional behavior until that can be fixed. */
2368 if (*memory_access_type == VMAT_ELEMENTWISE
2369 && !STMT_VINFO_STRIDED_P (stmt_info)
2370 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2371 && !GROUP_NEXT_ELEMENT (stmt_info)
2372 && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "not falling back to elementwise accesses\n");
2377 return false;
2378 }
2379 return true;
2380 }
2381
2382 /* Return true if boolean argument MASK is suitable for vectorizing
2383 conditional load or store STMT. When returning true, store the type
2384 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2385 in *MASK_VECTYPE_OUT. */
2386
2387 static bool
vect_check_load_store_mask(gimple * stmt,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2388 vect_check_load_store_mask (gimple *stmt, tree mask,
2389 vect_def_type *mask_dt_out,
2390 tree *mask_vectype_out)
2391 {
2392 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2393 {
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 "mask argument is not a boolean.\n");
2397 return false;
2398 }
2399
2400 if (TREE_CODE (mask) != SSA_NAME)
2401 {
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "mask argument is not an SSA name.\n");
2405 return false;
2406 }
2407
2408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2409 gimple *def_stmt;
2410 enum vect_def_type mask_dt;
2411 tree mask_vectype;
2412 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2413 &mask_vectype))
2414 {
2415 if (dump_enabled_p ())
2416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2417 "mask use not simple.\n");
2418 return false;
2419 }
2420
2421 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2422 if (!mask_vectype)
2423 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2424
2425 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2426 {
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "could not find an appropriate vector mask type.\n");
2430 return false;
2431 }
2432
2433 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2434 TYPE_VECTOR_SUBPARTS (vectype)))
2435 {
2436 if (dump_enabled_p ())
2437 {
2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2439 "vector mask type ");
2440 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2441 dump_printf (MSG_MISSED_OPTIMIZATION,
2442 " does not match vector data type ");
2443 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2444 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2445 }
2446 return false;
2447 }
2448
2449 *mask_dt_out = mask_dt;
2450 *mask_vectype_out = mask_vectype;
2451 return true;
2452 }
2453
2454 /* Return true if stored value RHS is suitable for vectorizing store
2455 statement STMT. When returning true, store the type of the
2456 definition in *RHS_DT_OUT, the type of the vectorized store value in
2457 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2458
2459 static bool
vect_check_store_rhs(gimple * stmt,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2460 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2461 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2462 {
2463 /* In the case this is a store from a constant make sure
2464 native_encode_expr can handle it. */
2465 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2466 {
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2469 "cannot encode constant as a byte sequence.\n");
2470 return false;
2471 }
2472
2473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2474 gimple *def_stmt;
2475 enum vect_def_type rhs_dt;
2476 tree rhs_vectype;
2477 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2478 &rhs_vectype))
2479 {
2480 if (dump_enabled_p ())
2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2482 "use not simple.\n");
2483 return false;
2484 }
2485
2486 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2487 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2488 {
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2491 "incompatible vector types.\n");
2492 return false;
2493 }
2494
2495 *rhs_dt_out = rhs_dt;
2496 *rhs_vectype_out = rhs_vectype;
2497 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2498 *vls_type_out = VLS_STORE_INVARIANT;
2499 else
2500 *vls_type_out = VLS_STORE;
2501 return true;
2502 }
2503
2504 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2505 Note that we support masks with floating-point type, in which case the
2506 floats are interpreted as a bitmask. */
2507
2508 static tree
vect_build_all_ones_mask(gimple * stmt,tree masktype)2509 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2510 {
2511 if (TREE_CODE (masktype) == INTEGER_TYPE)
2512 return build_int_cst (masktype, -1);
2513 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2514 {
2515 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2516 mask = build_vector_from_val (masktype, mask);
2517 return vect_init_vector (stmt, mask, masktype, NULL);
2518 }
2519 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2520 {
2521 REAL_VALUE_TYPE r;
2522 long tmp[6];
2523 for (int j = 0; j < 6; ++j)
2524 tmp[j] = -1;
2525 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2526 tree mask = build_real (TREE_TYPE (masktype), r);
2527 mask = build_vector_from_val (masktype, mask);
2528 return vect_init_vector (stmt, mask, masktype, NULL);
2529 }
2530 gcc_unreachable ();
2531 }
2532
2533 /* Build an all-zero merge value of type VECTYPE while vectorizing
2534 STMT as a gather load. */
2535
2536 static tree
vect_build_zero_merge_argument(gimple * stmt,tree vectype)2537 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2538 {
2539 tree merge;
2540 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2541 merge = build_int_cst (TREE_TYPE (vectype), 0);
2542 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2543 {
2544 REAL_VALUE_TYPE r;
2545 long tmp[6];
2546 for (int j = 0; j < 6; ++j)
2547 tmp[j] = 0;
2548 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2549 merge = build_real (TREE_TYPE (vectype), r);
2550 }
2551 else
2552 gcc_unreachable ();
2553 merge = build_vector_from_val (vectype, merge);
2554 return vect_init_vector (stmt, merge, vectype, NULL);
2555 }
2556
2557 /* Build a gather load call while vectorizing STMT. Insert new instructions
2558 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2559 operation. If the load is conditional, MASK is the unvectorized
2560 condition and MASK_DT is its definition type, otherwise MASK is null. */
2561
2562 static void
vect_build_gather_load_calls(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask,vect_def_type mask_dt)2563 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2564 gimple **vec_stmt, gather_scatter_info *gs_info,
2565 tree mask, vect_def_type mask_dt)
2566 {
2567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2568 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2569 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2570 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2571 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2572 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2573 edge pe = loop_preheader_edge (loop);
2574 enum { NARROW, NONE, WIDEN } modifier;
2575 poly_uint64 gather_off_nunits
2576 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2577
2578 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2579 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2580 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2581 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2582 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2583 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2584 tree scaletype = TREE_VALUE (arglist);
2585 gcc_checking_assert (types_compatible_p (srctype, rettype)
2586 && (!mask || types_compatible_p (srctype, masktype)));
2587
2588 tree perm_mask = NULL_TREE;
2589 tree mask_perm_mask = NULL_TREE;
2590 if (known_eq (nunits, gather_off_nunits))
2591 modifier = NONE;
2592 else if (known_eq (nunits * 2, gather_off_nunits))
2593 {
2594 modifier = WIDEN;
2595
2596 /* Currently widening gathers and scatters are only supported for
2597 fixed-length vectors. */
2598 int count = gather_off_nunits.to_constant ();
2599 vec_perm_builder sel (count, count, 1);
2600 for (int i = 0; i < count; ++i)
2601 sel.quick_push (i | (count / 2));
2602
2603 vec_perm_indices indices (sel, 1, count);
2604 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2605 indices);
2606 }
2607 else if (known_eq (nunits, gather_off_nunits * 2))
2608 {
2609 modifier = NARROW;
2610
2611 /* Currently narrowing gathers and scatters are only supported for
2612 fixed-length vectors. */
2613 int count = nunits.to_constant ();
2614 vec_perm_builder sel (count, count, 1);
2615 sel.quick_grow (count);
2616 for (int i = 0; i < count; ++i)
2617 sel[i] = i < count / 2 ? i : i + count / 2;
2618 vec_perm_indices indices (sel, 2, count);
2619 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2620
2621 ncopies *= 2;
2622
2623 if (mask)
2624 {
2625 for (int i = 0; i < count; ++i)
2626 sel[i] = i | (count / 2);
2627 indices.new_vector (sel, 2, count);
2628 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2629 }
2630 }
2631 else
2632 gcc_unreachable ();
2633
2634 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2635 vectype);
2636
2637 tree ptr = fold_convert (ptrtype, gs_info->base);
2638 if (!is_gimple_min_invariant (ptr))
2639 {
2640 gimple_seq seq;
2641 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2642 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2643 gcc_assert (!new_bb);
2644 }
2645
2646 tree scale = build_int_cst (scaletype, gs_info->scale);
2647
2648 tree vec_oprnd0 = NULL_TREE;
2649 tree vec_mask = NULL_TREE;
2650 tree src_op = NULL_TREE;
2651 tree mask_op = NULL_TREE;
2652 tree prev_res = NULL_TREE;
2653 stmt_vec_info prev_stmt_info = NULL;
2654
2655 if (!mask)
2656 {
2657 src_op = vect_build_zero_merge_argument (stmt, rettype);
2658 mask_op = vect_build_all_ones_mask (stmt, masktype);
2659 }
2660
2661 for (int j = 0; j < ncopies; ++j)
2662 {
2663 tree op, var;
2664 gimple *new_stmt;
2665 if (modifier == WIDEN && (j & 1))
2666 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2667 perm_mask, stmt, gsi);
2668 else if (j == 0)
2669 op = vec_oprnd0
2670 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2671 else
2672 op = vec_oprnd0
2673 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2674
2675 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2676 {
2677 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2678 TYPE_VECTOR_SUBPARTS (idxtype)));
2679 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2680 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2681 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2683 op = var;
2684 }
2685
2686 if (mask)
2687 {
2688 if (mask_perm_mask && (j & 1))
2689 mask_op = permute_vec_elements (mask_op, mask_op,
2690 mask_perm_mask, stmt, gsi);
2691 else
2692 {
2693 if (j == 0)
2694 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2695 else
2696 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2697
2698 mask_op = vec_mask;
2699 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2700 {
2701 gcc_assert
2702 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2703 TYPE_VECTOR_SUBPARTS (masktype)));
2704 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2705 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2706 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2707 mask_op);
2708 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2709 mask_op = var;
2710 }
2711 }
2712 src_op = mask_op;
2713 }
2714
2715 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2716 mask_op, scale);
2717
2718 if (!useless_type_conversion_p (vectype, rettype))
2719 {
2720 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2721 TYPE_VECTOR_SUBPARTS (rettype)));
2722 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2723 gimple_call_set_lhs (new_stmt, op);
2724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2725 var = make_ssa_name (vec_dest);
2726 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2727 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2728 }
2729 else
2730 {
2731 var = make_ssa_name (vec_dest, new_stmt);
2732 gimple_call_set_lhs (new_stmt, var);
2733 }
2734
2735 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2736
2737 if (modifier == NARROW)
2738 {
2739 if ((j & 1) == 0)
2740 {
2741 prev_res = var;
2742 continue;
2743 }
2744 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2745 new_stmt = SSA_NAME_DEF_STMT (var);
2746 }
2747
2748 if (prev_stmt_info == NULL)
2749 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2750 else
2751 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2752 prev_stmt_info = vinfo_for_stmt (new_stmt);
2753 }
2754 }
2755
2756 /* Prepare the base and offset in GS_INFO for vectorization.
2757 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2758 to the vectorized offset argument for the first copy of STMT. STMT
2759 is the statement described by GS_INFO and LOOP is the containing loop. */
2760
2761 static void
vect_get_gather_scatter_ops(struct loop * loop,gimple * stmt,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)2762 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2763 gather_scatter_info *gs_info,
2764 tree *dataref_ptr, tree *vec_offset)
2765 {
2766 gimple_seq stmts = NULL;
2767 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2768 if (stmts != NULL)
2769 {
2770 basic_block new_bb;
2771 edge pe = loop_preheader_edge (loop);
2772 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2773 gcc_assert (!new_bb);
2774 }
2775 tree offset_type = TREE_TYPE (gs_info->offset);
2776 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2777 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2778 offset_vectype);
2779 }
2780
2781 /* Prepare to implement a grouped or strided load or store using
2782 the gather load or scatter store operation described by GS_INFO.
2783 STMT is the load or store statement.
2784
2785 Set *DATAREF_BUMP to the amount that should be added to the base
2786 address after each copy of the vectorized statement. Set *VEC_OFFSET
2787 to an invariant offset vector in which element I has the value
2788 I * DR_STEP / SCALE. */
2789
2790 static void
vect_get_strided_load_store_ops(gimple * stmt,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)2791 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2792 gather_scatter_info *gs_info,
2793 tree *dataref_bump, tree *vec_offset)
2794 {
2795 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2796 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2797 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2798 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2799 gimple_seq stmts;
2800
2801 tree bump = size_binop (MULT_EXPR,
2802 fold_convert (sizetype, DR_STEP (dr)),
2803 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2804 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2805 if (stmts)
2806 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2807
2808 /* The offset given in GS_INFO can have pointer type, so use the element
2809 type of the vector instead. */
2810 tree offset_type = TREE_TYPE (gs_info->offset);
2811 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2812 offset_type = TREE_TYPE (offset_vectype);
2813
2814 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2815 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2816 ssize_int (gs_info->scale));
2817 step = fold_convert (offset_type, step);
2818 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2819
2820 /* Create {0, X, X*2, X*3, ...}. */
2821 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2822 build_zero_cst (offset_type), step);
2823 if (stmts)
2824 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2825 }
2826
2827 /* Return the amount that should be added to a vector pointer to move
2828 to the next or previous copy of AGGR_TYPE. DR is the data reference
2829 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2830 vectorization. */
2831
2832 static tree
vect_get_data_ptr_increment(data_reference * dr,tree aggr_type,vect_memory_access_type memory_access_type)2833 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2834 vect_memory_access_type memory_access_type)
2835 {
2836 if (memory_access_type == VMAT_INVARIANT)
2837 return size_zero_node;
2838
2839 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2840 tree step = vect_dr_behavior (dr)->step;
2841 if (tree_int_cst_sgn (step) == -1)
2842 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2843 return iv_step;
2844 }
2845
2846 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2847
2848 static bool
vectorizable_bswap(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,tree vectype_in,enum vect_def_type * dt)2849 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2850 gimple **vec_stmt, slp_tree slp_node,
2851 tree vectype_in, enum vect_def_type *dt)
2852 {
2853 tree op, vectype;
2854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2856 unsigned ncopies;
2857 unsigned HOST_WIDE_INT nunits, num_bytes;
2858
2859 op = gimple_call_arg (stmt, 0);
2860 vectype = STMT_VINFO_VECTYPE (stmt_info);
2861
2862 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2863 return false;
2864
2865 /* Multiple types in SLP are handled by creating the appropriate number of
2866 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2867 case of SLP. */
2868 if (slp_node)
2869 ncopies = 1;
2870 else
2871 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2872
2873 gcc_assert (ncopies >= 1);
2874
2875 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2876 if (! char_vectype)
2877 return false;
2878
2879 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2880 return false;
2881
2882 unsigned word_bytes = num_bytes / nunits;
2883
2884 /* The encoding uses one stepped pattern for each byte in the word. */
2885 vec_perm_builder elts (num_bytes, word_bytes, 3);
2886 for (unsigned i = 0; i < 3; ++i)
2887 for (unsigned j = 0; j < word_bytes; ++j)
2888 elts.quick_push ((i + 1) * word_bytes - j - 1);
2889
2890 vec_perm_indices indices (elts, 1, num_bytes);
2891 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2892 return false;
2893
2894 if (! vec_stmt)
2895 {
2896 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2897 if (dump_enabled_p ())
2898 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2899 "\n");
2900 if (! slp_node)
2901 {
2902 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2903 1, vector_stmt, stmt_info, 0, vect_prologue);
2904 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2905 ncopies, vec_perm, stmt_info, 0, vect_body);
2906 }
2907 return true;
2908 }
2909
2910 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2911
2912 /* Transform. */
2913 vec<tree> vec_oprnds = vNULL;
2914 gimple *new_stmt = NULL;
2915 stmt_vec_info prev_stmt_info = NULL;
2916 for (unsigned j = 0; j < ncopies; j++)
2917 {
2918 /* Handle uses. */
2919 if (j == 0)
2920 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2921 else
2922 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2923
2924 /* Arguments are ready. create the new vector stmt. */
2925 unsigned i;
2926 tree vop;
2927 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2928 {
2929 tree tem = make_ssa_name (char_vectype);
2930 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2931 char_vectype, vop));
2932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933 tree tem2 = make_ssa_name (char_vectype);
2934 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2935 tem, tem, bswap_vconst);
2936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2937 tem = make_ssa_name (vectype);
2938 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2939 vectype, tem2));
2940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2941 if (slp_node)
2942 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2943 }
2944
2945 if (slp_node)
2946 continue;
2947
2948 if (j == 0)
2949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2950 else
2951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2952
2953 prev_stmt_info = vinfo_for_stmt (new_stmt);
2954 }
2955
2956 vec_oprnds.release ();
2957 return true;
2958 }
2959
2960 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2961 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2962 in a single step. On success, store the binary pack code in
2963 *CONVERT_CODE. */
2964
2965 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)2966 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2967 tree_code *convert_code)
2968 {
2969 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2970 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2971 return false;
2972
2973 tree_code code;
2974 int multi_step_cvt = 0;
2975 auto_vec <tree, 8> interm_types;
2976 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2977 &code, &multi_step_cvt,
2978 &interm_types)
2979 || multi_step_cvt)
2980 return false;
2981
2982 *convert_code = code;
2983 return true;
2984 }
2985
2986 /* Function vectorizable_call.
2987
2988 Check if GS performs a function call that can be vectorized.
2989 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2990 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2991 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2992
2993 static bool
vectorizable_call(gimple * gs,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)2994 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2995 slp_tree slp_node)
2996 {
2997 gcall *stmt;
2998 tree vec_dest;
2999 tree scalar_dest;
3000 tree op, type;
3001 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3002 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
3003 tree vectype_out, vectype_in;
3004 poly_uint64 nunits_in;
3005 poly_uint64 nunits_out;
3006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3008 vec_info *vinfo = stmt_info->vinfo;
3009 tree fndecl, new_temp, rhs_type;
3010 gimple *def_stmt;
3011 enum vect_def_type dt[3]
3012 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3013 int ndts = 3;
3014 gimple *new_stmt = NULL;
3015 int ncopies, j;
3016 vec<tree> vargs = vNULL;
3017 enum { NARROW, NONE, WIDEN } modifier;
3018 size_t i, nargs;
3019 tree lhs;
3020
3021 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3022 return false;
3023
3024 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3025 && ! vec_stmt)
3026 return false;
3027
3028 /* Is GS a vectorizable call? */
3029 stmt = dyn_cast <gcall *> (gs);
3030 if (!stmt)
3031 return false;
3032
3033 if (gimple_call_internal_p (stmt)
3034 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3035 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3036 /* Handled by vectorizable_load and vectorizable_store. */
3037 return false;
3038
3039 if (gimple_call_lhs (stmt) == NULL_TREE
3040 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3041 return false;
3042
3043 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3044
3045 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3046
3047 /* Process function arguments. */
3048 rhs_type = NULL_TREE;
3049 vectype_in = NULL_TREE;
3050 nargs = gimple_call_num_args (stmt);
3051
3052 /* Bail out if the function has more than three arguments, we do not have
3053 interesting builtin functions to vectorize with more than two arguments
3054 except for fma. No arguments is also not good. */
3055 if (nargs == 0 || nargs > 3)
3056 return false;
3057
3058 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3059 if (gimple_call_internal_p (stmt)
3060 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3061 {
3062 nargs = 0;
3063 rhs_type = unsigned_type_node;
3064 }
3065
3066 for (i = 0; i < nargs; i++)
3067 {
3068 tree opvectype;
3069
3070 op = gimple_call_arg (stmt, i);
3071
3072 /* We can only handle calls with arguments of the same type. */
3073 if (rhs_type
3074 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3075 {
3076 if (dump_enabled_p ())
3077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3078 "argument types differ.\n");
3079 return false;
3080 }
3081 if (!rhs_type)
3082 rhs_type = TREE_TYPE (op);
3083
3084 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3085 {
3086 if (dump_enabled_p ())
3087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3088 "use not simple.\n");
3089 return false;
3090 }
3091
3092 if (!vectype_in)
3093 vectype_in = opvectype;
3094 else if (opvectype
3095 && opvectype != vectype_in)
3096 {
3097 if (dump_enabled_p ())
3098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3099 "argument vector types differ.\n");
3100 return false;
3101 }
3102 }
3103 /* If all arguments are external or constant defs use a vector type with
3104 the same size as the output vector type. */
3105 if (!vectype_in)
3106 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3107 if (vec_stmt)
3108 gcc_assert (vectype_in);
3109 if (!vectype_in)
3110 {
3111 if (dump_enabled_p ())
3112 {
3113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3114 "no vectype for scalar type ");
3115 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3116 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3117 }
3118
3119 return false;
3120 }
3121
3122 /* FORNOW */
3123 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3124 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3125 if (known_eq (nunits_in * 2, nunits_out))
3126 modifier = NARROW;
3127 else if (known_eq (nunits_out, nunits_in))
3128 modifier = NONE;
3129 else if (known_eq (nunits_out * 2, nunits_in))
3130 modifier = WIDEN;
3131 else
3132 return false;
3133
3134 /* We only handle functions that do not read or clobber memory. */
3135 if (gimple_vuse (stmt))
3136 {
3137 if (dump_enabled_p ())
3138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3139 "function reads from or writes to memory.\n");
3140 return false;
3141 }
3142
3143 /* For now, we only vectorize functions if a target specific builtin
3144 is available. TODO -- in some cases, it might be profitable to
3145 insert the calls for pieces of the vector, in order to be able
3146 to vectorize other operations in the loop. */
3147 fndecl = NULL_TREE;
3148 internal_fn ifn = IFN_LAST;
3149 combined_fn cfn = gimple_call_combined_fn (stmt);
3150 tree callee = gimple_call_fndecl (stmt);
3151
3152 /* First try using an internal function. */
3153 tree_code convert_code = ERROR_MARK;
3154 if (cfn != CFN_LAST
3155 && (modifier == NONE
3156 || (modifier == NARROW
3157 && simple_integer_narrowing (vectype_out, vectype_in,
3158 &convert_code))))
3159 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3160 vectype_in);
3161
3162 /* If that fails, try asking for a target-specific built-in function. */
3163 if (ifn == IFN_LAST)
3164 {
3165 if (cfn != CFN_LAST)
3166 fndecl = targetm.vectorize.builtin_vectorized_function
3167 (cfn, vectype_out, vectype_in);
3168 else if (callee)
3169 fndecl = targetm.vectorize.builtin_md_vectorized_function
3170 (callee, vectype_out, vectype_in);
3171 }
3172
3173 if (ifn == IFN_LAST && !fndecl)
3174 {
3175 if (cfn == CFN_GOMP_SIMD_LANE
3176 && !slp_node
3177 && loop_vinfo
3178 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3179 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3180 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3181 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3182 {
3183 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3184 { 0, 1, 2, ... vf - 1 } vector. */
3185 gcc_assert (nargs == 0);
3186 }
3187 else if (modifier == NONE
3188 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3189 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3190 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3191 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3192 vectype_in, dt);
3193 else
3194 {
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 "function is not vectorizable.\n");
3198 return false;
3199 }
3200 }
3201
3202 if (slp_node)
3203 ncopies = 1;
3204 else if (modifier == NARROW && ifn == IFN_LAST)
3205 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3206 else
3207 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3208
3209 /* Sanity check: make sure that at least one copy of the vectorized stmt
3210 needs to be generated. */
3211 gcc_assert (ncopies >= 1);
3212
3213 if (!vec_stmt) /* transformation not required. */
3214 {
3215 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3216 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3218 "\n");
3219 if (!slp_node)
3220 {
3221 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3222 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3223 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3224 vec_promote_demote, stmt_info, 0, vect_body);
3225 }
3226
3227 return true;
3228 }
3229
3230 /* Transform. */
3231
3232 if (dump_enabled_p ())
3233 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3234
3235 /* Handle def. */
3236 scalar_dest = gimple_call_lhs (stmt);
3237 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3238
3239 prev_stmt_info = NULL;
3240 if (modifier == NONE || ifn != IFN_LAST)
3241 {
3242 tree prev_res = NULL_TREE;
3243 for (j = 0; j < ncopies; ++j)
3244 {
3245 /* Build argument list for the vectorized call. */
3246 if (j == 0)
3247 vargs.create (nargs);
3248 else
3249 vargs.truncate (0);
3250
3251 if (slp_node)
3252 {
3253 auto_vec<vec<tree> > vec_defs (nargs);
3254 vec<tree> vec_oprnds0;
3255
3256 for (i = 0; i < nargs; i++)
3257 vargs.quick_push (gimple_call_arg (stmt, i));
3258 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3259 vec_oprnds0 = vec_defs[0];
3260
3261 /* Arguments are ready. Create the new vector stmt. */
3262 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3263 {
3264 size_t k;
3265 for (k = 0; k < nargs; k++)
3266 {
3267 vec<tree> vec_oprndsk = vec_defs[k];
3268 vargs[k] = vec_oprndsk[i];
3269 }
3270 if (modifier == NARROW)
3271 {
3272 tree half_res = make_ssa_name (vectype_in);
3273 gcall *call
3274 = gimple_build_call_internal_vec (ifn, vargs);
3275 gimple_call_set_lhs (call, half_res);
3276 gimple_call_set_nothrow (call, true);
3277 new_stmt = call;
3278 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3279 if ((i & 1) == 0)
3280 {
3281 prev_res = half_res;
3282 continue;
3283 }
3284 new_temp = make_ssa_name (vec_dest);
3285 new_stmt = gimple_build_assign (new_temp, convert_code,
3286 prev_res, half_res);
3287 }
3288 else
3289 {
3290 gcall *call;
3291 if (ifn != IFN_LAST)
3292 call = gimple_build_call_internal_vec (ifn, vargs);
3293 else
3294 call = gimple_build_call_vec (fndecl, vargs);
3295 new_temp = make_ssa_name (vec_dest, call);
3296 gimple_call_set_lhs (call, new_temp);
3297 gimple_call_set_nothrow (call, true);
3298 new_stmt = call;
3299 }
3300 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3301 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3302 }
3303
3304 for (i = 0; i < nargs; i++)
3305 {
3306 vec<tree> vec_oprndsi = vec_defs[i];
3307 vec_oprndsi.release ();
3308 }
3309 continue;
3310 }
3311
3312 for (i = 0; i < nargs; i++)
3313 {
3314 op = gimple_call_arg (stmt, i);
3315 if (j == 0)
3316 vec_oprnd0
3317 = vect_get_vec_def_for_operand (op, stmt);
3318 else
3319 {
3320 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3321 vec_oprnd0
3322 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3323 }
3324
3325 vargs.quick_push (vec_oprnd0);
3326 }
3327
3328 if (gimple_call_internal_p (stmt)
3329 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3330 {
3331 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3332 tree new_var
3333 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3334 gimple *init_stmt = gimple_build_assign (new_var, cst);
3335 vect_init_vector_1 (stmt, init_stmt, NULL);
3336 new_temp = make_ssa_name (vec_dest);
3337 new_stmt = gimple_build_assign (new_temp, new_var);
3338 }
3339 else if (modifier == NARROW)
3340 {
3341 tree half_res = make_ssa_name (vectype_in);
3342 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3343 gimple_call_set_lhs (call, half_res);
3344 gimple_call_set_nothrow (call, true);
3345 new_stmt = call;
3346 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3347 if ((j & 1) == 0)
3348 {
3349 prev_res = half_res;
3350 continue;
3351 }
3352 new_temp = make_ssa_name (vec_dest);
3353 new_stmt = gimple_build_assign (new_temp, convert_code,
3354 prev_res, half_res);
3355 }
3356 else
3357 {
3358 gcall *call;
3359 if (ifn != IFN_LAST)
3360 call = gimple_build_call_internal_vec (ifn, vargs);
3361 else
3362 call = gimple_build_call_vec (fndecl, vargs);
3363 new_temp = make_ssa_name (vec_dest, new_stmt);
3364 gimple_call_set_lhs (call, new_temp);
3365 gimple_call_set_nothrow (call, true);
3366 new_stmt = call;
3367 }
3368 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3369
3370 if (j == (modifier == NARROW ? 1 : 0))
3371 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3372 else
3373 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3374
3375 prev_stmt_info = vinfo_for_stmt (new_stmt);
3376 }
3377 }
3378 else if (modifier == NARROW)
3379 {
3380 for (j = 0; j < ncopies; ++j)
3381 {
3382 /* Build argument list for the vectorized call. */
3383 if (j == 0)
3384 vargs.create (nargs * 2);
3385 else
3386 vargs.truncate (0);
3387
3388 if (slp_node)
3389 {
3390 auto_vec<vec<tree> > vec_defs (nargs);
3391 vec<tree> vec_oprnds0;
3392
3393 for (i = 0; i < nargs; i++)
3394 vargs.quick_push (gimple_call_arg (stmt, i));
3395 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3396 vec_oprnds0 = vec_defs[0];
3397
3398 /* Arguments are ready. Create the new vector stmt. */
3399 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3400 {
3401 size_t k;
3402 vargs.truncate (0);
3403 for (k = 0; k < nargs; k++)
3404 {
3405 vec<tree> vec_oprndsk = vec_defs[k];
3406 vargs.quick_push (vec_oprndsk[i]);
3407 vargs.quick_push (vec_oprndsk[i + 1]);
3408 }
3409 gcall *call;
3410 if (ifn != IFN_LAST)
3411 call = gimple_build_call_internal_vec (ifn, vargs);
3412 else
3413 call = gimple_build_call_vec (fndecl, vargs);
3414 new_temp = make_ssa_name (vec_dest, call);
3415 gimple_call_set_lhs (call, new_temp);
3416 gimple_call_set_nothrow (call, true);
3417 new_stmt = call;
3418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3419 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3420 }
3421
3422 for (i = 0; i < nargs; i++)
3423 {
3424 vec<tree> vec_oprndsi = vec_defs[i];
3425 vec_oprndsi.release ();
3426 }
3427 continue;
3428 }
3429
3430 for (i = 0; i < nargs; i++)
3431 {
3432 op = gimple_call_arg (stmt, i);
3433 if (j == 0)
3434 {
3435 vec_oprnd0
3436 = vect_get_vec_def_for_operand (op, stmt);
3437 vec_oprnd1
3438 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3439 }
3440 else
3441 {
3442 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3443 vec_oprnd0
3444 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3445 vec_oprnd1
3446 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3447 }
3448
3449 vargs.quick_push (vec_oprnd0);
3450 vargs.quick_push (vec_oprnd1);
3451 }
3452
3453 new_stmt = gimple_build_call_vec (fndecl, vargs);
3454 new_temp = make_ssa_name (vec_dest, new_stmt);
3455 gimple_call_set_lhs (new_stmt, new_temp);
3456 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3457
3458 if (j == 0)
3459 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3460 else
3461 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3462
3463 prev_stmt_info = vinfo_for_stmt (new_stmt);
3464 }
3465
3466 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3467 }
3468 else
3469 /* No current target implements this case. */
3470 return false;
3471
3472 vargs.release ();
3473
3474 /* The call in STMT might prevent it from being removed in dce.
3475 We however cannot remove it here, due to the way the ssa name
3476 it defines is mapped to the new definition. So just replace
3477 rhs of the statement with something harmless. */
3478
3479 if (slp_node)
3480 return true;
3481
3482 type = TREE_TYPE (scalar_dest);
3483 if (is_pattern_stmt_p (stmt_info))
3484 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3485 else
3486 lhs = gimple_call_lhs (stmt);
3487
3488 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3489 set_vinfo_for_stmt (new_stmt, stmt_info);
3490 set_vinfo_for_stmt (stmt, NULL);
3491 STMT_VINFO_STMT (stmt_info) = new_stmt;
3492 gsi_replace (gsi, new_stmt, false);
3493
3494 return true;
3495 }
3496
3497
3498 struct simd_call_arg_info
3499 {
3500 tree vectype;
3501 tree op;
3502 HOST_WIDE_INT linear_step;
3503 enum vect_def_type dt;
3504 unsigned int align;
3505 bool simd_lane_linear;
3506 };
3507
3508 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3509 is linear within simd lane (but not within whole loop), note it in
3510 *ARGINFO. */
3511
3512 static void
vect_simd_lane_linear(tree op,struct loop * loop,struct simd_call_arg_info * arginfo)3513 vect_simd_lane_linear (tree op, struct loop *loop,
3514 struct simd_call_arg_info *arginfo)
3515 {
3516 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3517
3518 if (!is_gimple_assign (def_stmt)
3519 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3520 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3521 return;
3522
3523 tree base = gimple_assign_rhs1 (def_stmt);
3524 HOST_WIDE_INT linear_step = 0;
3525 tree v = gimple_assign_rhs2 (def_stmt);
3526 while (TREE_CODE (v) == SSA_NAME)
3527 {
3528 tree t;
3529 def_stmt = SSA_NAME_DEF_STMT (v);
3530 if (is_gimple_assign (def_stmt))
3531 switch (gimple_assign_rhs_code (def_stmt))
3532 {
3533 case PLUS_EXPR:
3534 t = gimple_assign_rhs2 (def_stmt);
3535 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3536 return;
3537 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3538 v = gimple_assign_rhs1 (def_stmt);
3539 continue;
3540 case MULT_EXPR:
3541 t = gimple_assign_rhs2 (def_stmt);
3542 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3543 return;
3544 linear_step = tree_to_shwi (t);
3545 v = gimple_assign_rhs1 (def_stmt);
3546 continue;
3547 CASE_CONVERT:
3548 t = gimple_assign_rhs1 (def_stmt);
3549 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3550 || (TYPE_PRECISION (TREE_TYPE (v))
3551 < TYPE_PRECISION (TREE_TYPE (t))))
3552 return;
3553 if (!linear_step)
3554 linear_step = 1;
3555 v = t;
3556 continue;
3557 default:
3558 return;
3559 }
3560 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3561 && loop->simduid
3562 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3563 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3564 == loop->simduid))
3565 {
3566 if (!linear_step)
3567 linear_step = 1;
3568 arginfo->linear_step = linear_step;
3569 arginfo->op = base;
3570 arginfo->simd_lane_linear = true;
3571 return;
3572 }
3573 }
3574 }
3575
3576 /* Return the number of elements in vector type VECTYPE, which is associated
3577 with a SIMD clone. At present these vectors always have a constant
3578 length. */
3579
3580 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3581 simd_clone_subparts (tree vectype)
3582 {
3583 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3584 }
3585
3586 /* Function vectorizable_simd_clone_call.
3587
3588 Check if STMT performs a function call that can be vectorized
3589 by calling a simd clone of the function.
3590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3591 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3593
3594 static bool
vectorizable_simd_clone_call(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)3595 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3596 gimple **vec_stmt, slp_tree slp_node)
3597 {
3598 tree vec_dest;
3599 tree scalar_dest;
3600 tree op, type;
3601 tree vec_oprnd0 = NULL_TREE;
3602 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3603 tree vectype;
3604 unsigned int nunits;
3605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3606 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3607 vec_info *vinfo = stmt_info->vinfo;
3608 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3609 tree fndecl, new_temp;
3610 gimple *def_stmt;
3611 gimple *new_stmt = NULL;
3612 int ncopies, j;
3613 auto_vec<simd_call_arg_info> arginfo;
3614 vec<tree> vargs = vNULL;
3615 size_t i, nargs;
3616 tree lhs, rtype, ratype;
3617 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3618
3619 /* Is STMT a vectorizable call? */
3620 if (!is_gimple_call (stmt))
3621 return false;
3622
3623 fndecl = gimple_call_fndecl (stmt);
3624 if (fndecl == NULL_TREE)
3625 return false;
3626
3627 struct cgraph_node *node = cgraph_node::get (fndecl);
3628 if (node == NULL || node->simd_clones == NULL)
3629 return false;
3630
3631 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3632 return false;
3633
3634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3635 && ! vec_stmt)
3636 return false;
3637
3638 if (gimple_call_lhs (stmt)
3639 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3640 return false;
3641
3642 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3643
3644 vectype = STMT_VINFO_VECTYPE (stmt_info);
3645
3646 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3647 return false;
3648
3649 /* FORNOW */
3650 if (slp_node)
3651 return false;
3652
3653 /* Process function arguments. */
3654 nargs = gimple_call_num_args (stmt);
3655
3656 /* Bail out if the function has zero arguments. */
3657 if (nargs == 0)
3658 return false;
3659
3660 arginfo.reserve (nargs, true);
3661
3662 for (i = 0; i < nargs; i++)
3663 {
3664 simd_call_arg_info thisarginfo;
3665 affine_iv iv;
3666
3667 thisarginfo.linear_step = 0;
3668 thisarginfo.align = 0;
3669 thisarginfo.op = NULL_TREE;
3670 thisarginfo.simd_lane_linear = false;
3671
3672 op = gimple_call_arg (stmt, i);
3673 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3674 &thisarginfo.vectype)
3675 || thisarginfo.dt == vect_uninitialized_def)
3676 {
3677 if (dump_enabled_p ())
3678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3679 "use not simple.\n");
3680 return false;
3681 }
3682
3683 if (thisarginfo.dt == vect_constant_def
3684 || thisarginfo.dt == vect_external_def)
3685 gcc_assert (thisarginfo.vectype == NULL_TREE);
3686 else
3687 gcc_assert (thisarginfo.vectype != NULL_TREE);
3688
3689 /* For linear arguments, the analyze phase should have saved
3690 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3691 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3692 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3693 {
3694 gcc_assert (vec_stmt);
3695 thisarginfo.linear_step
3696 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3697 thisarginfo.op
3698 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3699 thisarginfo.simd_lane_linear
3700 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3701 == boolean_true_node);
3702 /* If loop has been peeled for alignment, we need to adjust it. */
3703 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3704 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3705 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3706 {
3707 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3708 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3709 tree opt = TREE_TYPE (thisarginfo.op);
3710 bias = fold_convert (TREE_TYPE (step), bias);
3711 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3712 thisarginfo.op
3713 = fold_build2 (POINTER_TYPE_P (opt)
3714 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3715 thisarginfo.op, bias);
3716 }
3717 }
3718 else if (!vec_stmt
3719 && thisarginfo.dt != vect_constant_def
3720 && thisarginfo.dt != vect_external_def
3721 && loop_vinfo
3722 && TREE_CODE (op) == SSA_NAME
3723 && simple_iv (loop, loop_containing_stmt (stmt), op,
3724 &iv, false)
3725 && tree_fits_shwi_p (iv.step))
3726 {
3727 thisarginfo.linear_step = tree_to_shwi (iv.step);
3728 thisarginfo.op = iv.base;
3729 }
3730 else if ((thisarginfo.dt == vect_constant_def
3731 || thisarginfo.dt == vect_external_def)
3732 && POINTER_TYPE_P (TREE_TYPE (op)))
3733 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3734 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3735 linear too. */
3736 if (POINTER_TYPE_P (TREE_TYPE (op))
3737 && !thisarginfo.linear_step
3738 && !vec_stmt
3739 && thisarginfo.dt != vect_constant_def
3740 && thisarginfo.dt != vect_external_def
3741 && loop_vinfo
3742 && !slp_node
3743 && TREE_CODE (op) == SSA_NAME)
3744 vect_simd_lane_linear (op, loop, &thisarginfo);
3745
3746 arginfo.quick_push (thisarginfo);
3747 }
3748
3749 unsigned HOST_WIDE_INT vf;
3750 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3751 {
3752 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3754 "not considering SIMD clones; not yet supported"
3755 " for variable-width vectors.\n");
3756 return false;
3757 }
3758
3759 unsigned int badness = 0;
3760 struct cgraph_node *bestn = NULL;
3761 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3762 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3763 else
3764 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3765 n = n->simdclone->next_clone)
3766 {
3767 unsigned int this_badness = 0;
3768 if (n->simdclone->simdlen > vf
3769 || n->simdclone->nargs != nargs)
3770 continue;
3771 if (n->simdclone->simdlen < vf)
3772 this_badness += (exact_log2 (vf)
3773 - exact_log2 (n->simdclone->simdlen)) * 1024;
3774 if (n->simdclone->inbranch)
3775 this_badness += 2048;
3776 int target_badness = targetm.simd_clone.usable (n);
3777 if (target_badness < 0)
3778 continue;
3779 this_badness += target_badness * 512;
3780 /* FORNOW: Have to add code to add the mask argument. */
3781 if (n->simdclone->inbranch)
3782 continue;
3783 for (i = 0; i < nargs; i++)
3784 {
3785 switch (n->simdclone->args[i].arg_type)
3786 {
3787 case SIMD_CLONE_ARG_TYPE_VECTOR:
3788 if (!useless_type_conversion_p
3789 (n->simdclone->args[i].orig_type,
3790 TREE_TYPE (gimple_call_arg (stmt, i))))
3791 i = -1;
3792 else if (arginfo[i].dt == vect_constant_def
3793 || arginfo[i].dt == vect_external_def
3794 || arginfo[i].linear_step)
3795 this_badness += 64;
3796 break;
3797 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3798 if (arginfo[i].dt != vect_constant_def
3799 && arginfo[i].dt != vect_external_def)
3800 i = -1;
3801 break;
3802 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3803 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3804 if (arginfo[i].dt == vect_constant_def
3805 || arginfo[i].dt == vect_external_def
3806 || (arginfo[i].linear_step
3807 != n->simdclone->args[i].linear_step))
3808 i = -1;
3809 break;
3810 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3811 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3812 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3813 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3814 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3815 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3816 /* FORNOW */
3817 i = -1;
3818 break;
3819 case SIMD_CLONE_ARG_TYPE_MASK:
3820 gcc_unreachable ();
3821 }
3822 if (i == (size_t) -1)
3823 break;
3824 if (n->simdclone->args[i].alignment > arginfo[i].align)
3825 {
3826 i = -1;
3827 break;
3828 }
3829 if (arginfo[i].align)
3830 this_badness += (exact_log2 (arginfo[i].align)
3831 - exact_log2 (n->simdclone->args[i].alignment));
3832 }
3833 if (i == (size_t) -1)
3834 continue;
3835 if (bestn == NULL || this_badness < badness)
3836 {
3837 bestn = n;
3838 badness = this_badness;
3839 }
3840 }
3841
3842 if (bestn == NULL)
3843 return false;
3844
3845 for (i = 0; i < nargs; i++)
3846 if ((arginfo[i].dt == vect_constant_def
3847 || arginfo[i].dt == vect_external_def)
3848 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3849 {
3850 arginfo[i].vectype
3851 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3852 i)));
3853 if (arginfo[i].vectype == NULL
3854 || (simd_clone_subparts (arginfo[i].vectype)
3855 > bestn->simdclone->simdlen))
3856 return false;
3857 }
3858
3859 fndecl = bestn->decl;
3860 nunits = bestn->simdclone->simdlen;
3861 ncopies = vf / nunits;
3862
3863 /* If the function isn't const, only allow it in simd loops where user
3864 has asserted that at least nunits consecutive iterations can be
3865 performed using SIMD instructions. */
3866 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3867 && gimple_vuse (stmt))
3868 return false;
3869
3870 /* Sanity check: make sure that at least one copy of the vectorized stmt
3871 needs to be generated. */
3872 gcc_assert (ncopies >= 1);
3873
3874 if (!vec_stmt) /* transformation not required. */
3875 {
3876 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3877 for (i = 0; i < nargs; i++)
3878 if ((bestn->simdclone->args[i].arg_type
3879 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3880 || (bestn->simdclone->args[i].arg_type
3881 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3882 {
3883 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3884 + 1);
3885 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3886 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3887 ? size_type_node : TREE_TYPE (arginfo[i].op);
3888 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3889 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3890 tree sll = arginfo[i].simd_lane_linear
3891 ? boolean_true_node : boolean_false_node;
3892 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3893 }
3894 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3895 if (dump_enabled_p ())
3896 dump_printf_loc (MSG_NOTE, vect_location,
3897 "=== vectorizable_simd_clone_call ===\n");
3898 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3899 return true;
3900 }
3901
3902 /* Transform. */
3903
3904 if (dump_enabled_p ())
3905 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3906
3907 /* Handle def. */
3908 scalar_dest = gimple_call_lhs (stmt);
3909 vec_dest = NULL_TREE;
3910 rtype = NULL_TREE;
3911 ratype = NULL_TREE;
3912 if (scalar_dest)
3913 {
3914 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3915 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3916 if (TREE_CODE (rtype) == ARRAY_TYPE)
3917 {
3918 ratype = rtype;
3919 rtype = TREE_TYPE (ratype);
3920 }
3921 }
3922
3923 prev_stmt_info = NULL;
3924 for (j = 0; j < ncopies; ++j)
3925 {
3926 /* Build argument list for the vectorized call. */
3927 if (j == 0)
3928 vargs.create (nargs);
3929 else
3930 vargs.truncate (0);
3931
3932 for (i = 0; i < nargs; i++)
3933 {
3934 unsigned int k, l, m, o;
3935 tree atype;
3936 op = gimple_call_arg (stmt, i);
3937 switch (bestn->simdclone->args[i].arg_type)
3938 {
3939 case SIMD_CLONE_ARG_TYPE_VECTOR:
3940 atype = bestn->simdclone->args[i].vector_type;
3941 o = nunits / simd_clone_subparts (atype);
3942 for (m = j * o; m < (j + 1) * o; m++)
3943 {
3944 if (simd_clone_subparts (atype)
3945 < simd_clone_subparts (arginfo[i].vectype))
3946 {
3947 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3948 k = (simd_clone_subparts (arginfo[i].vectype)
3949 / simd_clone_subparts (atype));
3950 gcc_assert ((k & (k - 1)) == 0);
3951 if (m == 0)
3952 vec_oprnd0
3953 = vect_get_vec_def_for_operand (op, stmt);
3954 else
3955 {
3956 vec_oprnd0 = arginfo[i].op;
3957 if ((m & (k - 1)) == 0)
3958 vec_oprnd0
3959 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3960 vec_oprnd0);
3961 }
3962 arginfo[i].op = vec_oprnd0;
3963 vec_oprnd0
3964 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3965 bitsize_int (prec),
3966 bitsize_int ((m & (k - 1)) * prec));
3967 new_stmt
3968 = gimple_build_assign (make_ssa_name (atype),
3969 vec_oprnd0);
3970 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3971 vargs.safe_push (gimple_assign_lhs (new_stmt));
3972 }
3973 else
3974 {
3975 k = (simd_clone_subparts (atype)
3976 / simd_clone_subparts (arginfo[i].vectype));
3977 gcc_assert ((k & (k - 1)) == 0);
3978 vec<constructor_elt, va_gc> *ctor_elts;
3979 if (k != 1)
3980 vec_alloc (ctor_elts, k);
3981 else
3982 ctor_elts = NULL;
3983 for (l = 0; l < k; l++)
3984 {
3985 if (m == 0 && l == 0)
3986 vec_oprnd0
3987 = vect_get_vec_def_for_operand (op, stmt);
3988 else
3989 vec_oprnd0
3990 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3991 arginfo[i].op);
3992 arginfo[i].op = vec_oprnd0;
3993 if (k == 1)
3994 break;
3995 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3996 vec_oprnd0);
3997 }
3998 if (k == 1)
3999 vargs.safe_push (vec_oprnd0);
4000 else
4001 {
4002 vec_oprnd0 = build_constructor (atype, ctor_elts);
4003 new_stmt
4004 = gimple_build_assign (make_ssa_name (atype),
4005 vec_oprnd0);
4006 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4007 vargs.safe_push (gimple_assign_lhs (new_stmt));
4008 }
4009 }
4010 }
4011 break;
4012 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4013 vargs.safe_push (op);
4014 break;
4015 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4016 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4017 if (j == 0)
4018 {
4019 gimple_seq stmts;
4020 arginfo[i].op
4021 = force_gimple_operand (arginfo[i].op, &stmts, true,
4022 NULL_TREE);
4023 if (stmts != NULL)
4024 {
4025 basic_block new_bb;
4026 edge pe = loop_preheader_edge (loop);
4027 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4028 gcc_assert (!new_bb);
4029 }
4030 if (arginfo[i].simd_lane_linear)
4031 {
4032 vargs.safe_push (arginfo[i].op);
4033 break;
4034 }
4035 tree phi_res = copy_ssa_name (op);
4036 gphi *new_phi = create_phi_node (phi_res, loop->header);
4037 set_vinfo_for_stmt (new_phi,
4038 new_stmt_vec_info (new_phi, loop_vinfo));
4039 add_phi_arg (new_phi, arginfo[i].op,
4040 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4041 enum tree_code code
4042 = POINTER_TYPE_P (TREE_TYPE (op))
4043 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4044 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4045 ? sizetype : TREE_TYPE (op);
4046 widest_int cst
4047 = wi::mul (bestn->simdclone->args[i].linear_step,
4048 ncopies * nunits);
4049 tree tcst = wide_int_to_tree (type, cst);
4050 tree phi_arg = copy_ssa_name (op);
4051 new_stmt
4052 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4053 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4054 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4055 set_vinfo_for_stmt (new_stmt,
4056 new_stmt_vec_info (new_stmt, loop_vinfo));
4057 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4058 UNKNOWN_LOCATION);
4059 arginfo[i].op = phi_res;
4060 vargs.safe_push (phi_res);
4061 }
4062 else
4063 {
4064 enum tree_code code
4065 = POINTER_TYPE_P (TREE_TYPE (op))
4066 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4067 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4068 ? sizetype : TREE_TYPE (op);
4069 widest_int cst
4070 = wi::mul (bestn->simdclone->args[i].linear_step,
4071 j * nunits);
4072 tree tcst = wide_int_to_tree (type, cst);
4073 new_temp = make_ssa_name (TREE_TYPE (op));
4074 new_stmt = gimple_build_assign (new_temp, code,
4075 arginfo[i].op, tcst);
4076 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4077 vargs.safe_push (new_temp);
4078 }
4079 break;
4080 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4081 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4082 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4083 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4084 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4085 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4086 default:
4087 gcc_unreachable ();
4088 }
4089 }
4090
4091 new_stmt = gimple_build_call_vec (fndecl, vargs);
4092 if (vec_dest)
4093 {
4094 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4095 if (ratype)
4096 new_temp = create_tmp_var (ratype);
4097 else if (simd_clone_subparts (vectype)
4098 == simd_clone_subparts (rtype))
4099 new_temp = make_ssa_name (vec_dest, new_stmt);
4100 else
4101 new_temp = make_ssa_name (rtype, new_stmt);
4102 gimple_call_set_lhs (new_stmt, new_temp);
4103 }
4104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4105
4106 if (vec_dest)
4107 {
4108 if (simd_clone_subparts (vectype) < nunits)
4109 {
4110 unsigned int k, l;
4111 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4112 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4113 k = nunits / simd_clone_subparts (vectype);
4114 gcc_assert ((k & (k - 1)) == 0);
4115 for (l = 0; l < k; l++)
4116 {
4117 tree t;
4118 if (ratype)
4119 {
4120 t = build_fold_addr_expr (new_temp);
4121 t = build2 (MEM_REF, vectype, t,
4122 build_int_cst (TREE_TYPE (t), l * bytes));
4123 }
4124 else
4125 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4126 bitsize_int (prec), bitsize_int (l * prec));
4127 new_stmt
4128 = gimple_build_assign (make_ssa_name (vectype), t);
4129 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4130 if (j == 0 && l == 0)
4131 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4132 else
4133 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4134
4135 prev_stmt_info = vinfo_for_stmt (new_stmt);
4136 }
4137
4138 if (ratype)
4139 {
4140 tree clobber = build_constructor (ratype, NULL);
4141 TREE_THIS_VOLATILE (clobber) = 1;
4142 new_stmt = gimple_build_assign (new_temp, clobber);
4143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4144 }
4145 continue;
4146 }
4147 else if (simd_clone_subparts (vectype) > nunits)
4148 {
4149 unsigned int k = (simd_clone_subparts (vectype)
4150 / simd_clone_subparts (rtype));
4151 gcc_assert ((k & (k - 1)) == 0);
4152 if ((j & (k - 1)) == 0)
4153 vec_alloc (ret_ctor_elts, k);
4154 if (ratype)
4155 {
4156 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4157 for (m = 0; m < o; m++)
4158 {
4159 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4160 size_int (m), NULL_TREE, NULL_TREE);
4161 new_stmt
4162 = gimple_build_assign (make_ssa_name (rtype), tem);
4163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4164 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4165 gimple_assign_lhs (new_stmt));
4166 }
4167 tree clobber = build_constructor (ratype, NULL);
4168 TREE_THIS_VOLATILE (clobber) = 1;
4169 new_stmt = gimple_build_assign (new_temp, clobber);
4170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4171 }
4172 else
4173 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4174 if ((j & (k - 1)) != k - 1)
4175 continue;
4176 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4177 new_stmt
4178 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4179 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4180
4181 if ((unsigned) j == k - 1)
4182 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4183 else
4184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4185
4186 prev_stmt_info = vinfo_for_stmt (new_stmt);
4187 continue;
4188 }
4189 else if (ratype)
4190 {
4191 tree t = build_fold_addr_expr (new_temp);
4192 t = build2 (MEM_REF, vectype, t,
4193 build_int_cst (TREE_TYPE (t), 0));
4194 new_stmt
4195 = gimple_build_assign (make_ssa_name (vec_dest), t);
4196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4197 tree clobber = build_constructor (ratype, NULL);
4198 TREE_THIS_VOLATILE (clobber) = 1;
4199 vect_finish_stmt_generation (stmt,
4200 gimple_build_assign (new_temp,
4201 clobber), gsi);
4202 }
4203 }
4204
4205 if (j == 0)
4206 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4207 else
4208 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4209
4210 prev_stmt_info = vinfo_for_stmt (new_stmt);
4211 }
4212
4213 vargs.release ();
4214
4215 /* The call in STMT might prevent it from being removed in dce.
4216 We however cannot remove it here, due to the way the ssa name
4217 it defines is mapped to the new definition. So just replace
4218 rhs of the statement with something harmless. */
4219
4220 if (slp_node)
4221 return true;
4222
4223 if (scalar_dest)
4224 {
4225 type = TREE_TYPE (scalar_dest);
4226 if (is_pattern_stmt_p (stmt_info))
4227 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4228 else
4229 lhs = gimple_call_lhs (stmt);
4230 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4231 }
4232 else
4233 new_stmt = gimple_build_nop ();
4234 set_vinfo_for_stmt (new_stmt, stmt_info);
4235 set_vinfo_for_stmt (stmt, NULL);
4236 STMT_VINFO_STMT (stmt_info) = new_stmt;
4237 gsi_replace (gsi, new_stmt, true);
4238 unlink_stmt_vdef (stmt);
4239
4240 return true;
4241 }
4242
4243
4244 /* Function vect_gen_widened_results_half
4245
4246 Create a vector stmt whose code, type, number of arguments, and result
4247 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4248 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4249 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4250 needs to be created (DECL is a function-decl of a target-builtin).
4251 STMT is the original scalar stmt that we are vectorizing. */
4252
4253 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree decl,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,gimple * stmt)4254 vect_gen_widened_results_half (enum tree_code code,
4255 tree decl,
4256 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4257 tree vec_dest, gimple_stmt_iterator *gsi,
4258 gimple *stmt)
4259 {
4260 gimple *new_stmt;
4261 tree new_temp;
4262
4263 /* Generate half of the widened result: */
4264 if (code == CALL_EXPR)
4265 {
4266 /* Target specific support */
4267 if (op_type == binary_op)
4268 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4269 else
4270 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4271 new_temp = make_ssa_name (vec_dest, new_stmt);
4272 gimple_call_set_lhs (new_stmt, new_temp);
4273 }
4274 else
4275 {
4276 /* Generic support */
4277 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4278 if (op_type != binary_op)
4279 vec_oprnd1 = NULL;
4280 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4281 new_temp = make_ssa_name (vec_dest, new_stmt);
4282 gimple_assign_set_lhs (new_stmt, new_temp);
4283 }
4284 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4285
4286 return new_stmt;
4287 }
4288
4289
4290 /* Get vectorized definitions for loop-based vectorization. For the first
4291 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4292 scalar operand), and for the rest we get a copy with
4293 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4294 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4295 The vectors are collected into VEC_OPRNDS. */
4296
4297 static void
vect_get_loop_based_defs(tree * oprnd,gimple * stmt,enum vect_def_type dt,vec<tree> * vec_oprnds,int multi_step_cvt)4298 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4299 vec<tree> *vec_oprnds, int multi_step_cvt)
4300 {
4301 tree vec_oprnd;
4302
4303 /* Get first vector operand. */
4304 /* All the vector operands except the very first one (that is scalar oprnd)
4305 are stmt copies. */
4306 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4307 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4308 else
4309 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4310
4311 vec_oprnds->quick_push (vec_oprnd);
4312
4313 /* Get second vector operand. */
4314 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4315 vec_oprnds->quick_push (vec_oprnd);
4316
4317 *oprnd = vec_oprnd;
4318
4319 /* For conversion in multiple steps, continue to get operands
4320 recursively. */
4321 if (multi_step_cvt)
4322 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4323 }
4324
4325
4326 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4327 For multi-step conversions store the resulting vectors and call the function
4328 recursively. */
4329
4330 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,gimple * stmt,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4331 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4332 int multi_step_cvt, gimple *stmt,
4333 vec<tree> vec_dsts,
4334 gimple_stmt_iterator *gsi,
4335 slp_tree slp_node, enum tree_code code,
4336 stmt_vec_info *prev_stmt_info)
4337 {
4338 unsigned int i;
4339 tree vop0, vop1, new_tmp, vec_dest;
4340 gimple *new_stmt;
4341 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4342
4343 vec_dest = vec_dsts.pop ();
4344
4345 for (i = 0; i < vec_oprnds->length (); i += 2)
4346 {
4347 /* Create demotion operation. */
4348 vop0 = (*vec_oprnds)[i];
4349 vop1 = (*vec_oprnds)[i + 1];
4350 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4351 new_tmp = make_ssa_name (vec_dest, new_stmt);
4352 gimple_assign_set_lhs (new_stmt, new_tmp);
4353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4354
4355 if (multi_step_cvt)
4356 /* Store the resulting vector for next recursive call. */
4357 (*vec_oprnds)[i/2] = new_tmp;
4358 else
4359 {
4360 /* This is the last step of the conversion sequence. Store the
4361 vectors in SLP_NODE or in vector info of the scalar statement
4362 (or in STMT_VINFO_RELATED_STMT chain). */
4363 if (slp_node)
4364 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4365 else
4366 {
4367 if (!*prev_stmt_info)
4368 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4369 else
4370 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4371
4372 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4373 }
4374 }
4375 }
4376
4377 /* For multi-step demotion operations we first generate demotion operations
4378 from the source type to the intermediate types, and then combine the
4379 results (stored in VEC_OPRNDS) in demotion operation to the destination
4380 type. */
4381 if (multi_step_cvt)
4382 {
4383 /* At each level of recursion we have half of the operands we had at the
4384 previous level. */
4385 vec_oprnds->truncate ((i+1)/2);
4386 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4387 stmt, vec_dsts, gsi, slp_node,
4388 VEC_PACK_TRUNC_EXPR,
4389 prev_stmt_info);
4390 }
4391
4392 vec_dsts.quick_push (vec_dest);
4393 }
4394
4395
4396 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4397 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4398 the resulting vectors and call the function recursively. */
4399
4400 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,gimple * stmt,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,tree decl1,tree decl2,int op_type)4401 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4402 vec<tree> *vec_oprnds1,
4403 gimple *stmt, tree vec_dest,
4404 gimple_stmt_iterator *gsi,
4405 enum tree_code code1,
4406 enum tree_code code2, tree decl1,
4407 tree decl2, int op_type)
4408 {
4409 int i;
4410 tree vop0, vop1, new_tmp1, new_tmp2;
4411 gimple *new_stmt1, *new_stmt2;
4412 vec<tree> vec_tmp = vNULL;
4413
4414 vec_tmp.create (vec_oprnds0->length () * 2);
4415 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4416 {
4417 if (op_type == binary_op)
4418 vop1 = (*vec_oprnds1)[i];
4419 else
4420 vop1 = NULL_TREE;
4421
4422 /* Generate the two halves of promotion operation. */
4423 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4424 op_type, vec_dest, gsi, stmt);
4425 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4426 op_type, vec_dest, gsi, stmt);
4427 if (is_gimple_call (new_stmt1))
4428 {
4429 new_tmp1 = gimple_call_lhs (new_stmt1);
4430 new_tmp2 = gimple_call_lhs (new_stmt2);
4431 }
4432 else
4433 {
4434 new_tmp1 = gimple_assign_lhs (new_stmt1);
4435 new_tmp2 = gimple_assign_lhs (new_stmt2);
4436 }
4437
4438 /* Store the results for the next step. */
4439 vec_tmp.quick_push (new_tmp1);
4440 vec_tmp.quick_push (new_tmp2);
4441 }
4442
4443 vec_oprnds0->release ();
4444 *vec_oprnds0 = vec_tmp;
4445 }
4446
4447
4448 /* Check if STMT performs a conversion operation, that can be vectorized.
4449 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4450 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4451 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4452
4453 static bool
vectorizable_conversion(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)4454 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4455 gimple **vec_stmt, slp_tree slp_node)
4456 {
4457 tree vec_dest;
4458 tree scalar_dest;
4459 tree op0, op1 = NULL_TREE;
4460 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4461 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4462 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4463 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4464 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4465 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4466 tree new_temp;
4467 gimple *def_stmt;
4468 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4469 int ndts = 2;
4470 gimple *new_stmt = NULL;
4471 stmt_vec_info prev_stmt_info;
4472 poly_uint64 nunits_in;
4473 poly_uint64 nunits_out;
4474 tree vectype_out, vectype_in;
4475 int ncopies, i, j;
4476 tree lhs_type, rhs_type;
4477 enum { NARROW, NONE, WIDEN } modifier;
4478 vec<tree> vec_oprnds0 = vNULL;
4479 vec<tree> vec_oprnds1 = vNULL;
4480 tree vop0;
4481 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4482 vec_info *vinfo = stmt_info->vinfo;
4483 int multi_step_cvt = 0;
4484 vec<tree> interm_types = vNULL;
4485 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4486 int op_type;
4487 unsigned short fltsz;
4488
4489 /* Is STMT a vectorizable conversion? */
4490
4491 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4492 return false;
4493
4494 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4495 && ! vec_stmt)
4496 return false;
4497
4498 if (!is_gimple_assign (stmt))
4499 return false;
4500
4501 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4502 return false;
4503
4504 code = gimple_assign_rhs_code (stmt);
4505 if (!CONVERT_EXPR_CODE_P (code)
4506 && code != FIX_TRUNC_EXPR
4507 && code != FLOAT_EXPR
4508 && code != WIDEN_MULT_EXPR
4509 && code != WIDEN_LSHIFT_EXPR)
4510 return false;
4511
4512 op_type = TREE_CODE_LENGTH (code);
4513
4514 /* Check types of lhs and rhs. */
4515 scalar_dest = gimple_assign_lhs (stmt);
4516 lhs_type = TREE_TYPE (scalar_dest);
4517 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4518
4519 op0 = gimple_assign_rhs1 (stmt);
4520 rhs_type = TREE_TYPE (op0);
4521
4522 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4523 && !((INTEGRAL_TYPE_P (lhs_type)
4524 && INTEGRAL_TYPE_P (rhs_type))
4525 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4526 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4527 return false;
4528
4529 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4530 && ((INTEGRAL_TYPE_P (lhs_type)
4531 && !type_has_mode_precision_p (lhs_type))
4532 || (INTEGRAL_TYPE_P (rhs_type)
4533 && !type_has_mode_precision_p (rhs_type))))
4534 {
4535 if (dump_enabled_p ())
4536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4537 "type conversion to/from bit-precision unsupported."
4538 "\n");
4539 return false;
4540 }
4541
4542 /* Check the operands of the operation. */
4543 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4544 {
4545 if (dump_enabled_p ())
4546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4547 "use not simple.\n");
4548 return false;
4549 }
4550 if (op_type == binary_op)
4551 {
4552 bool ok;
4553
4554 op1 = gimple_assign_rhs2 (stmt);
4555 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4556 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4557 OP1. */
4558 if (CONSTANT_CLASS_P (op0))
4559 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4560 else
4561 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4562
4563 if (!ok)
4564 {
4565 if (dump_enabled_p ())
4566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4567 "use not simple.\n");
4568 return false;
4569 }
4570 }
4571
4572 /* If op0 is an external or constant defs use a vector type of
4573 the same size as the output vector type. */
4574 if (!vectype_in)
4575 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4576 if (vec_stmt)
4577 gcc_assert (vectype_in);
4578 if (!vectype_in)
4579 {
4580 if (dump_enabled_p ())
4581 {
4582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4583 "no vectype for scalar type ");
4584 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4585 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4586 }
4587
4588 return false;
4589 }
4590
4591 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4592 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4593 {
4594 if (dump_enabled_p ())
4595 {
4596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4597 "can't convert between boolean and non "
4598 "boolean vectors");
4599 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4600 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4601 }
4602
4603 return false;
4604 }
4605
4606 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4607 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4608 if (known_eq (nunits_out, nunits_in))
4609 modifier = NONE;
4610 else if (multiple_p (nunits_out, nunits_in))
4611 modifier = NARROW;
4612 else
4613 {
4614 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4615 modifier = WIDEN;
4616 }
4617
4618 /* Multiple types in SLP are handled by creating the appropriate number of
4619 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4620 case of SLP. */
4621 if (slp_node)
4622 ncopies = 1;
4623 else if (modifier == NARROW)
4624 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4625 else
4626 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4627
4628 /* Sanity check: make sure that at least one copy of the vectorized stmt
4629 needs to be generated. */
4630 gcc_assert (ncopies >= 1);
4631
4632 bool found_mode = false;
4633 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4634 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4635 opt_scalar_mode rhs_mode_iter;
4636
4637 /* Supportable by target? */
4638 switch (modifier)
4639 {
4640 case NONE:
4641 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4642 return false;
4643 if (supportable_convert_operation (code, vectype_out, vectype_in,
4644 &decl1, &code1))
4645 break;
4646 /* FALLTHRU */
4647 unsupported:
4648 if (dump_enabled_p ())
4649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4650 "conversion not supported by target.\n");
4651 return false;
4652
4653 case WIDEN:
4654 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4655 &code1, &code2, &multi_step_cvt,
4656 &interm_types))
4657 {
4658 /* Binary widening operation can only be supported directly by the
4659 architecture. */
4660 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4661 break;
4662 }
4663
4664 if (code != FLOAT_EXPR
4665 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4666 goto unsupported;
4667
4668 fltsz = GET_MODE_SIZE (lhs_mode);
4669 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4670 {
4671 rhs_mode = rhs_mode_iter.require ();
4672 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4673 break;
4674
4675 cvt_type
4676 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4677 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4678 if (cvt_type == NULL_TREE)
4679 goto unsupported;
4680
4681 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4682 {
4683 if (!supportable_convert_operation (code, vectype_out,
4684 cvt_type, &decl1, &codecvt1))
4685 goto unsupported;
4686 }
4687 else if (!supportable_widening_operation (code, stmt, vectype_out,
4688 cvt_type, &codecvt1,
4689 &codecvt2, &multi_step_cvt,
4690 &interm_types))
4691 continue;
4692 else
4693 gcc_assert (multi_step_cvt == 0);
4694
4695 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4696 vectype_in, &code1, &code2,
4697 &multi_step_cvt, &interm_types))
4698 {
4699 found_mode = true;
4700 break;
4701 }
4702 }
4703
4704 if (!found_mode)
4705 goto unsupported;
4706
4707 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4708 codecvt2 = ERROR_MARK;
4709 else
4710 {
4711 multi_step_cvt++;
4712 interm_types.safe_push (cvt_type);
4713 cvt_type = NULL_TREE;
4714 }
4715 break;
4716
4717 case NARROW:
4718 gcc_assert (op_type == unary_op);
4719 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4720 &code1, &multi_step_cvt,
4721 &interm_types))
4722 break;
4723
4724 if (code != FIX_TRUNC_EXPR
4725 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4726 goto unsupported;
4727
4728 cvt_type
4729 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4730 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4731 if (cvt_type == NULL_TREE)
4732 goto unsupported;
4733 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4734 &decl1, &codecvt1))
4735 goto unsupported;
4736 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4737 &code1, &multi_step_cvt,
4738 &interm_types))
4739 break;
4740 goto unsupported;
4741
4742 default:
4743 gcc_unreachable ();
4744 }
4745
4746 if (!vec_stmt) /* transformation not required. */
4747 {
4748 if (dump_enabled_p ())
4749 dump_printf_loc (MSG_NOTE, vect_location,
4750 "=== vectorizable_conversion ===\n");
4751 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4752 {
4753 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4754 if (!slp_node)
4755 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4756 }
4757 else if (modifier == NARROW)
4758 {
4759 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4760 if (!slp_node)
4761 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4762 }
4763 else
4764 {
4765 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4766 if (!slp_node)
4767 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4768 }
4769 interm_types.release ();
4770 return true;
4771 }
4772
4773 /* Transform. */
4774 if (dump_enabled_p ())
4775 dump_printf_loc (MSG_NOTE, vect_location,
4776 "transform conversion. ncopies = %d.\n", ncopies);
4777
4778 if (op_type == binary_op)
4779 {
4780 if (CONSTANT_CLASS_P (op0))
4781 op0 = fold_convert (TREE_TYPE (op1), op0);
4782 else if (CONSTANT_CLASS_P (op1))
4783 op1 = fold_convert (TREE_TYPE (op0), op1);
4784 }
4785
4786 /* In case of multi-step conversion, we first generate conversion operations
4787 to the intermediate types, and then from that types to the final one.
4788 We create vector destinations for the intermediate type (TYPES) received
4789 from supportable_*_operation, and store them in the correct order
4790 for future use in vect_create_vectorized_*_stmts (). */
4791 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4792 vec_dest = vect_create_destination_var (scalar_dest,
4793 (cvt_type && modifier == WIDEN)
4794 ? cvt_type : vectype_out);
4795 vec_dsts.quick_push (vec_dest);
4796
4797 if (multi_step_cvt)
4798 {
4799 for (i = interm_types.length () - 1;
4800 interm_types.iterate (i, &intermediate_type); i--)
4801 {
4802 vec_dest = vect_create_destination_var (scalar_dest,
4803 intermediate_type);
4804 vec_dsts.quick_push (vec_dest);
4805 }
4806 }
4807
4808 if (cvt_type)
4809 vec_dest = vect_create_destination_var (scalar_dest,
4810 modifier == WIDEN
4811 ? vectype_out : cvt_type);
4812
4813 if (!slp_node)
4814 {
4815 if (modifier == WIDEN)
4816 {
4817 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4818 if (op_type == binary_op)
4819 vec_oprnds1.create (1);
4820 }
4821 else if (modifier == NARROW)
4822 vec_oprnds0.create (
4823 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4824 }
4825 else if (code == WIDEN_LSHIFT_EXPR)
4826 vec_oprnds1.create (slp_node->vec_stmts_size);
4827
4828 last_oprnd = op0;
4829 prev_stmt_info = NULL;
4830 switch (modifier)
4831 {
4832 case NONE:
4833 for (j = 0; j < ncopies; j++)
4834 {
4835 if (j == 0)
4836 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4837 else
4838 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4839
4840 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4841 {
4842 /* Arguments are ready, create the new vector stmt. */
4843 if (code1 == CALL_EXPR)
4844 {
4845 new_stmt = gimple_build_call (decl1, 1, vop0);
4846 new_temp = make_ssa_name (vec_dest, new_stmt);
4847 gimple_call_set_lhs (new_stmt, new_temp);
4848 }
4849 else
4850 {
4851 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4852 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4853 new_temp = make_ssa_name (vec_dest, new_stmt);
4854 gimple_assign_set_lhs (new_stmt, new_temp);
4855 }
4856
4857 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4858 if (slp_node)
4859 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4860 else
4861 {
4862 if (!prev_stmt_info)
4863 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4864 else
4865 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4866 prev_stmt_info = vinfo_for_stmt (new_stmt);
4867 }
4868 }
4869 }
4870 break;
4871
4872 case WIDEN:
4873 /* In case the vectorization factor (VF) is bigger than the number
4874 of elements that we can fit in a vectype (nunits), we have to
4875 generate more than one vector stmt - i.e - we need to "unroll"
4876 the vector stmt by a factor VF/nunits. */
4877 for (j = 0; j < ncopies; j++)
4878 {
4879 /* Handle uses. */
4880 if (j == 0)
4881 {
4882 if (slp_node)
4883 {
4884 if (code == WIDEN_LSHIFT_EXPR)
4885 {
4886 unsigned int k;
4887
4888 vec_oprnd1 = op1;
4889 /* Store vec_oprnd1 for every vector stmt to be created
4890 for SLP_NODE. We check during the analysis that all
4891 the shift arguments are the same. */
4892 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4893 vec_oprnds1.quick_push (vec_oprnd1);
4894
4895 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4896 slp_node);
4897 }
4898 else
4899 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4900 &vec_oprnds1, slp_node);
4901 }
4902 else
4903 {
4904 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4905 vec_oprnds0.quick_push (vec_oprnd0);
4906 if (op_type == binary_op)
4907 {
4908 if (code == WIDEN_LSHIFT_EXPR)
4909 vec_oprnd1 = op1;
4910 else
4911 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4912 vec_oprnds1.quick_push (vec_oprnd1);
4913 }
4914 }
4915 }
4916 else
4917 {
4918 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4919 vec_oprnds0.truncate (0);
4920 vec_oprnds0.quick_push (vec_oprnd0);
4921 if (op_type == binary_op)
4922 {
4923 if (code == WIDEN_LSHIFT_EXPR)
4924 vec_oprnd1 = op1;
4925 else
4926 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4927 vec_oprnd1);
4928 vec_oprnds1.truncate (0);
4929 vec_oprnds1.quick_push (vec_oprnd1);
4930 }
4931 }
4932
4933 /* Arguments are ready. Create the new vector stmts. */
4934 for (i = multi_step_cvt; i >= 0; i--)
4935 {
4936 tree this_dest = vec_dsts[i];
4937 enum tree_code c1 = code1, c2 = code2;
4938 if (i == 0 && codecvt2 != ERROR_MARK)
4939 {
4940 c1 = codecvt1;
4941 c2 = codecvt2;
4942 }
4943 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4944 &vec_oprnds1,
4945 stmt, this_dest, gsi,
4946 c1, c2, decl1, decl2,
4947 op_type);
4948 }
4949
4950 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4951 {
4952 if (cvt_type)
4953 {
4954 if (codecvt1 == CALL_EXPR)
4955 {
4956 new_stmt = gimple_build_call (decl1, 1, vop0);
4957 new_temp = make_ssa_name (vec_dest, new_stmt);
4958 gimple_call_set_lhs (new_stmt, new_temp);
4959 }
4960 else
4961 {
4962 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4963 new_temp = make_ssa_name (vec_dest);
4964 new_stmt = gimple_build_assign (new_temp, codecvt1,
4965 vop0);
4966 }
4967
4968 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4969 }
4970 else
4971 new_stmt = SSA_NAME_DEF_STMT (vop0);
4972
4973 if (slp_node)
4974 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4975 else
4976 {
4977 if (!prev_stmt_info)
4978 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4979 else
4980 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4981 prev_stmt_info = vinfo_for_stmt (new_stmt);
4982 }
4983 }
4984 }
4985
4986 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4987 break;
4988
4989 case NARROW:
4990 /* In case the vectorization factor (VF) is bigger than the number
4991 of elements that we can fit in a vectype (nunits), we have to
4992 generate more than one vector stmt - i.e - we need to "unroll"
4993 the vector stmt by a factor VF/nunits. */
4994 for (j = 0; j < ncopies; j++)
4995 {
4996 /* Handle uses. */
4997 if (slp_node)
4998 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4999 slp_node);
5000 else
5001 {
5002 vec_oprnds0.truncate (0);
5003 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5004 vect_pow2 (multi_step_cvt) - 1);
5005 }
5006
5007 /* Arguments are ready. Create the new vector stmts. */
5008 if (cvt_type)
5009 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5010 {
5011 if (codecvt1 == CALL_EXPR)
5012 {
5013 new_stmt = gimple_build_call (decl1, 1, vop0);
5014 new_temp = make_ssa_name (vec_dest, new_stmt);
5015 gimple_call_set_lhs (new_stmt, new_temp);
5016 }
5017 else
5018 {
5019 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5020 new_temp = make_ssa_name (vec_dest);
5021 new_stmt = gimple_build_assign (new_temp, codecvt1,
5022 vop0);
5023 }
5024
5025 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5026 vec_oprnds0[i] = new_temp;
5027 }
5028
5029 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5030 stmt, vec_dsts, gsi,
5031 slp_node, code1,
5032 &prev_stmt_info);
5033 }
5034
5035 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5036 break;
5037 }
5038
5039 vec_oprnds0.release ();
5040 vec_oprnds1.release ();
5041 interm_types.release ();
5042
5043 return true;
5044 }
5045
5046
5047 /* Function vectorizable_assignment.
5048
5049 Check if STMT performs an assignment (copy) that can be vectorized.
5050 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5051 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5052 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5053
5054 static bool
vectorizable_assignment(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5055 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5056 gimple **vec_stmt, slp_tree slp_node)
5057 {
5058 tree vec_dest;
5059 tree scalar_dest;
5060 tree op;
5061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5062 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5063 tree new_temp;
5064 gimple *def_stmt;
5065 enum vect_def_type dt[1] = {vect_unknown_def_type};
5066 int ndts = 1;
5067 int ncopies;
5068 int i, j;
5069 vec<tree> vec_oprnds = vNULL;
5070 tree vop;
5071 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5072 vec_info *vinfo = stmt_info->vinfo;
5073 gimple *new_stmt = NULL;
5074 stmt_vec_info prev_stmt_info = NULL;
5075 enum tree_code code;
5076 tree vectype_in;
5077
5078 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5079 return false;
5080
5081 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5082 && ! vec_stmt)
5083 return false;
5084
5085 /* Is vectorizable assignment? */
5086 if (!is_gimple_assign (stmt))
5087 return false;
5088
5089 scalar_dest = gimple_assign_lhs (stmt);
5090 if (TREE_CODE (scalar_dest) != SSA_NAME)
5091 return false;
5092
5093 code = gimple_assign_rhs_code (stmt);
5094 if (gimple_assign_single_p (stmt)
5095 || code == PAREN_EXPR
5096 || CONVERT_EXPR_CODE_P (code))
5097 op = gimple_assign_rhs1 (stmt);
5098 else
5099 return false;
5100
5101 if (code == VIEW_CONVERT_EXPR)
5102 op = TREE_OPERAND (op, 0);
5103
5104 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5105 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5106
5107 /* Multiple types in SLP are handled by creating the appropriate number of
5108 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5109 case of SLP. */
5110 if (slp_node)
5111 ncopies = 1;
5112 else
5113 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5114
5115 gcc_assert (ncopies >= 1);
5116
5117 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5118 {
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5121 "use not simple.\n");
5122 return false;
5123 }
5124
5125 /* We can handle NOP_EXPR conversions that do not change the number
5126 of elements or the vector size. */
5127 if ((CONVERT_EXPR_CODE_P (code)
5128 || code == VIEW_CONVERT_EXPR)
5129 && (!vectype_in
5130 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5131 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5132 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5133 return false;
5134
5135 /* We do not handle bit-precision changes. */
5136 if ((CONVERT_EXPR_CODE_P (code)
5137 || code == VIEW_CONVERT_EXPR)
5138 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5139 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5140 || !type_has_mode_precision_p (TREE_TYPE (op)))
5141 /* But a conversion that does not change the bit-pattern is ok. */
5142 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5143 > TYPE_PRECISION (TREE_TYPE (op)))
5144 && TYPE_UNSIGNED (TREE_TYPE (op)))
5145 /* Conversion between boolean types of different sizes is
5146 a simple assignment in case their vectypes are same
5147 boolean vectors. */
5148 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5149 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5150 {
5151 if (dump_enabled_p ())
5152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5153 "type conversion to/from bit-precision "
5154 "unsupported.\n");
5155 return false;
5156 }
5157
5158 if (!vec_stmt) /* transformation not required. */
5159 {
5160 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5161 if (dump_enabled_p ())
5162 dump_printf_loc (MSG_NOTE, vect_location,
5163 "=== vectorizable_assignment ===\n");
5164 if (!slp_node)
5165 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5166 return true;
5167 }
5168
5169 /* Transform. */
5170 if (dump_enabled_p ())
5171 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5172
5173 /* Handle def. */
5174 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5175
5176 /* Handle use. */
5177 for (j = 0; j < ncopies; j++)
5178 {
5179 /* Handle uses. */
5180 if (j == 0)
5181 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5182 else
5183 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5184
5185 /* Arguments are ready. create the new vector stmt. */
5186 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5187 {
5188 if (CONVERT_EXPR_CODE_P (code)
5189 || code == VIEW_CONVERT_EXPR)
5190 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5191 new_stmt = gimple_build_assign (vec_dest, vop);
5192 new_temp = make_ssa_name (vec_dest, new_stmt);
5193 gimple_assign_set_lhs (new_stmt, new_temp);
5194 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5195 if (slp_node)
5196 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5197 }
5198
5199 if (slp_node)
5200 continue;
5201
5202 if (j == 0)
5203 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5204 else
5205 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5206
5207 prev_stmt_info = vinfo_for_stmt (new_stmt);
5208 }
5209
5210 vec_oprnds.release ();
5211 return true;
5212 }
5213
5214
5215 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5216 either as shift by a scalar or by a vector. */
5217
5218 bool
vect_supportable_shift(enum tree_code code,tree scalar_type)5219 vect_supportable_shift (enum tree_code code, tree scalar_type)
5220 {
5221
5222 machine_mode vec_mode;
5223 optab optab;
5224 int icode;
5225 tree vectype;
5226
5227 vectype = get_vectype_for_scalar_type (scalar_type);
5228 if (!vectype)
5229 return false;
5230
5231 optab = optab_for_tree_code (code, vectype, optab_scalar);
5232 if (!optab
5233 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5234 {
5235 optab = optab_for_tree_code (code, vectype, optab_vector);
5236 if (!optab
5237 || (optab_handler (optab, TYPE_MODE (vectype))
5238 == CODE_FOR_nothing))
5239 return false;
5240 }
5241
5242 vec_mode = TYPE_MODE (vectype);
5243 icode = (int) optab_handler (optab, vec_mode);
5244 if (icode == CODE_FOR_nothing)
5245 return false;
5246
5247 return true;
5248 }
5249
5250
5251 /* Function vectorizable_shift.
5252
5253 Check if STMT performs a shift operation that can be vectorized.
5254 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5255 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5256 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5257
5258 static bool
vectorizable_shift(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5259 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5260 gimple **vec_stmt, slp_tree slp_node)
5261 {
5262 tree vec_dest;
5263 tree scalar_dest;
5264 tree op0, op1 = NULL;
5265 tree vec_oprnd1 = NULL_TREE;
5266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5267 tree vectype;
5268 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5269 enum tree_code code;
5270 machine_mode vec_mode;
5271 tree new_temp;
5272 optab optab;
5273 int icode;
5274 machine_mode optab_op2_mode;
5275 gimple *def_stmt;
5276 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5277 int ndts = 2;
5278 gimple *new_stmt = NULL;
5279 stmt_vec_info prev_stmt_info;
5280 poly_uint64 nunits_in;
5281 poly_uint64 nunits_out;
5282 tree vectype_out;
5283 tree op1_vectype;
5284 int ncopies;
5285 int j, i;
5286 vec<tree> vec_oprnds0 = vNULL;
5287 vec<tree> vec_oprnds1 = vNULL;
5288 tree vop0, vop1;
5289 unsigned int k;
5290 bool scalar_shift_arg = true;
5291 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5292 vec_info *vinfo = stmt_info->vinfo;
5293
5294 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5295 return false;
5296
5297 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5298 && ! vec_stmt)
5299 return false;
5300
5301 /* Is STMT a vectorizable binary/unary operation? */
5302 if (!is_gimple_assign (stmt))
5303 return false;
5304
5305 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5306 return false;
5307
5308 code = gimple_assign_rhs_code (stmt);
5309
5310 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5311 || code == RROTATE_EXPR))
5312 return false;
5313
5314 scalar_dest = gimple_assign_lhs (stmt);
5315 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5316 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5317 {
5318 if (dump_enabled_p ())
5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5320 "bit-precision shifts not supported.\n");
5321 return false;
5322 }
5323
5324 op0 = gimple_assign_rhs1 (stmt);
5325 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5326 {
5327 if (dump_enabled_p ())
5328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5329 "use not simple.\n");
5330 return false;
5331 }
5332 /* If op0 is an external or constant def use a vector type with
5333 the same size as the output vector type. */
5334 if (!vectype)
5335 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5336 if (vec_stmt)
5337 gcc_assert (vectype);
5338 if (!vectype)
5339 {
5340 if (dump_enabled_p ())
5341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5342 "no vectype for scalar type\n");
5343 return false;
5344 }
5345
5346 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5347 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5348 if (maybe_ne (nunits_out, nunits_in))
5349 return false;
5350
5351 op1 = gimple_assign_rhs2 (stmt);
5352 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5353 {
5354 if (dump_enabled_p ())
5355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5356 "use not simple.\n");
5357 return false;
5358 }
5359
5360 /* Multiple types in SLP are handled by creating the appropriate number of
5361 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5362 case of SLP. */
5363 if (slp_node)
5364 ncopies = 1;
5365 else
5366 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5367
5368 gcc_assert (ncopies >= 1);
5369
5370 /* Determine whether the shift amount is a vector, or scalar. If the
5371 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5372
5373 if ((dt[1] == vect_internal_def
5374 || dt[1] == vect_induction_def)
5375 && !slp_node)
5376 scalar_shift_arg = false;
5377 else if (dt[1] == vect_constant_def
5378 || dt[1] == vect_external_def
5379 || dt[1] == vect_internal_def)
5380 {
5381 /* In SLP, need to check whether the shift count is the same,
5382 in loops if it is a constant or invariant, it is always
5383 a scalar shift. */
5384 if (slp_node)
5385 {
5386 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5387 gimple *slpstmt;
5388
5389 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5390 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5391 scalar_shift_arg = false;
5392
5393 /* For internal SLP defs we have to make sure we see scalar stmts
5394 for all vector elements.
5395 ??? For different vectors we could resort to a different
5396 scalar shift operand but code-generation below simply always
5397 takes the first. */
5398 if (dt[1] == vect_internal_def
5399 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), stmts.length ()))
5400 scalar_shift_arg = false;
5401 }
5402
5403 /* If the shift amount is computed by a pattern stmt we cannot
5404 use the scalar amount directly thus give up and use a vector
5405 shift. */
5406 if (dt[1] == vect_internal_def)
5407 {
5408 gimple *def = SSA_NAME_DEF_STMT (op1);
5409 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5410 scalar_shift_arg = false;
5411 }
5412 }
5413 else
5414 {
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "operand mode requires invariant argument.\n");
5418 return false;
5419 }
5420
5421 /* Vector shifted by vector. */
5422 if (!scalar_shift_arg)
5423 {
5424 optab = optab_for_tree_code (code, vectype, optab_vector);
5425 if (dump_enabled_p ())
5426 dump_printf_loc (MSG_NOTE, vect_location,
5427 "vector/vector shift/rotate found.\n");
5428
5429 if (!op1_vectype)
5430 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5431 if (op1_vectype == NULL_TREE
5432 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5433 {
5434 if (dump_enabled_p ())
5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5436 "unusable type for last operand in"
5437 " vector/vector shift/rotate.\n");
5438 return false;
5439 }
5440 }
5441 /* See if the machine has a vector shifted by scalar insn and if not
5442 then see if it has a vector shifted by vector insn. */
5443 else
5444 {
5445 optab = optab_for_tree_code (code, vectype, optab_scalar);
5446 if (optab
5447 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5448 {
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE, vect_location,
5451 "vector/scalar shift/rotate found.\n");
5452 }
5453 else
5454 {
5455 optab = optab_for_tree_code (code, vectype, optab_vector);
5456 if (optab
5457 && (optab_handler (optab, TYPE_MODE (vectype))
5458 != CODE_FOR_nothing))
5459 {
5460 scalar_shift_arg = false;
5461
5462 if (dump_enabled_p ())
5463 dump_printf_loc (MSG_NOTE, vect_location,
5464 "vector/vector shift/rotate found.\n");
5465
5466 /* Unlike the other binary operators, shifts/rotates have
5467 the rhs being int, instead of the same type as the lhs,
5468 so make sure the scalar is the right type if we are
5469 dealing with vectors of long long/long/short/char. */
5470 if (dt[1] == vect_constant_def)
5471 op1 = fold_convert (TREE_TYPE (vectype), op1);
5472 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5473 TREE_TYPE (op1)))
5474 {
5475 if (slp_node
5476 && TYPE_MODE (TREE_TYPE (vectype))
5477 != TYPE_MODE (TREE_TYPE (op1)))
5478 {
5479 if (dump_enabled_p ())
5480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5481 "unusable type for last operand in"
5482 " vector/vector shift/rotate.\n");
5483 return false;
5484 }
5485 if (vec_stmt && !slp_node)
5486 {
5487 op1 = fold_convert (TREE_TYPE (vectype), op1);
5488 op1 = vect_init_vector (stmt, op1,
5489 TREE_TYPE (vectype), NULL);
5490 }
5491 }
5492 }
5493 }
5494 }
5495
5496 /* Supportable by target? */
5497 if (!optab)
5498 {
5499 if (dump_enabled_p ())
5500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5501 "no optab.\n");
5502 return false;
5503 }
5504 vec_mode = TYPE_MODE (vectype);
5505 icode = (int) optab_handler (optab, vec_mode);
5506 if (icode == CODE_FOR_nothing)
5507 {
5508 if (dump_enabled_p ())
5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5510 "op not supported by target.\n");
5511 /* Check only during analysis. */
5512 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5513 || (!vec_stmt
5514 && !vect_worthwhile_without_simd_p (vinfo, code)))
5515 return false;
5516 if (dump_enabled_p ())
5517 dump_printf_loc (MSG_NOTE, vect_location,
5518 "proceeding using word mode.\n");
5519 }
5520
5521 /* Worthwhile without SIMD support? Check only during analysis. */
5522 if (!vec_stmt
5523 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5524 && !vect_worthwhile_without_simd_p (vinfo, code))
5525 {
5526 if (dump_enabled_p ())
5527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5528 "not worthwhile without SIMD support.\n");
5529 return false;
5530 }
5531
5532 if (!vec_stmt) /* transformation not required. */
5533 {
5534 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5535 if (dump_enabled_p ())
5536 dump_printf_loc (MSG_NOTE, vect_location,
5537 "=== vectorizable_shift ===\n");
5538 if (!slp_node)
5539 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5540 return true;
5541 }
5542
5543 /* Transform. */
5544
5545 if (dump_enabled_p ())
5546 dump_printf_loc (MSG_NOTE, vect_location,
5547 "transform binary/unary operation.\n");
5548
5549 /* Handle def. */
5550 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5551
5552 prev_stmt_info = NULL;
5553 for (j = 0; j < ncopies; j++)
5554 {
5555 /* Handle uses. */
5556 if (j == 0)
5557 {
5558 if (scalar_shift_arg)
5559 {
5560 /* Vector shl and shr insn patterns can be defined with scalar
5561 operand 2 (shift operand). In this case, use constant or loop
5562 invariant op1 directly, without extending it to vector mode
5563 first. */
5564 optab_op2_mode = insn_data[icode].operand[2].mode;
5565 if (!VECTOR_MODE_P (optab_op2_mode))
5566 {
5567 if (dump_enabled_p ())
5568 dump_printf_loc (MSG_NOTE, vect_location,
5569 "operand 1 using scalar mode.\n");
5570 vec_oprnd1 = op1;
5571 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5572 vec_oprnds1.quick_push (vec_oprnd1);
5573 if (slp_node)
5574 {
5575 /* Store vec_oprnd1 for every vector stmt to be created
5576 for SLP_NODE. We check during the analysis that all
5577 the shift arguments are the same.
5578 TODO: Allow different constants for different vector
5579 stmts generated for an SLP instance. */
5580 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5581 vec_oprnds1.quick_push (vec_oprnd1);
5582 }
5583 }
5584 }
5585
5586 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5587 (a special case for certain kind of vector shifts); otherwise,
5588 operand 1 should be of a vector type (the usual case). */
5589 if (vec_oprnd1)
5590 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5591 slp_node);
5592 else
5593 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5594 slp_node);
5595 }
5596 else
5597 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5598
5599 /* Arguments are ready. Create the new vector stmt. */
5600 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5601 {
5602 vop1 = vec_oprnds1[i];
5603 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5604 new_temp = make_ssa_name (vec_dest, new_stmt);
5605 gimple_assign_set_lhs (new_stmt, new_temp);
5606 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5607 if (slp_node)
5608 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5609 }
5610
5611 if (slp_node)
5612 continue;
5613
5614 if (j == 0)
5615 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5616 else
5617 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5618 prev_stmt_info = vinfo_for_stmt (new_stmt);
5619 }
5620
5621 vec_oprnds0.release ();
5622 vec_oprnds1.release ();
5623
5624 return true;
5625 }
5626
5627
5628 /* Function vectorizable_operation.
5629
5630 Check if STMT performs a binary, unary or ternary operation that can
5631 be vectorized.
5632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5633 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5635
5636 static bool
vectorizable_operation(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5637 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5638 gimple **vec_stmt, slp_tree slp_node)
5639 {
5640 tree vec_dest;
5641 tree scalar_dest;
5642 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5644 tree vectype;
5645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5646 enum tree_code code, orig_code;
5647 machine_mode vec_mode;
5648 tree new_temp;
5649 int op_type;
5650 optab optab;
5651 bool target_support_p;
5652 gimple *def_stmt;
5653 enum vect_def_type dt[3]
5654 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5655 int ndts = 3;
5656 gimple *new_stmt = NULL;
5657 stmt_vec_info prev_stmt_info;
5658 poly_uint64 nunits_in;
5659 poly_uint64 nunits_out;
5660 tree vectype_out;
5661 int ncopies;
5662 int j, i;
5663 vec<tree> vec_oprnds0 = vNULL;
5664 vec<tree> vec_oprnds1 = vNULL;
5665 vec<tree> vec_oprnds2 = vNULL;
5666 tree vop0, vop1, vop2;
5667 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5668 vec_info *vinfo = stmt_info->vinfo;
5669
5670 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5671 return false;
5672
5673 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5674 && ! vec_stmt)
5675 return false;
5676
5677 /* Is STMT a vectorizable binary/unary operation? */
5678 if (!is_gimple_assign (stmt))
5679 return false;
5680
5681 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5682 return false;
5683
5684 orig_code = code = gimple_assign_rhs_code (stmt);
5685
5686 /* For pointer addition and subtraction, we should use the normal
5687 plus and minus for the vector operation. */
5688 if (code == POINTER_PLUS_EXPR)
5689 code = PLUS_EXPR;
5690 if (code == POINTER_DIFF_EXPR)
5691 code = MINUS_EXPR;
5692
5693 /* Support only unary or binary operations. */
5694 op_type = TREE_CODE_LENGTH (code);
5695 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5696 {
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5699 "num. args = %d (not unary/binary/ternary op).\n",
5700 op_type);
5701 return false;
5702 }
5703
5704 scalar_dest = gimple_assign_lhs (stmt);
5705 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5706
5707 /* Most operations cannot handle bit-precision types without extra
5708 truncations. */
5709 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5710 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5711 /* Exception are bitwise binary operations. */
5712 && code != BIT_IOR_EXPR
5713 && code != BIT_XOR_EXPR
5714 && code != BIT_AND_EXPR)
5715 {
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718 "bit-precision arithmetic not supported.\n");
5719 return false;
5720 }
5721
5722 op0 = gimple_assign_rhs1 (stmt);
5723 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5724 {
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5727 "use not simple.\n");
5728 return false;
5729 }
5730 /* If op0 is an external or constant def use a vector type with
5731 the same size as the output vector type. */
5732 if (!vectype)
5733 {
5734 /* For boolean type we cannot determine vectype by
5735 invariant value (don't know whether it is a vector
5736 of booleans or vector of integers). We use output
5737 vectype because operations on boolean don't change
5738 type. */
5739 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5740 {
5741 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5742 {
5743 if (dump_enabled_p ())
5744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5745 "not supported operation on bool value.\n");
5746 return false;
5747 }
5748 vectype = vectype_out;
5749 }
5750 else
5751 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5752 }
5753 if (vec_stmt)
5754 gcc_assert (vectype);
5755 if (!vectype)
5756 {
5757 if (dump_enabled_p ())
5758 {
5759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5760 "no vectype for scalar type ");
5761 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5762 TREE_TYPE (op0));
5763 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5764 }
5765
5766 return false;
5767 }
5768
5769 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5770 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5771 if (maybe_ne (nunits_out, nunits_in))
5772 return false;
5773
5774 if (op_type == binary_op || op_type == ternary_op)
5775 {
5776 op1 = gimple_assign_rhs2 (stmt);
5777 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5778 {
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5781 "use not simple.\n");
5782 return false;
5783 }
5784 }
5785 if (op_type == ternary_op)
5786 {
5787 op2 = gimple_assign_rhs3 (stmt);
5788 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5789 {
5790 if (dump_enabled_p ())
5791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5792 "use not simple.\n");
5793 return false;
5794 }
5795 }
5796
5797 /* Multiple types in SLP are handled by creating the appropriate number of
5798 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5799 case of SLP. */
5800 if (slp_node)
5801 ncopies = 1;
5802 else
5803 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5804
5805 gcc_assert (ncopies >= 1);
5806
5807 /* Shifts are handled in vectorizable_shift (). */
5808 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5809 || code == RROTATE_EXPR)
5810 return false;
5811
5812 /* Supportable by target? */
5813
5814 vec_mode = TYPE_MODE (vectype);
5815 if (code == MULT_HIGHPART_EXPR)
5816 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5817 else
5818 {
5819 optab = optab_for_tree_code (code, vectype, optab_default);
5820 if (!optab)
5821 {
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824 "no optab.\n");
5825 return false;
5826 }
5827 target_support_p = (optab_handler (optab, vec_mode)
5828 != CODE_FOR_nothing);
5829 }
5830
5831 if (!target_support_p)
5832 {
5833 if (dump_enabled_p ())
5834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5835 "op not supported by target.\n");
5836 /* Check only during analysis. */
5837 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5838 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5839 return false;
5840 if (dump_enabled_p ())
5841 dump_printf_loc (MSG_NOTE, vect_location,
5842 "proceeding using word mode.\n");
5843 }
5844
5845 /* Worthwhile without SIMD support? Check only during analysis. */
5846 if (!VECTOR_MODE_P (vec_mode)
5847 && !vec_stmt
5848 && !vect_worthwhile_without_simd_p (vinfo, code))
5849 {
5850 if (dump_enabled_p ())
5851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5852 "not worthwhile without SIMD support.\n");
5853 return false;
5854 }
5855
5856 if (!vec_stmt) /* transformation not required. */
5857 {
5858 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5859 if (dump_enabled_p ())
5860 dump_printf_loc (MSG_NOTE, vect_location,
5861 "=== vectorizable_operation ===\n");
5862 if (!slp_node)
5863 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5864 return true;
5865 }
5866
5867 /* Transform. */
5868
5869 if (dump_enabled_p ())
5870 dump_printf_loc (MSG_NOTE, vect_location,
5871 "transform binary/unary operation.\n");
5872
5873 /* Handle def. */
5874 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5875
5876 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5877 vectors with unsigned elements, but the result is signed. So, we
5878 need to compute the MINUS_EXPR into vectype temporary and
5879 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5880 tree vec_cvt_dest = NULL_TREE;
5881 if (orig_code == POINTER_DIFF_EXPR)
5882 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5883
5884 /* In case the vectorization factor (VF) is bigger than the number
5885 of elements that we can fit in a vectype (nunits), we have to generate
5886 more than one vector stmt - i.e - we need to "unroll" the
5887 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5888 from one copy of the vector stmt to the next, in the field
5889 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5890 stages to find the correct vector defs to be used when vectorizing
5891 stmts that use the defs of the current stmt. The example below
5892 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5893 we need to create 4 vectorized stmts):
5894
5895 before vectorization:
5896 RELATED_STMT VEC_STMT
5897 S1: x = memref - -
5898 S2: z = x + 1 - -
5899
5900 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5901 there):
5902 RELATED_STMT VEC_STMT
5903 VS1_0: vx0 = memref0 VS1_1 -
5904 VS1_1: vx1 = memref1 VS1_2 -
5905 VS1_2: vx2 = memref2 VS1_3 -
5906 VS1_3: vx3 = memref3 - -
5907 S1: x = load - VS1_0
5908 S2: z = x + 1 - -
5909
5910 step2: vectorize stmt S2 (done here):
5911 To vectorize stmt S2 we first need to find the relevant vector
5912 def for the first operand 'x'. This is, as usual, obtained from
5913 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5914 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5915 relevant vector def 'vx0'. Having found 'vx0' we can generate
5916 the vector stmt VS2_0, and as usual, record it in the
5917 STMT_VINFO_VEC_STMT of stmt S2.
5918 When creating the second copy (VS2_1), we obtain the relevant vector
5919 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5920 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5921 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5922 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5923 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5924 chain of stmts and pointers:
5925 RELATED_STMT VEC_STMT
5926 VS1_0: vx0 = memref0 VS1_1 -
5927 VS1_1: vx1 = memref1 VS1_2 -
5928 VS1_2: vx2 = memref2 VS1_3 -
5929 VS1_3: vx3 = memref3 - -
5930 S1: x = load - VS1_0
5931 VS2_0: vz0 = vx0 + v1 VS2_1 -
5932 VS2_1: vz1 = vx1 + v1 VS2_2 -
5933 VS2_2: vz2 = vx2 + v1 VS2_3 -
5934 VS2_3: vz3 = vx3 + v1 - -
5935 S2: z = x + 1 - VS2_0 */
5936
5937 prev_stmt_info = NULL;
5938 for (j = 0; j < ncopies; j++)
5939 {
5940 /* Handle uses. */
5941 if (j == 0)
5942 {
5943 if (op_type == binary_op)
5944 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5945 slp_node);
5946 else if (op_type == ternary_op)
5947 {
5948 if (slp_node)
5949 {
5950 auto_vec<tree> ops(3);
5951 ops.quick_push (op0);
5952 ops.quick_push (op1);
5953 ops.quick_push (op2);
5954 auto_vec<vec<tree> > vec_defs(3);
5955 vect_get_slp_defs (ops, slp_node, &vec_defs);
5956 vec_oprnds0 = vec_defs[0];
5957 vec_oprnds1 = vec_defs[1];
5958 vec_oprnds2 = vec_defs[2];
5959 }
5960 else
5961 {
5962 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5963 NULL);
5964 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5965 NULL);
5966 }
5967 }
5968 else
5969 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5970 slp_node);
5971 }
5972 else
5973 {
5974 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5975 if (op_type == ternary_op)
5976 {
5977 tree vec_oprnd = vec_oprnds2.pop ();
5978 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5979 vec_oprnd));
5980 }
5981 }
5982
5983 /* Arguments are ready. Create the new vector stmt. */
5984 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5985 {
5986 vop1 = ((op_type == binary_op || op_type == ternary_op)
5987 ? vec_oprnds1[i] : NULL_TREE);
5988 vop2 = ((op_type == ternary_op)
5989 ? vec_oprnds2[i] : NULL_TREE);
5990 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5991 new_temp = make_ssa_name (vec_dest, new_stmt);
5992 gimple_assign_set_lhs (new_stmt, new_temp);
5993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5994 if (vec_cvt_dest)
5995 {
5996 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5997 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5998 new_temp);
5999 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6000 gimple_assign_set_lhs (new_stmt, new_temp);
6001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6002 }
6003 if (slp_node)
6004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6005 }
6006
6007 if (slp_node)
6008 continue;
6009
6010 if (j == 0)
6011 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6012 else
6013 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6014 prev_stmt_info = vinfo_for_stmt (new_stmt);
6015 }
6016
6017 vec_oprnds0.release ();
6018 vec_oprnds1.release ();
6019 vec_oprnds2.release ();
6020
6021 return true;
6022 }
6023
6024 /* A helper function to ensure data reference DR's base alignment. */
6025
6026 static void
ensure_base_align(struct data_reference * dr)6027 ensure_base_align (struct data_reference *dr)
6028 {
6029 if (!dr->aux)
6030 return;
6031
6032 if (DR_VECT_AUX (dr)->base_misaligned)
6033 {
6034 tree base_decl = DR_VECT_AUX (dr)->base_decl;
6035
6036 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6037
6038 if (decl_in_symtab_p (base_decl))
6039 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6040 else
6041 {
6042 SET_DECL_ALIGN (base_decl, align_base_to);
6043 DECL_USER_ALIGN (base_decl) = 1;
6044 }
6045 DR_VECT_AUX (dr)->base_misaligned = false;
6046 }
6047 }
6048
6049
6050 /* Function get_group_alias_ptr_type.
6051
6052 Return the alias type for the group starting at FIRST_STMT. */
6053
6054 static tree
get_group_alias_ptr_type(gimple * first_stmt)6055 get_group_alias_ptr_type (gimple *first_stmt)
6056 {
6057 struct data_reference *first_dr, *next_dr;
6058 gimple *next_stmt;
6059
6060 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6061 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6062 while (next_stmt)
6063 {
6064 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6065 if (get_alias_set (DR_REF (first_dr))
6066 != get_alias_set (DR_REF (next_dr)))
6067 {
6068 if (dump_enabled_p ())
6069 dump_printf_loc (MSG_NOTE, vect_location,
6070 "conflicting alias set types.\n");
6071 return ptr_type_node;
6072 }
6073 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6074 }
6075 return reference_alias_ptr_type (DR_REF (first_dr));
6076 }
6077
6078
6079 /* Function vectorizable_store.
6080
6081 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6082 can be vectorized.
6083 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6084 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6085 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6086
6087 static bool
vectorizable_store(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)6088 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6089 slp_tree slp_node)
6090 {
6091 tree data_ref;
6092 tree op;
6093 tree vec_oprnd = NULL_TREE;
6094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6095 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6096 tree elem_type;
6097 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6098 struct loop *loop = NULL;
6099 machine_mode vec_mode;
6100 tree dummy;
6101 enum dr_alignment_support alignment_support_scheme;
6102 gimple *def_stmt;
6103 enum vect_def_type rhs_dt = vect_unknown_def_type;
6104 enum vect_def_type mask_dt = vect_unknown_def_type;
6105 stmt_vec_info prev_stmt_info = NULL;
6106 tree dataref_ptr = NULL_TREE;
6107 tree dataref_offset = NULL_TREE;
6108 gimple *ptr_incr = NULL;
6109 int ncopies;
6110 int j;
6111 gimple *next_stmt, *first_stmt;
6112 bool grouped_store;
6113 unsigned int group_size, i;
6114 vec<tree> oprnds = vNULL;
6115 vec<tree> result_chain = vNULL;
6116 bool inv_p;
6117 tree offset = NULL_TREE;
6118 vec<tree> vec_oprnds = vNULL;
6119 bool slp = (slp_node != NULL);
6120 unsigned int vec_num;
6121 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6122 vec_info *vinfo = stmt_info->vinfo;
6123 tree aggr_type;
6124 gather_scatter_info gs_info;
6125 gimple *new_stmt;
6126 poly_uint64 vf;
6127 vec_load_store_type vls_type;
6128 tree ref_type;
6129
6130 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6131 return false;
6132
6133 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6134 && ! vec_stmt)
6135 return false;
6136
6137 /* Is vectorizable store? */
6138
6139 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6140 if (is_gimple_assign (stmt))
6141 {
6142 tree scalar_dest = gimple_assign_lhs (stmt);
6143 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6144 && is_pattern_stmt_p (stmt_info))
6145 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6146 if (TREE_CODE (scalar_dest) != ARRAY_REF
6147 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6148 && TREE_CODE (scalar_dest) != INDIRECT_REF
6149 && TREE_CODE (scalar_dest) != COMPONENT_REF
6150 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6151 && TREE_CODE (scalar_dest) != REALPART_EXPR
6152 && TREE_CODE (scalar_dest) != MEM_REF)
6153 return false;
6154 }
6155 else
6156 {
6157 gcall *call = dyn_cast <gcall *> (stmt);
6158 if (!call || !gimple_call_internal_p (call))
6159 return false;
6160
6161 internal_fn ifn = gimple_call_internal_fn (call);
6162 if (!internal_store_fn_p (ifn))
6163 return false;
6164
6165 if (slp_node != NULL)
6166 {
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "SLP of masked stores not supported.\n");
6170 return false;
6171 }
6172
6173 int mask_index = internal_fn_mask_index (ifn);
6174 if (mask_index >= 0)
6175 {
6176 mask = gimple_call_arg (call, mask_index);
6177 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6178 &mask_vectype))
6179 return false;
6180 }
6181 }
6182
6183 op = vect_get_store_rhs (stmt);
6184
6185 /* Cannot have hybrid store SLP -- that would mean storing to the
6186 same location twice. */
6187 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6188
6189 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6190 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6191
6192 if (loop_vinfo)
6193 {
6194 loop = LOOP_VINFO_LOOP (loop_vinfo);
6195 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6196 }
6197 else
6198 vf = 1;
6199
6200 /* Multiple types in SLP are handled by creating the appropriate number of
6201 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6202 case of SLP. */
6203 if (slp)
6204 ncopies = 1;
6205 else
6206 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6207
6208 gcc_assert (ncopies >= 1);
6209
6210 /* FORNOW. This restriction should be relaxed. */
6211 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6212 {
6213 if (dump_enabled_p ())
6214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6215 "multiple types in nested loop.\n");
6216 return false;
6217 }
6218
6219 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6220 return false;
6221
6222 elem_type = TREE_TYPE (vectype);
6223 vec_mode = TYPE_MODE (vectype);
6224
6225 if (!STMT_VINFO_DATA_REF (stmt_info))
6226 return false;
6227
6228 vect_memory_access_type memory_access_type;
6229 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6230 &memory_access_type, &gs_info))
6231 return false;
6232
6233 if (mask)
6234 {
6235 if (memory_access_type == VMAT_CONTIGUOUS)
6236 {
6237 if (!VECTOR_MODE_P (vec_mode)
6238 || !can_vec_mask_load_store_p (vec_mode,
6239 TYPE_MODE (mask_vectype), false))
6240 return false;
6241 }
6242 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6243 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6244 {
6245 if (dump_enabled_p ())
6246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6247 "unsupported access type for masked store.\n");
6248 return false;
6249 }
6250 }
6251 else
6252 {
6253 /* FORNOW. In some cases can vectorize even if data-type not supported
6254 (e.g. - array initialization with 0). */
6255 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6256 return false;
6257 }
6258
6259 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6260 && memory_access_type != VMAT_GATHER_SCATTER
6261 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6262 if (grouped_store)
6263 {
6264 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6265 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6266 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6267 }
6268 else
6269 {
6270 first_stmt = stmt;
6271 first_dr = dr;
6272 group_size = vec_num = 1;
6273 }
6274
6275 if (!vec_stmt) /* transformation not required. */
6276 {
6277 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6278
6279 if (loop_vinfo
6280 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6281 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6282 memory_access_type, &gs_info);
6283
6284 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6285 /* The SLP costs are calculated during SLP analysis. */
6286 if (!slp_node)
6287 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6288 vls_type, NULL, NULL, NULL);
6289 return true;
6290 }
6291 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6292
6293 /* Transform. */
6294
6295 ensure_base_align (dr);
6296
6297 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6298 {
6299 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6300 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6301 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6302 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6303 edge pe = loop_preheader_edge (loop);
6304 gimple_seq seq;
6305 basic_block new_bb;
6306 enum { NARROW, NONE, WIDEN } modifier;
6307 poly_uint64 scatter_off_nunits
6308 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6309
6310 if (known_eq (nunits, scatter_off_nunits))
6311 modifier = NONE;
6312 else if (known_eq (nunits * 2, scatter_off_nunits))
6313 {
6314 modifier = WIDEN;
6315
6316 /* Currently gathers and scatters are only supported for
6317 fixed-length vectors. */
6318 unsigned int count = scatter_off_nunits.to_constant ();
6319 vec_perm_builder sel (count, count, 1);
6320 for (i = 0; i < (unsigned int) count; ++i)
6321 sel.quick_push (i | (count / 2));
6322
6323 vec_perm_indices indices (sel, 1, count);
6324 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6325 indices);
6326 gcc_assert (perm_mask != NULL_TREE);
6327 }
6328 else if (known_eq (nunits, scatter_off_nunits * 2))
6329 {
6330 modifier = NARROW;
6331
6332 /* Currently gathers and scatters are only supported for
6333 fixed-length vectors. */
6334 unsigned int count = nunits.to_constant ();
6335 vec_perm_builder sel (count, count, 1);
6336 for (i = 0; i < (unsigned int) count; ++i)
6337 sel.quick_push (i | (count / 2));
6338
6339 vec_perm_indices indices (sel, 2, count);
6340 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6341 gcc_assert (perm_mask != NULL_TREE);
6342 ncopies *= 2;
6343 }
6344 else
6345 gcc_unreachable ();
6346
6347 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6348 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6349 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6350 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6351 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6352 scaletype = TREE_VALUE (arglist);
6353
6354 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6355 && TREE_CODE (rettype) == VOID_TYPE);
6356
6357 ptr = fold_convert (ptrtype, gs_info.base);
6358 if (!is_gimple_min_invariant (ptr))
6359 {
6360 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6361 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6362 gcc_assert (!new_bb);
6363 }
6364
6365 /* Currently we support only unconditional scatter stores,
6366 so mask should be all ones. */
6367 mask = build_int_cst (masktype, -1);
6368 mask = vect_init_vector (stmt, mask, masktype, NULL);
6369
6370 scale = build_int_cst (scaletype, gs_info.scale);
6371
6372 prev_stmt_info = NULL;
6373 for (j = 0; j < ncopies; ++j)
6374 {
6375 if (j == 0)
6376 {
6377 src = vec_oprnd1
6378 = vect_get_vec_def_for_operand (op, stmt);
6379 op = vec_oprnd0
6380 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6381 }
6382 else if (modifier != NONE && (j & 1))
6383 {
6384 if (modifier == WIDEN)
6385 {
6386 src = vec_oprnd1
6387 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6388 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6389 stmt, gsi);
6390 }
6391 else if (modifier == NARROW)
6392 {
6393 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6394 stmt, gsi);
6395 op = vec_oprnd0
6396 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6397 vec_oprnd0);
6398 }
6399 else
6400 gcc_unreachable ();
6401 }
6402 else
6403 {
6404 src = vec_oprnd1
6405 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6406 op = vec_oprnd0
6407 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6408 vec_oprnd0);
6409 }
6410
6411 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6412 {
6413 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6414 TYPE_VECTOR_SUBPARTS (srctype)));
6415 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6416 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6417 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6419 src = var;
6420 }
6421
6422 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6423 {
6424 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6425 TYPE_VECTOR_SUBPARTS (idxtype)));
6426 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6427 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6428 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6429 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6430 op = var;
6431 }
6432
6433 new_stmt
6434 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6435
6436 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6437
6438 if (prev_stmt_info == NULL)
6439 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6440 else
6441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6442 prev_stmt_info = vinfo_for_stmt (new_stmt);
6443 }
6444 return true;
6445 }
6446
6447 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6448 {
6449 gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6450 GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6451 }
6452
6453 if (grouped_store)
6454 {
6455 /* FORNOW */
6456 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6457
6458 /* We vectorize all the stmts of the interleaving group when we
6459 reach the last stmt in the group. */
6460 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6461 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6462 && !slp)
6463 {
6464 *vec_stmt = NULL;
6465 return true;
6466 }
6467
6468 if (slp)
6469 {
6470 grouped_store = false;
6471 /* VEC_NUM is the number of vect stmts to be created for this
6472 group. */
6473 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6474 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6475 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6476 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6477 op = vect_get_store_rhs (first_stmt);
6478 }
6479 else
6480 /* VEC_NUM is the number of vect stmts to be created for this
6481 group. */
6482 vec_num = group_size;
6483
6484 ref_type = get_group_alias_ptr_type (first_stmt);
6485 }
6486 else
6487 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6488
6489 if (dump_enabled_p ())
6490 dump_printf_loc (MSG_NOTE, vect_location,
6491 "transform store. ncopies = %d\n", ncopies);
6492
6493 if (memory_access_type == VMAT_ELEMENTWISE
6494 || memory_access_type == VMAT_STRIDED_SLP)
6495 {
6496 gimple_stmt_iterator incr_gsi;
6497 bool insert_after;
6498 gimple *incr;
6499 tree offvar;
6500 tree ivstep;
6501 tree running_off;
6502 tree stride_base, stride_step, alias_off;
6503 tree vec_oprnd;
6504 unsigned int g;
6505 /* Checked by get_load_store_type. */
6506 unsigned int const_nunits = nunits.to_constant ();
6507
6508 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6509 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6510
6511 stride_base
6512 = fold_build_pointer_plus
6513 (DR_BASE_ADDRESS (first_dr),
6514 size_binop (PLUS_EXPR,
6515 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6516 convert_to_ptrofftype (DR_INIT (first_dr))));
6517 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6518
6519 /* For a store with loop-invariant (but other than power-of-2)
6520 stride (i.e. not a grouped access) like so:
6521
6522 for (i = 0; i < n; i += stride)
6523 array[i] = ...;
6524
6525 we generate a new induction variable and new stores from
6526 the components of the (vectorized) rhs:
6527
6528 for (j = 0; ; j += VF*stride)
6529 vectemp = ...;
6530 tmp1 = vectemp[0];
6531 array[j] = tmp1;
6532 tmp2 = vectemp[1];
6533 array[j + stride] = tmp2;
6534 ...
6535 */
6536
6537 unsigned nstores = const_nunits;
6538 unsigned lnel = 1;
6539 tree ltype = elem_type;
6540 tree lvectype = vectype;
6541 if (slp)
6542 {
6543 if (group_size < const_nunits
6544 && const_nunits % group_size == 0)
6545 {
6546 nstores = const_nunits / group_size;
6547 lnel = group_size;
6548 ltype = build_vector_type (elem_type, group_size);
6549 lvectype = vectype;
6550
6551 /* First check if vec_extract optab doesn't support extraction
6552 of vector elts directly. */
6553 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6554 machine_mode vmode;
6555 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6556 || !VECTOR_MODE_P (vmode)
6557 || !targetm.vector_mode_supported_p (vmode)
6558 || (convert_optab_handler (vec_extract_optab,
6559 TYPE_MODE (vectype), vmode)
6560 == CODE_FOR_nothing))
6561 {
6562 /* Try to avoid emitting an extract of vector elements
6563 by performing the extracts using an integer type of the
6564 same size, extracting from a vector of those and then
6565 re-interpreting it as the original vector type if
6566 supported. */
6567 unsigned lsize
6568 = group_size * GET_MODE_BITSIZE (elmode);
6569 elmode = int_mode_for_size (lsize, 0).require ();
6570 unsigned int lnunits = const_nunits / group_size;
6571 /* If we can't construct such a vector fall back to
6572 element extracts from the original vector type and
6573 element size stores. */
6574 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6575 && VECTOR_MODE_P (vmode)
6576 && targetm.vector_mode_supported_p (vmode)
6577 && (convert_optab_handler (vec_extract_optab,
6578 vmode, elmode)
6579 != CODE_FOR_nothing))
6580 {
6581 nstores = lnunits;
6582 lnel = group_size;
6583 ltype = build_nonstandard_integer_type (lsize, 1);
6584 lvectype = build_vector_type (ltype, nstores);
6585 }
6586 /* Else fall back to vector extraction anyway.
6587 Fewer stores are more important than avoiding spilling
6588 of the vector we extract from. Compared to the
6589 construction case in vectorizable_load no store-forwarding
6590 issue exists here for reasonable archs. */
6591 }
6592 }
6593 else if (group_size >= const_nunits
6594 && group_size % const_nunits == 0)
6595 {
6596 nstores = 1;
6597 lnel = const_nunits;
6598 ltype = vectype;
6599 lvectype = vectype;
6600 }
6601 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6602 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6603 }
6604
6605 ivstep = stride_step;
6606 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6607 build_int_cst (TREE_TYPE (ivstep), vf));
6608
6609 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6610
6611 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6612 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6613 create_iv (stride_base, ivstep, NULL,
6614 loop, &incr_gsi, insert_after,
6615 &offvar, NULL);
6616 incr = gsi_stmt (incr_gsi);
6617 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6618
6619 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6620
6621 prev_stmt_info = NULL;
6622 alias_off = build_int_cst (ref_type, 0);
6623 next_stmt = first_stmt;
6624 for (g = 0; g < group_size; g++)
6625 {
6626 running_off = offvar;
6627 if (g)
6628 {
6629 tree size = TYPE_SIZE_UNIT (ltype);
6630 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6631 size);
6632 tree newoff = copy_ssa_name (running_off, NULL);
6633 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6634 running_off, pos);
6635 vect_finish_stmt_generation (stmt, incr, gsi);
6636 running_off = newoff;
6637 }
6638 unsigned int group_el = 0;
6639 unsigned HOST_WIDE_INT
6640 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6641 for (j = 0; j < ncopies; j++)
6642 {
6643 /* We've set op and dt above, from vect_get_store_rhs,
6644 and first_stmt == stmt. */
6645 if (j == 0)
6646 {
6647 if (slp)
6648 {
6649 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6650 slp_node);
6651 vec_oprnd = vec_oprnds[0];
6652 }
6653 else
6654 {
6655 op = vect_get_store_rhs (next_stmt);
6656 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6657 }
6658 }
6659 else
6660 {
6661 if (slp)
6662 vec_oprnd = vec_oprnds[j];
6663 else
6664 {
6665 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6666 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6667 vec_oprnd);
6668 }
6669 }
6670 /* Pun the vector to extract from if necessary. */
6671 if (lvectype != vectype)
6672 {
6673 tree tem = make_ssa_name (lvectype);
6674 gimple *pun
6675 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6676 lvectype, vec_oprnd));
6677 vect_finish_stmt_generation (stmt, pun, gsi);
6678 vec_oprnd = tem;
6679 }
6680 for (i = 0; i < nstores; i++)
6681 {
6682 tree newref, newoff;
6683 gimple *incr, *assign;
6684 tree size = TYPE_SIZE (ltype);
6685 /* Extract the i'th component. */
6686 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6687 bitsize_int (i), size);
6688 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6689 size, pos);
6690
6691 elem = force_gimple_operand_gsi (gsi, elem, true,
6692 NULL_TREE, true,
6693 GSI_SAME_STMT);
6694
6695 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6696 group_el * elsz);
6697 newref = build2 (MEM_REF, ltype,
6698 running_off, this_off);
6699 vect_copy_ref_info (newref, DR_REF (first_dr));
6700
6701 /* And store it to *running_off. */
6702 assign = gimple_build_assign (newref, elem);
6703 vect_finish_stmt_generation (stmt, assign, gsi);
6704
6705 group_el += lnel;
6706 if (! slp
6707 || group_el == group_size)
6708 {
6709 newoff = copy_ssa_name (running_off, NULL);
6710 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6711 running_off, stride_step);
6712 vect_finish_stmt_generation (stmt, incr, gsi);
6713
6714 running_off = newoff;
6715 group_el = 0;
6716 }
6717 if (g == group_size - 1
6718 && !slp)
6719 {
6720 if (j == 0 && i == 0)
6721 STMT_VINFO_VEC_STMT (stmt_info)
6722 = *vec_stmt = assign;
6723 else
6724 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6725 prev_stmt_info = vinfo_for_stmt (assign);
6726 }
6727 }
6728 }
6729 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6730 if (slp)
6731 break;
6732 }
6733
6734 vec_oprnds.release ();
6735 return true;
6736 }
6737
6738 auto_vec<tree> dr_chain (group_size);
6739 oprnds.create (group_size);
6740
6741 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6742 gcc_assert (alignment_support_scheme);
6743 vec_loop_masks *loop_masks
6744 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6745 ? &LOOP_VINFO_MASKS (loop_vinfo)
6746 : NULL);
6747 /* Targets with store-lane instructions must not require explicit
6748 realignment. vect_supportable_dr_alignment always returns either
6749 dr_aligned or dr_unaligned_supported for masked operations. */
6750 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6751 && !mask
6752 && !loop_masks)
6753 || alignment_support_scheme == dr_aligned
6754 || alignment_support_scheme == dr_unaligned_supported);
6755
6756 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6757 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6758 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6759
6760 tree bump;
6761 tree vec_offset = NULL_TREE;
6762 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6763 {
6764 aggr_type = NULL_TREE;
6765 bump = NULL_TREE;
6766 }
6767 else if (memory_access_type == VMAT_GATHER_SCATTER)
6768 {
6769 aggr_type = elem_type;
6770 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6771 &bump, &vec_offset);
6772 }
6773 else
6774 {
6775 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6776 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6777 else
6778 aggr_type = vectype;
6779 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6780 }
6781
6782 if (mask)
6783 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6784
6785 /* In case the vectorization factor (VF) is bigger than the number
6786 of elements that we can fit in a vectype (nunits), we have to generate
6787 more than one vector stmt - i.e - we need to "unroll" the
6788 vector stmt by a factor VF/nunits. For more details see documentation in
6789 vect_get_vec_def_for_copy_stmt. */
6790
6791 /* In case of interleaving (non-unit grouped access):
6792
6793 S1: &base + 2 = x2
6794 S2: &base = x0
6795 S3: &base + 1 = x1
6796 S4: &base + 3 = x3
6797
6798 We create vectorized stores starting from base address (the access of the
6799 first stmt in the chain (S2 in the above example), when the last store stmt
6800 of the chain (S4) is reached:
6801
6802 VS1: &base = vx2
6803 VS2: &base + vec_size*1 = vx0
6804 VS3: &base + vec_size*2 = vx1
6805 VS4: &base + vec_size*3 = vx3
6806
6807 Then permutation statements are generated:
6808
6809 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6810 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6811 ...
6812
6813 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6814 (the order of the data-refs in the output of vect_permute_store_chain
6815 corresponds to the order of scalar stmts in the interleaving chain - see
6816 the documentation of vect_permute_store_chain()).
6817
6818 In case of both multiple types and interleaving, above vector stores and
6819 permutation stmts are created for every copy. The result vector stmts are
6820 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6821 STMT_VINFO_RELATED_STMT for the next copies.
6822 */
6823
6824 prev_stmt_info = NULL;
6825 tree vec_mask = NULL_TREE;
6826 for (j = 0; j < ncopies; j++)
6827 {
6828
6829 if (j == 0)
6830 {
6831 if (slp)
6832 {
6833 /* Get vectorized arguments for SLP_NODE. */
6834 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6835 NULL, slp_node);
6836
6837 vec_oprnd = vec_oprnds[0];
6838 }
6839 else
6840 {
6841 /* For interleaved stores we collect vectorized defs for all the
6842 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6843 used as an input to vect_permute_store_chain(), and OPRNDS as
6844 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6845
6846 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6847 OPRNDS are of size 1. */
6848 next_stmt = first_stmt;
6849 for (i = 0; i < group_size; i++)
6850 {
6851 /* Since gaps are not supported for interleaved stores,
6852 GROUP_SIZE is the exact number of stmts in the chain.
6853 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6854 there is no interleaving, GROUP_SIZE is 1, and only one
6855 iteration of the loop will be executed. */
6856 op = vect_get_store_rhs (next_stmt);
6857 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6858 dr_chain.quick_push (vec_oprnd);
6859 oprnds.quick_push (vec_oprnd);
6860 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6861 }
6862 if (mask)
6863 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6864 mask_vectype);
6865 }
6866
6867 /* We should have catched mismatched types earlier. */
6868 gcc_assert (useless_type_conversion_p (vectype,
6869 TREE_TYPE (vec_oprnd)));
6870 bool simd_lane_access_p
6871 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6872 if (simd_lane_access_p
6873 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6874 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6875 && integer_zerop (DR_OFFSET (first_dr))
6876 && integer_zerop (DR_INIT (first_dr))
6877 && alias_sets_conflict_p (get_alias_set (aggr_type),
6878 get_alias_set (TREE_TYPE (ref_type))))
6879 {
6880 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6881 dataref_offset = build_int_cst (ref_type, 0);
6882 inv_p = false;
6883 }
6884 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6885 {
6886 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6887 &dataref_ptr, &vec_offset);
6888 inv_p = false;
6889 }
6890 else
6891 dataref_ptr
6892 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6893 simd_lane_access_p ? loop : NULL,
6894 offset, &dummy, gsi, &ptr_incr,
6895 simd_lane_access_p, &inv_p,
6896 NULL_TREE, bump);
6897 gcc_assert (bb_vinfo || !inv_p);
6898 }
6899 else
6900 {
6901 /* For interleaved stores we created vectorized defs for all the
6902 defs stored in OPRNDS in the previous iteration (previous copy).
6903 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6904 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6905 next copy.
6906 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6907 OPRNDS are of size 1. */
6908 for (i = 0; i < group_size; i++)
6909 {
6910 op = oprnds[i];
6911 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6912 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
6913 dr_chain[i] = vec_oprnd;
6914 oprnds[i] = vec_oprnd;
6915 }
6916 if (mask)
6917 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
6918 if (dataref_offset)
6919 dataref_offset
6920 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6921 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6922 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6923 vec_offset);
6924 else
6925 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6926 bump);
6927 }
6928
6929 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6930 {
6931 tree vec_array;
6932
6933 /* Combine all the vectors into an array. */
6934 vec_array = create_vector_array (vectype, vec_num);
6935 for (i = 0; i < vec_num; i++)
6936 {
6937 vec_oprnd = dr_chain[i];
6938 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6939 }
6940
6941 tree final_mask = NULL;
6942 if (loop_masks)
6943 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6944 vectype, j);
6945 if (vec_mask)
6946 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6947 vec_mask, gsi);
6948
6949 gcall *call;
6950 if (final_mask)
6951 {
6952 /* Emit:
6953 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6954 VEC_ARRAY). */
6955 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6956 tree alias_ptr = build_int_cst (ref_type, align);
6957 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6958 dataref_ptr, alias_ptr,
6959 final_mask, vec_array);
6960 }
6961 else
6962 {
6963 /* Emit:
6964 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6965 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6966 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6967 vec_array);
6968 gimple_call_set_lhs (call, data_ref);
6969 }
6970 gimple_call_set_nothrow (call, true);
6971 new_stmt = call;
6972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6973 }
6974 else
6975 {
6976 new_stmt = NULL;
6977 if (grouped_store)
6978 {
6979 if (j == 0)
6980 result_chain.create (group_size);
6981 /* Permute. */
6982 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6983 &result_chain);
6984 }
6985
6986 next_stmt = first_stmt;
6987 for (i = 0; i < vec_num; i++)
6988 {
6989 unsigned align, misalign;
6990
6991 tree final_mask = NULL_TREE;
6992 if (loop_masks)
6993 final_mask = vect_get_loop_mask (gsi, loop_masks,
6994 vec_num * ncopies,
6995 vectype, vec_num * j + i);
6996 if (vec_mask)
6997 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6998 vec_mask, gsi);
6999
7000 if (memory_access_type == VMAT_GATHER_SCATTER)
7001 {
7002 tree scale = size_int (gs_info.scale);
7003 gcall *call;
7004 if (loop_masks)
7005 call = gimple_build_call_internal
7006 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7007 scale, vec_oprnd, final_mask);
7008 else
7009 call = gimple_build_call_internal
7010 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7011 scale, vec_oprnd);
7012 gimple_call_set_nothrow (call, true);
7013 new_stmt = call;
7014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7015 break;
7016 }
7017
7018 if (i > 0)
7019 /* Bump the vector pointer. */
7020 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7021 stmt, bump);
7022
7023 if (slp)
7024 vec_oprnd = vec_oprnds[i];
7025 else if (grouped_store)
7026 /* For grouped stores vectorized defs are interleaved in
7027 vect_permute_store_chain(). */
7028 vec_oprnd = result_chain[i];
7029
7030 align = DR_TARGET_ALIGNMENT (first_dr);
7031 if (aligned_access_p (first_dr))
7032 misalign = 0;
7033 else if (DR_MISALIGNMENT (first_dr) == -1)
7034 {
7035 align = dr_alignment (vect_dr_behavior (first_dr));
7036 misalign = 0;
7037 }
7038 else
7039 misalign = DR_MISALIGNMENT (first_dr);
7040 if (dataref_offset == NULL_TREE
7041 && TREE_CODE (dataref_ptr) == SSA_NAME)
7042 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7043 misalign);
7044
7045 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7046 {
7047 tree perm_mask = perm_mask_for_reverse (vectype);
7048 tree perm_dest
7049 = vect_create_destination_var (vect_get_store_rhs (stmt),
7050 vectype);
7051 tree new_temp = make_ssa_name (perm_dest);
7052
7053 /* Generate the permute statement. */
7054 gimple *perm_stmt
7055 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7056 vec_oprnd, perm_mask);
7057 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7058
7059 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7060 vec_oprnd = new_temp;
7061 }
7062
7063 /* Arguments are ready. Create the new vector stmt. */
7064 if (final_mask)
7065 {
7066 align = least_bit_hwi (misalign | align);
7067 tree ptr = build_int_cst (ref_type, align);
7068 gcall *call
7069 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7070 dataref_ptr, ptr,
7071 final_mask, vec_oprnd);
7072 gimple_call_set_nothrow (call, true);
7073 new_stmt = call;
7074 }
7075 else
7076 {
7077 data_ref = fold_build2 (MEM_REF, vectype,
7078 dataref_ptr,
7079 dataref_offset
7080 ? dataref_offset
7081 : build_int_cst (ref_type, 0));
7082 if (aligned_access_p (first_dr))
7083 ;
7084 else if (DR_MISALIGNMENT (first_dr) == -1)
7085 TREE_TYPE (data_ref)
7086 = build_aligned_type (TREE_TYPE (data_ref),
7087 align * BITS_PER_UNIT);
7088 else
7089 TREE_TYPE (data_ref)
7090 = build_aligned_type (TREE_TYPE (data_ref),
7091 TYPE_ALIGN (elem_type));
7092 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7093 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7094 }
7095 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7096
7097 if (slp)
7098 continue;
7099
7100 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7101 if (!next_stmt)
7102 break;
7103 }
7104 }
7105 if (!slp)
7106 {
7107 if (j == 0)
7108 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7109 else
7110 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7111 prev_stmt_info = vinfo_for_stmt (new_stmt);
7112 }
7113 }
7114
7115 oprnds.release ();
7116 result_chain.release ();
7117 vec_oprnds.release ();
7118
7119 return true;
7120 }
7121
7122 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7123 VECTOR_CST mask. No checks are made that the target platform supports the
7124 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7125 vect_gen_perm_mask_checked. */
7126
7127 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)7128 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7129 {
7130 tree mask_type;
7131
7132 poly_uint64 nunits = sel.length ();
7133 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7134
7135 mask_type = build_vector_type (ssizetype, nunits);
7136 return vec_perm_indices_to_tree (mask_type, sel);
7137 }
7138
7139 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7140 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7141
7142 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)7143 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7144 {
7145 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7146 return vect_gen_perm_mask_any (vectype, sel);
7147 }
7148
7149 /* Given a vector variable X and Y, that was generated for the scalar
7150 STMT, generate instructions to permute the vector elements of X and Y
7151 using permutation mask MASK_VEC, insert them at *GSI and return the
7152 permuted vector variable. */
7153
7154 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,gimple * stmt,gimple_stmt_iterator * gsi)7155 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7156 gimple_stmt_iterator *gsi)
7157 {
7158 tree vectype = TREE_TYPE (x);
7159 tree perm_dest, data_ref;
7160 gimple *perm_stmt;
7161
7162 tree scalar_dest = gimple_get_lhs (stmt);
7163 if (TREE_CODE (scalar_dest) == SSA_NAME)
7164 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7165 else
7166 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7167 data_ref = make_ssa_name (perm_dest);
7168
7169 /* Generate the permute statement. */
7170 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7171 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7172
7173 return data_ref;
7174 }
7175
7176 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7177 inserting them on the loops preheader edge. Returns true if we
7178 were successful in doing so (and thus STMT can be moved then),
7179 otherwise returns false. */
7180
7181 static bool
hoist_defs_of_uses(gimple * stmt,struct loop * loop)7182 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7183 {
7184 ssa_op_iter i;
7185 tree op;
7186 bool any = false;
7187
7188 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7189 {
7190 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7191 if (!gimple_nop_p (def_stmt)
7192 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7193 {
7194 /* Make sure we don't need to recurse. While we could do
7195 so in simple cases when there are more complex use webs
7196 we don't have an easy way to preserve stmt order to fulfil
7197 dependencies within them. */
7198 tree op2;
7199 ssa_op_iter i2;
7200 if (gimple_code (def_stmt) == GIMPLE_PHI)
7201 return false;
7202 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7203 {
7204 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7205 if (!gimple_nop_p (def_stmt2)
7206 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7207 return false;
7208 }
7209 any = true;
7210 }
7211 }
7212
7213 if (!any)
7214 return true;
7215
7216 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7217 {
7218 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7219 if (!gimple_nop_p (def_stmt)
7220 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7221 {
7222 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7223 gsi_remove (&gsi, false);
7224 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7225 }
7226 }
7227
7228 return true;
7229 }
7230
7231 /* vectorizable_load.
7232
7233 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7234 can be vectorized.
7235 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7236 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7237 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7238
7239 static bool
vectorizable_load(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_instance slp_node_instance)7240 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7241 slp_tree slp_node, slp_instance slp_node_instance)
7242 {
7243 tree scalar_dest;
7244 tree vec_dest = NULL;
7245 tree data_ref = NULL;
7246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7247 stmt_vec_info prev_stmt_info;
7248 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7249 struct loop *loop = NULL;
7250 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7251 bool nested_in_vect_loop = false;
7252 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7253 tree elem_type;
7254 tree new_temp;
7255 machine_mode mode;
7256 gimple *new_stmt = NULL;
7257 tree dummy;
7258 enum dr_alignment_support alignment_support_scheme;
7259 tree dataref_ptr = NULL_TREE;
7260 tree dataref_offset = NULL_TREE;
7261 gimple *ptr_incr = NULL;
7262 int ncopies;
7263 int i, j;
7264 unsigned int group_size;
7265 poly_uint64 group_gap_adj;
7266 tree msq = NULL_TREE, lsq;
7267 tree offset = NULL_TREE;
7268 tree byte_offset = NULL_TREE;
7269 tree realignment_token = NULL_TREE;
7270 gphi *phi = NULL;
7271 vec<tree> dr_chain = vNULL;
7272 bool grouped_load = false;
7273 gimple *first_stmt;
7274 gimple *first_stmt_for_drptr = NULL;
7275 bool inv_p;
7276 bool compute_in_loop = false;
7277 struct loop *at_loop;
7278 int vec_num;
7279 bool slp = (slp_node != NULL);
7280 bool slp_perm = false;
7281 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7282 poly_uint64 vf;
7283 tree aggr_type;
7284 gather_scatter_info gs_info;
7285 vec_info *vinfo = stmt_info->vinfo;
7286 tree ref_type;
7287 enum vect_def_type mask_dt = vect_unknown_def_type;
7288
7289 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7290 return false;
7291
7292 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7293 && ! vec_stmt)
7294 return false;
7295
7296 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7297 if (is_gimple_assign (stmt))
7298 {
7299 scalar_dest = gimple_assign_lhs (stmt);
7300 if (TREE_CODE (scalar_dest) != SSA_NAME)
7301 return false;
7302
7303 tree_code code = gimple_assign_rhs_code (stmt);
7304 if (code != ARRAY_REF
7305 && code != BIT_FIELD_REF
7306 && code != INDIRECT_REF
7307 && code != COMPONENT_REF
7308 && code != IMAGPART_EXPR
7309 && code != REALPART_EXPR
7310 && code != MEM_REF
7311 && TREE_CODE_CLASS (code) != tcc_declaration)
7312 return false;
7313 }
7314 else
7315 {
7316 gcall *call = dyn_cast <gcall *> (stmt);
7317 if (!call || !gimple_call_internal_p (call))
7318 return false;
7319
7320 internal_fn ifn = gimple_call_internal_fn (call);
7321 if (!internal_load_fn_p (ifn))
7322 return false;
7323
7324 scalar_dest = gimple_call_lhs (call);
7325 if (!scalar_dest)
7326 return false;
7327
7328 if (slp_node != NULL)
7329 {
7330 if (dump_enabled_p ())
7331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7332 "SLP of masked loads not supported.\n");
7333 return false;
7334 }
7335
7336 int mask_index = internal_fn_mask_index (ifn);
7337 if (mask_index >= 0)
7338 {
7339 mask = gimple_call_arg (call, mask_index);
7340 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7341 &mask_vectype))
7342 return false;
7343 }
7344 }
7345
7346 if (!STMT_VINFO_DATA_REF (stmt_info))
7347 return false;
7348
7349 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7350 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7351
7352 if (loop_vinfo)
7353 {
7354 loop = LOOP_VINFO_LOOP (loop_vinfo);
7355 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7356 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7357 }
7358 else
7359 vf = 1;
7360
7361 /* Multiple types in SLP are handled by creating the appropriate number of
7362 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7363 case of SLP. */
7364 if (slp)
7365 ncopies = 1;
7366 else
7367 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7368
7369 gcc_assert (ncopies >= 1);
7370
7371 /* FORNOW. This restriction should be relaxed. */
7372 if (nested_in_vect_loop && ncopies > 1)
7373 {
7374 if (dump_enabled_p ())
7375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7376 "multiple types in nested loop.\n");
7377 return false;
7378 }
7379
7380 /* Invalidate assumptions made by dependence analysis when vectorization
7381 on the unrolled body effectively re-orders stmts. */
7382 if (ncopies > 1
7383 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7384 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7385 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7386 {
7387 if (dump_enabled_p ())
7388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7389 "cannot perform implicit CSE when unrolling "
7390 "with negative dependence distance\n");
7391 return false;
7392 }
7393
7394 elem_type = TREE_TYPE (vectype);
7395 mode = TYPE_MODE (vectype);
7396
7397 /* FORNOW. In some cases can vectorize even if data-type not supported
7398 (e.g. - data copies). */
7399 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7400 {
7401 if (dump_enabled_p ())
7402 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7403 "Aligned load, but unsupported type.\n");
7404 return false;
7405 }
7406
7407 /* Check if the load is a part of an interleaving chain. */
7408 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7409 {
7410 grouped_load = true;
7411 /* FORNOW */
7412 gcc_assert (!nested_in_vect_loop);
7413 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7414
7415 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7416 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7417
7418 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7419 slp_perm = true;
7420
7421 /* Invalidate assumptions made by dependence analysis when vectorization
7422 on the unrolled body effectively re-orders stmts. */
7423 if (!PURE_SLP_STMT (stmt_info)
7424 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7425 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7426 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7427 {
7428 if (dump_enabled_p ())
7429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7430 "cannot perform implicit CSE when performing "
7431 "group loads with negative dependence distance\n");
7432 return false;
7433 }
7434
7435 /* Similarly when the stmt is a load that is both part of a SLP
7436 instance and a loop vectorized stmt via the same-dr mechanism
7437 we have to give up. */
7438 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7439 && (STMT_SLP_TYPE (stmt_info)
7440 != STMT_SLP_TYPE (vinfo_for_stmt
7441 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7442 {
7443 if (dump_enabled_p ())
7444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7445 "conflicting SLP types for CSEd load\n");
7446 return false;
7447 }
7448 }
7449 else
7450 group_size = 1;
7451
7452 vect_memory_access_type memory_access_type;
7453 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7454 &memory_access_type, &gs_info))
7455 return false;
7456
7457 if (mask)
7458 {
7459 if (memory_access_type == VMAT_CONTIGUOUS)
7460 {
7461 machine_mode vec_mode = TYPE_MODE (vectype);
7462 if (!VECTOR_MODE_P (vec_mode)
7463 || !can_vec_mask_load_store_p (vec_mode,
7464 TYPE_MODE (mask_vectype), true))
7465 return false;
7466 }
7467 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7468 {
7469 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7470 tree masktype
7471 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7472 if (TREE_CODE (masktype) == INTEGER_TYPE)
7473 {
7474 if (dump_enabled_p ())
7475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7476 "masked gather with integer mask not"
7477 " supported.");
7478 return false;
7479 }
7480 }
7481 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7482 && memory_access_type != VMAT_GATHER_SCATTER)
7483 {
7484 if (dump_enabled_p ())
7485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7486 "unsupported access type for masked load.\n");
7487 return false;
7488 }
7489 }
7490
7491 if (!vec_stmt) /* transformation not required. */
7492 {
7493 if (!slp)
7494 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7495
7496 if (loop_vinfo
7497 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7498 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7499 memory_access_type, &gs_info);
7500
7501 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7502 /* The SLP costs are calculated during SLP analysis. */
7503 if (! slp_node)
7504 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7505 NULL, NULL, NULL);
7506 return true;
7507 }
7508
7509 if (!slp)
7510 gcc_assert (memory_access_type
7511 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7512
7513 if (dump_enabled_p ())
7514 dump_printf_loc (MSG_NOTE, vect_location,
7515 "transform load. ncopies = %d\n", ncopies);
7516
7517 /* Transform. */
7518
7519 ensure_base_align (dr);
7520
7521 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7522 {
7523 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7524 mask_dt);
7525 return true;
7526 }
7527
7528 if (memory_access_type == VMAT_ELEMENTWISE
7529 || memory_access_type == VMAT_STRIDED_SLP)
7530 {
7531 gimple_stmt_iterator incr_gsi;
7532 bool insert_after;
7533 gimple *incr;
7534 tree offvar;
7535 tree ivstep;
7536 tree running_off;
7537 vec<constructor_elt, va_gc> *v = NULL;
7538 tree stride_base, stride_step, alias_off;
7539 /* Checked by get_load_store_type. */
7540 unsigned int const_nunits = nunits.to_constant ();
7541 unsigned HOST_WIDE_INT cst_offset = 0;
7542
7543 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7544 gcc_assert (!nested_in_vect_loop);
7545
7546 if (grouped_load)
7547 {
7548 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7549 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7550 }
7551 else
7552 {
7553 first_stmt = stmt;
7554 first_dr = dr;
7555 }
7556 if (slp && grouped_load)
7557 {
7558 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7559 ref_type = get_group_alias_ptr_type (first_stmt);
7560 }
7561 else
7562 {
7563 if (grouped_load)
7564 cst_offset
7565 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7566 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7567 group_size = 1;
7568 ref_type = reference_alias_ptr_type (DR_REF (dr));
7569 }
7570
7571 stride_base
7572 = fold_build_pointer_plus
7573 (DR_BASE_ADDRESS (first_dr),
7574 size_binop (PLUS_EXPR,
7575 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7576 convert_to_ptrofftype (DR_INIT (first_dr))));
7577 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7578
7579 /* For a load with loop-invariant (but other than power-of-2)
7580 stride (i.e. not a grouped access) like so:
7581
7582 for (i = 0; i < n; i += stride)
7583 ... = array[i];
7584
7585 we generate a new induction variable and new accesses to
7586 form a new vector (or vectors, depending on ncopies):
7587
7588 for (j = 0; ; j += VF*stride)
7589 tmp1 = array[j];
7590 tmp2 = array[j + stride];
7591 ...
7592 vectemp = {tmp1, tmp2, ...}
7593 */
7594
7595 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7596 build_int_cst (TREE_TYPE (stride_step), vf));
7597
7598 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7599
7600 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7601 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7602 create_iv (stride_base, ivstep, NULL,
7603 loop, &incr_gsi, insert_after,
7604 &offvar, NULL);
7605 incr = gsi_stmt (incr_gsi);
7606 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7607
7608 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7609
7610 prev_stmt_info = NULL;
7611 running_off = offvar;
7612 alias_off = build_int_cst (ref_type, 0);
7613 int nloads = const_nunits;
7614 int lnel = 1;
7615 tree ltype = TREE_TYPE (vectype);
7616 tree lvectype = vectype;
7617 auto_vec<tree> dr_chain;
7618 if (memory_access_type == VMAT_STRIDED_SLP)
7619 {
7620 if (group_size < const_nunits)
7621 {
7622 /* First check if vec_init optab supports construction from
7623 vector elts directly. */
7624 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7625 machine_mode vmode;
7626 if (mode_for_vector (elmode, group_size).exists (&vmode)
7627 && VECTOR_MODE_P (vmode)
7628 && targetm.vector_mode_supported_p (vmode)
7629 && (convert_optab_handler (vec_init_optab,
7630 TYPE_MODE (vectype), vmode)
7631 != CODE_FOR_nothing))
7632 {
7633 nloads = const_nunits / group_size;
7634 lnel = group_size;
7635 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7636 }
7637 else
7638 {
7639 /* Otherwise avoid emitting a constructor of vector elements
7640 by performing the loads using an integer type of the same
7641 size, constructing a vector of those and then
7642 re-interpreting it as the original vector type.
7643 This avoids a huge runtime penalty due to the general
7644 inability to perform store forwarding from smaller stores
7645 to a larger load. */
7646 unsigned lsize
7647 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7648 elmode = int_mode_for_size (lsize, 0).require ();
7649 unsigned int lnunits = const_nunits / group_size;
7650 /* If we can't construct such a vector fall back to
7651 element loads of the original vector type. */
7652 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7653 && VECTOR_MODE_P (vmode)
7654 && targetm.vector_mode_supported_p (vmode)
7655 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7656 != CODE_FOR_nothing))
7657 {
7658 nloads = lnunits;
7659 lnel = group_size;
7660 ltype = build_nonstandard_integer_type (lsize, 1);
7661 lvectype = build_vector_type (ltype, nloads);
7662 }
7663 }
7664 }
7665 else
7666 {
7667 nloads = 1;
7668 lnel = const_nunits;
7669 ltype = vectype;
7670 }
7671 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7672 }
7673 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7674 else if (nloads == 1)
7675 ltype = vectype;
7676
7677 if (slp)
7678 {
7679 /* For SLP permutation support we need to load the whole group,
7680 not only the number of vector stmts the permutation result
7681 fits in. */
7682 if (slp_perm)
7683 {
7684 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7685 variable VF. */
7686 unsigned int const_vf = vf.to_constant ();
7687 ncopies = CEIL (group_size * const_vf, const_nunits);
7688 dr_chain.create (ncopies);
7689 }
7690 else
7691 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7692 }
7693 unsigned int group_el = 0;
7694 unsigned HOST_WIDE_INT
7695 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7696 for (j = 0; j < ncopies; j++)
7697 {
7698 if (nloads > 1)
7699 vec_alloc (v, nloads);
7700 for (i = 0; i < nloads; i++)
7701 {
7702 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7703 group_el * elsz + cst_offset);
7704 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7705 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7706 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7707 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7708 if (nloads > 1)
7709 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7710 gimple_assign_lhs (new_stmt));
7711
7712 group_el += lnel;
7713 if (! slp
7714 || group_el == group_size)
7715 {
7716 tree newoff = copy_ssa_name (running_off);
7717 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7718 running_off, stride_step);
7719 vect_finish_stmt_generation (stmt, incr, gsi);
7720
7721 running_off = newoff;
7722 group_el = 0;
7723 }
7724 }
7725 if (nloads > 1)
7726 {
7727 tree vec_inv = build_constructor (lvectype, v);
7728 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7729 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7730 if (lvectype != vectype)
7731 {
7732 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7733 VIEW_CONVERT_EXPR,
7734 build1 (VIEW_CONVERT_EXPR,
7735 vectype, new_temp));
7736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7737 }
7738 }
7739
7740 if (slp)
7741 {
7742 if (slp_perm)
7743 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7744 else
7745 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7746 }
7747 else
7748 {
7749 if (j == 0)
7750 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7751 else
7752 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7753 prev_stmt_info = vinfo_for_stmt (new_stmt);
7754 }
7755 }
7756 if (slp_perm)
7757 {
7758 unsigned n_perms;
7759 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7760 slp_node_instance, false, &n_perms);
7761 }
7762 return true;
7763 }
7764
7765 if (memory_access_type == VMAT_GATHER_SCATTER
7766 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7767 grouped_load = false;
7768
7769 if (grouped_load)
7770 {
7771 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7772 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7773 /* For SLP vectorization we directly vectorize a subchain
7774 without permutation. */
7775 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7776 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7777 /* For BB vectorization always use the first stmt to base
7778 the data ref pointer on. */
7779 if (bb_vinfo)
7780 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7781
7782 /* Check if the chain of loads is already vectorized. */
7783 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7784 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7785 ??? But we can only do so if there is exactly one
7786 as we have no way to get at the rest. Leave the CSE
7787 opportunity alone.
7788 ??? With the group load eventually participating
7789 in multiple different permutations (having multiple
7790 slp nodes which refer to the same group) the CSE
7791 is even wrong code. See PR56270. */
7792 && !slp)
7793 {
7794 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7795 return true;
7796 }
7797 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7798 group_gap_adj = 0;
7799
7800 /* VEC_NUM is the number of vect stmts to be created for this group. */
7801 if (slp)
7802 {
7803 grouped_load = false;
7804 /* For SLP permutation support we need to load the whole group,
7805 not only the number of vector stmts the permutation result
7806 fits in. */
7807 if (slp_perm)
7808 {
7809 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7810 variable VF. */
7811 unsigned int const_vf = vf.to_constant ();
7812 unsigned int const_nunits = nunits.to_constant ();
7813 vec_num = CEIL (group_size * const_vf, const_nunits);
7814 group_gap_adj = vf * group_size - nunits * vec_num;
7815 }
7816 else
7817 {
7818 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7819 group_gap_adj
7820 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7821 }
7822 }
7823 else
7824 vec_num = group_size;
7825
7826 ref_type = get_group_alias_ptr_type (first_stmt);
7827 }
7828 else
7829 {
7830 first_stmt = stmt;
7831 first_dr = dr;
7832 group_size = vec_num = 1;
7833 group_gap_adj = 0;
7834 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7835 }
7836
7837 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7838 gcc_assert (alignment_support_scheme);
7839 vec_loop_masks *loop_masks
7840 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7841 ? &LOOP_VINFO_MASKS (loop_vinfo)
7842 : NULL);
7843 /* Targets with store-lane instructions must not require explicit
7844 realignment. vect_supportable_dr_alignment always returns either
7845 dr_aligned or dr_unaligned_supported for masked operations. */
7846 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7847 && !mask
7848 && !loop_masks)
7849 || alignment_support_scheme == dr_aligned
7850 || alignment_support_scheme == dr_unaligned_supported);
7851
7852 /* In case the vectorization factor (VF) is bigger than the number
7853 of elements that we can fit in a vectype (nunits), we have to generate
7854 more than one vector stmt - i.e - we need to "unroll" the
7855 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7856 from one copy of the vector stmt to the next, in the field
7857 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7858 stages to find the correct vector defs to be used when vectorizing
7859 stmts that use the defs of the current stmt. The example below
7860 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7861 need to create 4 vectorized stmts):
7862
7863 before vectorization:
7864 RELATED_STMT VEC_STMT
7865 S1: x = memref - -
7866 S2: z = x + 1 - -
7867
7868 step 1: vectorize stmt S1:
7869 We first create the vector stmt VS1_0, and, as usual, record a
7870 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7871 Next, we create the vector stmt VS1_1, and record a pointer to
7872 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7873 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7874 stmts and pointers:
7875 RELATED_STMT VEC_STMT
7876 VS1_0: vx0 = memref0 VS1_1 -
7877 VS1_1: vx1 = memref1 VS1_2 -
7878 VS1_2: vx2 = memref2 VS1_3 -
7879 VS1_3: vx3 = memref3 - -
7880 S1: x = load - VS1_0
7881 S2: z = x + 1 - -
7882
7883 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7884 information we recorded in RELATED_STMT field is used to vectorize
7885 stmt S2. */
7886
7887 /* In case of interleaving (non-unit grouped access):
7888
7889 S1: x2 = &base + 2
7890 S2: x0 = &base
7891 S3: x1 = &base + 1
7892 S4: x3 = &base + 3
7893
7894 Vectorized loads are created in the order of memory accesses
7895 starting from the access of the first stmt of the chain:
7896
7897 VS1: vx0 = &base
7898 VS2: vx1 = &base + vec_size*1
7899 VS3: vx3 = &base + vec_size*2
7900 VS4: vx4 = &base + vec_size*3
7901
7902 Then permutation statements are generated:
7903
7904 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7905 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7906 ...
7907
7908 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7909 (the order of the data-refs in the output of vect_permute_load_chain
7910 corresponds to the order of scalar stmts in the interleaving chain - see
7911 the documentation of vect_permute_load_chain()).
7912 The generation of permutation stmts and recording them in
7913 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7914
7915 In case of both multiple types and interleaving, the vector loads and
7916 permutation stmts above are created for every copy. The result vector
7917 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7918 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7919
7920 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7921 on a target that supports unaligned accesses (dr_unaligned_supported)
7922 we generate the following code:
7923 p = initial_addr;
7924 indx = 0;
7925 loop {
7926 p = p + indx * vectype_size;
7927 vec_dest = *(p);
7928 indx = indx + 1;
7929 }
7930
7931 Otherwise, the data reference is potentially unaligned on a target that
7932 does not support unaligned accesses (dr_explicit_realign_optimized) -
7933 then generate the following code, in which the data in each iteration is
7934 obtained by two vector loads, one from the previous iteration, and one
7935 from the current iteration:
7936 p1 = initial_addr;
7937 msq_init = *(floor(p1))
7938 p2 = initial_addr + VS - 1;
7939 realignment_token = call target_builtin;
7940 indx = 0;
7941 loop {
7942 p2 = p2 + indx * vectype_size
7943 lsq = *(floor(p2))
7944 vec_dest = realign_load (msq, lsq, realignment_token)
7945 indx = indx + 1;
7946 msq = lsq;
7947 } */
7948
7949 /* If the misalignment remains the same throughout the execution of the
7950 loop, we can create the init_addr and permutation mask at the loop
7951 preheader. Otherwise, it needs to be created inside the loop.
7952 This can only occur when vectorizing memory accesses in the inner-loop
7953 nested within an outer-loop that is being vectorized. */
7954
7955 if (nested_in_vect_loop
7956 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7957 GET_MODE_SIZE (TYPE_MODE (vectype))))
7958 {
7959 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7960 compute_in_loop = true;
7961 }
7962
7963 if ((alignment_support_scheme == dr_explicit_realign_optimized
7964 || alignment_support_scheme == dr_explicit_realign)
7965 && !compute_in_loop)
7966 {
7967 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7968 alignment_support_scheme, NULL_TREE,
7969 &at_loop);
7970 if (alignment_support_scheme == dr_explicit_realign_optimized)
7971 {
7972 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7973 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7974 size_one_node);
7975 }
7976 }
7977 else
7978 at_loop = loop;
7979
7980 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7981 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7982
7983 tree bump;
7984 tree vec_offset = NULL_TREE;
7985 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7986 {
7987 aggr_type = NULL_TREE;
7988 bump = NULL_TREE;
7989 }
7990 else if (memory_access_type == VMAT_GATHER_SCATTER)
7991 {
7992 aggr_type = elem_type;
7993 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7994 &bump, &vec_offset);
7995 }
7996 else
7997 {
7998 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7999 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8000 else
8001 aggr_type = vectype;
8002 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8003 }
8004
8005 tree vec_mask = NULL_TREE;
8006 prev_stmt_info = NULL;
8007 poly_uint64 group_elt = 0;
8008 for (j = 0; j < ncopies; j++)
8009 {
8010 /* 1. Create the vector or array pointer update chain. */
8011 if (j == 0)
8012 {
8013 bool simd_lane_access_p
8014 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8015 if (simd_lane_access_p
8016 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8017 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8018 && integer_zerop (DR_OFFSET (first_dr))
8019 && integer_zerop (DR_INIT (first_dr))
8020 && alias_sets_conflict_p (get_alias_set (aggr_type),
8021 get_alias_set (TREE_TYPE (ref_type)))
8022 && (alignment_support_scheme == dr_aligned
8023 || alignment_support_scheme == dr_unaligned_supported))
8024 {
8025 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8026 dataref_offset = build_int_cst (ref_type, 0);
8027 inv_p = false;
8028 }
8029 else if (first_stmt_for_drptr
8030 && first_stmt != first_stmt_for_drptr)
8031 {
8032 dataref_ptr
8033 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8034 at_loop, offset, &dummy, gsi,
8035 &ptr_incr, simd_lane_access_p,
8036 &inv_p, byte_offset, bump);
8037 /* Adjust the pointer by the difference to first_stmt. */
8038 data_reference_p ptrdr
8039 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8040 tree diff = fold_convert (sizetype,
8041 size_binop (MINUS_EXPR,
8042 DR_INIT (first_dr),
8043 DR_INIT (ptrdr)));
8044 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8045 stmt, diff);
8046 }
8047 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8048 {
8049 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8050 &dataref_ptr, &vec_offset);
8051 inv_p = false;
8052 }
8053 else
8054 dataref_ptr
8055 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8056 offset, &dummy, gsi, &ptr_incr,
8057 simd_lane_access_p, &inv_p,
8058 byte_offset, bump);
8059 if (mask)
8060 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8061 mask_vectype);
8062 }
8063 else
8064 {
8065 if (dataref_offset)
8066 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8067 bump);
8068 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8069 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8070 vec_offset);
8071 else
8072 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8073 stmt, bump);
8074 if (mask)
8075 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8076 }
8077
8078 if (grouped_load || slp_perm)
8079 dr_chain.create (vec_num);
8080
8081 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8082 {
8083 tree vec_array;
8084
8085 vec_array = create_vector_array (vectype, vec_num);
8086
8087 tree final_mask = NULL_TREE;
8088 if (loop_masks)
8089 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8090 vectype, j);
8091 if (vec_mask)
8092 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8093 vec_mask, gsi);
8094
8095 gcall *call;
8096 if (final_mask)
8097 {
8098 /* Emit:
8099 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8100 VEC_MASK). */
8101 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8102 tree alias_ptr = build_int_cst (ref_type, align);
8103 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8104 dataref_ptr, alias_ptr,
8105 final_mask);
8106 }
8107 else
8108 {
8109 /* Emit:
8110 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8111 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8112 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8113 }
8114 gimple_call_set_lhs (call, vec_array);
8115 gimple_call_set_nothrow (call, true);
8116 new_stmt = call;
8117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8118
8119 /* Extract each vector into an SSA_NAME. */
8120 for (i = 0; i < vec_num; i++)
8121 {
8122 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8123 vec_array, i);
8124 dr_chain.quick_push (new_temp);
8125 }
8126
8127 /* Record the mapping between SSA_NAMEs and statements. */
8128 vect_record_grouped_load_vectors (stmt, dr_chain);
8129 }
8130 else
8131 {
8132 for (i = 0; i < vec_num; i++)
8133 {
8134 tree final_mask = NULL_TREE;
8135 if (loop_masks
8136 && memory_access_type != VMAT_INVARIANT)
8137 final_mask = vect_get_loop_mask (gsi, loop_masks,
8138 vec_num * ncopies,
8139 vectype, vec_num * j + i);
8140 if (vec_mask)
8141 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8142 vec_mask, gsi);
8143
8144 if (i > 0)
8145 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8146 stmt, bump);
8147
8148 /* 2. Create the vector-load in the loop. */
8149 switch (alignment_support_scheme)
8150 {
8151 case dr_aligned:
8152 case dr_unaligned_supported:
8153 {
8154 unsigned int align, misalign;
8155
8156 if (memory_access_type == VMAT_GATHER_SCATTER)
8157 {
8158 tree scale = size_int (gs_info.scale);
8159 gcall *call;
8160 if (loop_masks)
8161 call = gimple_build_call_internal
8162 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8163 vec_offset, scale, final_mask);
8164 else
8165 call = gimple_build_call_internal
8166 (IFN_GATHER_LOAD, 3, dataref_ptr,
8167 vec_offset, scale);
8168 gimple_call_set_nothrow (call, true);
8169 new_stmt = call;
8170 data_ref = NULL_TREE;
8171 break;
8172 }
8173
8174 align = DR_TARGET_ALIGNMENT (dr);
8175 if (alignment_support_scheme == dr_aligned)
8176 {
8177 gcc_assert (aligned_access_p (first_dr));
8178 misalign = 0;
8179 }
8180 else if (DR_MISALIGNMENT (first_dr) == -1)
8181 {
8182 align = dr_alignment (vect_dr_behavior (first_dr));
8183 misalign = 0;
8184 }
8185 else
8186 misalign = DR_MISALIGNMENT (first_dr);
8187 if (dataref_offset == NULL_TREE
8188 && TREE_CODE (dataref_ptr) == SSA_NAME)
8189 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8190 align, misalign);
8191
8192 if (final_mask)
8193 {
8194 align = least_bit_hwi (misalign | align);
8195 tree ptr = build_int_cst (ref_type, align);
8196 gcall *call
8197 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8198 dataref_ptr, ptr,
8199 final_mask);
8200 gimple_call_set_nothrow (call, true);
8201 new_stmt = call;
8202 data_ref = NULL_TREE;
8203 }
8204 else
8205 {
8206 data_ref
8207 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8208 dataref_offset
8209 ? dataref_offset
8210 : build_int_cst (ref_type, 0));
8211 if (alignment_support_scheme == dr_aligned)
8212 ;
8213 else if (DR_MISALIGNMENT (first_dr) == -1)
8214 TREE_TYPE (data_ref)
8215 = build_aligned_type (TREE_TYPE (data_ref),
8216 align * BITS_PER_UNIT);
8217 else
8218 TREE_TYPE (data_ref)
8219 = build_aligned_type (TREE_TYPE (data_ref),
8220 TYPE_ALIGN (elem_type));
8221 }
8222 break;
8223 }
8224 case dr_explicit_realign:
8225 {
8226 tree ptr, bump;
8227
8228 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8229
8230 if (compute_in_loop)
8231 msq = vect_setup_realignment (first_stmt, gsi,
8232 &realignment_token,
8233 dr_explicit_realign,
8234 dataref_ptr, NULL);
8235
8236 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8237 ptr = copy_ssa_name (dataref_ptr);
8238 else
8239 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8240 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8241 new_stmt = gimple_build_assign
8242 (ptr, BIT_AND_EXPR, dataref_ptr,
8243 build_int_cst
8244 (TREE_TYPE (dataref_ptr),
8245 -(HOST_WIDE_INT) align));
8246 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8247 data_ref
8248 = build2 (MEM_REF, vectype, ptr,
8249 build_int_cst (ref_type, 0));
8250 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8251 vec_dest = vect_create_destination_var (scalar_dest,
8252 vectype);
8253 new_stmt = gimple_build_assign (vec_dest, data_ref);
8254 new_temp = make_ssa_name (vec_dest, new_stmt);
8255 gimple_assign_set_lhs (new_stmt, new_temp);
8256 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8257 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8258 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8259 msq = new_temp;
8260
8261 bump = size_binop (MULT_EXPR, vs,
8262 TYPE_SIZE_UNIT (elem_type));
8263 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8264 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8265 new_stmt = gimple_build_assign
8266 (NULL_TREE, BIT_AND_EXPR, ptr,
8267 build_int_cst
8268 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8269 ptr = copy_ssa_name (ptr, new_stmt);
8270 gimple_assign_set_lhs (new_stmt, ptr);
8271 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8272 data_ref
8273 = build2 (MEM_REF, vectype, ptr,
8274 build_int_cst (ref_type, 0));
8275 break;
8276 }
8277 case dr_explicit_realign_optimized:
8278 {
8279 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8280 new_temp = copy_ssa_name (dataref_ptr);
8281 else
8282 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8283 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8284 new_stmt = gimple_build_assign
8285 (new_temp, BIT_AND_EXPR, dataref_ptr,
8286 build_int_cst (TREE_TYPE (dataref_ptr),
8287 -(HOST_WIDE_INT) align));
8288 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8289 data_ref
8290 = build2 (MEM_REF, vectype, new_temp,
8291 build_int_cst (ref_type, 0));
8292 break;
8293 }
8294 default:
8295 gcc_unreachable ();
8296 }
8297 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8298 /* DATA_REF is null if we've already built the statement. */
8299 if (data_ref)
8300 {
8301 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8302 new_stmt = gimple_build_assign (vec_dest, data_ref);
8303 }
8304 new_temp = make_ssa_name (vec_dest, new_stmt);
8305 gimple_set_lhs (new_stmt, new_temp);
8306 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8307
8308 /* 3. Handle explicit realignment if necessary/supported.
8309 Create in loop:
8310 vec_dest = realign_load (msq, lsq, realignment_token) */
8311 if (alignment_support_scheme == dr_explicit_realign_optimized
8312 || alignment_support_scheme == dr_explicit_realign)
8313 {
8314 lsq = gimple_assign_lhs (new_stmt);
8315 if (!realignment_token)
8316 realignment_token = dataref_ptr;
8317 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8318 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8319 msq, lsq, realignment_token);
8320 new_temp = make_ssa_name (vec_dest, new_stmt);
8321 gimple_assign_set_lhs (new_stmt, new_temp);
8322 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8323
8324 if (alignment_support_scheme == dr_explicit_realign_optimized)
8325 {
8326 gcc_assert (phi);
8327 if (i == vec_num - 1 && j == ncopies - 1)
8328 add_phi_arg (phi, lsq,
8329 loop_latch_edge (containing_loop),
8330 UNKNOWN_LOCATION);
8331 msq = lsq;
8332 }
8333 }
8334
8335 /* 4. Handle invariant-load. */
8336 if (inv_p && !bb_vinfo)
8337 {
8338 gcc_assert (!grouped_load);
8339 /* If we have versioned for aliasing or the loop doesn't
8340 have any data dependencies that would preclude this,
8341 then we are sure this is a loop invariant load and
8342 thus we can insert it on the preheader edge. */
8343 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8344 && !nested_in_vect_loop
8345 && hoist_defs_of_uses (stmt, loop))
8346 {
8347 if (dump_enabled_p ())
8348 {
8349 dump_printf_loc (MSG_NOTE, vect_location,
8350 "hoisting out of the vectorized "
8351 "loop: ");
8352 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8353 }
8354 tree tem = copy_ssa_name (scalar_dest);
8355 gsi_insert_on_edge_immediate
8356 (loop_preheader_edge (loop),
8357 gimple_build_assign (tem,
8358 unshare_expr
8359 (gimple_assign_rhs1 (stmt))));
8360 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8361 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8362 set_vinfo_for_stmt (new_stmt,
8363 new_stmt_vec_info (new_stmt, vinfo));
8364 }
8365 else
8366 {
8367 gimple_stmt_iterator gsi2 = *gsi;
8368 gsi_next (&gsi2);
8369 new_temp = vect_init_vector (stmt, scalar_dest,
8370 vectype, &gsi2);
8371 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8372 }
8373 }
8374
8375 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8376 {
8377 tree perm_mask = perm_mask_for_reverse (vectype);
8378 new_temp = permute_vec_elements (new_temp, new_temp,
8379 perm_mask, stmt, gsi);
8380 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8381 }
8382
8383 /* Collect vector loads and later create their permutation in
8384 vect_transform_grouped_load (). */
8385 if (grouped_load || slp_perm)
8386 dr_chain.quick_push (new_temp);
8387
8388 /* Store vector loads in the corresponding SLP_NODE. */
8389 if (slp && !slp_perm)
8390 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8391
8392 /* With SLP permutation we load the gaps as well, without
8393 we need to skip the gaps after we manage to fully load
8394 all elements. group_gap_adj is GROUP_SIZE here. */
8395 group_elt += nunits;
8396 if (maybe_ne (group_gap_adj, 0U)
8397 && !slp_perm
8398 && known_eq (group_elt, group_size - group_gap_adj))
8399 {
8400 poly_wide_int bump_val
8401 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8402 * group_gap_adj);
8403 tree bump = wide_int_to_tree (sizetype, bump_val);
8404 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8405 stmt, bump);
8406 group_elt = 0;
8407 }
8408 }
8409 /* Bump the vector pointer to account for a gap or for excess
8410 elements loaded for a permuted SLP load. */
8411 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8412 {
8413 poly_wide_int bump_val
8414 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8415 * group_gap_adj);
8416 tree bump = wide_int_to_tree (sizetype, bump_val);
8417 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8418 stmt, bump);
8419 }
8420 }
8421
8422 if (slp && !slp_perm)
8423 continue;
8424
8425 if (slp_perm)
8426 {
8427 unsigned n_perms;
8428 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8429 slp_node_instance, false,
8430 &n_perms))
8431 {
8432 dr_chain.release ();
8433 return false;
8434 }
8435 }
8436 else
8437 {
8438 if (grouped_load)
8439 {
8440 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8441 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8442 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8443 }
8444 else
8445 {
8446 if (j == 0)
8447 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8448 else
8449 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8450 prev_stmt_info = vinfo_for_stmt (new_stmt);
8451 }
8452 }
8453 dr_chain.release ();
8454 }
8455
8456 return true;
8457 }
8458
8459 /* Function vect_is_simple_cond.
8460
8461 Input:
8462 LOOP - the loop that is being vectorized.
8463 COND - Condition that is checked for simple use.
8464
8465 Output:
8466 *COMP_VECTYPE - the vector type for the comparison.
8467 *DTS - The def types for the arguments of the comparison
8468
8469 Returns whether a COND can be vectorized. Checks whether
8470 condition operands are supportable using vec_is_simple_use. */
8471
8472 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,tree * comp_vectype,enum vect_def_type * dts,tree vectype)8473 vect_is_simple_cond (tree cond, vec_info *vinfo,
8474 tree *comp_vectype, enum vect_def_type *dts,
8475 tree vectype)
8476 {
8477 tree lhs, rhs;
8478 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8479
8480 /* Mask case. */
8481 if (TREE_CODE (cond) == SSA_NAME
8482 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8483 {
8484 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8485 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8486 &dts[0], comp_vectype)
8487 || !*comp_vectype
8488 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8489 return false;
8490 return true;
8491 }
8492
8493 if (!COMPARISON_CLASS_P (cond))
8494 return false;
8495
8496 lhs = TREE_OPERAND (cond, 0);
8497 rhs = TREE_OPERAND (cond, 1);
8498
8499 if (TREE_CODE (lhs) == SSA_NAME)
8500 {
8501 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8502 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8503 return false;
8504 }
8505 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8506 || TREE_CODE (lhs) == FIXED_CST)
8507 dts[0] = vect_constant_def;
8508 else
8509 return false;
8510
8511 if (TREE_CODE (rhs) == SSA_NAME)
8512 {
8513 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8514 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8515 return false;
8516 }
8517 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8518 || TREE_CODE (rhs) == FIXED_CST)
8519 dts[1] = vect_constant_def;
8520 else
8521 return false;
8522
8523 if (vectype1 && vectype2
8524 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8525 TYPE_VECTOR_SUBPARTS (vectype2)))
8526 return false;
8527
8528 *comp_vectype = vectype1 ? vectype1 : vectype2;
8529 /* Invariant comparison. */
8530 if (! *comp_vectype && vectype)
8531 {
8532 tree scalar_type = TREE_TYPE (lhs);
8533 /* If we can widen the comparison to match vectype do so. */
8534 if (INTEGRAL_TYPE_P (scalar_type)
8535 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8536 TYPE_SIZE (TREE_TYPE (vectype))))
8537 scalar_type = build_nonstandard_integer_type
8538 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8539 TYPE_UNSIGNED (scalar_type));
8540 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8541 }
8542
8543 return true;
8544 }
8545
8546 /* vectorizable_condition.
8547
8548 Check if STMT is conditional modify expression that can be vectorized.
8549 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8550 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8551 at GSI.
8552
8553 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8554 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8555 else clause if it is 2).
8556
8557 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8558
8559 bool
vectorizable_condition(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,int reduc_index,slp_tree slp_node)8560 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8561 gimple **vec_stmt, tree reduc_def, int reduc_index,
8562 slp_tree slp_node)
8563 {
8564 tree scalar_dest = NULL_TREE;
8565 tree vec_dest = NULL_TREE;
8566 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8567 tree then_clause, else_clause;
8568 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8569 tree comp_vectype = NULL_TREE;
8570 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8571 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8572 tree vec_compare;
8573 tree new_temp;
8574 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8575 enum vect_def_type dts[4]
8576 = {vect_unknown_def_type, vect_unknown_def_type,
8577 vect_unknown_def_type, vect_unknown_def_type};
8578 int ndts = 4;
8579 int ncopies;
8580 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8581 stmt_vec_info prev_stmt_info = NULL;
8582 int i, j;
8583 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8584 vec<tree> vec_oprnds0 = vNULL;
8585 vec<tree> vec_oprnds1 = vNULL;
8586 vec<tree> vec_oprnds2 = vNULL;
8587 vec<tree> vec_oprnds3 = vNULL;
8588 tree vec_cmp_type;
8589 bool masked = false;
8590
8591 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8592 return false;
8593
8594 vect_reduction_type reduction_type
8595 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8596 if (reduction_type == TREE_CODE_REDUCTION)
8597 {
8598 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8599 return false;
8600
8601 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8602 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8603 && reduc_def))
8604 return false;
8605
8606 /* FORNOW: not yet supported. */
8607 if (STMT_VINFO_LIVE_P (stmt_info))
8608 {
8609 if (dump_enabled_p ())
8610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8611 "value used after loop.\n");
8612 return false;
8613 }
8614 }
8615
8616 /* Is vectorizable conditional operation? */
8617 if (!is_gimple_assign (stmt))
8618 return false;
8619
8620 code = gimple_assign_rhs_code (stmt);
8621
8622 if (code != COND_EXPR)
8623 return false;
8624
8625 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8626 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8627
8628 if (slp_node)
8629 ncopies = 1;
8630 else
8631 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8632
8633 gcc_assert (ncopies >= 1);
8634 if (reduc_index && ncopies > 1)
8635 return false; /* FORNOW */
8636
8637 cond_expr = gimple_assign_rhs1 (stmt);
8638 then_clause = gimple_assign_rhs2 (stmt);
8639 else_clause = gimple_assign_rhs3 (stmt);
8640
8641 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8642 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8643 || !comp_vectype)
8644 return false;
8645
8646 gimple *def_stmt;
8647 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8648 &vectype1))
8649 return false;
8650 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8651 &vectype2))
8652 return false;
8653
8654 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8655 return false;
8656
8657 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8658 return false;
8659
8660 masked = !COMPARISON_CLASS_P (cond_expr);
8661 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8662
8663 if (vec_cmp_type == NULL_TREE)
8664 return false;
8665
8666 cond_code = TREE_CODE (cond_expr);
8667 if (!masked)
8668 {
8669 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8670 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8671 }
8672
8673 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8674 {
8675 /* Boolean values may have another representation in vectors
8676 and therefore we prefer bit operations over comparison for
8677 them (which also works for scalar masks). We store opcodes
8678 to use in bitop1 and bitop2. Statement is vectorized as
8679 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8680 depending on bitop1 and bitop2 arity. */
8681 switch (cond_code)
8682 {
8683 case GT_EXPR:
8684 bitop1 = BIT_NOT_EXPR;
8685 bitop2 = BIT_AND_EXPR;
8686 break;
8687 case GE_EXPR:
8688 bitop1 = BIT_NOT_EXPR;
8689 bitop2 = BIT_IOR_EXPR;
8690 break;
8691 case LT_EXPR:
8692 bitop1 = BIT_NOT_EXPR;
8693 bitop2 = BIT_AND_EXPR;
8694 std::swap (cond_expr0, cond_expr1);
8695 break;
8696 case LE_EXPR:
8697 bitop1 = BIT_NOT_EXPR;
8698 bitop2 = BIT_IOR_EXPR;
8699 std::swap (cond_expr0, cond_expr1);
8700 break;
8701 case NE_EXPR:
8702 bitop1 = BIT_XOR_EXPR;
8703 break;
8704 case EQ_EXPR:
8705 bitop1 = BIT_XOR_EXPR;
8706 bitop2 = BIT_NOT_EXPR;
8707 break;
8708 default:
8709 return false;
8710 }
8711 cond_code = SSA_NAME;
8712 }
8713
8714 if (!vec_stmt)
8715 {
8716 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8717 if (bitop1 != NOP_EXPR)
8718 {
8719 machine_mode mode = TYPE_MODE (comp_vectype);
8720 optab optab;
8721
8722 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8723 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8724 return false;
8725
8726 if (bitop2 != NOP_EXPR)
8727 {
8728 optab = optab_for_tree_code (bitop2, comp_vectype,
8729 optab_default);
8730 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8731 return false;
8732 }
8733 }
8734 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8735 cond_code))
8736 {
8737 if (!slp_node)
8738 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8739 return true;
8740 }
8741 return false;
8742 }
8743
8744 /* Transform. */
8745
8746 if (!slp_node)
8747 {
8748 vec_oprnds0.create (1);
8749 vec_oprnds1.create (1);
8750 vec_oprnds2.create (1);
8751 vec_oprnds3.create (1);
8752 }
8753
8754 /* Handle def. */
8755 scalar_dest = gimple_assign_lhs (stmt);
8756 if (reduction_type != EXTRACT_LAST_REDUCTION)
8757 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8758
8759 /* Handle cond expr. */
8760 for (j = 0; j < ncopies; j++)
8761 {
8762 gimple *new_stmt = NULL;
8763 if (j == 0)
8764 {
8765 if (slp_node)
8766 {
8767 auto_vec<tree, 4> ops;
8768 auto_vec<vec<tree>, 4> vec_defs;
8769
8770 if (masked)
8771 ops.safe_push (cond_expr);
8772 else
8773 {
8774 ops.safe_push (cond_expr0);
8775 ops.safe_push (cond_expr1);
8776 }
8777 ops.safe_push (then_clause);
8778 ops.safe_push (else_clause);
8779 vect_get_slp_defs (ops, slp_node, &vec_defs);
8780 vec_oprnds3 = vec_defs.pop ();
8781 vec_oprnds2 = vec_defs.pop ();
8782 if (!masked)
8783 vec_oprnds1 = vec_defs.pop ();
8784 vec_oprnds0 = vec_defs.pop ();
8785 }
8786 else
8787 {
8788 gimple *gtemp;
8789 if (masked)
8790 {
8791 vec_cond_lhs
8792 = vect_get_vec_def_for_operand (cond_expr, stmt,
8793 comp_vectype);
8794 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8795 >emp, &dts[0]);
8796 }
8797 else
8798 {
8799 vec_cond_lhs
8800 = vect_get_vec_def_for_operand (cond_expr0,
8801 stmt, comp_vectype);
8802 vect_is_simple_use (cond_expr0, loop_vinfo, >emp, &dts[0]);
8803
8804 vec_cond_rhs
8805 = vect_get_vec_def_for_operand (cond_expr1,
8806 stmt, comp_vectype);
8807 vect_is_simple_use (cond_expr1, loop_vinfo, >emp, &dts[1]);
8808 }
8809 if (reduc_index == 1)
8810 vec_then_clause = reduc_def;
8811 else
8812 {
8813 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8814 stmt);
8815 vect_is_simple_use (then_clause, loop_vinfo,
8816 >emp, &dts[2]);
8817 }
8818 if (reduc_index == 2)
8819 vec_else_clause = reduc_def;
8820 else
8821 {
8822 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8823 stmt);
8824 vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]);
8825 }
8826 }
8827 }
8828 else
8829 {
8830 vec_cond_lhs
8831 = vect_get_vec_def_for_stmt_copy (dts[0],
8832 vec_oprnds0.pop ());
8833 if (!masked)
8834 vec_cond_rhs
8835 = vect_get_vec_def_for_stmt_copy (dts[1],
8836 vec_oprnds1.pop ());
8837
8838 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8839 vec_oprnds2.pop ());
8840 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8841 vec_oprnds3.pop ());
8842 }
8843
8844 if (!slp_node)
8845 {
8846 vec_oprnds0.quick_push (vec_cond_lhs);
8847 if (!masked)
8848 vec_oprnds1.quick_push (vec_cond_rhs);
8849 vec_oprnds2.quick_push (vec_then_clause);
8850 vec_oprnds3.quick_push (vec_else_clause);
8851 }
8852
8853 /* Arguments are ready. Create the new vector stmt. */
8854 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8855 {
8856 vec_then_clause = vec_oprnds2[i];
8857 vec_else_clause = vec_oprnds3[i];
8858
8859 if (masked)
8860 vec_compare = vec_cond_lhs;
8861 else
8862 {
8863 vec_cond_rhs = vec_oprnds1[i];
8864 if (bitop1 == NOP_EXPR)
8865 vec_compare = build2 (cond_code, vec_cmp_type,
8866 vec_cond_lhs, vec_cond_rhs);
8867 else
8868 {
8869 new_temp = make_ssa_name (vec_cmp_type);
8870 if (bitop1 == BIT_NOT_EXPR)
8871 new_stmt = gimple_build_assign (new_temp, bitop1,
8872 vec_cond_rhs);
8873 else
8874 new_stmt
8875 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8876 vec_cond_rhs);
8877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8878 if (bitop2 == NOP_EXPR)
8879 vec_compare = new_temp;
8880 else if (bitop2 == BIT_NOT_EXPR)
8881 {
8882 /* Instead of doing ~x ? y : z do x ? z : y. */
8883 vec_compare = new_temp;
8884 std::swap (vec_then_clause, vec_else_clause);
8885 }
8886 else
8887 {
8888 vec_compare = make_ssa_name (vec_cmp_type);
8889 new_stmt
8890 = gimple_build_assign (vec_compare, bitop2,
8891 vec_cond_lhs, new_temp);
8892 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8893 }
8894 }
8895 }
8896 if (reduction_type == EXTRACT_LAST_REDUCTION)
8897 {
8898 if (!is_gimple_val (vec_compare))
8899 {
8900 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8901 new_stmt = gimple_build_assign (vec_compare_name,
8902 vec_compare);
8903 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8904 vec_compare = vec_compare_name;
8905 }
8906 gcc_assert (reduc_index == 2);
8907 new_stmt = gimple_build_call_internal
8908 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8909 vec_then_clause);
8910 gimple_call_set_lhs (new_stmt, scalar_dest);
8911 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8912 if (stmt == gsi_stmt (*gsi))
8913 vect_finish_replace_stmt (stmt, new_stmt);
8914 else
8915 {
8916 /* In this case we're moving the definition to later in the
8917 block. That doesn't matter because the only uses of the
8918 lhs are in phi statements. */
8919 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8920 gsi_remove (&old_gsi, true);
8921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8922 }
8923 }
8924 else
8925 {
8926 new_temp = make_ssa_name (vec_dest);
8927 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8928 vec_compare, vec_then_clause,
8929 vec_else_clause);
8930 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8931 }
8932 if (slp_node)
8933 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8934 }
8935
8936 if (slp_node)
8937 continue;
8938
8939 if (j == 0)
8940 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8941 else
8942 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8943
8944 prev_stmt_info = vinfo_for_stmt (new_stmt);
8945 }
8946
8947 vec_oprnds0.release ();
8948 vec_oprnds1.release ();
8949 vec_oprnds2.release ();
8950 vec_oprnds3.release ();
8951
8952 return true;
8953 }
8954
8955 /* vectorizable_comparison.
8956
8957 Check if STMT is comparison expression that can be vectorized.
8958 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8959 comparison, put it in VEC_STMT, and insert it at GSI.
8960
8961 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8962
8963 static bool
vectorizable_comparison(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,slp_tree slp_node)8964 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8965 gimple **vec_stmt, tree reduc_def,
8966 slp_tree slp_node)
8967 {
8968 tree lhs, rhs1, rhs2;
8969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8970 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8971 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8972 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8973 tree new_temp;
8974 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8975 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8976 int ndts = 2;
8977 poly_uint64 nunits;
8978 int ncopies;
8979 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8980 stmt_vec_info prev_stmt_info = NULL;
8981 int i, j;
8982 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8983 vec<tree> vec_oprnds0 = vNULL;
8984 vec<tree> vec_oprnds1 = vNULL;
8985 gimple *def_stmt;
8986 tree mask_type;
8987 tree mask;
8988
8989 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8990 return false;
8991
8992 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8993 return false;
8994
8995 mask_type = vectype;
8996 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8997
8998 if (slp_node)
8999 ncopies = 1;
9000 else
9001 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9002
9003 gcc_assert (ncopies >= 1);
9004 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9005 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9006 && reduc_def))
9007 return false;
9008
9009 if (STMT_VINFO_LIVE_P (stmt_info))
9010 {
9011 if (dump_enabled_p ())
9012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9013 "value used after loop.\n");
9014 return false;
9015 }
9016
9017 if (!is_gimple_assign (stmt))
9018 return false;
9019
9020 code = gimple_assign_rhs_code (stmt);
9021
9022 if (TREE_CODE_CLASS (code) != tcc_comparison)
9023 return false;
9024
9025 rhs1 = gimple_assign_rhs1 (stmt);
9026 rhs2 = gimple_assign_rhs2 (stmt);
9027
9028 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9029 &dts[0], &vectype1))
9030 return false;
9031
9032 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9033 &dts[1], &vectype2))
9034 return false;
9035
9036 if (vectype1 && vectype2
9037 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9038 TYPE_VECTOR_SUBPARTS (vectype2)))
9039 return false;
9040
9041 vectype = vectype1 ? vectype1 : vectype2;
9042
9043 /* Invariant comparison. */
9044 if (!vectype)
9045 {
9046 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9047 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9048 return false;
9049 }
9050 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9051 return false;
9052
9053 /* Can't compare mask and non-mask types. */
9054 if (vectype1 && vectype2
9055 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9056 return false;
9057
9058 /* Boolean values may have another representation in vectors
9059 and therefore we prefer bit operations over comparison for
9060 them (which also works for scalar masks). We store opcodes
9061 to use in bitop1 and bitop2. Statement is vectorized as
9062 BITOP2 (rhs1 BITOP1 rhs2) or
9063 rhs1 BITOP2 (BITOP1 rhs2)
9064 depending on bitop1 and bitop2 arity. */
9065 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9066 {
9067 if (code == GT_EXPR)
9068 {
9069 bitop1 = BIT_NOT_EXPR;
9070 bitop2 = BIT_AND_EXPR;
9071 }
9072 else if (code == GE_EXPR)
9073 {
9074 bitop1 = BIT_NOT_EXPR;
9075 bitop2 = BIT_IOR_EXPR;
9076 }
9077 else if (code == LT_EXPR)
9078 {
9079 bitop1 = BIT_NOT_EXPR;
9080 bitop2 = BIT_AND_EXPR;
9081 std::swap (rhs1, rhs2);
9082 std::swap (dts[0], dts[1]);
9083 }
9084 else if (code == LE_EXPR)
9085 {
9086 bitop1 = BIT_NOT_EXPR;
9087 bitop2 = BIT_IOR_EXPR;
9088 std::swap (rhs1, rhs2);
9089 std::swap (dts[0], dts[1]);
9090 }
9091 else
9092 {
9093 bitop1 = BIT_XOR_EXPR;
9094 if (code == EQ_EXPR)
9095 bitop2 = BIT_NOT_EXPR;
9096 }
9097 }
9098
9099 if (!vec_stmt)
9100 {
9101 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9102 if (!slp_node)
9103 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9104 dts, ndts, NULL, NULL);
9105 if (bitop1 == NOP_EXPR)
9106 return expand_vec_cmp_expr_p (vectype, mask_type, code);
9107 else
9108 {
9109 machine_mode mode = TYPE_MODE (vectype);
9110 optab optab;
9111
9112 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9113 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9114 return false;
9115
9116 if (bitop2 != NOP_EXPR)
9117 {
9118 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9119 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9120 return false;
9121 }
9122 return true;
9123 }
9124 }
9125
9126 /* Transform. */
9127 if (!slp_node)
9128 {
9129 vec_oprnds0.create (1);
9130 vec_oprnds1.create (1);
9131 }
9132
9133 /* Handle def. */
9134 lhs = gimple_assign_lhs (stmt);
9135 mask = vect_create_destination_var (lhs, mask_type);
9136
9137 /* Handle cmp expr. */
9138 for (j = 0; j < ncopies; j++)
9139 {
9140 gassign *new_stmt = NULL;
9141 if (j == 0)
9142 {
9143 if (slp_node)
9144 {
9145 auto_vec<tree, 2> ops;
9146 auto_vec<vec<tree>, 2> vec_defs;
9147
9148 ops.safe_push (rhs1);
9149 ops.safe_push (rhs2);
9150 vect_get_slp_defs (ops, slp_node, &vec_defs);
9151 vec_oprnds1 = vec_defs.pop ();
9152 vec_oprnds0 = vec_defs.pop ();
9153 }
9154 else
9155 {
9156 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9157 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9158 }
9159 }
9160 else
9161 {
9162 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9163 vec_oprnds0.pop ());
9164 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9165 vec_oprnds1.pop ());
9166 }
9167
9168 if (!slp_node)
9169 {
9170 vec_oprnds0.quick_push (vec_rhs1);
9171 vec_oprnds1.quick_push (vec_rhs2);
9172 }
9173
9174 /* Arguments are ready. Create the new vector stmt. */
9175 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9176 {
9177 vec_rhs2 = vec_oprnds1[i];
9178
9179 new_temp = make_ssa_name (mask);
9180 if (bitop1 == NOP_EXPR)
9181 {
9182 new_stmt = gimple_build_assign (new_temp, code,
9183 vec_rhs1, vec_rhs2);
9184 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9185 }
9186 else
9187 {
9188 if (bitop1 == BIT_NOT_EXPR)
9189 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9190 else
9191 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9192 vec_rhs2);
9193 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9194 if (bitop2 != NOP_EXPR)
9195 {
9196 tree res = make_ssa_name (mask);
9197 if (bitop2 == BIT_NOT_EXPR)
9198 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9199 else
9200 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9201 new_temp);
9202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9203 }
9204 }
9205 if (slp_node)
9206 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9207 }
9208
9209 if (slp_node)
9210 continue;
9211
9212 if (j == 0)
9213 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9214 else
9215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9216
9217 prev_stmt_info = vinfo_for_stmt (new_stmt);
9218 }
9219
9220 vec_oprnds0.release ();
9221 vec_oprnds1.release ();
9222
9223 return true;
9224 }
9225
9226 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9227 can handle all live statements in the node. Otherwise return true
9228 if STMT is not live or if vectorizable_live_operation can handle it.
9229 GSI and VEC_STMT are as for vectorizable_live_operation. */
9230
9231 static bool
can_vectorize_live_stmts(gimple * stmt,gimple_stmt_iterator * gsi,slp_tree slp_node,gimple ** vec_stmt)9232 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9233 slp_tree slp_node, gimple **vec_stmt)
9234 {
9235 if (slp_node)
9236 {
9237 gimple *slp_stmt;
9238 unsigned int i;
9239 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9240 {
9241 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9242 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9243 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9244 vec_stmt))
9245 return false;
9246 }
9247 }
9248 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9249 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9250 return false;
9251
9252 return true;
9253 }
9254
9255 /* Make sure the statement is vectorizable. */
9256
9257 bool
vect_analyze_stmt(gimple * stmt,bool * need_to_vectorize,slp_tree node,slp_instance node_instance)9258 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9259 slp_instance node_instance)
9260 {
9261 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9262 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9263 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9264 bool ok;
9265 gimple *pattern_stmt;
9266 gimple_seq pattern_def_seq;
9267
9268 if (dump_enabled_p ())
9269 {
9270 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9271 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9272 }
9273
9274 if (gimple_has_volatile_ops (stmt))
9275 {
9276 if (dump_enabled_p ())
9277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9278 "not vectorized: stmt has volatile operands\n");
9279
9280 return false;
9281 }
9282
9283 /* Skip stmts that do not need to be vectorized. In loops this is expected
9284 to include:
9285 - the COND_EXPR which is the loop exit condition
9286 - any LABEL_EXPRs in the loop
9287 - computations that are used only for array indexing or loop control.
9288 In basic blocks we only analyze statements that are a part of some SLP
9289 instance, therefore, all the statements are relevant.
9290
9291 Pattern statement needs to be analyzed instead of the original statement
9292 if the original statement is not relevant. Otherwise, we analyze both
9293 statements. In basic blocks we are called from some SLP instance
9294 traversal, don't analyze pattern stmts instead, the pattern stmts
9295 already will be part of SLP instance. */
9296
9297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9298 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9299 && !STMT_VINFO_LIVE_P (stmt_info))
9300 {
9301 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9302 && pattern_stmt
9303 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9304 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9305 {
9306 /* Analyze PATTERN_STMT instead of the original stmt. */
9307 stmt = pattern_stmt;
9308 stmt_info = vinfo_for_stmt (pattern_stmt);
9309 if (dump_enabled_p ())
9310 {
9311 dump_printf_loc (MSG_NOTE, vect_location,
9312 "==> examining pattern statement: ");
9313 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9314 }
9315 }
9316 else
9317 {
9318 if (dump_enabled_p ())
9319 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9320
9321 return true;
9322 }
9323 }
9324 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9325 && node == NULL
9326 && pattern_stmt
9327 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9328 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9329 {
9330 /* Analyze PATTERN_STMT too. */
9331 if (dump_enabled_p ())
9332 {
9333 dump_printf_loc (MSG_NOTE, vect_location,
9334 "==> examining pattern statement: ");
9335 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9336 }
9337
9338 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9339 node_instance))
9340 return false;
9341 }
9342
9343 if (is_pattern_stmt_p (stmt_info)
9344 && node == NULL
9345 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9346 {
9347 gimple_stmt_iterator si;
9348
9349 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9350 {
9351 gimple *pattern_def_stmt = gsi_stmt (si);
9352 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9353 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9354 {
9355 /* Analyze def stmt of STMT if it's a pattern stmt. */
9356 if (dump_enabled_p ())
9357 {
9358 dump_printf_loc (MSG_NOTE, vect_location,
9359 "==> examining pattern def statement: ");
9360 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9361 }
9362
9363 if (!vect_analyze_stmt (pattern_def_stmt,
9364 need_to_vectorize, node, node_instance))
9365 return false;
9366 }
9367 }
9368 }
9369
9370 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9371 {
9372 case vect_internal_def:
9373 break;
9374
9375 case vect_reduction_def:
9376 case vect_nested_cycle:
9377 gcc_assert (!bb_vinfo
9378 && (relevance == vect_used_in_outer
9379 || relevance == vect_used_in_outer_by_reduction
9380 || relevance == vect_used_by_reduction
9381 || relevance == vect_unused_in_scope
9382 || relevance == vect_used_only_live));
9383 break;
9384
9385 case vect_induction_def:
9386 gcc_assert (!bb_vinfo);
9387 break;
9388
9389 case vect_constant_def:
9390 case vect_external_def:
9391 case vect_unknown_def_type:
9392 default:
9393 gcc_unreachable ();
9394 }
9395
9396 if (STMT_VINFO_RELEVANT_P (stmt_info))
9397 {
9398 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9399 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9400 || (is_gimple_call (stmt)
9401 && gimple_call_lhs (stmt) == NULL_TREE));
9402 *need_to_vectorize = true;
9403 }
9404
9405 if (PURE_SLP_STMT (stmt_info) && !node)
9406 {
9407 dump_printf_loc (MSG_NOTE, vect_location,
9408 "handled only by SLP analysis\n");
9409 return true;
9410 }
9411
9412 ok = true;
9413 if (!bb_vinfo
9414 && (STMT_VINFO_RELEVANT_P (stmt_info)
9415 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9416 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9417 || vectorizable_conversion (stmt, NULL, NULL, node)
9418 || vectorizable_shift (stmt, NULL, NULL, node)
9419 || vectorizable_operation (stmt, NULL, NULL, node)
9420 || vectorizable_assignment (stmt, NULL, NULL, node)
9421 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9422 || vectorizable_call (stmt, NULL, NULL, node)
9423 || vectorizable_store (stmt, NULL, NULL, node)
9424 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
9425 || vectorizable_induction (stmt, NULL, NULL, node)
9426 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9427 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9428 else
9429 {
9430 if (bb_vinfo)
9431 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9432 || vectorizable_conversion (stmt, NULL, NULL, node)
9433 || vectorizable_shift (stmt, NULL, NULL, node)
9434 || vectorizable_operation (stmt, NULL, NULL, node)
9435 || vectorizable_assignment (stmt, NULL, NULL, node)
9436 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9437 || vectorizable_call (stmt, NULL, NULL, node)
9438 || vectorizable_store (stmt, NULL, NULL, node)
9439 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9440 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9441 }
9442
9443 if (!ok)
9444 {
9445 if (dump_enabled_p ())
9446 {
9447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9448 "not vectorized: relevant stmt not ");
9449 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9450 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9451 }
9452
9453 return false;
9454 }
9455
9456 if (bb_vinfo)
9457 return true;
9458
9459 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9460 need extra handling, except for vectorizable reductions. */
9461 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9462 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9463 {
9464 if (dump_enabled_p ())
9465 {
9466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9467 "not vectorized: live stmt not supported: ");
9468 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9469 }
9470
9471 return false;
9472 }
9473
9474 return true;
9475 }
9476
9477
9478 /* Function vect_transform_stmt.
9479
9480 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9481
9482 bool
vect_transform_stmt(gimple * stmt,gimple_stmt_iterator * gsi,bool * grouped_store,slp_tree slp_node,slp_instance slp_node_instance)9483 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9484 bool *grouped_store, slp_tree slp_node,
9485 slp_instance slp_node_instance)
9486 {
9487 bool is_store = false;
9488 gimple *vec_stmt = NULL;
9489 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9490 bool done;
9491
9492 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9493 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9494
9495 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9496 && nested_in_vect_loop_p
9497 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9498 stmt));
9499
9500 switch (STMT_VINFO_TYPE (stmt_info))
9501 {
9502 case type_demotion_vec_info_type:
9503 case type_promotion_vec_info_type:
9504 case type_conversion_vec_info_type:
9505 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9506 gcc_assert (done);
9507 break;
9508
9509 case induc_vec_info_type:
9510 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9511 gcc_assert (done);
9512 break;
9513
9514 case shift_vec_info_type:
9515 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9516 gcc_assert (done);
9517 break;
9518
9519 case op_vec_info_type:
9520 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9521 gcc_assert (done);
9522 break;
9523
9524 case assignment_vec_info_type:
9525 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9526 gcc_assert (done);
9527 break;
9528
9529 case load_vec_info_type:
9530 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9531 slp_node_instance);
9532 gcc_assert (done);
9533 break;
9534
9535 case store_vec_info_type:
9536 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9537 gcc_assert (done);
9538 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9539 {
9540 /* In case of interleaving, the whole chain is vectorized when the
9541 last store in the chain is reached. Store stmts before the last
9542 one are skipped, and there vec_stmt_info shouldn't be freed
9543 meanwhile. */
9544 *grouped_store = true;
9545 stmt_vec_info group_info
9546 = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9547 if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
9548 is_store = true;
9549 }
9550 else
9551 is_store = true;
9552 break;
9553
9554 case condition_vec_info_type:
9555 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9556 gcc_assert (done);
9557 break;
9558
9559 case comparison_vec_info_type:
9560 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9561 gcc_assert (done);
9562 break;
9563
9564 case call_vec_info_type:
9565 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9566 stmt = gsi_stmt (*gsi);
9567 break;
9568
9569 case call_simd_clone_vec_info_type:
9570 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9571 stmt = gsi_stmt (*gsi);
9572 break;
9573
9574 case reduc_vec_info_type:
9575 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9576 slp_node_instance);
9577 gcc_assert (done);
9578 break;
9579
9580 default:
9581 if (!STMT_VINFO_LIVE_P (stmt_info))
9582 {
9583 if (dump_enabled_p ())
9584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9585 "stmt not supported.\n");
9586 gcc_unreachable ();
9587 }
9588 }
9589
9590 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9591 This would break hybrid SLP vectorization. */
9592 if (slp_node)
9593 gcc_assert (!vec_stmt
9594 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9595
9596 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9597 is being vectorized, but outside the immediately enclosing loop. */
9598 if (vec_stmt
9599 && nested_p
9600 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9601 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9602 || STMT_VINFO_RELEVANT (stmt_info) ==
9603 vect_used_in_outer_by_reduction))
9604 {
9605 struct loop *innerloop = LOOP_VINFO_LOOP (
9606 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9607 imm_use_iterator imm_iter;
9608 use_operand_p use_p;
9609 tree scalar_dest;
9610 gimple *exit_phi;
9611
9612 if (dump_enabled_p ())
9613 dump_printf_loc (MSG_NOTE, vect_location,
9614 "Record the vdef for outer-loop vectorization.\n");
9615
9616 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9617 (to be used when vectorizing outer-loop stmts that use the DEF of
9618 STMT). */
9619 if (gimple_code (stmt) == GIMPLE_PHI)
9620 scalar_dest = PHI_RESULT (stmt);
9621 else
9622 scalar_dest = gimple_get_lhs (stmt);
9623
9624 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9625 {
9626 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9627 {
9628 exit_phi = USE_STMT (use_p);
9629 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9630 }
9631 }
9632 }
9633
9634 /* Handle stmts whose DEF is used outside the loop-nest that is
9635 being vectorized. */
9636 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9637 {
9638 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9639 gcc_assert (done);
9640 }
9641
9642 if (vec_stmt)
9643 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9644
9645 return is_store;
9646 }
9647
9648
9649 /* Remove a group of stores (for SLP or interleaving), free their
9650 stmt_vec_info. */
9651
9652 void
vect_remove_stores(gimple * first_stmt)9653 vect_remove_stores (gimple *first_stmt)
9654 {
9655 gimple *next = first_stmt;
9656 gimple *tmp;
9657 gimple_stmt_iterator next_si;
9658
9659 while (next)
9660 {
9661 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9662
9663 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9664 if (is_pattern_stmt_p (stmt_info))
9665 next = STMT_VINFO_RELATED_STMT (stmt_info);
9666 /* Free the attached stmt_vec_info and remove the stmt. */
9667 next_si = gsi_for_stmt (next);
9668 unlink_stmt_vdef (next);
9669 gsi_remove (&next_si, true);
9670 release_defs (next);
9671 free_stmt_vec_info (next);
9672 next = tmp;
9673 }
9674 }
9675
9676
9677 /* Function new_stmt_vec_info.
9678
9679 Create and initialize a new stmt_vec_info struct for STMT. */
9680
9681 stmt_vec_info
new_stmt_vec_info(gimple * stmt,vec_info * vinfo)9682 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9683 {
9684 stmt_vec_info res;
9685 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9686
9687 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9688 STMT_VINFO_STMT (res) = stmt;
9689 res->vinfo = vinfo;
9690 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9691 STMT_VINFO_LIVE_P (res) = false;
9692 STMT_VINFO_VECTYPE (res) = NULL;
9693 STMT_VINFO_VEC_STMT (res) = NULL;
9694 STMT_VINFO_VECTORIZABLE (res) = true;
9695 STMT_VINFO_IN_PATTERN_P (res) = false;
9696 STMT_VINFO_RELATED_STMT (res) = NULL;
9697 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9698 STMT_VINFO_DATA_REF (res) = NULL;
9699 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9700 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9701
9702 if (gimple_code (stmt) == GIMPLE_PHI
9703 && is_loop_header_bb_p (gimple_bb (stmt)))
9704 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9705 else
9706 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9707
9708 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9709 STMT_SLP_TYPE (res) = loop_vect;
9710 STMT_VINFO_NUM_SLP_USES (res) = 0;
9711
9712 GROUP_FIRST_ELEMENT (res) = NULL;
9713 GROUP_NEXT_ELEMENT (res) = NULL;
9714 GROUP_SIZE (res) = 0;
9715 GROUP_STORE_COUNT (res) = 0;
9716 GROUP_GAP (res) = 0;
9717 GROUP_SAME_DR_STMT (res) = NULL;
9718
9719 return res;
9720 }
9721
9722
9723 /* Create a hash table for stmt_vec_info. */
9724
9725 void
init_stmt_vec_info_vec(void)9726 init_stmt_vec_info_vec (void)
9727 {
9728 gcc_assert (!stmt_vec_info_vec.exists ());
9729 stmt_vec_info_vec.create (50);
9730 }
9731
9732
9733 /* Free hash table for stmt_vec_info. */
9734
9735 void
free_stmt_vec_info_vec(void)9736 free_stmt_vec_info_vec (void)
9737 {
9738 unsigned int i;
9739 stmt_vec_info info;
9740 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9741 if (info != NULL)
9742 free_stmt_vec_info (STMT_VINFO_STMT (info));
9743 gcc_assert (stmt_vec_info_vec.exists ());
9744 stmt_vec_info_vec.release ();
9745 }
9746
9747
9748 /* Free stmt vectorization related info. */
9749
9750 void
free_stmt_vec_info(gimple * stmt)9751 free_stmt_vec_info (gimple *stmt)
9752 {
9753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9754
9755 if (!stmt_info)
9756 return;
9757
9758 /* Check if this statement has a related "pattern stmt"
9759 (introduced by the vectorizer during the pattern recognition
9760 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9761 too. */
9762 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9763 {
9764 stmt_vec_info patt_info
9765 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9766 if (patt_info)
9767 {
9768 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9769 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9770 gimple_set_bb (patt_stmt, NULL);
9771 tree lhs = gimple_get_lhs (patt_stmt);
9772 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9773 release_ssa_name (lhs);
9774 if (seq)
9775 {
9776 gimple_stmt_iterator si;
9777 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9778 {
9779 gimple *seq_stmt = gsi_stmt (si);
9780 gimple_set_bb (seq_stmt, NULL);
9781 lhs = gimple_get_lhs (seq_stmt);
9782 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9783 release_ssa_name (lhs);
9784 free_stmt_vec_info (seq_stmt);
9785 }
9786 }
9787 free_stmt_vec_info (patt_stmt);
9788 }
9789 }
9790
9791 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9792 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9793 set_vinfo_for_stmt (stmt, NULL);
9794 free (stmt_info);
9795 }
9796
9797
9798 /* Function get_vectype_for_scalar_type_and_size.
9799
9800 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9801 by the target. */
9802
9803 tree
get_vectype_for_scalar_type_and_size(tree scalar_type,poly_uint64 size)9804 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9805 {
9806 tree orig_scalar_type = scalar_type;
9807 scalar_mode inner_mode;
9808 machine_mode simd_mode;
9809 poly_uint64 nunits;
9810 tree vectype;
9811
9812 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9813 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9814 return NULL_TREE;
9815
9816 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9817
9818 /* For vector types of elements whose mode precision doesn't
9819 match their types precision we use a element type of mode
9820 precision. The vectorization routines will have to make sure
9821 they support the proper result truncation/extension.
9822 We also make sure to build vector types with INTEGER_TYPE
9823 component type only. */
9824 if (INTEGRAL_TYPE_P (scalar_type)
9825 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9826 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9827 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9828 TYPE_UNSIGNED (scalar_type));
9829
9830 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9831 When the component mode passes the above test simply use a type
9832 corresponding to that mode. The theory is that any use that
9833 would cause problems with this will disable vectorization anyway. */
9834 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9835 && !INTEGRAL_TYPE_P (scalar_type))
9836 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9837
9838 /* We can't build a vector type of elements with alignment bigger than
9839 their size. */
9840 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9841 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9842 TYPE_UNSIGNED (scalar_type));
9843
9844 /* If we felt back to using the mode fail if there was
9845 no scalar type for it. */
9846 if (scalar_type == NULL_TREE)
9847 return NULL_TREE;
9848
9849 /* If no size was supplied use the mode the target prefers. Otherwise
9850 lookup a vector mode of the specified size. */
9851 if (known_eq (size, 0U))
9852 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9853 else if (!multiple_p (size, nbytes, &nunits)
9854 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9855 return NULL_TREE;
9856 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9857 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9858 return NULL_TREE;
9859
9860 vectype = build_vector_type (scalar_type, nunits);
9861
9862 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9863 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9864 return NULL_TREE;
9865
9866 /* Re-attach the address-space qualifier if we canonicalized the scalar
9867 type. */
9868 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9869 return build_qualified_type
9870 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9871
9872 return vectype;
9873 }
9874
9875 poly_uint64 current_vector_size;
9876
9877 /* Function get_vectype_for_scalar_type.
9878
9879 Returns the vector type corresponding to SCALAR_TYPE as supported
9880 by the target. */
9881
9882 tree
get_vectype_for_scalar_type(tree scalar_type)9883 get_vectype_for_scalar_type (tree scalar_type)
9884 {
9885 tree vectype;
9886 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9887 current_vector_size);
9888 if (vectype
9889 && known_eq (current_vector_size, 0U))
9890 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9891 return vectype;
9892 }
9893
9894 /* Function get_mask_type_for_scalar_type.
9895
9896 Returns the mask type corresponding to a result of comparison
9897 of vectors of specified SCALAR_TYPE as supported by target. */
9898
9899 tree
get_mask_type_for_scalar_type(tree scalar_type)9900 get_mask_type_for_scalar_type (tree scalar_type)
9901 {
9902 tree vectype = get_vectype_for_scalar_type (scalar_type);
9903
9904 if (!vectype)
9905 return NULL;
9906
9907 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9908 current_vector_size);
9909 }
9910
9911 /* Function get_same_sized_vectype
9912
9913 Returns a vector type corresponding to SCALAR_TYPE of size
9914 VECTOR_TYPE if supported by the target. */
9915
9916 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)9917 get_same_sized_vectype (tree scalar_type, tree vector_type)
9918 {
9919 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9920 return build_same_sized_truth_vector_type (vector_type);
9921
9922 return get_vectype_for_scalar_type_and_size
9923 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9924 }
9925
9926 /* Function vect_is_simple_use.
9927
9928 Input:
9929 VINFO - the vect info of the loop or basic block that is being vectorized.
9930 OPERAND - operand in the loop or bb.
9931 Output:
9932 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9933 DT - the type of definition
9934
9935 Returns whether a stmt with OPERAND can be vectorized.
9936 For loops, supportable operands are constants, loop invariants, and operands
9937 that are defined by the current iteration of the loop. Unsupportable
9938 operands are those that are defined by a previous iteration of the loop (as
9939 is the case in reduction/induction computations).
9940 For basic blocks, supportable operands are constants and bb invariants.
9941 For now, operands defined outside the basic block are not supported. */
9942
9943 bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt)9944 vect_is_simple_use (tree operand, vec_info *vinfo,
9945 gimple **def_stmt, enum vect_def_type *dt)
9946 {
9947 *def_stmt = NULL;
9948 *dt = vect_unknown_def_type;
9949
9950 if (dump_enabled_p ())
9951 {
9952 dump_printf_loc (MSG_NOTE, vect_location,
9953 "vect_is_simple_use: operand ");
9954 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9955 dump_printf (MSG_NOTE, "\n");
9956 }
9957
9958 if (CONSTANT_CLASS_P (operand))
9959 {
9960 *dt = vect_constant_def;
9961 return true;
9962 }
9963
9964 if (is_gimple_min_invariant (operand))
9965 {
9966 *dt = vect_external_def;
9967 return true;
9968 }
9969
9970 if (TREE_CODE (operand) != SSA_NAME)
9971 {
9972 if (dump_enabled_p ())
9973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9974 "not ssa-name.\n");
9975 return false;
9976 }
9977
9978 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9979 {
9980 *dt = vect_external_def;
9981 return true;
9982 }
9983
9984 *def_stmt = SSA_NAME_DEF_STMT (operand);
9985 if (dump_enabled_p ())
9986 {
9987 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9988 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9989 }
9990
9991 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9992 *dt = vect_external_def;
9993 else
9994 {
9995 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9996 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9997 }
9998
9999 if (dump_enabled_p ())
10000 {
10001 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
10002 switch (*dt)
10003 {
10004 case vect_uninitialized_def:
10005 dump_printf (MSG_NOTE, "uninitialized\n");
10006 break;
10007 case vect_constant_def:
10008 dump_printf (MSG_NOTE, "constant\n");
10009 break;
10010 case vect_external_def:
10011 dump_printf (MSG_NOTE, "external\n");
10012 break;
10013 case vect_internal_def:
10014 dump_printf (MSG_NOTE, "internal\n");
10015 break;
10016 case vect_induction_def:
10017 dump_printf (MSG_NOTE, "induction\n");
10018 break;
10019 case vect_reduction_def:
10020 dump_printf (MSG_NOTE, "reduction\n");
10021 break;
10022 case vect_double_reduction_def:
10023 dump_printf (MSG_NOTE, "double reduction\n");
10024 break;
10025 case vect_nested_cycle:
10026 dump_printf (MSG_NOTE, "nested cycle\n");
10027 break;
10028 case vect_unknown_def_type:
10029 dump_printf (MSG_NOTE, "unknown\n");
10030 break;
10031 }
10032 }
10033
10034 if (*dt == vect_unknown_def_type)
10035 {
10036 if (dump_enabled_p ())
10037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10038 "Unsupported pattern.\n");
10039 return false;
10040 }
10041
10042 switch (gimple_code (*def_stmt))
10043 {
10044 case GIMPLE_PHI:
10045 case GIMPLE_ASSIGN:
10046 case GIMPLE_CALL:
10047 break;
10048 default:
10049 if (dump_enabled_p ())
10050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10051 "unsupported defining stmt:\n");
10052 return false;
10053 }
10054
10055 return true;
10056 }
10057
10058 /* Function vect_is_simple_use.
10059
10060 Same as vect_is_simple_use but also determines the vector operand
10061 type of OPERAND and stores it to *VECTYPE. If the definition of
10062 OPERAND is vect_uninitialized_def, vect_constant_def or
10063 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10064 is responsible to compute the best suited vector type for the
10065 scalar operand. */
10066
10067 bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt,tree * vectype)10068 vect_is_simple_use (tree operand, vec_info *vinfo,
10069 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10070 {
10071 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10072 return false;
10073
10074 /* Now get a vector type if the def is internal, otherwise supply
10075 NULL_TREE and leave it up to the caller to figure out a proper
10076 type for the use stmt. */
10077 if (*dt == vect_internal_def
10078 || *dt == vect_induction_def
10079 || *dt == vect_reduction_def
10080 || *dt == vect_double_reduction_def
10081 || *dt == vect_nested_cycle)
10082 {
10083 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10084
10085 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10086 && !STMT_VINFO_RELEVANT (stmt_info)
10087 && !STMT_VINFO_LIVE_P (stmt_info))
10088 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10089
10090 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10091 gcc_assert (*vectype != NULL_TREE);
10092 }
10093 else if (*dt == vect_uninitialized_def
10094 || *dt == vect_constant_def
10095 || *dt == vect_external_def)
10096 *vectype = NULL_TREE;
10097 else
10098 gcc_unreachable ();
10099
10100 return true;
10101 }
10102
10103
10104 /* Function supportable_widening_operation
10105
10106 Check whether an operation represented by the code CODE is a
10107 widening operation that is supported by the target platform in
10108 vector form (i.e., when operating on arguments of type VECTYPE_IN
10109 producing a result of type VECTYPE_OUT).
10110
10111 Widening operations we currently support are NOP (CONVERT), FLOAT
10112 and WIDEN_MULT. This function checks if these operations are supported
10113 by the target platform either directly (via vector tree-codes), or via
10114 target builtins.
10115
10116 Output:
10117 - CODE1 and CODE2 are codes of vector operations to be used when
10118 vectorizing the operation, if available.
10119 - MULTI_STEP_CVT determines the number of required intermediate steps in
10120 case of multi-step conversion (like char->short->int - in that case
10121 MULTI_STEP_CVT will be 1).
10122 - INTERM_TYPES contains the intermediate type required to perform the
10123 widening operation (short in the above example). */
10124
10125 bool
supportable_widening_operation(enum tree_code code,gimple * stmt,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)10126 supportable_widening_operation (enum tree_code code, gimple *stmt,
10127 tree vectype_out, tree vectype_in,
10128 enum tree_code *code1, enum tree_code *code2,
10129 int *multi_step_cvt,
10130 vec<tree> *interm_types)
10131 {
10132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10133 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10134 struct loop *vect_loop = NULL;
10135 machine_mode vec_mode;
10136 enum insn_code icode1, icode2;
10137 optab optab1, optab2;
10138 tree vectype = vectype_in;
10139 tree wide_vectype = vectype_out;
10140 enum tree_code c1, c2;
10141 int i;
10142 tree prev_type, intermediate_type;
10143 machine_mode intermediate_mode, prev_mode;
10144 optab optab3, optab4;
10145
10146 *multi_step_cvt = 0;
10147 if (loop_info)
10148 vect_loop = LOOP_VINFO_LOOP (loop_info);
10149
10150 switch (code)
10151 {
10152 case WIDEN_MULT_EXPR:
10153 /* The result of a vectorized widening operation usually requires
10154 two vectors (because the widened results do not fit into one vector).
10155 The generated vector results would normally be expected to be
10156 generated in the same order as in the original scalar computation,
10157 i.e. if 8 results are generated in each vector iteration, they are
10158 to be organized as follows:
10159 vect1: [res1,res2,res3,res4],
10160 vect2: [res5,res6,res7,res8].
10161
10162 However, in the special case that the result of the widening
10163 operation is used in a reduction computation only, the order doesn't
10164 matter (because when vectorizing a reduction we change the order of
10165 the computation). Some targets can take advantage of this and
10166 generate more efficient code. For example, targets like Altivec,
10167 that support widen_mult using a sequence of {mult_even,mult_odd}
10168 generate the following vectors:
10169 vect1: [res1,res3,res5,res7],
10170 vect2: [res2,res4,res6,res8].
10171
10172 When vectorizing outer-loops, we execute the inner-loop sequentially
10173 (each vectorized inner-loop iteration contributes to VF outer-loop
10174 iterations in parallel). We therefore don't allow to change the
10175 order of the computation in the inner-loop during outer-loop
10176 vectorization. */
10177 /* TODO: Another case in which order doesn't *really* matter is when we
10178 widen and then contract again, e.g. (short)((int)x * y >> 8).
10179 Normally, pack_trunc performs an even/odd permute, whereas the
10180 repack from an even/odd expansion would be an interleave, which
10181 would be significantly simpler for e.g. AVX2. */
10182 /* In any case, in order to avoid duplicating the code below, recurse
10183 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10184 are properly set up for the caller. If we fail, we'll continue with
10185 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10186 if (vect_loop
10187 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10188 && !nested_in_vect_loop_p (vect_loop, stmt)
10189 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10190 stmt, vectype_out, vectype_in,
10191 code1, code2, multi_step_cvt,
10192 interm_types))
10193 {
10194 /* Elements in a vector with vect_used_by_reduction property cannot
10195 be reordered if the use chain with this property does not have the
10196 same operation. One such an example is s += a * b, where elements
10197 in a and b cannot be reordered. Here we check if the vector defined
10198 by STMT is only directly used in the reduction statement. */
10199 tree lhs = gimple_assign_lhs (stmt);
10200 use_operand_p dummy;
10201 gimple *use_stmt;
10202 stmt_vec_info use_stmt_info = NULL;
10203 if (single_imm_use (lhs, &dummy, &use_stmt)
10204 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10205 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10206 return true;
10207 }
10208 c1 = VEC_WIDEN_MULT_LO_EXPR;
10209 c2 = VEC_WIDEN_MULT_HI_EXPR;
10210 break;
10211
10212 case DOT_PROD_EXPR:
10213 c1 = DOT_PROD_EXPR;
10214 c2 = DOT_PROD_EXPR;
10215 break;
10216
10217 case SAD_EXPR:
10218 c1 = SAD_EXPR;
10219 c2 = SAD_EXPR;
10220 break;
10221
10222 case VEC_WIDEN_MULT_EVEN_EXPR:
10223 /* Support the recursion induced just above. */
10224 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10225 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10226 break;
10227
10228 case WIDEN_LSHIFT_EXPR:
10229 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10230 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10231 break;
10232
10233 CASE_CONVERT:
10234 c1 = VEC_UNPACK_LO_EXPR;
10235 c2 = VEC_UNPACK_HI_EXPR;
10236 break;
10237
10238 case FLOAT_EXPR:
10239 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10240 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10241 break;
10242
10243 case FIX_TRUNC_EXPR:
10244 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10245 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10246 computing the operation. */
10247 return false;
10248
10249 default:
10250 gcc_unreachable ();
10251 }
10252
10253 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10254 std::swap (c1, c2);
10255
10256 if (code == FIX_TRUNC_EXPR)
10257 {
10258 /* The signedness is determined from output operand. */
10259 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10260 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10261 }
10262 else
10263 {
10264 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10265 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10266 }
10267
10268 if (!optab1 || !optab2)
10269 return false;
10270
10271 vec_mode = TYPE_MODE (vectype);
10272 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10273 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10274 return false;
10275
10276 *code1 = c1;
10277 *code2 = c2;
10278
10279 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10280 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10281 /* For scalar masks we may have different boolean
10282 vector types having the same QImode. Thus we
10283 add additional check for elements number. */
10284 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10285 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10286 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10287
10288 /* Check if it's a multi-step conversion that can be done using intermediate
10289 types. */
10290
10291 prev_type = vectype;
10292 prev_mode = vec_mode;
10293
10294 if (!CONVERT_EXPR_CODE_P (code))
10295 return false;
10296
10297 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10298 intermediate steps in promotion sequence. We try
10299 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10300 not. */
10301 interm_types->create (MAX_INTERM_CVT_STEPS);
10302 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10303 {
10304 intermediate_mode = insn_data[icode1].operand[0].mode;
10305 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10306 {
10307 intermediate_type = vect_halve_mask_nunits (prev_type);
10308 if (intermediate_mode != TYPE_MODE (intermediate_type))
10309 return false;
10310 }
10311 else
10312 intermediate_type
10313 = lang_hooks.types.type_for_mode (intermediate_mode,
10314 TYPE_UNSIGNED (prev_type));
10315
10316 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10317 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10318
10319 if (!optab3 || !optab4
10320 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10321 || insn_data[icode1].operand[0].mode != intermediate_mode
10322 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10323 || insn_data[icode2].operand[0].mode != intermediate_mode
10324 || ((icode1 = optab_handler (optab3, intermediate_mode))
10325 == CODE_FOR_nothing)
10326 || ((icode2 = optab_handler (optab4, intermediate_mode))
10327 == CODE_FOR_nothing))
10328 break;
10329
10330 interm_types->quick_push (intermediate_type);
10331 (*multi_step_cvt)++;
10332
10333 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10334 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10335 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10336 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10337 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10338
10339 prev_type = intermediate_type;
10340 prev_mode = intermediate_mode;
10341 }
10342
10343 interm_types->release ();
10344 return false;
10345 }
10346
10347
10348 /* Function supportable_narrowing_operation
10349
10350 Check whether an operation represented by the code CODE is a
10351 narrowing operation that is supported by the target platform in
10352 vector form (i.e., when operating on arguments of type VECTYPE_IN
10353 and producing a result of type VECTYPE_OUT).
10354
10355 Narrowing operations we currently support are NOP (CONVERT) and
10356 FIX_TRUNC. This function checks if these operations are supported by
10357 the target platform directly via vector tree-codes.
10358
10359 Output:
10360 - CODE1 is the code of a vector operation to be used when
10361 vectorizing the operation, if available.
10362 - MULTI_STEP_CVT determines the number of required intermediate steps in
10363 case of multi-step conversion (like int->short->char - in that case
10364 MULTI_STEP_CVT will be 1).
10365 - INTERM_TYPES contains the intermediate type required to perform the
10366 narrowing operation (short in the above example). */
10367
10368 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)10369 supportable_narrowing_operation (enum tree_code code,
10370 tree vectype_out, tree vectype_in,
10371 enum tree_code *code1, int *multi_step_cvt,
10372 vec<tree> *interm_types)
10373 {
10374 machine_mode vec_mode;
10375 enum insn_code icode1;
10376 optab optab1, interm_optab;
10377 tree vectype = vectype_in;
10378 tree narrow_vectype = vectype_out;
10379 enum tree_code c1;
10380 tree intermediate_type, prev_type;
10381 machine_mode intermediate_mode, prev_mode;
10382 int i;
10383 bool uns;
10384
10385 *multi_step_cvt = 0;
10386 switch (code)
10387 {
10388 CASE_CONVERT:
10389 c1 = VEC_PACK_TRUNC_EXPR;
10390 break;
10391
10392 case FIX_TRUNC_EXPR:
10393 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10394 break;
10395
10396 case FLOAT_EXPR:
10397 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10398 tree code and optabs used for computing the operation. */
10399 return false;
10400
10401 default:
10402 gcc_unreachable ();
10403 }
10404
10405 if (code == FIX_TRUNC_EXPR)
10406 /* The signedness is determined from output operand. */
10407 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10408 else
10409 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10410
10411 if (!optab1)
10412 return false;
10413
10414 vec_mode = TYPE_MODE (vectype);
10415 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10416 return false;
10417
10418 *code1 = c1;
10419
10420 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10421 /* For scalar masks we may have different boolean
10422 vector types having the same QImode. Thus we
10423 add additional check for elements number. */
10424 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10425 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10426 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10427
10428 /* Check if it's a multi-step conversion that can be done using intermediate
10429 types. */
10430 prev_mode = vec_mode;
10431 prev_type = vectype;
10432 if (code == FIX_TRUNC_EXPR)
10433 uns = TYPE_UNSIGNED (vectype_out);
10434 else
10435 uns = TYPE_UNSIGNED (vectype);
10436
10437 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10438 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10439 costly than signed. */
10440 if (code == FIX_TRUNC_EXPR && uns)
10441 {
10442 enum insn_code icode2;
10443
10444 intermediate_type
10445 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10446 interm_optab
10447 = optab_for_tree_code (c1, intermediate_type, optab_default);
10448 if (interm_optab != unknown_optab
10449 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10450 && insn_data[icode1].operand[0].mode
10451 == insn_data[icode2].operand[0].mode)
10452 {
10453 uns = false;
10454 optab1 = interm_optab;
10455 icode1 = icode2;
10456 }
10457 }
10458
10459 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10460 intermediate steps in promotion sequence. We try
10461 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10462 interm_types->create (MAX_INTERM_CVT_STEPS);
10463 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10464 {
10465 intermediate_mode = insn_data[icode1].operand[0].mode;
10466 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10467 {
10468 intermediate_type = vect_double_mask_nunits (prev_type);
10469 if (intermediate_mode != TYPE_MODE (intermediate_type))
10470 return false;
10471 }
10472 else
10473 intermediate_type
10474 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10475 interm_optab
10476 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10477 optab_default);
10478 if (!interm_optab
10479 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10480 || insn_data[icode1].operand[0].mode != intermediate_mode
10481 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10482 == CODE_FOR_nothing))
10483 break;
10484
10485 interm_types->quick_push (intermediate_type);
10486 (*multi_step_cvt)++;
10487
10488 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10489 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10490 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10491 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10492
10493 prev_mode = intermediate_mode;
10494 prev_type = intermediate_type;
10495 optab1 = interm_optab;
10496 }
10497
10498 interm_types->release ();
10499 return false;
10500 }
10501
10502 /* Generate and return a statement that sets vector mask MASK such that
10503 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10504
10505 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)10506 vect_gen_while (tree mask, tree start_index, tree end_index)
10507 {
10508 tree cmp_type = TREE_TYPE (start_index);
10509 tree mask_type = TREE_TYPE (mask);
10510 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10511 cmp_type, mask_type,
10512 OPTIMIZE_FOR_SPEED));
10513 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10514 start_index, end_index,
10515 build_zero_cst (mask_type));
10516 gimple_call_set_lhs (call, mask);
10517 return call;
10518 }
10519
10520 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10521 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10522
10523 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)10524 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10525 tree end_index)
10526 {
10527 tree tmp = make_ssa_name (mask_type);
10528 gcall *call = vect_gen_while (tmp, start_index, end_index);
10529 gimple_seq_add_stmt (seq, call);
10530 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10531 }
10532