1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
45
46 LOC
find_bb_location(basic_block bb)47 find_bb_location (basic_block bb)
48 {
49 gimple stmt = NULL;
50 gimple_stmt_iterator si;
51
52 if (!bb)
53 return UNKNOWN_LOC;
54
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 {
57 stmt = gsi_stmt (si);
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
60 }
61
62 return UNKNOWN_LOC;
63 }
64
65
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67
68 static void
vect_free_slp_tree(slp_tree node)69 vect_free_slp_tree (slp_tree node)
70 {
71 int i;
72 slp_void_p child;
73
74 if (!node)
75 return;
76
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree) child);
79
80 VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
81 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82
83 if (SLP_TREE_VEC_STMTS (node))
84 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85
86 free (node);
87 }
88
89
90 /* Free the memory allocated for the SLP instance. */
91
92 void
vect_free_slp_instance(slp_instance instance)93 vect_free_slp_instance (slp_instance instance)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
96 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
97 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 free (instance);
99 }
100
101
102 /* Create an SLP node for SCALAR_STMTS. */
103
104 static slp_tree
vect_create_new_slp_node(VEC (gimple,heap)* scalar_stmts)105 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
106 {
107 slp_tree node;
108 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
109 unsigned int nops;
110
111 if (is_gimple_call (stmt))
112 nops = gimple_call_num_args (stmt);
113 else if (is_gimple_assign (stmt))
114 {
115 nops = gimple_num_ops (stmt) - 1;
116 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
117 nops++;
118 }
119 else
120 return NULL;
121
122 node = XNEW (struct _slp_tree);
123 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
124 SLP_TREE_VEC_STMTS (node) = NULL;
125 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
126 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
127 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
128
129 return node;
130 }
131
132
133 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134 operand. */
VEC(slp_oprnd_info,heap)135 static VEC (slp_oprnd_info, heap) *
136 vect_create_oprnd_info (int nops, int group_size)
137 {
138 int i;
139 slp_oprnd_info oprnd_info;
140 VEC (slp_oprnd_info, heap) *oprnds_info;
141
142 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
143 for (i = 0; i < nops; i++)
144 {
145 oprnd_info = XNEW (struct _slp_oprnd_info);
146 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
147 oprnd_info->first_dt = vect_uninitialized_def;
148 oprnd_info->first_def_type = NULL_TREE;
149 oprnd_info->first_const_oprnd = NULL_TREE;
150 oprnd_info->first_pattern = false;
151 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
152 }
153
154 return oprnds_info;
155 }
156
157
158 /* Free operands info. */
159
160 static void
vect_free_oprnd_info(VEC (slp_oprnd_info,heap)** oprnds_info)161 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
162 {
163 int i;
164 slp_oprnd_info oprnd_info;
165
166 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
167 {
168 VEC_free (gimple, heap, oprnd_info->def_stmts);
169 XDELETE (oprnd_info);
170 }
171
172 VEC_free (slp_oprnd_info, heap, *oprnds_info);
173 }
174
175
176 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
177 they are of a valid type and that they match the defs of the first stmt of
178 the SLP group (stored in OPRNDS_INFO). */
179
180 static bool
vect_get_and_check_slp_defs(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,slp_tree slp_node,gimple stmt,int ncopies_for_cost,bool first,VEC (slp_oprnd_info,heap)** oprnds_info)181 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
182 slp_tree slp_node, gimple stmt,
183 int ncopies_for_cost, bool first,
184 VEC (slp_oprnd_info, heap) **oprnds_info)
185 {
186 tree oprnd;
187 unsigned int i, number_of_oprnds;
188 tree def, def_op0 = NULL_TREE;
189 gimple def_stmt;
190 enum vect_def_type dt = vect_uninitialized_def;
191 enum vect_def_type dt_op0 = vect_uninitialized_def;
192 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
193 tree lhs = gimple_get_lhs (stmt);
194 struct loop *loop = NULL;
195 enum tree_code rhs_code;
196 bool different_types = false;
197 bool pattern = false;
198 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
199 int op_idx = 1;
200 tree compare_rhs = NULL_TREE;
201
202 if (loop_vinfo)
203 loop = LOOP_VINFO_LOOP (loop_vinfo);
204
205 if (is_gimple_call (stmt))
206 {
207 number_of_oprnds = gimple_call_num_args (stmt);
208 op_idx = 3;
209 }
210 else if (is_gimple_assign (stmt))
211 {
212 number_of_oprnds = gimple_num_ops (stmt) - 1;
213 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
214 number_of_oprnds++;
215 }
216 else
217 return false;
218
219 for (i = 0; i < number_of_oprnds; i++)
220 {
221 if (compare_rhs)
222 {
223 oprnd = compare_rhs;
224 compare_rhs = NULL_TREE;
225 }
226 else
227 oprnd = gimple_op (stmt, op_idx++);
228
229 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
230
231 if (COMPARISON_CLASS_P (oprnd))
232 {
233 compare_rhs = TREE_OPERAND (oprnd, 1);
234 oprnd = TREE_OPERAND (oprnd, 0);
235 }
236
237 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
238 &def, &dt)
239 || (!def_stmt && dt != vect_constant_def))
240 {
241 if (vect_print_dump_info (REPORT_SLP))
242 {
243 fprintf (vect_dump, "Build SLP failed: can't find def for ");
244 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
245 }
246
247 return false;
248 }
249
250 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
251 from the pattern. Check that all the stmts of the node are in the
252 pattern. */
253 if (loop && def_stmt && gimple_bb (def_stmt)
254 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
255 && vinfo_for_stmt (def_stmt)
256 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
257 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
258 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
259 {
260 pattern = true;
261 if (!first && !oprnd_info->first_pattern)
262 {
263 if (vect_print_dump_info (REPORT_DETAILS))
264 {
265 fprintf (vect_dump, "Build SLP failed: some of the stmts"
266 " are in a pattern, and others are not ");
267 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
268 }
269
270 return false;
271 }
272
273 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
274 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
275
276 if (dt == vect_unknown_def_type)
277 {
278 if (vect_print_dump_info (REPORT_DETAILS))
279 fprintf (vect_dump, "Unsupported pattern.");
280 return false;
281 }
282
283 switch (gimple_code (def_stmt))
284 {
285 case GIMPLE_PHI:
286 def = gimple_phi_result (def_stmt);
287 break;
288
289 case GIMPLE_ASSIGN:
290 def = gimple_assign_lhs (def_stmt);
291 break;
292
293 default:
294 if (vect_print_dump_info (REPORT_DETAILS))
295 fprintf (vect_dump, "unsupported defining stmt: ");
296 return false;
297 }
298 }
299
300 if (first)
301 {
302 oprnd_info->first_dt = dt;
303 oprnd_info->first_pattern = pattern;
304 if (def)
305 {
306 oprnd_info->first_def_type = TREE_TYPE (def);
307 oprnd_info->first_const_oprnd = NULL_TREE;
308 }
309 else
310 {
311 oprnd_info->first_def_type = NULL_TREE;
312 oprnd_info->first_const_oprnd = oprnd;
313 }
314
315 if (i == 0)
316 {
317 def_op0 = def;
318 dt_op0 = dt;
319 /* Analyze costs (for the first stmt of the group only). */
320 if (REFERENCE_CLASS_P (lhs))
321 /* Store. */
322 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
323 dt, slp_node);
324 else
325 {
326 enum vect_def_type dts[2];
327 dts[0] = dt;
328 dts[1] = vect_uninitialized_def;
329 /* Not memory operation (we don't call this function for
330 loads). */
331 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
332 slp_node);
333 }
334 }
335 }
336 else
337 {
338 /* Not first stmt of the group, check that the def-stmt/s match
339 the def-stmt/s of the first stmt. Allow different definition
340 types for reduction chains: the first stmt must be a
341 vect_reduction_def (a phi node), and the rest
342 vect_internal_def. */
343 if (((oprnd_info->first_dt != dt
344 && !(oprnd_info->first_dt == vect_reduction_def
345 && dt == vect_internal_def))
346 || (oprnd_info->first_def_type != NULL_TREE
347 && def
348 && !types_compatible_p (oprnd_info->first_def_type,
349 TREE_TYPE (def))))
350 || (!def
351 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
352 TREE_TYPE (oprnd)))
353 || different_types)
354 {
355 if (number_of_oprnds != 2)
356 {
357 if (vect_print_dump_info (REPORT_SLP))
358 fprintf (vect_dump, "Build SLP failed: different types ");
359
360 return false;
361 }
362
363 /* Try to swap operands in case of binary operation. */
364 if (i == 0)
365 different_types = true;
366 else
367 {
368 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
369 if (is_gimple_assign (stmt)
370 && (rhs_code = gimple_assign_rhs_code (stmt))
371 && TREE_CODE_CLASS (rhs_code) == tcc_binary
372 && commutative_tree_code (rhs_code)
373 && oprnd0_info->first_dt == dt
374 && oprnd_info->first_dt == dt_op0
375 && def_op0 && def
376 && !(oprnd0_info->first_def_type
377 && !types_compatible_p (oprnd0_info->first_def_type,
378 TREE_TYPE (def)))
379 && !(oprnd_info->first_def_type
380 && !types_compatible_p (oprnd_info->first_def_type,
381 TREE_TYPE (def_op0))))
382 {
383 if (vect_print_dump_info (REPORT_SLP))
384 {
385 fprintf (vect_dump, "Swapping operands of ");
386 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
387 }
388
389 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
390 gimple_assign_rhs2_ptr (stmt));
391 }
392 else
393 {
394 if (vect_print_dump_info (REPORT_SLP))
395 fprintf (vect_dump, "Build SLP failed: different types ");
396
397 return false;
398 }
399 }
400 }
401 }
402
403 /* Check the types of the definitions. */
404 switch (dt)
405 {
406 case vect_constant_def:
407 case vect_external_def:
408 case vect_reduction_def:
409 break;
410
411 case vect_internal_def:
412 if (different_types)
413 {
414 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
415 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
416 if (i == 0)
417 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
418 else
419 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
420 }
421 else
422 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
423
424 break;
425
426 default:
427 /* FORNOW: Not supported. */
428 if (vect_print_dump_info (REPORT_SLP))
429 {
430 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
431 print_generic_expr (vect_dump, def, TDF_SLIM);
432 }
433
434 return false;
435 }
436 }
437
438 return true;
439 }
440
441
442 /* Recursively build an SLP tree starting from NODE.
443 Fail (and return FALSE) if def-stmts are not isomorphic, require data
444 permutation or are of unsupported types of operation. Otherwise, return
445 TRUE. */
446
447 static bool
vect_build_slp_tree(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,slp_tree * node,unsigned int group_size,int * inside_cost,int * outside_cost,int ncopies_for_cost,unsigned int * max_nunits,VEC (int,heap)** load_permutation,VEC (slp_tree,heap)** loads,unsigned int vectorization_factor,bool * loads_permuted)448 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
449 slp_tree *node, unsigned int group_size,
450 int *inside_cost, int *outside_cost,
451 int ncopies_for_cost, unsigned int *max_nunits,
452 VEC (int, heap) **load_permutation,
453 VEC (slp_tree, heap) **loads,
454 unsigned int vectorization_factor, bool *loads_permuted)
455 {
456 unsigned int i;
457 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
458 gimple stmt = VEC_index (gimple, stmts, 0);
459 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
460 enum tree_code first_cond_code = ERROR_MARK;
461 tree lhs;
462 bool stop_recursion = false, need_same_oprnds = false;
463 tree vectype, scalar_type, first_op1 = NULL_TREE;
464 unsigned int ncopies;
465 optab optab;
466 int icode;
467 enum machine_mode optab_op2_mode;
468 enum machine_mode vec_mode;
469 struct data_reference *first_dr;
470 HOST_WIDE_INT dummy;
471 bool permutation = false;
472 unsigned int load_place;
473 gimple first_load, prev_first_load = NULL;
474 VEC (slp_oprnd_info, heap) *oprnds_info;
475 unsigned int nops;
476 slp_oprnd_info oprnd_info;
477 tree cond;
478
479 if (is_gimple_call (stmt))
480 nops = gimple_call_num_args (stmt);
481 else if (is_gimple_assign (stmt))
482 {
483 nops = gimple_num_ops (stmt) - 1;
484 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
485 nops++;
486 }
487 else
488 return false;
489
490 oprnds_info = vect_create_oprnd_info (nops, group_size);
491
492 /* For every stmt in NODE find its def stmt/s. */
493 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
494 {
495 if (vect_print_dump_info (REPORT_SLP))
496 {
497 fprintf (vect_dump, "Build SLP for ");
498 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
499 }
500
501 /* Fail to vectorize statements marked as unvectorizable. */
502 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
503 {
504 if (vect_print_dump_info (REPORT_SLP))
505 {
506 fprintf (vect_dump,
507 "Build SLP failed: unvectorizable statement ");
508 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
509 }
510
511 vect_free_oprnd_info (&oprnds_info);
512 return false;
513 }
514
515 lhs = gimple_get_lhs (stmt);
516 if (lhs == NULL_TREE)
517 {
518 if (vect_print_dump_info (REPORT_SLP))
519 {
520 fprintf (vect_dump,
521 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
522 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 }
524
525 vect_free_oprnd_info (&oprnds_info);
526 return false;
527 }
528
529 if (is_gimple_assign (stmt)
530 && gimple_assign_rhs_code (stmt) == COND_EXPR
531 && (cond = gimple_assign_rhs1 (stmt))
532 && !COMPARISON_CLASS_P (cond))
533 {
534 if (vect_print_dump_info (REPORT_SLP))
535 {
536 fprintf (vect_dump,
537 "Build SLP failed: condition is not comparison ");
538 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
539 }
540
541 vect_free_oprnd_info (&oprnds_info);
542 return false;
543 }
544
545 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
546 vectype = get_vectype_for_scalar_type (scalar_type);
547 if (!vectype)
548 {
549 if (vect_print_dump_info (REPORT_SLP))
550 {
551 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
552 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
553 }
554
555 vect_free_oprnd_info (&oprnds_info);
556 return false;
557 }
558
559 /* In case of multiple types we need to detect the smallest type. */
560 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
561 {
562 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
563 if (bb_vinfo)
564 vectorization_factor = *max_nunits;
565 }
566
567 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
568
569 if (is_gimple_call (stmt))
570 {
571 rhs_code = CALL_EXPR;
572 if (gimple_call_internal_p (stmt)
573 || gimple_call_tail_p (stmt)
574 || gimple_call_noreturn_p (stmt)
575 || !gimple_call_nothrow_p (stmt)
576 || gimple_call_chain (stmt))
577 {
578 if (vect_print_dump_info (REPORT_SLP))
579 {
580 fprintf (vect_dump,
581 "Build SLP failed: unsupported call type ");
582 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
583 }
584
585 vect_free_oprnd_info (&oprnds_info);
586 return false;
587 }
588 }
589 else
590 rhs_code = gimple_assign_rhs_code (stmt);
591
592 /* Check the operation. */
593 if (i == 0)
594 {
595 first_stmt_code = rhs_code;
596
597 /* Shift arguments should be equal in all the packed stmts for a
598 vector shift with scalar shift operand. */
599 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
600 || rhs_code == LROTATE_EXPR
601 || rhs_code == RROTATE_EXPR)
602 {
603 vec_mode = TYPE_MODE (vectype);
604
605 /* First see if we have a vector/vector shift. */
606 optab = optab_for_tree_code (rhs_code, vectype,
607 optab_vector);
608
609 if (!optab
610 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
611 {
612 /* No vector/vector shift, try for a vector/scalar shift. */
613 optab = optab_for_tree_code (rhs_code, vectype,
614 optab_scalar);
615
616 if (!optab)
617 {
618 if (vect_print_dump_info (REPORT_SLP))
619 fprintf (vect_dump, "Build SLP failed: no optab.");
620 vect_free_oprnd_info (&oprnds_info);
621 return false;
622 }
623 icode = (int) optab_handler (optab, vec_mode);
624 if (icode == CODE_FOR_nothing)
625 {
626 if (vect_print_dump_info (REPORT_SLP))
627 fprintf (vect_dump, "Build SLP failed: "
628 "op not supported by target.");
629 vect_free_oprnd_info (&oprnds_info);
630 return false;
631 }
632 optab_op2_mode = insn_data[icode].operand[2].mode;
633 if (!VECTOR_MODE_P (optab_op2_mode))
634 {
635 need_same_oprnds = true;
636 first_op1 = gimple_assign_rhs2 (stmt);
637 }
638 }
639 }
640 else if (rhs_code == WIDEN_LSHIFT_EXPR)
641 {
642 need_same_oprnds = true;
643 first_op1 = gimple_assign_rhs2 (stmt);
644 }
645 }
646 else
647 {
648 if (first_stmt_code != rhs_code
649 && (first_stmt_code != IMAGPART_EXPR
650 || rhs_code != REALPART_EXPR)
651 && (first_stmt_code != REALPART_EXPR
652 || rhs_code != IMAGPART_EXPR)
653 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
654 && (first_stmt_code == ARRAY_REF
655 || first_stmt_code == INDIRECT_REF
656 || first_stmt_code == COMPONENT_REF
657 || first_stmt_code == MEM_REF)))
658 {
659 if (vect_print_dump_info (REPORT_SLP))
660 {
661 fprintf (vect_dump,
662 "Build SLP failed: different operation in stmt ");
663 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
664 }
665
666 vect_free_oprnd_info (&oprnds_info);
667 return false;
668 }
669
670 if (need_same_oprnds
671 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
672 {
673 if (vect_print_dump_info (REPORT_SLP))
674 {
675 fprintf (vect_dump,
676 "Build SLP failed: different shift arguments in ");
677 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
678 }
679
680 vect_free_oprnd_info (&oprnds_info);
681 return false;
682 }
683
684 if (rhs_code == CALL_EXPR)
685 {
686 gimple first_stmt = VEC_index (gimple, stmts, 0);
687 if (gimple_call_num_args (stmt) != nops
688 || !operand_equal_p (gimple_call_fn (first_stmt),
689 gimple_call_fn (stmt), 0)
690 || gimple_call_fntype (first_stmt)
691 != gimple_call_fntype (stmt))
692 {
693 if (vect_print_dump_info (REPORT_SLP))
694 {
695 fprintf (vect_dump,
696 "Build SLP failed: different calls in ");
697 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
698 }
699
700 vect_free_oprnd_info (&oprnds_info);
701 return false;
702 }
703 }
704 }
705
706 /* Strided store or load. */
707 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
708 {
709 if (REFERENCE_CLASS_P (lhs))
710 {
711 /* Store. */
712 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
713 stmt, ncopies_for_cost,
714 (i == 0), &oprnds_info))
715 {
716 vect_free_oprnd_info (&oprnds_info);
717 return false;
718 }
719 }
720 else
721 {
722 /* Load. */
723 /* FORNOW: Check that there is no gap between the loads. */
724 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
725 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
726 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
727 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
728 {
729 if (vect_print_dump_info (REPORT_SLP))
730 {
731 fprintf (vect_dump, "Build SLP failed: strided "
732 "loads have gaps ");
733 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
734 }
735
736 vect_free_oprnd_info (&oprnds_info);
737 return false;
738 }
739
740 /* Check that the size of interleaved loads group is not
741 greater than the SLP group size. */
742 if (loop_vinfo
743 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
744 {
745 if (vect_print_dump_info (REPORT_SLP))
746 {
747 fprintf (vect_dump, "Build SLP failed: the number of "
748 "interleaved loads is greater than"
749 " the SLP group size ");
750 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
751 }
752
753 vect_free_oprnd_info (&oprnds_info);
754 return false;
755 }
756
757 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
758 if (prev_first_load)
759 {
760 /* Check that there are no loads from different interleaving
761 chains in the same node. The only exception is complex
762 numbers. */
763 if (prev_first_load != first_load
764 && rhs_code != REALPART_EXPR
765 && rhs_code != IMAGPART_EXPR)
766 {
767 if (vect_print_dump_info (REPORT_SLP))
768 {
769 fprintf (vect_dump, "Build SLP failed: different "
770 "interleaving chains in one node ");
771 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
772 }
773
774 vect_free_oprnd_info (&oprnds_info);
775 return false;
776 }
777 }
778 else
779 prev_first_load = first_load;
780
781 if (first_load == stmt)
782 {
783 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
784 if (vect_supportable_dr_alignment (first_dr, false)
785 == dr_unaligned_unsupported)
786 {
787 if (vect_print_dump_info (REPORT_SLP))
788 {
789 fprintf (vect_dump, "Build SLP failed: unsupported "
790 "unaligned load ");
791 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
792 }
793
794 vect_free_oprnd_info (&oprnds_info);
795 return false;
796 }
797
798 /* Analyze costs (for the first stmt in the group). */
799 vect_model_load_cost (vinfo_for_stmt (stmt),
800 ncopies_for_cost, false, *node);
801 }
802
803 /* Store the place of this load in the interleaving chain. In
804 case that permutation is needed we later decide if a specific
805 permutation is supported. */
806 load_place = vect_get_place_in_interleaving_chain (stmt,
807 first_load);
808 if (load_place != i)
809 permutation = true;
810
811 VEC_safe_push (int, heap, *load_permutation, load_place);
812
813 /* We stop the tree when we reach a group of loads. */
814 stop_recursion = true;
815 continue;
816 }
817 } /* Strided access. */
818 else
819 {
820 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
821 {
822 /* Not strided load. */
823 if (vect_print_dump_info (REPORT_SLP))
824 {
825 fprintf (vect_dump, "Build SLP failed: not strided load ");
826 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
827 }
828
829 /* FORNOW: Not strided loads are not supported. */
830 vect_free_oprnd_info (&oprnds_info);
831 return false;
832 }
833
834 /* Not memory operation. */
835 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
836 && TREE_CODE_CLASS (rhs_code) != tcc_unary
837 && rhs_code != COND_EXPR
838 && rhs_code != CALL_EXPR)
839 {
840 if (vect_print_dump_info (REPORT_SLP))
841 {
842 fprintf (vect_dump, "Build SLP failed: operation");
843 fprintf (vect_dump, " unsupported ");
844 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
845 }
846
847 vect_free_oprnd_info (&oprnds_info);
848 return false;
849 }
850
851 if (rhs_code == COND_EXPR)
852 {
853 tree cond_expr = gimple_assign_rhs1 (stmt);
854
855 if (i == 0)
856 first_cond_code = TREE_CODE (cond_expr);
857 else if (first_cond_code != TREE_CODE (cond_expr))
858 {
859 if (vect_print_dump_info (REPORT_SLP))
860 {
861 fprintf (vect_dump, "Build SLP failed: different"
862 " operation");
863 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
864 }
865
866 vect_free_oprnd_info (&oprnds_info);
867 return false;
868 }
869 }
870
871 /* Find the def-stmts. */
872 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
873 ncopies_for_cost, (i == 0),
874 &oprnds_info))
875 {
876 vect_free_oprnd_info (&oprnds_info);
877 return false;
878 }
879 }
880 }
881
882 /* Add the costs of the node to the overall instance costs. */
883 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
884 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
885
886 /* Strided loads were reached - stop the recursion. */
887 if (stop_recursion)
888 {
889 VEC_safe_push (slp_tree, heap, *loads, *node);
890 if (permutation)
891 {
892
893 *loads_permuted = true;
894 *inside_cost
895 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
896 * group_size;
897 }
898 else
899 {
900 /* We don't check here complex numbers chains, so we set
901 LOADS_PERMUTED for further check in
902 vect_supported_load_permutation_p. */
903 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
904 *loads_permuted = true;
905 }
906
907 vect_free_oprnd_info (&oprnds_info);
908 return true;
909 }
910
911 /* Create SLP_TREE nodes for the definition node/s. */
912 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
913 {
914 slp_tree child;
915
916 if (oprnd_info->first_dt != vect_internal_def)
917 continue;
918
919 child = vect_create_new_slp_node (oprnd_info->def_stmts);
920 if (!child
921 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
922 inside_cost, outside_cost, ncopies_for_cost,
923 max_nunits, load_permutation, loads,
924 vectorization_factor, loads_permuted))
925 {
926 if (child)
927 oprnd_info->def_stmts = NULL;
928 vect_free_slp_tree (child);
929 vect_free_oprnd_info (&oprnds_info);
930 return false;
931 }
932
933 oprnd_info->def_stmts = NULL;
934 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
935 }
936
937 vect_free_oprnd_info (&oprnds_info);
938 return true;
939 }
940
941
942 static void
vect_print_slp_tree(slp_tree node)943 vect_print_slp_tree (slp_tree node)
944 {
945 int i;
946 gimple stmt;
947 slp_void_p child;
948
949 if (!node)
950 return;
951
952 fprintf (vect_dump, "node ");
953 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
954 {
955 fprintf (vect_dump, "\n\tstmt %d ", i);
956 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
957 }
958 fprintf (vect_dump, "\n");
959
960 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
961 vect_print_slp_tree ((slp_tree) child);
962 }
963
964
965 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
966 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
967 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
968 stmts in NODE are to be marked. */
969
970 static void
vect_mark_slp_stmts(slp_tree node,enum slp_vect_type mark,int j)971 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
972 {
973 int i;
974 gimple stmt;
975 slp_void_p child;
976
977 if (!node)
978 return;
979
980 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
981 if (j < 0 || i == j)
982 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
983
984 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
985 vect_mark_slp_stmts ((slp_tree) child, mark, j);
986 }
987
988
989 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
990
991 static void
vect_mark_slp_stmts_relevant(slp_tree node)992 vect_mark_slp_stmts_relevant (slp_tree node)
993 {
994 int i;
995 gimple stmt;
996 stmt_vec_info stmt_info;
997 slp_void_p child;
998
999 if (!node)
1000 return;
1001
1002 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1003 {
1004 stmt_info = vinfo_for_stmt (stmt);
1005 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1006 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1007 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1008 }
1009
1010 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1011 vect_mark_slp_stmts_relevant ((slp_tree) child);
1012 }
1013
1014
1015 /* Check if the permutation required by the SLP INSTANCE is supported.
1016 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1017
1018 static bool
vect_supported_slp_permutation_p(slp_instance instance)1019 vect_supported_slp_permutation_p (slp_instance instance)
1020 {
1021 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1022 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1023 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1024 VEC (slp_tree, heap) *sorted_loads = NULL;
1025 int index;
1026 slp_tree *tmp_loads = NULL;
1027 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1028 slp_tree load;
1029
1030 /* FORNOW: The only supported loads permutation is loads from the same
1031 location in all the loads in the node, when the data-refs in
1032 nodes of LOADS constitute an interleaving chain.
1033 Sort the nodes according to the order of accesses in the chain. */
1034 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1035 for (i = 0, j = 0;
1036 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1037 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1038 i += group_size, j++)
1039 {
1040 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1041 /* Check that the loads are all in the same interleaving chain. */
1042 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1043 {
1044 if (vect_print_dump_info (REPORT_DETAILS))
1045 {
1046 fprintf (vect_dump, "Build SLP failed: unsupported data "
1047 "permutation ");
1048 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1049 }
1050
1051 free (tmp_loads);
1052 return false;
1053 }
1054
1055 tmp_loads[index] = load;
1056 }
1057
1058 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1059 for (i = 0; i < group_size; i++)
1060 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1061
1062 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1063 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1064 free (tmp_loads);
1065
1066 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1067 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1068 instance, true))
1069 return false;
1070
1071 return true;
1072 }
1073
1074
1075 /* Rearrange the statements of NODE according to PERMUTATION. */
1076
1077 static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,VEC (int,heap)* permutation)1078 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1079 VEC (int, heap) *permutation)
1080 {
1081 gimple stmt;
1082 VEC (gimple, heap) *tmp_stmts;
1083 unsigned int index, i;
1084 slp_void_p child;
1085
1086 if (!node)
1087 return;
1088
1089 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1090 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1091
1092 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1093 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1094
1095 for (i = 0; i < group_size; i++)
1096 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1097
1098 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1099 {
1100 index = VEC_index (int, permutation, i);
1101 VEC_replace (gimple, tmp_stmts, index, stmt);
1102 }
1103
1104 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1105 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1106 }
1107
1108
1109 /* Check if the required load permutation is supported.
1110 LOAD_PERMUTATION contains a list of indices of the loads.
1111 In SLP this permutation is relative to the order of strided stores that are
1112 the base of the SLP instance. */
1113
1114 static bool
vect_supported_load_permutation_p(slp_instance slp_instn,int group_size,VEC (int,heap)* load_permutation)1115 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1116 VEC (int, heap) *load_permutation)
1117 {
1118 int i = 0, j, prev = -1, next, k, number_of_groups;
1119 bool supported, bad_permutation = false;
1120 sbitmap load_index;
1121 slp_tree node, other_complex_node;
1122 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1123 unsigned complex_numbers = 0;
1124 struct data_reference *dr;
1125 bb_vec_info bb_vinfo;
1126
1127 /* FORNOW: permutations are only supported in SLP. */
1128 if (!slp_instn)
1129 return false;
1130
1131 if (vect_print_dump_info (REPORT_SLP))
1132 {
1133 fprintf (vect_dump, "Load permutation ");
1134 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1135 fprintf (vect_dump, "%d ", next);
1136 }
1137
1138 /* In case of reduction every load permutation is allowed, since the order
1139 of the reduction statements is not important (as opposed to the case of
1140 strided stores). The only condition we need to check is that all the
1141 load nodes are of the same size and have the same permutation (and then
1142 rearrange all the nodes of the SLP instance according to this
1143 permutation). */
1144
1145 /* Check that all the load nodes are of the same size. */
1146 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1147 {
1148 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1149 != (unsigned) group_size)
1150 return false;
1151
1152 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1153 if (is_gimple_assign (stmt)
1154 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1155 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1156 complex_numbers++;
1157 }
1158
1159 /* Complex operands can be swapped as following:
1160 real_c = real_b + real_a;
1161 imag_c = imag_a + imag_b;
1162 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1163 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1164 chains are mixed, they match the above pattern. */
1165 if (complex_numbers)
1166 {
1167 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1168 {
1169 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1170 {
1171 if (j == 0)
1172 first = stmt;
1173 else
1174 {
1175 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1176 {
1177 if (complex_numbers != 2)
1178 return false;
1179
1180 if (i == 0)
1181 k = 1;
1182 else
1183 k = 0;
1184
1185 other_complex_node = VEC_index (slp_tree,
1186 SLP_INSTANCE_LOADS (slp_instn), k);
1187 other_node_first = VEC_index (gimple,
1188 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1189
1190 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1191 != other_node_first)
1192 return false;
1193 }
1194 }
1195 }
1196 }
1197 }
1198
1199 /* We checked that this case ok, so there is no need to proceed with
1200 permutation tests. */
1201 if (complex_numbers == 2
1202 && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
1203 {
1204 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1205 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1206 return true;
1207 }
1208
1209 node = SLP_INSTANCE_TREE (slp_instn);
1210 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1211 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1212 instance, not all the loads belong to the same node or interleaving
1213 group. Hence, we need to divide them into groups according to
1214 GROUP_SIZE. */
1215 number_of_groups = VEC_length (int, load_permutation) / group_size;
1216
1217 /* Reduction (there are no data-refs in the root).
1218 In reduction chain the order of the loads is important. */
1219 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1220 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1221 {
1222 int first_group_load_index;
1223
1224 /* Compare all the permutation sequences to the first one. */
1225 for (i = 1; i < number_of_groups; i++)
1226 {
1227 k = 0;
1228 for (j = i * group_size; j < i * group_size + group_size; j++)
1229 {
1230 next = VEC_index (int, load_permutation, j);
1231 first_group_load_index = VEC_index (int, load_permutation, k);
1232
1233 if (next != first_group_load_index)
1234 {
1235 bad_permutation = true;
1236 break;
1237 }
1238
1239 k++;
1240 }
1241
1242 if (bad_permutation)
1243 break;
1244 }
1245
1246 if (!bad_permutation)
1247 {
1248 /* Check that the loads in the first sequence are different and there
1249 are no gaps between them. */
1250 load_index = sbitmap_alloc (group_size);
1251 sbitmap_zero (load_index);
1252 for (k = 0; k < group_size; k++)
1253 {
1254 first_group_load_index = VEC_index (int, load_permutation, k);
1255 if (TEST_BIT (load_index, first_group_load_index))
1256 {
1257 bad_permutation = true;
1258 break;
1259 }
1260
1261 SET_BIT (load_index, first_group_load_index);
1262 }
1263
1264 if (!bad_permutation)
1265 for (k = 0; k < group_size; k++)
1266 if (!TEST_BIT (load_index, k))
1267 {
1268 bad_permutation = true;
1269 break;
1270 }
1271
1272 sbitmap_free (load_index);
1273 }
1274
1275 if (!bad_permutation)
1276 {
1277 /* This permutation is valid for reduction. Since the order of the
1278 statements in the nodes is not important unless they are memory
1279 accesses, we can rearrange the statements in all the nodes
1280 according to the order of the loads. */
1281 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1282 load_permutation);
1283 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1284 return true;
1285 }
1286 }
1287
1288 /* In basic block vectorization we allow any subchain of an interleaving
1289 chain.
1290 FORNOW: not supported in loop SLP because of realignment compications. */
1291 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1292 bad_permutation = false;
1293 /* Check that for every node in the instance teh loads form a subchain. */
1294 if (bb_vinfo)
1295 {
1296 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1297 {
1298 next_load = NULL;
1299 first_load = NULL;
1300 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1301 {
1302 if (!first_load)
1303 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1304 else if (first_load
1305 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1306 {
1307 bad_permutation = true;
1308 break;
1309 }
1310
1311 if (j != 0 && next_load != load)
1312 {
1313 bad_permutation = true;
1314 break;
1315 }
1316
1317 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1318 }
1319
1320 if (bad_permutation)
1321 break;
1322 }
1323
1324 /* Check that the alignment of the first load in every subchain, i.e.,
1325 the first statement in every load node, is supported. */
1326 if (!bad_permutation)
1327 {
1328 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1329 {
1330 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1331 if (first_load
1332 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1333 {
1334 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1335 if (vect_supportable_dr_alignment (dr, false)
1336 == dr_unaligned_unsupported)
1337 {
1338 if (vect_print_dump_info (REPORT_SLP))
1339 {
1340 fprintf (vect_dump, "unsupported unaligned load ");
1341 print_gimple_stmt (vect_dump, first_load, 0,
1342 TDF_SLIM);
1343 }
1344 bad_permutation = true;
1345 break;
1346 }
1347 }
1348 }
1349
1350 if (!bad_permutation)
1351 {
1352 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1353 return true;
1354 }
1355 }
1356 }
1357
1358 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1359 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1360 well (unless it's reduction). */
1361 if (VEC_length (int, load_permutation)
1362 != (unsigned int) (group_size * group_size))
1363 return false;
1364
1365 supported = true;
1366 load_index = sbitmap_alloc (group_size);
1367 sbitmap_zero (load_index);
1368 for (j = 0; j < group_size; j++)
1369 {
1370 for (i = j * group_size, k = 0;
1371 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1372 i++, k++)
1373 {
1374 if (i != j * group_size && next != prev)
1375 {
1376 supported = false;
1377 break;
1378 }
1379
1380 prev = next;
1381 }
1382
1383 if (TEST_BIT (load_index, prev))
1384 {
1385 supported = false;
1386 break;
1387 }
1388
1389 SET_BIT (load_index, prev);
1390 }
1391
1392 for (j = 0; j < group_size; j++)
1393 if (!TEST_BIT (load_index, j))
1394 return false;
1395
1396 sbitmap_free (load_index);
1397
1398 if (supported && i == group_size * group_size
1399 && vect_supported_slp_permutation_p (slp_instn))
1400 return true;
1401
1402 return false;
1403 }
1404
1405
1406 /* Find the first load in the loop that belongs to INSTANCE.
1407 When loads are in several SLP nodes, there can be a case in which the first
1408 load does not appear in the first SLP node to be transformed, causing
1409 incorrect order of statements. Since we generate all the loads together,
1410 they must be inserted before the first load of the SLP instance and not
1411 before the first load of the first node of the instance. */
1412
1413 static gimple
vect_find_first_load_in_slp_instance(slp_instance instance)1414 vect_find_first_load_in_slp_instance (slp_instance instance)
1415 {
1416 int i, j;
1417 slp_tree load_node;
1418 gimple first_load = NULL, load;
1419
1420 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1421 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1422 first_load = get_earlier_stmt (load, first_load);
1423
1424 return first_load;
1425 }
1426
1427
1428 /* Find the last store in SLP INSTANCE. */
1429
1430 static gimple
vect_find_last_store_in_slp_instance(slp_instance instance)1431 vect_find_last_store_in_slp_instance (slp_instance instance)
1432 {
1433 int i;
1434 slp_tree node;
1435 gimple last_store = NULL, store;
1436
1437 node = SLP_INSTANCE_TREE (instance);
1438 for (i = 0;
1439 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1440 i++)
1441 last_store = get_later_stmt (store, last_store);
1442
1443 return last_store;
1444 }
1445
1446
1447 /* Analyze an SLP instance starting from a group of strided stores. Call
1448 vect_build_slp_tree to build a tree of packed stmts if possible.
1449 Return FALSE if it's impossible to SLP any stmt in the loop. */
1450
1451 static bool
vect_analyze_slp_instance(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,gimple stmt)1452 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1453 gimple stmt)
1454 {
1455 slp_instance new_instance;
1456 slp_tree node;
1457 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1458 unsigned int unrolling_factor = 1, nunits;
1459 tree vectype, scalar_type = NULL_TREE;
1460 gimple next;
1461 unsigned int vectorization_factor = 0;
1462 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1463 unsigned int max_nunits = 0;
1464 VEC (int, heap) *load_permutation;
1465 VEC (slp_tree, heap) *loads;
1466 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1467 bool loads_permuted = false;
1468 VEC (gimple, heap) *scalar_stmts;
1469
1470 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1471 {
1472 if (dr)
1473 {
1474 scalar_type = TREE_TYPE (DR_REF (dr));
1475 vectype = get_vectype_for_scalar_type (scalar_type);
1476 }
1477 else
1478 {
1479 gcc_assert (loop_vinfo);
1480 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1481 }
1482
1483 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1484 }
1485 else
1486 {
1487 gcc_assert (loop_vinfo);
1488 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1489 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1490 }
1491
1492 if (!vectype)
1493 {
1494 if (vect_print_dump_info (REPORT_SLP))
1495 {
1496 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1497 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1498 }
1499
1500 return false;
1501 }
1502
1503 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1504 if (loop_vinfo)
1505 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1506 else
1507 vectorization_factor = nunits;
1508
1509 /* Calculate the unrolling factor. */
1510 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1511 if (unrolling_factor != 1 && !loop_vinfo)
1512 {
1513 if (vect_print_dump_info (REPORT_SLP))
1514 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1515 " block SLP");
1516
1517 return false;
1518 }
1519
1520 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1521 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1522 next = stmt;
1523 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1524 {
1525 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1526 while (next)
1527 {
1528 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1529 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1530 VEC_safe_push (gimple, heap, scalar_stmts,
1531 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1532 else
1533 VEC_safe_push (gimple, heap, scalar_stmts, next);
1534 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1535 }
1536 }
1537 else
1538 {
1539 /* Collect reduction statements. */
1540 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1541 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1542 VEC_safe_push (gimple, heap, scalar_stmts, next);
1543 }
1544
1545 node = vect_create_new_slp_node (scalar_stmts);
1546
1547 /* Calculate the number of vector stmts to create based on the unrolling
1548 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1549 GROUP_SIZE / NUNITS otherwise. */
1550 ncopies_for_cost = unrolling_factor * group_size / nunits;
1551
1552 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1553 loads = VEC_alloc (slp_tree, heap, group_size);
1554
1555 /* Build the tree for the SLP instance. */
1556 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1557 &inside_cost, &outside_cost, ncopies_for_cost,
1558 &max_nunits, &load_permutation, &loads,
1559 vectorization_factor, &loads_permuted))
1560 {
1561 /* Calculate the unrolling factor based on the smallest type. */
1562 if (max_nunits > nunits)
1563 unrolling_factor = least_common_multiple (max_nunits, group_size)
1564 / group_size;
1565
1566 if (unrolling_factor != 1 && !loop_vinfo)
1567 {
1568 if (vect_print_dump_info (REPORT_SLP))
1569 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1570 " block SLP");
1571 vect_free_slp_tree (node);
1572 VEC_free (int, heap, load_permutation);
1573 VEC_free (slp_tree, heap, loads);
1574 return false;
1575 }
1576
1577 /* Create a new SLP instance. */
1578 new_instance = XNEW (struct _slp_instance);
1579 SLP_INSTANCE_TREE (new_instance) = node;
1580 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1581 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1582 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1583 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1584 SLP_INSTANCE_LOADS (new_instance) = loads;
1585 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1586 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1587
1588 if (loads_permuted)
1589 {
1590 if (!vect_supported_load_permutation_p (new_instance, group_size,
1591 load_permutation))
1592 {
1593 if (vect_print_dump_info (REPORT_SLP))
1594 {
1595 fprintf (vect_dump, "Build SLP failed: unsupported load "
1596 "permutation ");
1597 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1598 }
1599
1600 vect_free_slp_instance (new_instance);
1601 return false;
1602 }
1603
1604 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1605 = vect_find_first_load_in_slp_instance (new_instance);
1606 }
1607 else
1608 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1609
1610 if (loop_vinfo)
1611 VEC_safe_push (slp_instance, heap,
1612 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1613 new_instance);
1614 else
1615 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1616 new_instance);
1617
1618 if (vect_print_dump_info (REPORT_SLP))
1619 vect_print_slp_tree (node);
1620
1621 return true;
1622 }
1623
1624 /* Failed to SLP. */
1625 /* Free the allocated memory. */
1626 vect_free_slp_tree (node);
1627 VEC_free (int, heap, load_permutation);
1628 VEC_free (slp_tree, heap, loads);
1629
1630 return false;
1631 }
1632
1633
1634 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1635 trees of packed scalar stmts if SLP is possible. */
1636
1637 bool
vect_analyze_slp(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)1638 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1639 {
1640 unsigned int i;
1641 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1642 gimple first_element;
1643 bool ok = false;
1644
1645 if (vect_print_dump_info (REPORT_SLP))
1646 fprintf (vect_dump, "=== vect_analyze_slp ===");
1647
1648 if (loop_vinfo)
1649 {
1650 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1651 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1652 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1653 }
1654 else
1655 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1656
1657 /* Find SLP sequences starting from groups of strided stores. */
1658 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1659 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1660 ok = true;
1661
1662 if (bb_vinfo && !ok)
1663 {
1664 if (vect_print_dump_info (REPORT_SLP))
1665 fprintf (vect_dump, "Failed to SLP the basic block.");
1666
1667 return false;
1668 }
1669
1670 if (loop_vinfo
1671 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1672 {
1673 /* Find SLP sequences starting from reduction chains. */
1674 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1675 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1676 ok = true;
1677 else
1678 return false;
1679
1680 /* Don't try to vectorize SLP reductions if reduction chain was
1681 detected. */
1682 return ok;
1683 }
1684
1685 /* Find SLP sequences starting from groups of reductions. */
1686 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1687 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1688 VEC_index (gimple, reductions, 0)))
1689 ok = true;
1690
1691 return true;
1692 }
1693
1694
1695 /* For each possible SLP instance decide whether to SLP it and calculate overall
1696 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1697 least one instance. */
1698
1699 bool
vect_make_slp_decision(loop_vec_info loop_vinfo)1700 vect_make_slp_decision (loop_vec_info loop_vinfo)
1701 {
1702 unsigned int i, unrolling_factor = 1;
1703 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1704 slp_instance instance;
1705 int decided_to_slp = 0;
1706
1707 if (vect_print_dump_info (REPORT_SLP))
1708 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1709
1710 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1711 {
1712 /* FORNOW: SLP if you can. */
1713 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1714 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1715
1716 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1717 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1718 loop-based vectorization. Such stmts will be marked as HYBRID. */
1719 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1720 decided_to_slp++;
1721 }
1722
1723 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1724
1725 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1726 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1727 decided_to_slp, unrolling_factor);
1728
1729 return (decided_to_slp > 0);
1730 }
1731
1732
1733 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1734 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1735
1736 static void
vect_detect_hybrid_slp_stmts(slp_tree node)1737 vect_detect_hybrid_slp_stmts (slp_tree node)
1738 {
1739 int i;
1740 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1741 gimple stmt = VEC_index (gimple, stmts, 0);
1742 imm_use_iterator imm_iter;
1743 gimple use_stmt;
1744 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1745 slp_void_p child;
1746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1747 struct loop *loop = NULL;
1748 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1749 basic_block bb = NULL;
1750
1751 if (!node)
1752 return;
1753
1754 if (loop_vinfo)
1755 loop = LOOP_VINFO_LOOP (loop_vinfo);
1756 else
1757 bb = BB_VINFO_BB (bb_vinfo);
1758
1759 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1760 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1761 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1762 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1763 if (gimple_bb (use_stmt)
1764 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1765 || bb == gimple_bb (use_stmt))
1766 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1767 && !STMT_SLP_TYPE (stmt_vinfo)
1768 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1769 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1770 && !(gimple_code (use_stmt) == GIMPLE_PHI
1771 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1772 == vect_reduction_def))
1773 vect_mark_slp_stmts (node, hybrid, i);
1774
1775 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1776 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1777 }
1778
1779
1780 /* Find stmts that must be both vectorized and SLPed. */
1781
1782 void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)1783 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1784 {
1785 unsigned int i;
1786 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1787 slp_instance instance;
1788
1789 if (vect_print_dump_info (REPORT_SLP))
1790 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1791
1792 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1793 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1794 }
1795
1796
1797 /* Create and initialize a new bb_vec_info struct for BB, as well as
1798 stmt_vec_info structs for all the stmts in it. */
1799
1800 static bb_vec_info
new_bb_vec_info(basic_block bb)1801 new_bb_vec_info (basic_block bb)
1802 {
1803 bb_vec_info res = NULL;
1804 gimple_stmt_iterator gsi;
1805
1806 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1807 BB_VINFO_BB (res) = bb;
1808
1809 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1810 {
1811 gimple stmt = gsi_stmt (gsi);
1812 gimple_set_uid (stmt, 0);
1813 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1814 }
1815
1816 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1817 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1818
1819 bb->aux = res;
1820 return res;
1821 }
1822
1823
1824 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1825 stmts in the basic block. */
1826
1827 static void
destroy_bb_vec_info(bb_vec_info bb_vinfo)1828 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1829 {
1830 VEC (slp_instance, heap) *slp_instances;
1831 slp_instance instance;
1832 basic_block bb;
1833 gimple_stmt_iterator si;
1834 unsigned i;
1835
1836 if (!bb_vinfo)
1837 return;
1838
1839 bb = BB_VINFO_BB (bb_vinfo);
1840
1841 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1842 {
1843 gimple stmt = gsi_stmt (si);
1844 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1845
1846 if (stmt_info)
1847 /* Free stmt_vec_info. */
1848 free_stmt_vec_info (stmt);
1849 }
1850
1851 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1852 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1853 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1854 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1855 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1856 vect_free_slp_instance (instance);
1857 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1858 free (bb_vinfo);
1859 bb->aux = NULL;
1860 }
1861
1862
1863 /* Analyze statements contained in SLP tree node after recursively analyzing
1864 the subtree. Return TRUE if the operations are supported. */
1865
1866 static bool
vect_slp_analyze_node_operations(bb_vec_info bb_vinfo,slp_tree node)1867 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1868 {
1869 bool dummy;
1870 int i;
1871 gimple stmt;
1872 slp_void_p child;
1873
1874 if (!node)
1875 return true;
1876
1877 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1878 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1879 return false;
1880
1881 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1882 {
1883 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1884 gcc_assert (stmt_info);
1885 gcc_assert (PURE_SLP_STMT (stmt_info));
1886
1887 if (!vect_analyze_stmt (stmt, &dummy, node))
1888 return false;
1889 }
1890
1891 return true;
1892 }
1893
1894
1895 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1896 operations are supported. */
1897
1898 static bool
vect_slp_analyze_operations(bb_vec_info bb_vinfo)1899 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1900 {
1901 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1902 slp_instance instance;
1903 int i;
1904
1905 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1906 {
1907 if (!vect_slp_analyze_node_operations (bb_vinfo,
1908 SLP_INSTANCE_TREE (instance)))
1909 {
1910 vect_free_slp_instance (instance);
1911 VEC_ordered_remove (slp_instance, slp_instances, i);
1912 }
1913 else
1914 i++;
1915 }
1916
1917 if (!VEC_length (slp_instance, slp_instances))
1918 return false;
1919
1920 return true;
1921 }
1922
1923 /* Check if vectorization of the basic block is profitable. */
1924
1925 static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)1926 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1927 {
1928 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1929 slp_instance instance;
1930 int i;
1931 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1932 unsigned int stmt_cost;
1933 gimple stmt;
1934 gimple_stmt_iterator si;
1935 basic_block bb = BB_VINFO_BB (bb_vinfo);
1936 stmt_vec_info stmt_info = NULL;
1937 tree dummy_type = NULL;
1938 int dummy = 0;
1939
1940 /* Calculate vector costs. */
1941 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1942 {
1943 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1944 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1945 }
1946
1947 /* Calculate scalar cost. */
1948 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1949 {
1950 stmt = gsi_stmt (si);
1951 stmt_info = vinfo_for_stmt (stmt);
1952
1953 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1954 || !PURE_SLP_STMT (stmt_info))
1955 continue;
1956
1957 if (STMT_VINFO_DATA_REF (stmt_info))
1958 {
1959 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1960 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1961 (scalar_load, dummy_type, dummy);
1962 else
1963 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1964 (scalar_store, dummy_type, dummy);
1965 }
1966 else
1967 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1968 (scalar_stmt, dummy_type, dummy);
1969
1970 scalar_cost += stmt_cost;
1971 }
1972
1973 if (vect_print_dump_info (REPORT_COST))
1974 {
1975 fprintf (vect_dump, "Cost model analysis: \n");
1976 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1977 vec_inside_cost);
1978 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1979 vec_outside_cost);
1980 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1981 }
1982
1983 /* Vectorization is profitable if its cost is less than the cost of scalar
1984 version. */
1985 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1986 return false;
1987
1988 return true;
1989 }
1990
1991 /* Check if the basic block can be vectorized. */
1992
1993 static bb_vec_info
vect_slp_analyze_bb_1(basic_block bb)1994 vect_slp_analyze_bb_1 (basic_block bb)
1995 {
1996 bb_vec_info bb_vinfo;
1997 VEC (ddr_p, heap) *ddrs;
1998 VEC (slp_instance, heap) *slp_instances;
1999 slp_instance instance;
2000 int i;
2001 int min_vf = 2;
2002 int max_vf = MAX_VECTORIZATION_FACTOR;
2003
2004 bb_vinfo = new_bb_vec_info (bb);
2005 if (!bb_vinfo)
2006 return NULL;
2007
2008 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2009 {
2010 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2011 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
2012 "block.\n");
2013
2014 destroy_bb_vec_info (bb_vinfo);
2015 return NULL;
2016 }
2017
2018 ddrs = BB_VINFO_DDRS (bb_vinfo);
2019 if (!VEC_length (ddr_p, ddrs))
2020 {
2021 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2022 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
2023 "block.\n");
2024
2025 destroy_bb_vec_info (bb_vinfo);
2026 return NULL;
2027 }
2028
2029 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2030 || min_vf > max_vf)
2031 {
2032 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2033 fprintf (vect_dump, "not vectorized: unhandled data dependence "
2034 "in basic block.\n");
2035
2036 destroy_bb_vec_info (bb_vinfo);
2037 return NULL;
2038 }
2039
2040 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2041 {
2042 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2043 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2044 "block.\n");
2045
2046 destroy_bb_vec_info (bb_vinfo);
2047 return NULL;
2048 }
2049
2050 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2051 {
2052 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2053 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2054 "block.\n");
2055
2056 destroy_bb_vec_info (bb_vinfo);
2057 return NULL;
2058 }
2059
2060 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2061 {
2062 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2063 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2064 "block.\n");
2065
2066 destroy_bb_vec_info (bb_vinfo);
2067 return NULL;
2068 }
2069
2070 /* Check the SLP opportunities in the basic block, analyze and build SLP
2071 trees. */
2072 if (!vect_analyze_slp (NULL, bb_vinfo))
2073 {
2074 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2075 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2076 "in basic block.\n");
2077
2078 destroy_bb_vec_info (bb_vinfo);
2079 return NULL;
2080 }
2081
2082 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2083
2084 /* Mark all the statements that we want to vectorize as pure SLP and
2085 relevant. */
2086 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2087 {
2088 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2089 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2090 }
2091
2092 if (!vect_slp_analyze_operations (bb_vinfo))
2093 {
2094 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2095 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2096
2097 destroy_bb_vec_info (bb_vinfo);
2098 return NULL;
2099 }
2100
2101 /* Cost model: check if the vectorization is worthwhile. */
2102 if (flag_vect_cost_model
2103 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2104 {
2105 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2106 fprintf (vect_dump, "not vectorized: vectorization is not "
2107 "profitable.\n");
2108
2109 destroy_bb_vec_info (bb_vinfo);
2110 return NULL;
2111 }
2112
2113 if (vect_print_dump_info (REPORT_DETAILS))
2114 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2115
2116 return bb_vinfo;
2117 }
2118
2119
2120 bb_vec_info
vect_slp_analyze_bb(basic_block bb)2121 vect_slp_analyze_bb (basic_block bb)
2122 {
2123 bb_vec_info bb_vinfo;
2124 int insns = 0;
2125 gimple_stmt_iterator gsi;
2126 unsigned int vector_sizes;
2127
2128 if (vect_print_dump_info (REPORT_DETAILS))
2129 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2130
2131 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2132 {
2133 gimple stmt = gsi_stmt (gsi);
2134 if (!is_gimple_debug (stmt)
2135 && !gimple_nop_p (stmt)
2136 && gimple_code (stmt) != GIMPLE_LABEL)
2137 insns++;
2138 }
2139
2140 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2141 {
2142 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2143 fprintf (vect_dump, "not vectorized: too many instructions in basic "
2144 "block.\n");
2145
2146 return NULL;
2147 }
2148
2149 /* Autodetect first vector size we try. */
2150 current_vector_size = 0;
2151 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2152
2153 while (1)
2154 {
2155 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2156 if (bb_vinfo)
2157 return bb_vinfo;
2158
2159 destroy_bb_vec_info (bb_vinfo);
2160
2161 vector_sizes &= ~current_vector_size;
2162 if (vector_sizes == 0
2163 || current_vector_size == 0)
2164 return NULL;
2165
2166 /* Try the next biggest vector size. */
2167 current_vector_size = 1 << floor_log2 (vector_sizes);
2168 if (vect_print_dump_info (REPORT_DETAILS))
2169 fprintf (vect_dump, "***** Re-trying analysis with "
2170 "vector size %d\n", current_vector_size);
2171 }
2172 }
2173
2174
2175 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2176 the number of created vector stmts depends on the unrolling factor).
2177 However, the actual number of vector stmts for every SLP node depends on
2178 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2179 should be updated. In this function we assume that the inside costs
2180 calculated in vect_model_xxx_cost are linear in ncopies. */
2181
2182 void
vect_update_slp_costs_according_to_vf(loop_vec_info loop_vinfo)2183 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2184 {
2185 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2186 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2187 slp_instance instance;
2188
2189 if (vect_print_dump_info (REPORT_SLP))
2190 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2191
2192 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2193 /* We assume that costs are linear in ncopies. */
2194 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2195 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2196 }
2197
2198
2199 /* For constant and loop invariant defs of SLP_NODE this function returns
2200 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2201 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2202 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2203 REDUC_INDEX is the index of the reduction operand in the statements, unless
2204 it is -1. */
2205
2206 static void
vect_get_constant_vectors(tree op,slp_tree slp_node,VEC (tree,heap)** vec_oprnds,unsigned int op_num,unsigned int number_of_vectors,int reduc_index)2207 vect_get_constant_vectors (tree op, slp_tree slp_node,
2208 VEC (tree, heap) **vec_oprnds,
2209 unsigned int op_num, unsigned int number_of_vectors,
2210 int reduc_index)
2211 {
2212 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2213 gimple stmt = VEC_index (gimple, stmts, 0);
2214 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2215 int nunits;
2216 tree vec_cst;
2217 tree t = NULL_TREE;
2218 int j, number_of_places_left_in_vector;
2219 tree vector_type;
2220 tree vop;
2221 int group_size = VEC_length (gimple, stmts);
2222 unsigned int vec_num, i;
2223 int number_of_copies = 1;
2224 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2225 bool constant_p, is_store;
2226 tree neutral_op = NULL;
2227 enum tree_code code = gimple_expr_code (stmt);
2228 gimple def_stmt;
2229 struct loop *loop;
2230
2231 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2232 && reduc_index != -1)
2233 {
2234 op_num = reduc_index - 1;
2235 op = gimple_op (stmt, reduc_index);
2236 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2237 we need either neutral operands or the original operands. See
2238 get_initial_def_for_reduction() for details. */
2239 switch (code)
2240 {
2241 case WIDEN_SUM_EXPR:
2242 case DOT_PROD_EXPR:
2243 case PLUS_EXPR:
2244 case MINUS_EXPR:
2245 case BIT_IOR_EXPR:
2246 case BIT_XOR_EXPR:
2247 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2248 neutral_op = build_real (TREE_TYPE (op), dconst0);
2249 else
2250 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2251
2252 break;
2253
2254 case MULT_EXPR:
2255 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2256 neutral_op = build_real (TREE_TYPE (op), dconst1);
2257 else
2258 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2259
2260 break;
2261
2262 case BIT_AND_EXPR:
2263 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2264 break;
2265
2266 case MAX_EXPR:
2267 case MIN_EXPR:
2268 def_stmt = SSA_NAME_DEF_STMT (op);
2269 loop = (gimple_bb (stmt))->loop_father;
2270 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2271 loop_preheader_edge (loop));
2272 break;
2273
2274 default:
2275 neutral_op = NULL;
2276 }
2277 }
2278
2279 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2280 {
2281 is_store = true;
2282 op = gimple_assign_rhs1 (stmt);
2283 }
2284 else
2285 is_store = false;
2286
2287 gcc_assert (op);
2288
2289 if (CONSTANT_CLASS_P (op))
2290 constant_p = true;
2291 else
2292 constant_p = false;
2293
2294 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2295 gcc_assert (vector_type);
2296 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2297
2298 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2299 created vectors. It is greater than 1 if unrolling is performed.
2300
2301 For example, we have two scalar operands, s1 and s2 (e.g., group of
2302 strided accesses of size two), while NUNITS is four (i.e., four scalars
2303 of this type can be packed in a vector). The output vector will contain
2304 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2305 will be 2).
2306
2307 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2308 containing the operands.
2309
2310 For example, NUNITS is four as before, and the group size is 8
2311 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2312 {s5, s6, s7, s8}. */
2313
2314 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2315
2316 number_of_places_left_in_vector = nunits;
2317 for (j = 0; j < number_of_copies; j++)
2318 {
2319 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2320 {
2321 if (is_store)
2322 op = gimple_assign_rhs1 (stmt);
2323 else
2324 {
2325 switch (code)
2326 {
2327 case COND_EXPR:
2328 if (op_num == 0 || op_num == 1)
2329 {
2330 tree cond = gimple_assign_rhs1 (stmt);
2331 op = TREE_OPERAND (cond, op_num);
2332 }
2333 else
2334 {
2335 if (op_num == 2)
2336 op = gimple_assign_rhs2 (stmt);
2337 else
2338 op = gimple_assign_rhs3 (stmt);
2339 }
2340 break;
2341
2342 case CALL_EXPR:
2343 op = gimple_call_arg (stmt, op_num);
2344 break;
2345
2346 default:
2347 op = gimple_op (stmt, op_num + 1);
2348 }
2349 }
2350
2351 if (reduc_index != -1)
2352 {
2353 loop = (gimple_bb (stmt))->loop_father;
2354 def_stmt = SSA_NAME_DEF_STMT (op);
2355
2356 gcc_assert (loop);
2357
2358 /* Get the def before the loop. In reduction chain we have only
2359 one initial value. */
2360 if ((j != (number_of_copies - 1)
2361 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2362 && i != 0))
2363 && neutral_op)
2364 op = neutral_op;
2365 else
2366 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2367 loop_preheader_edge (loop));
2368 }
2369
2370 /* Create 'vect_ = {op0,op1,...,opn}'. */
2371 t = tree_cons (NULL_TREE, op, t);
2372
2373 number_of_places_left_in_vector--;
2374
2375 if (number_of_places_left_in_vector == 0)
2376 {
2377 number_of_places_left_in_vector = nunits;
2378
2379 if (constant_p)
2380 vec_cst = build_vector (vector_type, t);
2381 else
2382 vec_cst = build_constructor_from_list (vector_type, t);
2383 VEC_quick_push (tree, voprnds,
2384 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2385 t = NULL_TREE;
2386 }
2387 }
2388 }
2389
2390 /* Since the vectors are created in the reverse order, we should invert
2391 them. */
2392 vec_num = VEC_length (tree, voprnds);
2393 for (j = vec_num - 1; j >= 0; j--)
2394 {
2395 vop = VEC_index (tree, voprnds, j);
2396 VEC_quick_push (tree, *vec_oprnds, vop);
2397 }
2398
2399 VEC_free (tree, heap, voprnds);
2400
2401 /* In case that VF is greater than the unrolling factor needed for the SLP
2402 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2403 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2404 to replicate the vectors. */
2405 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2406 {
2407 tree neutral_vec = NULL;
2408
2409 if (neutral_op)
2410 {
2411 if (!neutral_vec)
2412 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2413
2414 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2415 }
2416 else
2417 {
2418 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2419 VEC_quick_push (tree, *vec_oprnds, vop);
2420 }
2421 }
2422 }
2423
2424
2425 /* Get vectorized definitions from SLP_NODE that contains corresponding
2426 vectorized def-stmts. */
2427
2428 static void
vect_get_slp_vect_defs(slp_tree slp_node,VEC (tree,heap)** vec_oprnds)2429 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2430 {
2431 tree vec_oprnd;
2432 gimple vec_def_stmt;
2433 unsigned int i;
2434
2435 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2436
2437 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2438 {
2439 gcc_assert (vec_def_stmt);
2440 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2441 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2442 }
2443 }
2444
2445
2446 /* Get vectorized definitions for SLP_NODE.
2447 If the scalar definitions are loop invariants or constants, collect them and
2448 call vect_get_constant_vectors() to create vector stmts.
2449 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2450 must be stored in the corresponding child of SLP_NODE, and we call
2451 vect_get_slp_vect_defs () to retrieve them. */
2452
2453 void
vect_get_slp_defs(VEC (tree,heap)* ops,slp_tree slp_node,VEC (slp_void_p,heap)** vec_oprnds,int reduc_index)2454 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2455 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2456 {
2457 gimple first_stmt, first_def;
2458 int number_of_vects = 0, i;
2459 unsigned int child_index = 0;
2460 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2461 slp_tree child = NULL;
2462 VEC (tree, heap) *vec_defs;
2463 tree oprnd, def_lhs;
2464 bool vectorized_defs;
2465
2466 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2467 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2468 {
2469 /* For each operand we check if it has vectorized definitions in a child
2470 node or we need to create them (for invariants and constants). We
2471 check if the LHS of the first stmt of the next child matches OPRND.
2472 If it does, we found the correct child. Otherwise, we call
2473 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2474 to check this child node for the next operand. */
2475 vectorized_defs = false;
2476 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2477 {
2478 child = (slp_tree) VEC_index (slp_void_p,
2479 SLP_TREE_CHILDREN (slp_node),
2480 child_index);
2481 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2482
2483 /* In the end of a pattern sequence we have a use of the original stmt,
2484 so we need to compare OPRND with the original def. */
2485 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2486 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2487 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2488 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2489
2490 if (is_gimple_call (first_def))
2491 def_lhs = gimple_call_lhs (first_def);
2492 else
2493 def_lhs = gimple_assign_lhs (first_def);
2494
2495 if (operand_equal_p (oprnd, def_lhs, 0))
2496 {
2497 /* The number of vector defs is determined by the number of
2498 vector statements in the node from which we get those
2499 statements. */
2500 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2501 vectorized_defs = true;
2502 child_index++;
2503 }
2504 }
2505
2506 if (!vectorized_defs)
2507 {
2508 if (i == 0)
2509 {
2510 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2511 /* Number of vector stmts was calculated according to LHS in
2512 vect_schedule_slp_instance (), fix it by replacing LHS with
2513 RHS, if necessary. See vect_get_smallest_scalar_type () for
2514 details. */
2515 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2516 &rhs_size_unit);
2517 if (rhs_size_unit != lhs_size_unit)
2518 {
2519 number_of_vects *= rhs_size_unit;
2520 number_of_vects /= lhs_size_unit;
2521 }
2522 }
2523 }
2524
2525 /* Allocate memory for vectorized defs. */
2526 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2527
2528 /* For reduction defs we call vect_get_constant_vectors (), since we are
2529 looking for initial loop invariant values. */
2530 if (vectorized_defs && reduc_index == -1)
2531 /* The defs are already vectorized. */
2532 vect_get_slp_vect_defs (child, &vec_defs);
2533 else
2534 /* Build vectors from scalar defs. */
2535 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2536 number_of_vects, reduc_index);
2537
2538 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2539
2540 /* For reductions, we only need initial values. */
2541 if (reduc_index != -1)
2542 return;
2543 }
2544 }
2545
2546
2547 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2548 building a vector of type MASK_TYPE from it) and two input vectors placed in
2549 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2550 shifting by STRIDE elements of DR_CHAIN for every copy.
2551 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2552 copies).
2553 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2554 the created stmts must be inserted. */
2555
2556 static inline void
vect_create_mask_and_perm(gimple stmt,gimple next_scalar_stmt,tree mask,int first_vec_indx,int second_vec_indx,gimple_stmt_iterator * gsi,slp_tree node,tree vectype,VEC (tree,heap)* dr_chain,int ncopies,int vect_stmts_counter)2557 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2558 tree mask, int first_vec_indx, int second_vec_indx,
2559 gimple_stmt_iterator *gsi, slp_tree node,
2560 tree vectype, VEC(tree,heap) *dr_chain,
2561 int ncopies, int vect_stmts_counter)
2562 {
2563 tree perm_dest;
2564 gimple perm_stmt = NULL;
2565 stmt_vec_info next_stmt_info;
2566 int i, stride;
2567 tree first_vec, second_vec, data_ref;
2568
2569 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2570
2571 /* Initialize the vect stmts of NODE to properly insert the generated
2572 stmts later. */
2573 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2574 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2575 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2576
2577 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2578 for (i = 0; i < ncopies; i++)
2579 {
2580 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2581 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2582
2583 /* Generate the permute statement. */
2584 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2585 first_vec, second_vec, mask);
2586 data_ref = make_ssa_name (perm_dest, perm_stmt);
2587 gimple_set_lhs (perm_stmt, data_ref);
2588 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2589
2590 /* Store the vector statement in NODE. */
2591 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2592 stride * i + vect_stmts_counter, perm_stmt);
2593
2594 first_vec_indx += stride;
2595 second_vec_indx += stride;
2596 }
2597
2598 /* Mark the scalar stmt as vectorized. */
2599 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2600 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2601 }
2602
2603
2604 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2605 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2606 representation. Check that the mask is valid and return FALSE if not.
2607 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2608 the next vector, i.e., the current first vector is not needed. */
2609
2610 static bool
vect_get_mask_element(gimple stmt,int first_mask_element,int m,int mask_nunits,bool only_one_vec,int index,unsigned char * mask,int * current_mask_element,bool * need_next_vector,int * number_of_mask_fixes,bool * mask_fixed,bool * needs_first_vector)2611 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2612 int mask_nunits, bool only_one_vec, int index,
2613 unsigned char *mask, int *current_mask_element,
2614 bool *need_next_vector, int *number_of_mask_fixes,
2615 bool *mask_fixed, bool *needs_first_vector)
2616 {
2617 int i;
2618
2619 /* Convert to target specific representation. */
2620 *current_mask_element = first_mask_element + m;
2621 /* Adjust the value in case it's a mask for second and third vectors. */
2622 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2623
2624 if (*current_mask_element < mask_nunits)
2625 *needs_first_vector = true;
2626
2627 /* We have only one input vector to permute but the mask accesses values in
2628 the next vector as well. */
2629 if (only_one_vec && *current_mask_element >= mask_nunits)
2630 {
2631 if (vect_print_dump_info (REPORT_DETAILS))
2632 {
2633 fprintf (vect_dump, "permutation requires at least two vectors ");
2634 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2635 }
2636
2637 return false;
2638 }
2639
2640 /* The mask requires the next vector. */
2641 if (*current_mask_element >= mask_nunits * 2)
2642 {
2643 if (*needs_first_vector || *mask_fixed)
2644 {
2645 /* We either need the first vector too or have already moved to the
2646 next vector. In both cases, this permutation needs three
2647 vectors. */
2648 if (vect_print_dump_info (REPORT_DETAILS))
2649 {
2650 fprintf (vect_dump, "permutation requires at "
2651 "least three vectors ");
2652 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2653 }
2654
2655 return false;
2656 }
2657
2658 /* We move to the next vector, dropping the first one and working with
2659 the second and the third - we need to adjust the values of the mask
2660 accordingly. */
2661 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2662
2663 for (i = 0; i < index; i++)
2664 mask[i] -= mask_nunits * *number_of_mask_fixes;
2665
2666 (*number_of_mask_fixes)++;
2667 *mask_fixed = true;
2668 }
2669
2670 *need_next_vector = *mask_fixed;
2671
2672 /* This was the last element of this mask. Start a new one. */
2673 if (index == mask_nunits - 1)
2674 {
2675 *number_of_mask_fixes = 1;
2676 *mask_fixed = false;
2677 *needs_first_vector = false;
2678 }
2679
2680 return true;
2681 }
2682
2683
2684 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2685 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2686 permute statements for SLP_NODE_INSTANCE. */
2687 bool
vect_transform_slp_perm_load(gimple stmt,VEC (tree,heap)* dr_chain,gimple_stmt_iterator * gsi,int vf,slp_instance slp_node_instance,bool analyze_only)2688 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2689 gimple_stmt_iterator *gsi, int vf,
2690 slp_instance slp_node_instance, bool analyze_only)
2691 {
2692 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2693 tree mask_element_type = NULL_TREE, mask_type;
2694 int i, j, k, nunits, vec_index = 0, scalar_index;
2695 slp_tree node;
2696 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2697 gimple next_scalar_stmt;
2698 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2699 int first_mask_element;
2700 int index, unroll_factor, current_mask_element, ncopies;
2701 unsigned char *mask;
2702 bool only_one_vec = false, need_next_vector = false;
2703 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2704 int number_of_mask_fixes = 1;
2705 bool mask_fixed = false;
2706 bool needs_first_vector = false;
2707 enum machine_mode mode;
2708
2709 mode = TYPE_MODE (vectype);
2710
2711 if (!can_vec_perm_p (mode, false, NULL))
2712 {
2713 if (vect_print_dump_info (REPORT_DETAILS))
2714 {
2715 fprintf (vect_dump, "no vect permute for ");
2716 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2717 }
2718 return false;
2719 }
2720
2721 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2722 same size as the vector element being permuted. */
2723 mask_element_type
2724 = lang_hooks.types.type_for_size
2725 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2726 mask_type = get_vectype_for_scalar_type (mask_element_type);
2727 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2728 mask = XALLOCAVEC (unsigned char, nunits);
2729 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2730
2731 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2732 unrolling factor. */
2733 orig_vec_stmts_num = group_size *
2734 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2735 if (orig_vec_stmts_num == 1)
2736 only_one_vec = true;
2737
2738 /* Number of copies is determined by the final vectorization factor
2739 relatively to SLP_NODE_INSTANCE unrolling factor. */
2740 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2741
2742 /* Generate permutation masks for every NODE. Number of masks for each NODE
2743 is equal to GROUP_SIZE.
2744 E.g., we have a group of three nodes with three loads from the same
2745 location in each node, and the vector size is 4. I.e., we have a
2746 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2747 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2748 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2749 ...
2750
2751 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2752 The last mask is illegal since we assume two operands for permute
2753 operation, and the mask element values can't be outside that range.
2754 Hence, the last mask must be converted into {2,5,5,5}.
2755 For the first two permutations we need the first and the second input
2756 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2757 we need the second and the third vectors: {b1,c1,a2,b2} and
2758 {c2,a3,b3,c3}. */
2759
2760 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2761 {
2762 scalar_index = 0;
2763 index = 0;
2764 vect_stmts_counter = 0;
2765 vec_index = 0;
2766 first_vec_index = vec_index++;
2767 if (only_one_vec)
2768 second_vec_index = first_vec_index;
2769 else
2770 second_vec_index = vec_index++;
2771
2772 for (j = 0; j < unroll_factor; j++)
2773 {
2774 for (k = 0; k < group_size; k++)
2775 {
2776 first_mask_element = i + j * group_size;
2777 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2778 nunits, only_one_vec, index,
2779 mask, ¤t_mask_element,
2780 &need_next_vector,
2781 &number_of_mask_fixes, &mask_fixed,
2782 &needs_first_vector))
2783 return false;
2784 mask[index++] = current_mask_element;
2785
2786 if (index == nunits)
2787 {
2788 tree mask_vec = NULL;
2789
2790 if (!can_vec_perm_p (mode, false, mask))
2791 {
2792 if (vect_print_dump_info (REPORT_DETAILS))
2793 {
2794 fprintf (vect_dump, "unsupported vect permute { ");
2795 for (i = 0; i < nunits; ++i)
2796 fprintf (vect_dump, "%d ", mask[i]);
2797 fprintf (vect_dump, "}\n");
2798 }
2799 return false;
2800 }
2801
2802 while (--index >= 0)
2803 {
2804 tree t = build_int_cst (mask_element_type, mask[index]);
2805 mask_vec = tree_cons (NULL, t, mask_vec);
2806 }
2807 mask_vec = build_vector (mask_type, mask_vec);
2808 index = 0;
2809
2810 if (!analyze_only)
2811 {
2812 if (need_next_vector)
2813 {
2814 first_vec_index = second_vec_index;
2815 second_vec_index = vec_index;
2816 }
2817
2818 next_scalar_stmt = VEC_index (gimple,
2819 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2820
2821 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2822 mask_vec, first_vec_index, second_vec_index,
2823 gsi, node, vectype, dr_chain,
2824 ncopies, vect_stmts_counter++);
2825 }
2826 }
2827 }
2828 }
2829 }
2830
2831 return true;
2832 }
2833
2834
2835
2836 /* Vectorize SLP instance tree in postorder. */
2837
2838 static bool
vect_schedule_slp_instance(slp_tree node,slp_instance instance,unsigned int vectorization_factor)2839 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2840 unsigned int vectorization_factor)
2841 {
2842 gimple stmt;
2843 bool strided_store, is_store;
2844 gimple_stmt_iterator si;
2845 stmt_vec_info stmt_info;
2846 unsigned int vec_stmts_size, nunits, group_size;
2847 tree vectype;
2848 int i;
2849 slp_tree loads_node;
2850 slp_void_p child;
2851
2852 if (!node)
2853 return false;
2854
2855 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2856 vect_schedule_slp_instance ((slp_tree) child, instance,
2857 vectorization_factor);
2858
2859 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2860 stmt_info = vinfo_for_stmt (stmt);
2861
2862 /* VECTYPE is the type of the destination. */
2863 vectype = STMT_VINFO_VECTYPE (stmt_info);
2864 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2865 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2866
2867 /* For each SLP instance calculate number of vector stmts to be created
2868 for the scalar stmts in each node of the SLP tree. Number of vector
2869 elements in one vector iteration is the number of scalar elements in
2870 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2871 size. */
2872 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2873
2874 /* In case of load permutation we have to allocate vectorized statements for
2875 all the nodes that participate in that permutation. */
2876 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2877 {
2878 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2879 {
2880 if (!SLP_TREE_VEC_STMTS (loads_node))
2881 {
2882 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2883 vec_stmts_size);
2884 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2885 }
2886 }
2887 }
2888
2889 if (!SLP_TREE_VEC_STMTS (node))
2890 {
2891 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2892 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2893 }
2894
2895 if (vect_print_dump_info (REPORT_DETAILS))
2896 {
2897 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2898 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2899 }
2900
2901 /* Loads should be inserted before the first load. */
2902 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2903 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2904 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2905 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2906 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2907 else if (is_pattern_stmt_p (stmt_info))
2908 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2909 else
2910 si = gsi_for_stmt (stmt);
2911
2912 /* Stores should be inserted just before the last store. */
2913 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2914 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2915 {
2916 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2917 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
2918 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
2919 si = gsi_for_stmt (last_store);
2920 }
2921
2922 /* Mark the first element of the reduction chain as reduction to properly
2923 transform the node. In the analysis phase only the last element of the
2924 chain is marked as reduction. */
2925 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2926 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2927 {
2928 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2929 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2930 }
2931
2932 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2933 return is_store;
2934 }
2935
2936 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2937 For loop vectorization this is done in vectorizable_call, but for SLP
2938 it needs to be deferred until end of vect_schedule_slp, because multiple
2939 SLP instances may refer to the same scalar stmt. */
2940
2941 static void
vect_remove_slp_scalar_calls(slp_tree node)2942 vect_remove_slp_scalar_calls (slp_tree node)
2943 {
2944 gimple stmt, new_stmt;
2945 gimple_stmt_iterator gsi;
2946 int i;
2947 slp_void_p child;
2948 tree lhs;
2949 stmt_vec_info stmt_info;
2950
2951 if (!node)
2952 return;
2953
2954 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2955 vect_remove_slp_scalar_calls ((slp_tree) child);
2956
2957 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
2958 {
2959 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
2960 continue;
2961 stmt_info = vinfo_for_stmt (stmt);
2962 if (stmt_info == NULL
2963 || is_pattern_stmt_p (stmt_info)
2964 || !PURE_SLP_STMT (stmt_info))
2965 continue;
2966 lhs = gimple_call_lhs (stmt);
2967 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2968 set_vinfo_for_stmt (new_stmt, stmt_info);
2969 set_vinfo_for_stmt (stmt, NULL);
2970 STMT_VINFO_STMT (stmt_info) = new_stmt;
2971 gsi = gsi_for_stmt (stmt);
2972 gsi_replace (&gsi, new_stmt, false);
2973 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2974 }
2975 }
2976
2977 /* Generate vector code for all SLP instances in the loop/basic block. */
2978
2979 bool
vect_schedule_slp(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2980 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2981 {
2982 VEC (slp_instance, heap) *slp_instances;
2983 slp_instance instance;
2984 slp_tree loads_node;
2985 unsigned int i, j, vf;
2986 bool is_store = false;
2987
2988 if (loop_vinfo)
2989 {
2990 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2991 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2992 }
2993 else
2994 {
2995 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2996 vf = 1;
2997 }
2998
2999 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3000 {
3001 /* Schedule the tree of INSTANCE. */
3002 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3003 instance, vf);
3004
3005 /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3006 between SLP instances we fail to properly initialize the
3007 vectorized SLP stmts and confuse different load permutations. */
3008 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), j, loads_node)
3009 STMT_VINFO_VEC_STMT
3010 (vinfo_for_stmt
3011 (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (loads_node), 0))) = NULL;
3012
3013 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
3014 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3015 fprintf (vect_dump, "vectorizing stmts using SLP.");
3016 }
3017
3018 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3019 {
3020 slp_tree root = SLP_INSTANCE_TREE (instance);
3021 gimple store;
3022 unsigned int j;
3023 gimple_stmt_iterator gsi;
3024
3025 /* Remove scalar call stmts. Do not do this for basic-block
3026 vectorization as not all uses may be vectorized.
3027 ??? Why should this be necessary? DCE should be able to
3028 remove the stmts itself.
3029 ??? For BB vectorization we can as well remove scalar
3030 stmts starting from the SLP tree root if they have no
3031 uses. */
3032 if (loop_vinfo)
3033 vect_remove_slp_scalar_calls (root);
3034
3035 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3036 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3037 {
3038 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3039 break;
3040
3041 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3042 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3043 /* Free the attached stmt_vec_info and remove the stmt. */
3044 gsi = gsi_for_stmt (store);
3045 gsi_remove (&gsi, true);
3046 free_stmt_vec_info (store);
3047 }
3048 }
3049
3050 return is_store;
3051 }
3052
3053
3054 /* Vectorize the basic block. */
3055
3056 void
vect_slp_transform_bb(basic_block bb)3057 vect_slp_transform_bb (basic_block bb)
3058 {
3059 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3060 gimple_stmt_iterator si;
3061
3062 gcc_assert (bb_vinfo);
3063
3064 if (vect_print_dump_info (REPORT_DETAILS))
3065 fprintf (vect_dump, "SLPing BB\n");
3066
3067 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3068 {
3069 gimple stmt = gsi_stmt (si);
3070 stmt_vec_info stmt_info;
3071
3072 if (vect_print_dump_info (REPORT_DETAILS))
3073 {
3074 fprintf (vect_dump, "------>SLPing statement: ");
3075 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3076 }
3077
3078 stmt_info = vinfo_for_stmt (stmt);
3079 gcc_assert (stmt_info);
3080
3081 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3082 if (STMT_SLP_TYPE (stmt_info))
3083 {
3084 vect_schedule_slp (NULL, bb_vinfo);
3085 break;
3086 }
3087 }
3088
3089 mark_sym_for_renaming (gimple_vop (cfun));
3090 /* The memory tags and pointers in vectorized statements need to
3091 have their SSA forms updated. FIXME, why can't this be delayed
3092 until all the loops have been transformed? */
3093 update_ssa (TODO_update_ssa);
3094
3095 if (vect_print_dump_info (REPORT_DETAILS))
3096 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3097
3098 destroy_bb_vec_info (bb_vinfo);
3099 }
3100
3101