1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static void vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static bool vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
54
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
67
68
69 /* Function vect_determine_vectorization_factor
70
71 Determine the vectorization factor (VF). VF is the number of data elements
72 that are operated upon in parallel in a single iteration of the vectorized
73 loop. For example, when vectorizing a loop that operates on 4byte elements,
74 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75 elements can fit in a single vector register.
76
77 We currently support vectorization of loops in which all types operated upon
78 are of the same size. Therefore this function currently sets VF according to
79 the size of the types operated upon, and fails if there are multiple sizes
80 in the loop.
81
82 VF is also the factor by which the loop iterations are strip-mined, e.g.:
83 original loop:
84 for (i=0; i<N; i++){
85 a[i] = b[i] + c[i];
86 }
87
88 vectorized loop:
89 for (i=0; i<N; i+=VF){
90 a[i:VF] = b[i:VF] + c[i:VF];
91 }
92 */
93
94 static bool
vect_determine_vectorization_factor(loop_vec_info loop_vinfo)95 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 {
97 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99 int nbbs = loop->num_nodes;
100 block_stmt_iterator si;
101 unsigned int vectorization_factor = 0;
102 int i;
103 tree scalar_type;
104
105 if (vect_print_dump_info (REPORT_DETAILS))
106 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
107
108 for (i = 0; i < nbbs; i++)
109 {
110 basic_block bb = bbs[i];
111
112 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
113 {
114 tree stmt = bsi_stmt (si);
115 unsigned int nunits;
116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117 tree vectype;
118
119 if (vect_print_dump_info (REPORT_DETAILS))
120 {
121 fprintf (vect_dump, "==> examining statement: ");
122 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 }
124
125 gcc_assert (stmt_info);
126 /* skip stmts which do not need to be vectorized. */
127 if (!STMT_VINFO_RELEVANT_P (stmt_info)
128 && !STMT_VINFO_LIVE_P (stmt_info))
129 {
130 if (vect_print_dump_info (REPORT_DETAILS))
131 fprintf (vect_dump, "skip.");
132 continue;
133 }
134
135 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136 {
137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
138 {
139 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
140 print_generic_expr (vect_dump, stmt, TDF_SLIM);
141 }
142 return false;
143 }
144
145 if (STMT_VINFO_DATA_REF (stmt_info))
146 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
147 else if (TREE_CODE (stmt) == MODIFY_EXPR)
148 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
149 else
150 scalar_type = TREE_TYPE (stmt);
151
152 if (vect_print_dump_info (REPORT_DETAILS))
153 {
154 fprintf (vect_dump, "get vectype for scalar type: ");
155 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
156 }
157
158 vectype = get_vectype_for_scalar_type (scalar_type);
159 if (!vectype)
160 {
161 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
162 {
163 fprintf (vect_dump, "not vectorized: unsupported data-type ");
164 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165 }
166 return false;
167 }
168 if (vect_print_dump_info (REPORT_DETAILS))
169 {
170 fprintf (vect_dump, "vectype: ");
171 print_generic_expr (vect_dump, vectype, TDF_SLIM);
172 }
173 STMT_VINFO_VECTYPE (stmt_info) = vectype;
174
175 nunits = TYPE_VECTOR_SUBPARTS (vectype);
176 if (vect_print_dump_info (REPORT_DETAILS))
177 fprintf (vect_dump, "nunits = %d", nunits);
178
179 if (vectorization_factor)
180 {
181 /* FORNOW: don't allow mixed units.
182 This restriction will be relaxed in the future. */
183 if (nunits != vectorization_factor)
184 {
185 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
186 fprintf (vect_dump, "not vectorized: mixed data-types");
187 return false;
188 }
189 }
190 else
191 vectorization_factor = nunits;
192
193 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
194 * vectorization_factor == UNITS_PER_SIMD_WORD);
195 }
196 }
197
198 /* TODO: Analyze cost. Decide if worth while to vectorize. */
199
200 if (vectorization_factor <= 1)
201 {
202 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203 fprintf (vect_dump, "not vectorized: unsupported data-type");
204 return false;
205 }
206 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
207
208 return true;
209 }
210
211
212 /* Function vect_analyze_operations.
213
214 Scan the loop stmts and make sure they are all vectorizable. */
215
216 static bool
vect_analyze_operations(loop_vec_info loop_vinfo)217 vect_analyze_operations (loop_vec_info loop_vinfo)
218 {
219 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
220 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
221 int nbbs = loop->num_nodes;
222 block_stmt_iterator si;
223 unsigned int vectorization_factor = 0;
224 int i;
225 bool ok;
226 tree phi;
227 stmt_vec_info stmt_info;
228 bool need_to_vectorize = false;
229
230 if (vect_print_dump_info (REPORT_DETAILS))
231 fprintf (vect_dump, "=== vect_analyze_operations ===");
232
233 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
234 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
235
236 for (i = 0; i < nbbs; i++)
237 {
238 basic_block bb = bbs[i];
239
240 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
241 {
242 stmt_info = vinfo_for_stmt (phi);
243 if (vect_print_dump_info (REPORT_DETAILS))
244 {
245 fprintf (vect_dump, "examining phi: ");
246 print_generic_expr (vect_dump, phi, TDF_SLIM);
247 }
248
249 gcc_assert (stmt_info);
250
251 if (STMT_VINFO_LIVE_P (stmt_info))
252 {
253 /* FORNOW: not yet supported. */
254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
255 fprintf (vect_dump, "not vectorized: value used after loop.");
256 return false;
257 }
258
259 if (STMT_VINFO_RELEVANT_P (stmt_info))
260 {
261 /* Most likely a reduction-like computation that is used
262 in the loop. */
263 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264 fprintf (vect_dump, "not vectorized: unsupported pattern.");
265 return false;
266 }
267 }
268
269 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
270 {
271 tree stmt = bsi_stmt (si);
272 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
273
274 if (vect_print_dump_info (REPORT_DETAILS))
275 {
276 fprintf (vect_dump, "==> examining statement: ");
277 print_generic_expr (vect_dump, stmt, TDF_SLIM);
278 }
279
280 gcc_assert (stmt_info);
281
282 /* skip stmts which do not need to be vectorized.
283 this is expected to include:
284 - the COND_EXPR which is the loop exit condition
285 - any LABEL_EXPRs in the loop
286 - computations that are used only for array indexing or loop
287 control */
288
289 if (!STMT_VINFO_RELEVANT_P (stmt_info)
290 && !STMT_VINFO_LIVE_P (stmt_info))
291 {
292 if (vect_print_dump_info (REPORT_DETAILS))
293 fprintf (vect_dump, "irrelevant.");
294 continue;
295 }
296
297 if (STMT_VINFO_RELEVANT_P (stmt_info))
298 {
299 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
300 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
301
302 ok = (vectorizable_operation (stmt, NULL, NULL)
303 || vectorizable_assignment (stmt, NULL, NULL)
304 || vectorizable_load (stmt, NULL, NULL)
305 || vectorizable_store (stmt, NULL, NULL)
306 || vectorizable_condition (stmt, NULL, NULL));
307
308 if (!ok)
309 {
310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
311 {
312 fprintf (vect_dump,
313 "not vectorized: relevant stmt not supported: ");
314 print_generic_expr (vect_dump, stmt, TDF_SLIM);
315 }
316 return false;
317 }
318 need_to_vectorize = true;
319 }
320
321 if (STMT_VINFO_LIVE_P (stmt_info))
322 {
323 ok = vectorizable_reduction (stmt, NULL, NULL);
324
325 if (ok)
326 need_to_vectorize = true;
327 else
328 ok = vectorizable_live_operation (stmt, NULL, NULL);
329
330 if (!ok)
331 {
332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
333 {
334 fprintf (vect_dump,
335 "not vectorized: live stmt not supported: ");
336 print_generic_expr (vect_dump, stmt, TDF_SLIM);
337 }
338 return false;
339 }
340 }
341 } /* stmts in bb */
342 } /* bbs */
343
344 /* TODO: Analyze cost. Decide if worth while to vectorize. */
345
346 /* All operations in the loop are either irrelevant (deal with loop
347 control, or dead), or only used outside the loop and can be moved
348 out of the loop (e.g. invariants, inductions). The loop can be
349 optimized away by scalar optimizations. We're better off not
350 touching this loop. */
351 if (!need_to_vectorize)
352 {
353 if (vect_print_dump_info (REPORT_DETAILS))
354 fprintf (vect_dump,
355 "All the computation can be taken out of the loop.");
356 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
357 fprintf (vect_dump,
358 "not vectorized: redundant loop. no profit to vectorize.");
359 return false;
360 }
361
362 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
363 && vect_print_dump_info (REPORT_DETAILS))
364 fprintf (vect_dump,
365 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
366 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
367
368 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
369 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
370 {
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
372 fprintf (vect_dump, "not vectorized: iteration count too small.");
373 return false;
374 }
375
376 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
377 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
378 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
379 {
380 if (vect_print_dump_info (REPORT_DETAILS))
381 fprintf (vect_dump, "epilog loop required.");
382 if (!vect_can_advance_ivs_p (loop_vinfo))
383 {
384 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
385 fprintf (vect_dump,
386 "not vectorized: can't create epilog loop 1.");
387 return false;
388 }
389 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
390 {
391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
392 fprintf (vect_dump,
393 "not vectorized: can't create epilog loop 2.");
394 return false;
395 }
396 }
397
398 return true;
399 }
400
401
402 /* Function exist_non_indexing_operands_for_use_p
403
404 USE is one of the uses attached to STMT. Check if USE is
405 used in STMT for anything other than indexing an array. */
406
407 static bool
exist_non_indexing_operands_for_use_p(tree use,tree stmt)408 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
409 {
410 tree operand;
411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
412
413 /* USE corresponds to some operand in STMT. If there is no data
414 reference in STMT, then any operand that corresponds to USE
415 is not indexing an array. */
416 if (!STMT_VINFO_DATA_REF (stmt_info))
417 return true;
418
419 /* STMT has a data_ref. FORNOW this means that its of one of
420 the following forms:
421 -1- ARRAY_REF = var
422 -2- var = ARRAY_REF
423 (This should have been verified in analyze_data_refs).
424
425 'var' in the second case corresponds to a def, not a use,
426 so USE cannot correspond to any operands that are not used
427 for array indexing.
428
429 Therefore, all we need to check is if STMT falls into the
430 first case, and whether var corresponds to USE. */
431
432 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
433 return false;
434
435 operand = TREE_OPERAND (stmt, 1);
436
437 if (TREE_CODE (operand) != SSA_NAME)
438 return false;
439
440 if (operand == use)
441 return true;
442
443 return false;
444 }
445
446
447 /* Function vect_analyze_scalar_cycles.
448
449 Examine the cross iteration def-use cycles of scalar variables, by
450 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
451 following: invariant, induction, reduction, unknown.
452
453 Some forms of scalar cycles are not yet supported.
454
455 Example1: reduction: (unsupported yet)
456
457 loop1:
458 for (i=0; i<N; i++)
459 sum += a[i];
460
461 Example2: induction: (unsupported yet)
462
463 loop2:
464 for (i=0; i<N; i++)
465 a[i] = i;
466
467 Note: the following loop *is* vectorizable:
468
469 loop3:
470 for (i=0; i<N; i++)
471 a[i] = b[i];
472
473 even though it has a def-use cycle caused by the induction variable i:
474
475 loop: i_2 = PHI (i_0, i_1)
476 a[i_2] = ...;
477 i_1 = i_2 + 1;
478 GOTO loop;
479
480 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
481 it does not need to be vectorized because it is only used for array
482 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
483 loop2 on the other hand is relevant (it is being written to memory).
484 */
485
486 static void
vect_analyze_scalar_cycles(loop_vec_info loop_vinfo)487 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
488 {
489 tree phi;
490 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
491 basic_block bb = loop->header;
492 tree dummy;
493
494 if (vect_print_dump_info (REPORT_DETAILS))
495 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
496
497 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
498 {
499 tree access_fn = NULL;
500 tree def = PHI_RESULT (phi);
501 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
502 tree reduc_stmt;
503
504 if (vect_print_dump_info (REPORT_DETAILS))
505 {
506 fprintf (vect_dump, "Analyze phi: ");
507 print_generic_expr (vect_dump, phi, TDF_SLIM);
508 }
509
510 /* Skip virtual phi's. The data dependences that are associated with
511 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
512
513 if (!is_gimple_reg (SSA_NAME_VAR (def)))
514 {
515 if (vect_print_dump_info (REPORT_DETAILS))
516 fprintf (vect_dump, "virtual phi. skip.");
517 continue;
518 }
519
520 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
521
522 /* Analyze the evolution function. */
523
524 access_fn = analyze_scalar_evolution (loop, def);
525
526 if (!access_fn)
527 continue;
528
529 if (vect_print_dump_info (REPORT_DETAILS))
530 {
531 fprintf (vect_dump, "Access function of PHI: ");
532 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
533 }
534
535 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
536 {
537 if (vect_print_dump_info (REPORT_DETAILS))
538 fprintf (vect_dump, "Detected induction.");
539 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
540 continue;
541 }
542
543 /* TODO: handle invariant phis */
544
545 reduc_stmt = vect_is_simple_reduction (loop, phi);
546 if (reduc_stmt)
547 {
548 if (vect_print_dump_info (REPORT_DETAILS))
549 fprintf (vect_dump, "Detected reduction.");
550 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
551 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
552 vect_reduction_def;
553 }
554 else
555 if (vect_print_dump_info (REPORT_DETAILS))
556 fprintf (vect_dump, "Unknown def-use cycle pattern.");
557
558 }
559
560 return;
561 }
562
563
564 /* Function vect_analyze_data_ref_dependence.
565
566 Return TRUE if there (might) exist a dependence between a memory-reference
567 DRA and a memory-reference DRB. */
568
569 static bool
vect_analyze_data_ref_dependence(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo)570 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
571 loop_vec_info loop_vinfo)
572 {
573 unsigned int i;
574 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
575 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
576 unsigned int loop_depth = 0;
577 struct loop *loop_nest = loop;
578 struct data_reference *dra = DDR_A (ddr);
579 struct data_reference *drb = DDR_B (ddr);
580 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
581 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
582
583 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
584 return false;
585
586 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
587 {
588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
589 {
590 fprintf (vect_dump,
591 "not vectorized: can't determine dependence between ");
592 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
593 fprintf (vect_dump, " and ");
594 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
595 }
596 return true;
597 }
598
599 if (DDR_NUM_DIST_VECTS (ddr) == 0)
600 {
601 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
602 {
603 fprintf (vect_dump, "not vectorized: bad dist vector for ");
604 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605 fprintf (vect_dump, " and ");
606 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
607 }
608 return true;
609 }
610
611 /* Find loop depth. */
612 while (loop_nest && loop_nest->outer && loop_nest->outer->outer)
613 {
614 loop_nest = loop_nest->outer;
615 loop_depth++;
616 }
617
618 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
619 {
620 int dist = DDR_DIST_VECT (ddr, i)[loop_depth];
621
622 if (vect_print_dump_info (REPORT_DR_DETAILS))
623 fprintf (vect_dump, "dependence distance = %d.", dist);
624
625 /* Same loop iteration. */
626 if (dist % vectorization_factor == 0)
627 {
628 /* Two references with distance zero have the same alignment. */
629 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
630 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
631 if (vect_print_dump_info (REPORT_ALIGNMENT))
632 fprintf (vect_dump, "accesses have the same alignment.");
633 if (vect_print_dump_info (REPORT_DR_DETAILS))
634 {
635 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
636 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
637 fprintf (vect_dump, " and ");
638 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
639 }
640 continue;
641 }
642
643 if (abs (dist) >= vectorization_factor)
644 {
645 /* Dependence distance does not create dependence, as far as vectorization
646 is concerned, in this case. */
647 if (vect_print_dump_info (REPORT_DR_DETAILS))
648 fprintf (vect_dump, "dependence distance >= VF.");
649 continue;
650 }
651
652 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
653 {
654 fprintf (vect_dump,
655 "not vectorized: possible dependence between data-refs ");
656 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
657 fprintf (vect_dump, " and ");
658 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
659 }
660
661 return true;
662 }
663
664 return false;
665 }
666
667
668 /* Function vect_analyze_data_ref_dependences.
669
670 Examine all the data references in the loop, and make sure there do not
671 exist any data dependences between them. */
672
673 static bool
vect_analyze_data_ref_dependences(loop_vec_info loop_vinfo)674 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
675 {
676 unsigned int i;
677 varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo);
678
679 if (vect_print_dump_info (REPORT_DETAILS))
680 fprintf (vect_dump, "=== vect_analyze_dependences ===");
681
682 for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
683 {
684 struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
685
686 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
687 return false;
688 }
689
690 return true;
691 }
692
693
694 /* Function vect_compute_data_ref_alignment
695
696 Compute the misalignment of the data reference DR.
697
698 Output:
699 1. If during the misalignment computation it is found that the data reference
700 cannot be vectorized then false is returned.
701 2. DR_MISALIGNMENT (DR) is defined.
702
703 FOR NOW: No analysis is actually performed. Misalignment is calculated
704 only for trivial cases. TODO. */
705
706 static bool
vect_compute_data_ref_alignment(struct data_reference * dr)707 vect_compute_data_ref_alignment (struct data_reference *dr)
708 {
709 tree stmt = DR_STMT (dr);
710 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
711 tree ref = DR_REF (dr);
712 tree vectype;
713 tree base, base_addr;
714 bool base_aligned;
715 tree misalign;
716 tree aligned_to, alignment;
717
718 if (vect_print_dump_info (REPORT_DETAILS))
719 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
720
721 /* Initialize misalignment to unknown. */
722 DR_MISALIGNMENT (dr) = -1;
723
724 misalign = DR_OFFSET_MISALIGNMENT (dr);
725 aligned_to = DR_ALIGNED_TO (dr);
726 base_addr = DR_BASE_ADDRESS (dr);
727 base = build_fold_indirect_ref (base_addr);
728 vectype = STMT_VINFO_VECTYPE (stmt_info);
729 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
730
731 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
732 || !misalign)
733 {
734 if (vect_print_dump_info (REPORT_DETAILS))
735 {
736 fprintf (vect_dump, "Unknown alignment for access: ");
737 print_generic_expr (vect_dump, base, TDF_SLIM);
738 }
739 return true;
740 }
741
742 if ((DECL_P (base)
743 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
744 alignment) >= 0)
745 || (TREE_CODE (base_addr) == SSA_NAME
746 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
747 TREE_TYPE (base_addr)))),
748 alignment) >= 0))
749 base_aligned = true;
750 else
751 base_aligned = false;
752
753 if (!base_aligned)
754 {
755 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
756 {
757 if (vect_print_dump_info (REPORT_DETAILS))
758 {
759 fprintf (vect_dump, "can't force alignment of ref: ");
760 print_generic_expr (vect_dump, ref, TDF_SLIM);
761 }
762 return true;
763 }
764
765 /* Force the alignment of the decl.
766 NOTE: This is the only change to the code we make during
767 the analysis phase, before deciding to vectorize the loop. */
768 if (vect_print_dump_info (REPORT_DETAILS))
769 fprintf (vect_dump, "force alignment");
770 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
771 DECL_USER_ALIGN (base) = 1;
772 }
773
774 /* At this point we assume that the base is aligned. */
775 gcc_assert (base_aligned
776 || (TREE_CODE (base) == VAR_DECL
777 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
778
779 /* Modulo alignment. */
780 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
781
782 if (!host_integerp (misalign, 1))
783 {
784 /* Negative or overflowed misalignment value. */
785 if (vect_print_dump_info (REPORT_DETAILS))
786 fprintf (vect_dump, "unexpected misalign value");
787 return false;
788 }
789
790 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
791
792 if (vect_print_dump_info (REPORT_DETAILS))
793 {
794 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
795 print_generic_expr (vect_dump, ref, TDF_SLIM);
796 }
797
798 return true;
799 }
800
801
802 /* Function vect_compute_data_refs_alignment
803
804 Compute the misalignment of data references in the loop.
805 Return FALSE if a data reference is found that cannot be vectorized. */
806
807 static bool
vect_compute_data_refs_alignment(loop_vec_info loop_vinfo)808 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
809 {
810 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
811 unsigned int i;
812
813 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
814 {
815 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
816 if (!vect_compute_data_ref_alignment (dr))
817 return false;
818 }
819
820 return true;
821 }
822
823
824 /* Function vect_update_misalignment_for_peel
825
826 DR - the data reference whose misalignment is to be adjusted.
827 DR_PEEL - the data reference whose misalignment is being made
828 zero in the vector loop by the peel.
829 NPEEL - the number of iterations in the peel loop if the misalignment
830 of DR_PEEL is known at compile time. */
831
832 static void
vect_update_misalignment_for_peel(struct data_reference * dr,struct data_reference * dr_peel,int npeel)833 vect_update_misalignment_for_peel (struct data_reference *dr,
834 struct data_reference *dr_peel, int npeel)
835 {
836 unsigned int i;
837 int drsize;
838 VEC(dr_p,heap) *same_align_drs;
839 struct data_reference *current_dr;
840
841 if (known_alignment_for_access_p (dr)
842 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
843 {
844 DR_MISALIGNMENT (dr) = 0;
845 return;
846 }
847
848 /* It can be assumed that the data refs with the same alignment as dr_peel
849 are aligned in the vector loop. */
850 same_align_drs
851 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
852 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
853 {
854 if (current_dr != dr)
855 continue;
856 gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
857 DR_MISALIGNMENT (dr) = 0;
858 return;
859 }
860
861 if (known_alignment_for_access_p (dr)
862 && known_alignment_for_access_p (dr_peel))
863 {
864 drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
865 DR_MISALIGNMENT (dr) += npeel * drsize;
866 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
867 return;
868 }
869
870 DR_MISALIGNMENT (dr) = -1;
871 }
872
873
874 /* Function vect_verify_datarefs_alignment
875
876 Return TRUE if all data references in the loop can be
877 handled with respect to alignment. */
878
879 static bool
vect_verify_datarefs_alignment(loop_vec_info loop_vinfo)880 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
881 {
882 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
883 enum dr_alignment_support supportable_dr_alignment;
884 unsigned int i;
885
886 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
887 {
888 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
889 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
890 if (!supportable_dr_alignment)
891 {
892 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
893 {
894 if (DR_IS_READ (dr))
895 fprintf (vect_dump,
896 "not vectorized: unsupported unaligned load.");
897 else
898 fprintf (vect_dump,
899 "not vectorized: unsupported unaligned store.");
900 }
901 return false;
902 }
903 if (supportable_dr_alignment != dr_aligned
904 && vect_print_dump_info (REPORT_ALIGNMENT))
905 fprintf (vect_dump, "Vectorizing an unaligned access.");
906 }
907 return true;
908 }
909
910
911 /* Function vect_enhance_data_refs_alignment
912
913 This pass will use loop versioning and loop peeling in order to enhance
914 the alignment of data references in the loop.
915
916 FOR NOW: we assume that whatever versioning/peeling takes place, only the
917 original loop is to be vectorized; Any other loops that are created by
918 the transformations performed in this pass - are not supposed to be
919 vectorized. This restriction will be relaxed.
920
921 This pass will require a cost model to guide it whether to apply peeling
922 or versioning or a combination of the two. For example, the scheme that
923 intel uses when given a loop with several memory accesses, is as follows:
924 choose one memory access ('p') which alignment you want to force by doing
925 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
926 other accesses are not necessarily aligned, or (2) use loop versioning to
927 generate one loop in which all accesses are aligned, and another loop in
928 which only 'p' is necessarily aligned.
929
930 ("Automatic Intra-Register Vectorization for the Intel Architecture",
931 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
932 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
933
934 Devising a cost model is the most critical aspect of this work. It will
935 guide us on which access to peel for, whether to use loop versioning, how
936 many versions to create, etc. The cost model will probably consist of
937 generic considerations as well as target specific considerations (on
938 powerpc for example, misaligned stores are more painful than misaligned
939 loads).
940
941 Here are the general steps involved in alignment enhancements:
942
943 -- original loop, before alignment analysis:
944 for (i=0; i<N; i++){
945 x = q[i]; # DR_MISALIGNMENT(q) = unknown
946 p[i] = y; # DR_MISALIGNMENT(p) = unknown
947 }
948
949 -- After vect_compute_data_refs_alignment:
950 for (i=0; i<N; i++){
951 x = q[i]; # DR_MISALIGNMENT(q) = 3
952 p[i] = y; # DR_MISALIGNMENT(p) = unknown
953 }
954
955 -- Possibility 1: we do loop versioning:
956 if (p is aligned) {
957 for (i=0; i<N; i++){ # loop 1A
958 x = q[i]; # DR_MISALIGNMENT(q) = 3
959 p[i] = y; # DR_MISALIGNMENT(p) = 0
960 }
961 }
962 else {
963 for (i=0; i<N; i++){ # loop 1B
964 x = q[i]; # DR_MISALIGNMENT(q) = 3
965 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
966 }
967 }
968
969 -- Possibility 2: we do loop peeling:
970 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
971 x = q[i];
972 p[i] = y;
973 }
974 for (i = 3; i < N; i++){ # loop 2A
975 x = q[i]; # DR_MISALIGNMENT(q) = 0
976 p[i] = y; # DR_MISALIGNMENT(p) = unknown
977 }
978
979 -- Possibility 3: combination of loop peeling and versioning:
980 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
981 x = q[i];
982 p[i] = y;
983 }
984 if (p is aligned) {
985 for (i = 3; i<N; i++){ # loop 3A
986 x = q[i]; # DR_MISALIGNMENT(q) = 0
987 p[i] = y; # DR_MISALIGNMENT(p) = 0
988 }
989 }
990 else {
991 for (i = 3; i<N; i++){ # loop 3B
992 x = q[i]; # DR_MISALIGNMENT(q) = 0
993 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
994 }
995 }
996
997 These loops are later passed to loop_transform to be vectorized. The
998 vectorizer will use the alignment information to guide the transformation
999 (whether to generate regular loads/stores, or with special handling for
1000 misalignment). */
1001
1002 static bool
vect_enhance_data_refs_alignment(loop_vec_info loop_vinfo)1003 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1004 {
1005 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1006 enum dr_alignment_support supportable_dr_alignment;
1007 struct data_reference *dr0 = NULL;
1008 struct data_reference *dr;
1009 unsigned int i;
1010 bool do_peeling = false;
1011 bool do_versioning = false;
1012 bool stat;
1013
1014 /* While cost model enhancements are expected in the future, the high level
1015 view of the code at this time is as follows:
1016
1017 A) If there is a misaligned write then see if peeling to align this write
1018 can make all data references satisfy vect_supportable_dr_alignment.
1019 If so, update data structures as needed and return true. Note that
1020 at this time vect_supportable_dr_alignment is known to return false
1021 for a a misaligned write.
1022
1023 B) If peeling wasn't possible and there is a data reference with an
1024 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1025 then see if loop versioning checks can be used to make all data
1026 references satisfy vect_supportable_dr_alignment. If so, update
1027 data structures as needed and return true.
1028
1029 C) If neither peeling nor versioning were successful then return false if
1030 any data reference does not satisfy vect_supportable_dr_alignment.
1031
1032 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1033
1034 Note, Possibility 3 above (which is peeling and versioning together) is not
1035 being done at this time. */
1036
1037 /* (1) Peeling to force alignment. */
1038
1039 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1040 Considerations:
1041 + How many accesses will become aligned due to the peeling
1042 - How many accesses will become unaligned due to the peeling,
1043 and the cost of misaligned accesses.
1044 - The cost of peeling (the extra runtime checks, the increase
1045 in code size).
1046
1047 The scheme we use FORNOW: peel to force the alignment of the first
1048 misaligned store in the loop.
1049 Rationale: misaligned stores are not yet supported.
1050
1051 TODO: Use a cost model. */
1052
1053 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1054 {
1055 dr = VARRAY_GENERIC_PTR (datarefs, i);
1056 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1057 {
1058 dr0 = dr;
1059 do_peeling = true;
1060 break;
1061 }
1062 }
1063
1064 /* Often peeling for alignment will require peeling for loop-bound, which in
1065 turn requires that we know how to adjust the loop ivs after the loop. */
1066 if (!vect_can_advance_ivs_p (loop_vinfo))
1067 do_peeling = false;
1068
1069 if (do_peeling)
1070 {
1071 int mis;
1072 int npeel = 0;
1073
1074 if (known_alignment_for_access_p (dr0))
1075 {
1076 /* Since it's known at compile time, compute the number of iterations
1077 in the peeled loop (the peeling factor) for use in updating
1078 DR_MISALIGNMENT values. The peeling factor is the vectorization
1079 factor minus the misalignment as an element count. */
1080 mis = DR_MISALIGNMENT (dr0);
1081 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1082 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1083 }
1084
1085 /* Ensure that all data refs can be vectorized after the peel. */
1086 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1087 {
1088 int save_misalignment;
1089
1090 dr = VARRAY_GENERIC_PTR (datarefs, i);
1091 if (dr == dr0)
1092 continue;
1093 save_misalignment = DR_MISALIGNMENT (dr);
1094 vect_update_misalignment_for_peel (dr, dr0, npeel);
1095 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1096 DR_MISALIGNMENT (dr) = save_misalignment;
1097
1098 if (!supportable_dr_alignment)
1099 {
1100 do_peeling = false;
1101 break;
1102 }
1103 }
1104
1105 if (do_peeling)
1106 {
1107 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1108 If the misalignment of DR_i is identical to that of dr0 then set
1109 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1110 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1111 by the peeling factor times the element size of DR_i (MOD the
1112 vectorization factor times the size). Otherwise, the
1113 misalignment of DR_i must be set to unknown. */
1114 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1115 {
1116 dr = VARRAY_GENERIC_PTR (datarefs, i);
1117 if (dr == dr0)
1118 continue;
1119 vect_update_misalignment_for_peel (dr, dr0, npeel);
1120 }
1121
1122 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1123 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1124 DR_MISALIGNMENT (dr0) = 0;
1125 if (vect_print_dump_info (REPORT_ALIGNMENT))
1126 fprintf (vect_dump, "Alignment of access forced using peeling.");
1127
1128 if (vect_print_dump_info (REPORT_DETAILS))
1129 fprintf (vect_dump, "Peeling for alignment will be applied.");
1130
1131 stat = vect_verify_datarefs_alignment (loop_vinfo);
1132 gcc_assert (stat);
1133 return stat;
1134 }
1135 }
1136
1137
1138 /* (2) Versioning to force alignment. */
1139
1140 /* Try versioning if:
1141 1) flag_tree_vect_loop_version is TRUE
1142 2) optimize_size is FALSE
1143 3) there is at least one unsupported misaligned data ref with an unknown
1144 misalignment, and
1145 4) all misaligned data refs with a known misalignment are supported, and
1146 5) the number of runtime alignment checks is within reason. */
1147
1148 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1149
1150 if (do_versioning)
1151 {
1152 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1153 {
1154 dr = VARRAY_GENERIC_PTR (datarefs, i);
1155
1156 if (aligned_access_p (dr))
1157 continue;
1158
1159 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1160
1161 if (!supportable_dr_alignment)
1162 {
1163 tree stmt;
1164 int mask;
1165 tree vectype;
1166
1167 if (known_alignment_for_access_p (dr)
1168 || VEC_length (tree,
1169 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1170 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1171 {
1172 do_versioning = false;
1173 break;
1174 }
1175
1176 stmt = DR_STMT (dr);
1177 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1178 gcc_assert (vectype);
1179
1180 /* The rightmost bits of an aligned address must be zeros.
1181 Construct the mask needed for this test. For example,
1182 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1183 mask must be 15 = 0xf. */
1184 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1185
1186 /* FORNOW: use the same mask to test all potentially unaligned
1187 references in the loop. The vectorizer currently supports
1188 a single vector size, see the reference to
1189 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1190 vectorization factor is computed. */
1191 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1192 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1193 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1194 VEC_safe_push (tree, heap,
1195 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1196 DR_STMT (dr));
1197 }
1198 }
1199
1200 /* Versioning requires at least one misaligned data reference. */
1201 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1202 do_versioning = false;
1203 else if (!do_versioning)
1204 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1205 }
1206
1207 if (do_versioning)
1208 {
1209 VEC(tree,heap) *may_misalign_stmts
1210 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1211 tree stmt;
1212
1213 /* It can now be assumed that the data references in the statements
1214 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1215 of the loop being vectorized. */
1216 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1217 {
1218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1219 dr = STMT_VINFO_DATA_REF (stmt_info);
1220 DR_MISALIGNMENT (dr) = 0;
1221 if (vect_print_dump_info (REPORT_ALIGNMENT))
1222 fprintf (vect_dump, "Alignment of access forced using versioning.");
1223 }
1224
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Versioning for alignment will be applied.");
1227
1228 /* Peeling and versioning can't be done together at this time. */
1229 gcc_assert (! (do_peeling && do_versioning));
1230
1231 stat = vect_verify_datarefs_alignment (loop_vinfo);
1232 gcc_assert (stat);
1233 return stat;
1234 }
1235
1236 /* This point is reached if neither peeling nor versioning is being done. */
1237 gcc_assert (! (do_peeling || do_versioning));
1238
1239 stat = vect_verify_datarefs_alignment (loop_vinfo);
1240 return stat;
1241 }
1242
1243
1244 /* Function vect_analyze_data_refs_alignment
1245
1246 Analyze the alignment of the data-references in the loop.
1247 Return FALSE if a data reference is found that cannot be vectorized. */
1248
1249 static bool
vect_analyze_data_refs_alignment(loop_vec_info loop_vinfo)1250 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1251 {
1252 if (vect_print_dump_info (REPORT_DETAILS))
1253 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1254
1255 if (!vect_compute_data_refs_alignment (loop_vinfo))
1256 {
1257 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1258 fprintf (vect_dump,
1259 "not vectorized: can't calculate alignment for data ref.");
1260 return false;
1261 }
1262
1263 return true;
1264 }
1265
1266
1267 /* Function vect_analyze_data_ref_access.
1268
1269 Analyze the access pattern of the data-reference DR. For now, a data access
1270 has to be consecutive to be considered vectorizable. */
1271
1272 static bool
vect_analyze_data_ref_access(struct data_reference * dr)1273 vect_analyze_data_ref_access (struct data_reference *dr)
1274 {
1275 tree step = DR_STEP (dr);
1276 tree scalar_type = TREE_TYPE (DR_REF (dr));
1277
1278 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1279 {
1280 if (vect_print_dump_info (REPORT_DETAILS))
1281 fprintf (vect_dump, "not consecutive access");
1282 return false;
1283 }
1284 return true;
1285 }
1286
1287
1288 /* Function vect_analyze_data_ref_accesses.
1289
1290 Analyze the access pattern of all the data references in the loop.
1291
1292 FORNOW: the only access pattern that is considered vectorizable is a
1293 simple step 1 (consecutive) access.
1294
1295 FORNOW: handle only arrays and pointer accesses. */
1296
1297 static bool
vect_analyze_data_ref_accesses(loop_vec_info loop_vinfo)1298 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1299 {
1300 unsigned int i;
1301 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1302
1303 if (vect_print_dump_info (REPORT_DETAILS))
1304 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1305
1306 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1307 {
1308 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1309 if (!vect_analyze_data_ref_access (dr))
1310 {
1311 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1312 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1313 return false;
1314 }
1315 }
1316
1317 return true;
1318 }
1319
1320
1321 /* Function vect_analyze_data_refs.
1322
1323 Find all the data references in the loop.
1324
1325 The general structure of the analysis of data refs in the vectorizer is as
1326 follows:
1327 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1328 find and analyze all data-refs in the loop and their dependences.
1329 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1330 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1331 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1332
1333 */
1334
1335 static bool
vect_analyze_data_refs(loop_vec_info loop_vinfo)1336 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1337 {
1338 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1339 unsigned int i;
1340 varray_type datarefs;
1341 tree scalar_type;
1342
1343 if (vect_print_dump_info (REPORT_DETAILS))
1344 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1345
1346 compute_data_dependences_for_loop (loop, false,
1347 &(LOOP_VINFO_DATAREFS (loop_vinfo)),
1348 &(LOOP_VINFO_DDRS (loop_vinfo)));
1349
1350 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1351 from stmt_vec_info struct to DR and vectype. */
1352 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1353 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1354 {
1355 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1356 tree stmt;
1357 stmt_vec_info stmt_info;
1358
1359 if (!dr || !DR_REF (dr))
1360 {
1361 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1362 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1363 return false;
1364 }
1365
1366 /* Update DR field in stmt_vec_info struct. */
1367 stmt = DR_STMT (dr);
1368 stmt_info = vinfo_for_stmt (stmt);
1369
1370 if (STMT_VINFO_DATA_REF (stmt_info))
1371 {
1372 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1373 {
1374 fprintf (vect_dump,
1375 "not vectorized: more than one data ref in stmt: ");
1376 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1377 }
1378 return false;
1379 }
1380 STMT_VINFO_DATA_REF (stmt_info) = dr;
1381
1382 /* Check that analysis of the data-ref succeeded. */
1383 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1384 || !DR_STEP (dr))
1385 {
1386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1387 {
1388 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1389 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1390 }
1391 return false;
1392 }
1393 if (!DR_MEMTAG (dr))
1394 {
1395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1396 {
1397 fprintf (vect_dump, "not vectorized: no memory tag for ");
1398 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1399 }
1400 return false;
1401 }
1402
1403 /* Set vectype for STMT. */
1404 scalar_type = TREE_TYPE (DR_REF (dr));
1405 STMT_VINFO_VECTYPE (stmt_info) =
1406 get_vectype_for_scalar_type (scalar_type);
1407 if (!STMT_VINFO_VECTYPE (stmt_info))
1408 {
1409 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1410 {
1411 fprintf (vect_dump,
1412 "not vectorized: no vectype for stmt: ");
1413 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1414 fprintf (vect_dump, " scalar_type: ");
1415 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1416 }
1417 return false;
1418 }
1419 }
1420
1421 return true;
1422 }
1423
1424
1425 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1426
1427 /* Function vect_mark_relevant.
1428
1429 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1430
1431 static void
vect_mark_relevant(VEC (tree,heap)** worklist,tree stmt,bool relevant_p,bool live_p)1432 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1433 bool relevant_p, bool live_p)
1434 {
1435 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1436 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1437 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1438
1439 if (vect_print_dump_info (REPORT_DETAILS))
1440 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1441
1442 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1443 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1444
1445 if (TREE_CODE (stmt) == PHI_NODE)
1446 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1447 or live will fail vectorization later on. */
1448 return;
1449
1450 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1451 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1452 {
1453 if (vect_print_dump_info (REPORT_DETAILS))
1454 fprintf (vect_dump, "already marked relevant/live.");
1455 return;
1456 }
1457
1458 VEC_safe_push (tree, heap, *worklist, stmt);
1459 }
1460
1461
1462 /* Function vect_stmt_relevant_p.
1463
1464 Return true if STMT in loop that is represented by LOOP_VINFO is
1465 "relevant for vectorization".
1466
1467 A stmt is considered "relevant for vectorization" if:
1468 - it has uses outside the loop.
1469 - it has vdefs (it alters memory).
1470 - control stmts in the loop (except for the exit condition).
1471
1472 CHECKME: what other side effects would the vectorizer allow? */
1473
1474 static bool
vect_stmt_relevant_p(tree stmt,loop_vec_info loop_vinfo,bool * relevant_p,bool * live_p)1475 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1476 bool *relevant_p, bool *live_p)
1477 {
1478 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1479 ssa_op_iter op_iter;
1480 imm_use_iterator imm_iter;
1481 use_operand_p use_p;
1482 def_operand_p def_p;
1483
1484 *relevant_p = false;
1485 *live_p = false;
1486
1487 /* cond stmt other than loop exit cond. */
1488 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1489 *relevant_p = true;
1490
1491 /* changing memory. */
1492 if (TREE_CODE (stmt) != PHI_NODE)
1493 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1494 {
1495 if (vect_print_dump_info (REPORT_DETAILS))
1496 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1497 *relevant_p = true;
1498 }
1499
1500 /* uses outside the loop. */
1501 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1502 {
1503 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1504 {
1505 basic_block bb = bb_for_stmt (USE_STMT (use_p));
1506 if (!flow_bb_inside_loop_p (loop, bb))
1507 {
1508 if (vect_print_dump_info (REPORT_DETAILS))
1509 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1510
1511 /* We expect all such uses to be in the loop exit phis
1512 (because of loop closed form) */
1513 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1514 gcc_assert (bb == loop->single_exit->dest);
1515
1516 *live_p = true;
1517 }
1518 }
1519 }
1520
1521 return (*live_p || *relevant_p);
1522 }
1523
1524
1525 /* Function vect_mark_stmts_to_be_vectorized.
1526
1527 Not all stmts in the loop need to be vectorized. For example:
1528
1529 for i...
1530 for j...
1531 1. T0 = i + j
1532 2. T1 = a[T0]
1533
1534 3. j = j + 1
1535
1536 Stmt 1 and 3 do not need to be vectorized, because loop control and
1537 addressing of vectorized data-refs are handled differently.
1538
1539 This pass detects such stmts. */
1540
1541 static bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)1542 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1543 {
1544 VEC(tree,heap) *worklist;
1545 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1546 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1547 unsigned int nbbs = loop->num_nodes;
1548 block_stmt_iterator si;
1549 tree stmt, use;
1550 stmt_ann_t ann;
1551 ssa_op_iter iter;
1552 unsigned int i;
1553 stmt_vec_info stmt_vinfo;
1554 basic_block bb;
1555 tree phi;
1556 bool relevant_p, live_p;
1557 tree def, def_stmt;
1558 enum vect_def_type dt;
1559
1560 if (vect_print_dump_info (REPORT_DETAILS))
1561 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1562
1563 worklist = VEC_alloc (tree, heap, 64);
1564
1565 /* 1. Init worklist. */
1566
1567 bb = loop->header;
1568 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1569 {
1570 if (vect_print_dump_info (REPORT_DETAILS))
1571 {
1572 fprintf (vect_dump, "init: phi relevant? ");
1573 print_generic_expr (vect_dump, phi, TDF_SLIM);
1574 }
1575
1576 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1577 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1578 }
1579
1580 for (i = 0; i < nbbs; i++)
1581 {
1582 bb = bbs[i];
1583 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1584 {
1585 stmt = bsi_stmt (si);
1586
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 {
1589 fprintf (vect_dump, "init: stmt relevant? ");
1590 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1591 }
1592
1593 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1594 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1595 }
1596 }
1597
1598
1599 /* 2. Process_worklist */
1600
1601 while (VEC_length (tree, worklist) > 0)
1602 {
1603 stmt = VEC_pop (tree, worklist);
1604
1605 if (vect_print_dump_info (REPORT_DETAILS))
1606 {
1607 fprintf (vect_dump, "worklist: examine stmt: ");
1608 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1609 }
1610
1611 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1612 in the loop, mark the stmt that defines it (DEF_STMT) as
1613 relevant/irrelevant and live/dead according to the liveness and
1614 relevance properties of STMT.
1615 */
1616
1617 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1618
1619 ann = stmt_ann (stmt);
1620 stmt_vinfo = vinfo_for_stmt (stmt);
1621
1622 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1623 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1624
1625 /* Generally, the liveness and relevance properties of STMT are
1626 propagated to the DEF_STMTs of its USEs:
1627 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1628 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1629
1630 Exceptions:
1631
1632 (case 1)
1633 If USE is used only for address computations (e.g. array indexing),
1634 which does not need to be directly vectorized, then the
1635 liveness/relevance of the respective DEF_STMT is left unchanged.
1636
1637 (case 2)
1638 If STMT has been identified as defining a reduction variable, then
1639 we have two cases:
1640 (case 2.1)
1641 The last use of STMT is the reduction-variable, which is defined
1642 by a loop-header-phi. We don't want to mark the phi as live or
1643 relevant (because it does not need to be vectorized, it is handled
1644 as part of the vectorization of the reduction), so in this case we
1645 skip the call to vect_mark_relevant.
1646 (case 2.2)
1647 The rest of the uses of STMT are defined in the loop body. For
1648 the def_stmt of these uses we want to set liveness/relevance
1649 as follows:
1650 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1651 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1652 because even though STMT is classified as live (since it defines a
1653 value that is used across loop iterations) and irrelevant (since it
1654 is not used inside the loop), it will be vectorized, and therefore
1655 the corresponding DEF_STMTs need to marked as relevant.
1656 */
1657
1658 /* case 2.2: */
1659 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1660 {
1661 gcc_assert (!relevant_p && live_p);
1662 relevant_p = true;
1663 live_p = false;
1664 }
1665
1666 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1667 {
1668 /* case 1: we are only interested in uses that need to be vectorized.
1669 Uses that are used for address computation are not considered
1670 relevant.
1671 */
1672 if (!exist_non_indexing_operands_for_use_p (use, stmt))
1673 continue;
1674
1675 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1676 {
1677 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1678 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1679 VEC_free (tree, heap, worklist);
1680 return false;
1681 }
1682
1683 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1684 continue;
1685
1686 if (vect_print_dump_info (REPORT_DETAILS))
1687 {
1688 fprintf (vect_dump, "worklist: examine use %d: ", i);
1689 print_generic_expr (vect_dump, use, TDF_SLIM);
1690 }
1691
1692 bb = bb_for_stmt (def_stmt);
1693 if (!flow_bb_inside_loop_p (loop, bb))
1694 continue;
1695
1696 /* case 2.1: the reduction-use does not mark the defining-phi
1697 as relevant. */
1698 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1699 && TREE_CODE (def_stmt) == PHI_NODE)
1700 continue;
1701
1702 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1703 }
1704 } /* while worklist */
1705
1706 VEC_free (tree, heap, worklist);
1707 return true;
1708 }
1709
1710
1711 /* Function vect_can_advance_ivs_p
1712
1713 In case the number of iterations that LOOP iterates is unknown at compile
1714 time, an epilog loop will be generated, and the loop induction variables
1715 (IVs) will be "advanced" to the value they are supposed to take just before
1716 the epilog loop. Here we check that the access function of the loop IVs
1717 and the expression that represents the loop bound are simple enough.
1718 These restrictions will be relaxed in the future. */
1719
1720 static bool
vect_can_advance_ivs_p(loop_vec_info loop_vinfo)1721 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1722 {
1723 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1724 basic_block bb = loop->header;
1725 tree phi;
1726
1727 /* Analyze phi functions of the loop header. */
1728
1729 if (vect_print_dump_info (REPORT_DETAILS))
1730 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1731
1732 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1733 {
1734 tree access_fn = NULL;
1735 tree evolution_part;
1736
1737 if (vect_print_dump_info (REPORT_DETAILS))
1738 {
1739 fprintf (vect_dump, "Analyze phi: ");
1740 print_generic_expr (vect_dump, phi, TDF_SLIM);
1741 }
1742
1743 /* Skip virtual phi's. The data dependences that are associated with
1744 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1745
1746 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1747 {
1748 if (vect_print_dump_info (REPORT_DETAILS))
1749 fprintf (vect_dump, "virtual phi. skip.");
1750 continue;
1751 }
1752
1753 /* Skip reduction phis. */
1754
1755 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1756 {
1757 if (vect_print_dump_info (REPORT_DETAILS))
1758 fprintf (vect_dump, "reduc phi. skip.");
1759 continue;
1760 }
1761
1762 /* Analyze the evolution function. */
1763
1764 access_fn = instantiate_parameters
1765 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1766
1767 if (!access_fn)
1768 {
1769 if (vect_print_dump_info (REPORT_DETAILS))
1770 fprintf (vect_dump, "No Access function.");
1771 return false;
1772 }
1773
1774 if (vect_print_dump_info (REPORT_DETAILS))
1775 {
1776 fprintf (vect_dump, "Access function of PHI: ");
1777 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1778 }
1779
1780 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1781
1782 if (evolution_part == NULL_TREE)
1783 {
1784 if (vect_print_dump_info (REPORT_DETAILS))
1785 fprintf (vect_dump, "No evolution.");
1786 return false;
1787 }
1788
1789 /* FORNOW: We do not transform initial conditions of IVs
1790 which evolution functions are a polynomial of degree >= 2. */
1791
1792 if (tree_is_chrec (evolution_part))
1793 return false;
1794 }
1795
1796 return true;
1797 }
1798
1799
1800 /* Function vect_get_loop_niters.
1801
1802 Determine how many iterations the loop is executed.
1803 If an expression that represents the number of iterations
1804 can be constructed, place it in NUMBER_OF_ITERATIONS.
1805 Return the loop exit condition. */
1806
1807 static tree
vect_get_loop_niters(struct loop * loop,tree * number_of_iterations)1808 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1809 {
1810 tree niters;
1811
1812 if (vect_print_dump_info (REPORT_DETAILS))
1813 fprintf (vect_dump, "=== get_loop_niters ===");
1814
1815 niters = number_of_iterations_in_loop (loop);
1816
1817 if (niters != NULL_TREE
1818 && niters != chrec_dont_know)
1819 {
1820 *number_of_iterations = niters;
1821
1822 if (vect_print_dump_info (REPORT_DETAILS))
1823 {
1824 fprintf (vect_dump, "==> get_loop_niters:" );
1825 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1826 }
1827 }
1828
1829 return get_loop_exit_condition (loop);
1830 }
1831
1832
1833 /* Function vect_analyze_loop_form.
1834
1835 Verify the following restrictions (some may be relaxed in the future):
1836 - it's an inner-most loop
1837 - number of BBs = 2 (which are the loop header and the latch)
1838 - the loop has a pre-header
1839 - the loop has a single entry and exit
1840 - the loop exit condition is simple enough, and the number of iterations
1841 can be analyzed (a countable loop). */
1842
1843 static loop_vec_info
vect_analyze_loop_form(struct loop * loop)1844 vect_analyze_loop_form (struct loop *loop)
1845 {
1846 loop_vec_info loop_vinfo;
1847 tree loop_cond;
1848 tree number_of_iterations = NULL;
1849
1850 if (vect_print_dump_info (REPORT_DETAILS))
1851 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1852
1853 if (loop->inner)
1854 {
1855 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1856 fprintf (vect_dump, "not vectorized: nested loop.");
1857 return NULL;
1858 }
1859
1860 if (!loop->single_exit
1861 || loop->num_nodes != 2
1862 || EDGE_COUNT (loop->header->preds) != 2)
1863 {
1864 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1865 {
1866 if (!loop->single_exit)
1867 fprintf (vect_dump, "not vectorized: multiple exits.");
1868 else if (loop->num_nodes != 2)
1869 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1870 else if (EDGE_COUNT (loop->header->preds) != 2)
1871 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1872 }
1873
1874 return NULL;
1875 }
1876
1877 /* We assume that the loop exit condition is at the end of the loop. i.e,
1878 that the loop is represented as a do-while (with a proper if-guard
1879 before the loop if needed), where the loop header contains all the
1880 executable statements, and the latch is empty. */
1881 if (!empty_block_p (loop->latch))
1882 {
1883 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1884 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1885 return NULL;
1886 }
1887
1888 /* Make sure there exists a single-predecessor exit bb: */
1889 if (!single_pred_p (loop->single_exit->dest))
1890 {
1891 edge e = loop->single_exit;
1892 if (!(e->flags & EDGE_ABNORMAL))
1893 {
1894 split_loop_exit_edge (e);
1895 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump, "split exit edge.");
1897 }
1898 else
1899 {
1900 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1901 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1902 return NULL;
1903 }
1904 }
1905
1906 if (empty_block_p (loop->header))
1907 {
1908 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1909 fprintf (vect_dump, "not vectorized: empty loop.");
1910 return NULL;
1911 }
1912
1913 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1914 if (!loop_cond)
1915 {
1916 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1917 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1918 return NULL;
1919 }
1920
1921 if (!number_of_iterations)
1922 {
1923 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1924 fprintf (vect_dump,
1925 "not vectorized: number of iterations cannot be computed.");
1926 return NULL;
1927 }
1928
1929 if (chrec_contains_undetermined (number_of_iterations))
1930 {
1931 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1932 fprintf (vect_dump, "Infinite number of iterations.");
1933 return false;
1934 }
1935
1936 loop_vinfo = new_loop_vec_info (loop);
1937 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1938
1939 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1940 {
1941 if (vect_print_dump_info (REPORT_DETAILS))
1942 {
1943 fprintf (vect_dump, "Symbolic number of iterations is ");
1944 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1945 }
1946 }
1947 else
1948 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1949 {
1950 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1951 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1952 return NULL;
1953 }
1954
1955 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
1956
1957 return loop_vinfo;
1958 }
1959
1960
1961 /* Function vect_analyze_loop.
1962
1963 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1964 for it. The different analyses will record information in the
1965 loop_vec_info struct. */
1966 loop_vec_info
vect_analyze_loop(struct loop * loop)1967 vect_analyze_loop (struct loop *loop)
1968 {
1969 bool ok;
1970 loop_vec_info loop_vinfo;
1971
1972 if (vect_print_dump_info (REPORT_DETAILS))
1973 fprintf (vect_dump, "===== analyze_loop_nest =====");
1974
1975 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1976
1977 loop_vinfo = vect_analyze_loop_form (loop);
1978 if (!loop_vinfo)
1979 {
1980 if (vect_print_dump_info (REPORT_DETAILS))
1981 fprintf (vect_dump, "bad loop form.");
1982 return NULL;
1983 }
1984
1985 /* Find all data references in the loop (which correspond to vdefs/vuses)
1986 and analyze their evolution in the loop.
1987
1988 FORNOW: Handle only simple, array references, which
1989 alignment can be forced, and aligned pointer-references. */
1990
1991 ok = vect_analyze_data_refs (loop_vinfo);
1992 if (!ok)
1993 {
1994 if (vect_print_dump_info (REPORT_DETAILS))
1995 fprintf (vect_dump, "bad data references.");
1996 destroy_loop_vec_info (loop_vinfo);
1997 return NULL;
1998 }
1999
2000 /* Classify all cross-iteration scalar data-flow cycles.
2001 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2002
2003 vect_analyze_scalar_cycles (loop_vinfo);
2004
2005 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2006
2007 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2008 if (!ok)
2009 {
2010 if (vect_print_dump_info (REPORT_DETAILS))
2011 fprintf (vect_dump, "unexpected pattern.");
2012 destroy_loop_vec_info (loop_vinfo);
2013 return NULL;
2014 }
2015
2016 /* Analyze the alignment of the data-refs in the loop.
2017 Fail if a data reference is found that cannot be vectorized. */
2018
2019 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2020 if (!ok)
2021 {
2022 if (vect_print_dump_info (REPORT_DETAILS))
2023 fprintf (vect_dump, "bad data alignment.");
2024 destroy_loop_vec_info (loop_vinfo);
2025 return NULL;
2026 }
2027
2028 ok = vect_determine_vectorization_factor (loop_vinfo);
2029 if (!ok)
2030 {
2031 if (vect_print_dump_info (REPORT_DETAILS))
2032 fprintf (vect_dump, "can't determine vectorization factor.");
2033 destroy_loop_vec_info (loop_vinfo);
2034 return NULL;
2035 }
2036
2037 /* Analyze data dependences between the data-refs in the loop.
2038 FORNOW: fail at the first data dependence that we encounter. */
2039
2040 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2041 if (!ok)
2042 {
2043 if (vect_print_dump_info (REPORT_DETAILS))
2044 fprintf (vect_dump, "bad data dependence.");
2045 destroy_loop_vec_info (loop_vinfo);
2046 return NULL;
2047 }
2048
2049 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2050 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2051
2052 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2053 if (!ok)
2054 {
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "bad data access.");
2057 destroy_loop_vec_info (loop_vinfo);
2058 return NULL;
2059 }
2060
2061 /* This pass will decide on using loop versioning and/or loop peeling in
2062 order to enhance the alignment of data references in the loop. */
2063
2064 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2065 if (!ok)
2066 {
2067 if (vect_print_dump_info (REPORT_DETAILS))
2068 fprintf (vect_dump, "bad data alignment.");
2069 destroy_loop_vec_info (loop_vinfo);
2070 return NULL;
2071 }
2072
2073 /* Scan all the operations in the loop and make sure they are
2074 vectorizable. */
2075
2076 ok = vect_analyze_operations (loop_vinfo);
2077 if (!ok)
2078 {
2079 if (vect_print_dump_info (REPORT_DETAILS))
2080 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2081 destroy_loop_vec_info (loop_vinfo);
2082 return NULL;
2083 }
2084
2085 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2086
2087 return loop_vinfo;
2088 }
2089