1 /* Transformation Utilities for Loop Vectorization.
2    Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
47 
48 /* Utility functions for the code transformation.  */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *);
50 static void vect_align_data_ref (tree);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53   (tree, block_stmt_iterator *, tree, tree *, bool);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
63 
64 /* Utility function dealing with loop peeling (not peeling itself).  */
65 static void vect_generate_tmps_on_preheader
66   (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74   (loop_vec_info, tree *, struct loops *);
75 static int vect_min_worthwhile_factor (enum tree_code);
76 
77 
78 /* Function vect_get_new_vect_var.
79 
80    Returns a name for a new variable. The current naming scheme appends the
81    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82    the name of vectorizer generated variables, and appends that to NAME if
83    provided.  */
84 
85 static tree
vect_get_new_vect_var(tree type,enum vect_var_kind var_kind,const char * name)86 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
87 {
88   const char *prefix;
89   tree new_vect_var;
90 
91   switch (var_kind)
92   {
93   case vect_simple_var:
94     prefix = "vect_";
95     break;
96   case vect_scalar_var:
97     prefix = "stmp_";
98     break;
99   case vect_pointer_var:
100     prefix = "vect_p";
101     break;
102   default:
103     gcc_unreachable ();
104   }
105 
106   if (name)
107     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
108   else
109     new_vect_var = create_tmp_var (type, prefix);
110 
111   return new_vect_var;
112 }
113 
114 
115 /* Function vect_create_addr_base_for_vector_ref.
116 
117    Create an expression that computes the address of the first memory location
118    that will be accessed for a data reference.
119 
120    Input:
121    STMT: The statement containing the data reference.
122    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
123    OFFSET: Optional. If supplied, it is be added to the initial address.
124 
125    Output:
126    1. Return an SSA_NAME whose value is the address of the memory location of
127       the first vector of the data reference.
128    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
129       these statement(s) which define the returned SSA_NAME.
130 
131    FORNOW: We are only handling array accesses with step 1.  */
132 
133 static tree
vect_create_addr_base_for_vector_ref(tree stmt,tree * new_stmt_list,tree offset)134 vect_create_addr_base_for_vector_ref (tree stmt,
135                                       tree *new_stmt_list,
136 				      tree offset)
137 {
138   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
139   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
140   tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
141   tree base_name = build_fold_indirect_ref (data_ref_base);
142   tree ref = DR_REF (dr);
143   tree scalar_type = TREE_TYPE (ref);
144   tree scalar_ptr_type = build_pointer_type (scalar_type);
145   tree vec_stmt;
146   tree new_temp;
147   tree addr_base, addr_expr;
148   tree dest, new_stmt;
149   tree base_offset = unshare_expr (DR_OFFSET (dr));
150   tree init = unshare_expr (DR_INIT (dr));
151 
152   /* Create base_offset */
153   base_offset = size_binop (PLUS_EXPR, base_offset, init);
154   dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
155   add_referenced_tmp_var (dest);
156   base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
157   append_to_statement_list_force (new_stmt, new_stmt_list);
158 
159   if (offset)
160     {
161       tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
162       add_referenced_tmp_var (tmp);
163       offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
164 			    DR_STEP (dr));
165       base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
166 				 base_offset, offset);
167       base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
168       append_to_statement_list_force (new_stmt, new_stmt_list);
169     }
170 
171   /* base + base_offset */
172   addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
173 			   base_offset);
174 
175   /* addr_expr = addr_base */
176   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
177                                      get_name (base_name));
178   add_referenced_tmp_var (addr_expr);
179   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
180   new_temp = make_ssa_name (addr_expr, vec_stmt);
181   TREE_OPERAND (vec_stmt, 0) = new_temp;
182   append_to_statement_list_force (vec_stmt, new_stmt_list);
183 
184   if (vect_print_dump_info (REPORT_DETAILS))
185     {
186       fprintf (vect_dump, "created ");
187       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
188     }
189   return new_temp;
190 }
191 
192 
193 /* Function vect_align_data_ref.
194 
195    Handle misalignment of a memory accesses.
196 
197    FORNOW: Can't handle misaligned accesses.
198    Make sure that the dataref is aligned.  */
199 
200 static void
vect_align_data_ref(tree stmt)201 vect_align_data_ref (tree stmt)
202 {
203   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
204   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
205 
206   /* FORNOW: can't handle misaligned accesses;
207              all accesses expected to be aligned.  */
208   gcc_assert (aligned_access_p (dr));
209 }
210 
211 
212 /* Function vect_create_data_ref_ptr.
213 
214    Create a memory reference expression for vector access, to be used in a
215    vector load/store stmt. The reference is based on a new pointer to vector
216    type (vp).
217 
218    Input:
219    1. STMT: a stmt that references memory. Expected to be of the form
220          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
221    2. BSI: block_stmt_iterator where new stmts can be added.
222    3. OFFSET (optional): an offset to be added to the initial address accessed
223         by the data-ref in STMT.
224    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
225         pointing to the initial address.
226 
227    Output:
228    1. Declare a new ptr to vector_type, and have it point to the base of the
229       data reference (initial addressed accessed by the data reference).
230       For example, for vector of type V8HI, the following code is generated:
231 
232       v8hi *vp;
233       vp = (v8hi *)initial_address;
234 
235       if OFFSET is not supplied:
236          initial_address = &a[init];
237       if OFFSET is supplied:
238          initial_address = &a[init + OFFSET];
239 
240       Return the initial_address in INITIAL_ADDRESS.
241 
242    2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
243       a data-reference in the loop based on the new vector pointer vp.  This
244       new data reference will by some means be updated each iteration of
245       the loop.  Return the pointer vp'.
246 
247    FORNOW: handle only aligned and consecutive accesses.  */
248 
249 static tree
vect_create_data_ref_ptr(tree stmt,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,tree offset,tree * initial_address,bool only_init)250 vect_create_data_ref_ptr (tree stmt,
251 			  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
252 			  tree offset, tree *initial_address, bool only_init)
253 {
254   tree base_name;
255   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
257   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
258   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
259   tree vect_ptr_type;
260   tree vect_ptr;
261   tree tag;
262   tree new_temp;
263   tree vec_stmt;
264   tree new_stmt_list = NULL_TREE;
265   edge pe = loop_preheader_edge (loop);
266   basic_block new_bb;
267   tree vect_ptr_init;
268   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
269 
270   base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
271 
272   if (vect_print_dump_info (REPORT_DETAILS))
273     {
274       tree data_ref_base = base_name;
275       fprintf (vect_dump, "create vector-pointer variable to type: ");
276       print_generic_expr (vect_dump, vectype, TDF_SLIM);
277       if (TREE_CODE (data_ref_base) == VAR_DECL)
278         fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
279       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
280         fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
281       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
282         fprintf (vect_dump, "  vectorizing a record based array ref: ");
283       else if (TREE_CODE (data_ref_base) == SSA_NAME)
284         fprintf (vect_dump, "  vectorizing a pointer ref: ");
285       print_generic_expr (vect_dump, base_name, TDF_SLIM);
286     }
287 
288   /** (1) Create the new vector-pointer variable:  **/
289 
290   vect_ptr_type = build_pointer_type (vectype);
291   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
292                                     get_name (base_name));
293   add_referenced_tmp_var (vect_ptr);
294 
295 
296   /** (2) Add aliasing information to the new vector-pointer:
297           (The points-to info (DR_PTR_INFO) may be defined later.)  **/
298 
299   tag = DR_MEMTAG (dr);
300   gcc_assert (tag);
301 
302   /* If tag is a variable (and NOT_A_TAG) than a new type alias
303      tag must be created with tag added to its may alias list.  */
304   if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
305     new_type_alias (vect_ptr, tag);
306   else
307     var_ann (vect_ptr)->type_mem_tag = tag;
308 
309   var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
310 
311   /** (3) Calculate the initial address the vector-pointer, and set
312           the vector-pointer to point to it before the loop:  **/
313 
314   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
315   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
316                                                    offset);
317   pe = loop_preheader_edge (loop);
318   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
319   gcc_assert (!new_bb);
320   *initial_address = new_temp;
321 
322   /* Create: p = (vectype *) initial_base  */
323   vec_stmt = fold_convert (vect_ptr_type, new_temp);
324   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
325   vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
326   TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
327   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
328   gcc_assert (!new_bb);
329 
330 
331   /** (4) Handle the updating of the vector-pointer inside the loop: **/
332 
333   if (only_init) /* No update in loop is required.  */
334     {
335       /* Copy the points-to information if it exists. */
336       if (DR_PTR_INFO (dr))
337         duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
338       return vect_ptr_init;
339     }
340   else
341     {
342       block_stmt_iterator incr_bsi;
343       bool insert_after;
344       tree indx_before_incr, indx_after_incr;
345       tree incr;
346 
347       standard_iv_increment_position (loop, &incr_bsi, &insert_after);
348       create_iv (vect_ptr_init,
349 		 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
350 		 NULL_TREE, loop, &incr_bsi, insert_after,
351 		 &indx_before_incr, &indx_after_incr);
352       incr = bsi_stmt (incr_bsi);
353       set_stmt_info ((tree_ann_t)stmt_ann (incr),
354 		     new_stmt_vec_info (incr, loop_vinfo));
355 
356       /* Copy the points-to information if it exists. */
357       if (DR_PTR_INFO (dr))
358 	{
359 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
360 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
361 	}
362       merge_alias_info (vect_ptr_init, indx_before_incr);
363       merge_alias_info (vect_ptr_init, indx_after_incr);
364 
365       return indx_before_incr;
366     }
367 }
368 
369 
370 /* Function vect_create_destination_var.
371 
372    Create a new temporary of type VECTYPE.  */
373 
374 static tree
vect_create_destination_var(tree scalar_dest,tree vectype)375 vect_create_destination_var (tree scalar_dest, tree vectype)
376 {
377   tree vec_dest;
378   const char *new_name;
379   tree type;
380   enum vect_var_kind kind;
381 
382   kind = vectype ? vect_simple_var : vect_scalar_var;
383   type = vectype ? vectype : TREE_TYPE (scalar_dest);
384 
385   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
386 
387   new_name = get_name (scalar_dest);
388   if (!new_name)
389     new_name = "var_";
390   vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
391   add_referenced_tmp_var (vec_dest);
392 
393   return vec_dest;
394 }
395 
396 
397 /* Function vect_init_vector.
398 
399    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
400    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
401    used in the vectorization of STMT.  */
402 
403 static tree
vect_init_vector(tree stmt,tree vector_var)404 vect_init_vector (tree stmt, tree vector_var)
405 {
406   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
407   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
408   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
409   tree new_var;
410   tree init_stmt;
411   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
412   tree vec_oprnd;
413   edge pe;
414   tree new_temp;
415   basic_block new_bb;
416 
417   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
418   add_referenced_tmp_var (new_var);
419 
420   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
421   new_temp = make_ssa_name (new_var, init_stmt);
422   TREE_OPERAND (init_stmt, 0) = new_temp;
423 
424   pe = loop_preheader_edge (loop);
425   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
426   gcc_assert (!new_bb);
427 
428   if (vect_print_dump_info (REPORT_DETAILS))
429     {
430       fprintf (vect_dump, "created new init_stmt: ");
431       print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
432     }
433 
434   vec_oprnd = TREE_OPERAND (init_stmt, 0);
435   return vec_oprnd;
436 }
437 
438 
439 /* Function vect_get_vec_def_for_operand.
440 
441    OP is an operand in STMT. This function returns a (vector) def that will be
442    used in the vectorized stmt for STMT.
443 
444    In the case that OP is an SSA_NAME which is defined in the loop, then
445    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
446 
447    In case OP is an invariant or constant, a new stmt that creates a vector def
448    needs to be introduced.  */
449 
450 static tree
vect_get_vec_def_for_operand(tree op,tree stmt,tree * scalar_def)451 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
452 {
453   tree vec_oprnd;
454   tree vec_stmt;
455   tree def_stmt;
456   stmt_vec_info def_stmt_info = NULL;
457   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
459   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
460   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
461   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
462   tree vec_inv;
463   tree vec_cst;
464   tree t = NULL_TREE;
465   tree def;
466   int i;
467   enum vect_def_type dt;
468   bool is_simple_use;
469 
470   if (vect_print_dump_info (REPORT_DETAILS))
471     {
472       fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
473       print_generic_expr (vect_dump, op, TDF_SLIM);
474     }
475 
476   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
477   gcc_assert (is_simple_use);
478   if (vect_print_dump_info (REPORT_DETAILS))
479     {
480       if (def)
481         {
482           fprintf (vect_dump, "def =  ");
483           print_generic_expr (vect_dump, def, TDF_SLIM);
484         }
485       if (def_stmt)
486         {
487           fprintf (vect_dump, "  def_stmt =  ");
488           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
489         }
490     }
491 
492   switch (dt)
493     {
494     /* Case 1: operand is a constant.  */
495     case vect_constant_def:
496       {
497 	if (scalar_def)
498 	  *scalar_def = op;
499 
500         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
501         if (vect_print_dump_info (REPORT_DETAILS))
502           fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
503 
504         for (i = nunits - 1; i >= 0; --i)
505           {
506             t = tree_cons (NULL_TREE, op, t);
507           }
508         vec_cst = build_vector (vectype, t);
509         return vect_init_vector (stmt, vec_cst);
510       }
511 
512     /* Case 2: operand is defined outside the loop - loop invariant.  */
513     case vect_invariant_def:
514       {
515 	if (scalar_def)
516 	  *scalar_def = def;
517 
518         /* Create 'vec_inv = {inv,inv,..,inv}'  */
519         if (vect_print_dump_info (REPORT_DETAILS))
520           fprintf (vect_dump, "Create vector_inv.");
521 
522         for (i = nunits - 1; i >= 0; --i)
523           {
524             t = tree_cons (NULL_TREE, def, t);
525           }
526 
527 	/* FIXME: use build_constructor directly.  */
528         vec_inv = build_constructor_from_list (vectype, t);
529         return vect_init_vector (stmt, vec_inv);
530       }
531 
532     /* Case 3: operand is defined inside the loop.  */
533     case vect_loop_def:
534       {
535 	if (scalar_def)
536 	  *scalar_def = def_stmt;
537 
538         /* Get the def from the vectorized stmt.  */
539         def_stmt_info = vinfo_for_stmt (def_stmt);
540         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
541         gcc_assert (vec_stmt);
542         vec_oprnd = TREE_OPERAND (vec_stmt, 0);
543         return vec_oprnd;
544       }
545 
546     /* Case 4: operand is defined by a loop header phi - reduction  */
547     case vect_reduction_def:
548       {
549         gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
550 
551         /* Get the def before the loop  */
552         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
553         return get_initial_def_for_reduction (stmt, op, scalar_def);
554      }
555 
556     /* Case 5: operand is defined by loop-header phi - induction.  */
557     case vect_induction_def:
558       {
559         if (vect_print_dump_info (REPORT_DETAILS))
560           fprintf (vect_dump, "induction - unsupported.");
561         internal_error ("no support for induction"); /* FORNOW */
562       }
563 
564     default:
565       gcc_unreachable ();
566     }
567 }
568 
569 
570 /* Function vect_finish_stmt_generation.
571 
572    Insert a new stmt.  */
573 
574 static void
vect_finish_stmt_generation(tree stmt,tree vec_stmt,block_stmt_iterator * bsi)575 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
576 {
577   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
578 
579   if (vect_print_dump_info (REPORT_DETAILS))
580     {
581       fprintf (vect_dump, "add new stmt: ");
582       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
583     }
584 
585   /* Make sure bsi points to the stmt that is being vectorized.  */
586   gcc_assert (stmt == bsi_stmt (*bsi));
587 
588 #ifdef USE_MAPPED_LOCATION
589   SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
590 #else
591   SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
592 #endif
593 }
594 
595 
596 #define ADJUST_IN_EPILOG 1
597 
598 /* Function get_initial_def_for_reduction
599 
600    Input:
601    STMT - a stmt that performs a reduction operation in the loop.
602    INIT_VAL - the initial value of the reduction variable
603 
604    Output:
605    SCALAR_DEF - a tree that holds a value to be added to the final result
606 	of the reduction (used for "ADJUST_IN_EPILOG" - see below).
607    Return a vector variable, initialized according to the operation that STMT
608 	performs. This vector will be used as the initial value of the
609 	vector of partial results.
610 
611    Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
612      add:         [0,0,...,0,0]
613      mult:        [1,1,...,1,1]
614      min/max:     [init_val,init_val,..,init_val,init_val]
615      bit and/or:  [init_val,init_val,..,init_val,init_val]
616    and when necessary (e.g. add/mult case) let the caller know
617    that it needs to adjust the result by init_val.
618 
619    Option2: Initialize the vector as follows:
620      add:         [0,0,...,0,init_val]
621      mult:        [1,1,...,1,init_val]
622      min/max:     [init_val,init_val,...,init_val]
623      bit and/or:  [init_val,init_val,...,init_val]
624    and no adjustments are needed.
625 
626    For example, for the following code:
627 
628    s = init_val;
629    for (i=0;i<n;i++)
630      s = s + a[i];
631 
632    STMT is 's = s + a[i]', and the reduction variable is 's'.
633    For a vector of 4 units, we want to return either [0,0,0,init_val],
634    or [0,0,0,0] and let the caller know that it needs to adjust
635    the result at the end by 'init_val'.
636 
637    FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
638    TODO: Use some cost-model to estimate which scheme is more profitable.
639 */
640 
641 static tree
get_initial_def_for_reduction(tree stmt,tree init_val,tree * scalar_def)642 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
643 {
644   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
645   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
646   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
647   int nelements;
648   enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
649   tree type = TREE_TYPE (init_val);
650   tree def;
651   tree vec, t = NULL_TREE;
652   bool need_epilog_adjust;
653   int i;
654 
655   gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
656 
657   switch (code)
658   {
659   case PLUS_EXPR:
660     if (INTEGRAL_TYPE_P (type))
661       def = build_int_cst (type, 0);
662     else
663       def = build_real (type, dconst0);
664 
665 #ifdef ADJUST_IN_EPILOG
666     /* All the 'nunits' elements are set to 0. The final result will be
667        adjusted by 'init_val' at the loop epilog.  */
668     nelements = nunits;
669     need_epilog_adjust = true;
670 #else
671     /* 'nunits - 1' elements are set to 0; The last element is set to
672         'init_val'.  No further adjustments at the epilog are needed.  */
673     nelements = nunits - 1;
674     need_epilog_adjust = false;
675 #endif
676     break;
677 
678   case MIN_EXPR:
679   case MAX_EXPR:
680     def = init_val;
681     nelements = nunits;
682     need_epilog_adjust = false;
683     break;
684 
685   default:
686     gcc_unreachable ();
687   }
688 
689   for (i = nelements - 1; i >= 0; --i)
690     t = tree_cons (NULL_TREE, def, t);
691 
692   if (nelements == nunits - 1)
693     {
694       /* Set the last element of the vector.  */
695       t = tree_cons (NULL_TREE, init_val, t);
696       nelements += 1;
697     }
698   gcc_assert (nelements == nunits);
699 
700   if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
701     vec = build_vector (vectype, t);
702   else
703     vec = build_constructor_from_list (vectype, t);
704 
705   if (!need_epilog_adjust)
706     *scalar_def = NULL_TREE;
707   else
708     *scalar_def = init_val;
709 
710   return vect_init_vector (stmt, vec);
711 }
712 
713 
714 /* Function vect_create_epilog_for_reduction:
715 
716    Create code at the loop-epilog to finalize the result of a reduction
717    computation.
718 
719    LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
720    into a single result, by applying the operation REDUC_CODE on the
721    partial-results-vector. For this, we need to create a new phi node at the
722    loop exit to preserve loop-closed form, as illustrated below.
723 
724    STMT is the original scalar reduction stmt that is being vectorized.
725    REDUCTION_OP is the scalar reduction-variable.
726    REDUCTION_PHI is the phi-node that carries the reduction computation.
727    This function also sets the arguments for the REDUCTION_PHI:
728    The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
729    The loop-latch argument is VECT_DEF - the vector of partial sums.
730 
731      This function transforms this:
732 
733         loop:
734           vec_def = phi <null, null>    # REDUCTION_PHI
735           ....
736           VECT_DEF = ...
737 
738         loop_exit:
739           s_out0 = phi <s_loop>         # EXIT_PHI
740 
741           use <s_out0>
742           use <s_out0>
743 
744      Into:
745 
746         loop:
747           vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
748           ....
749           VECT_DEF = ...
750 
751         loop_exit:
752           s_out0 = phi <s_loop>         # EXIT_PHI
753           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
754 
755           v_out2 = reduc_expr <v_out1>
756           s_out3 = extract_field <v_out2, 0>
757 
758           use <s_out3>
759           use <s_out3>
760 */
761 
762 static void
vect_create_epilog_for_reduction(tree vect_def,tree stmt,tree reduction_op,enum tree_code reduc_code,tree reduction_phi)763 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
764                                   enum tree_code reduc_code, tree reduction_phi)
765 {
766   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
767   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
768   enum machine_mode mode = TYPE_MODE (vectype);
769   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
770   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
771   basic_block exit_bb;
772   tree scalar_dest = TREE_OPERAND (stmt, 0);
773   tree scalar_type = TREE_TYPE (scalar_dest);
774   tree new_phi;
775   block_stmt_iterator exit_bsi;
776   tree vec_dest;
777   tree new_temp;
778   tree new_name;
779   tree epilog_stmt;
780   tree new_scalar_dest, exit_phi;
781   tree bitsize, bitpos, bytesize;
782   enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
783   tree scalar_initial_def;
784   tree vec_initial_def;
785   tree orig_name;
786   imm_use_iterator imm_iter;
787   use_operand_p use_p;
788   bool extract_scalar_result;
789 
790   /*** 1. Create the reduction def-use cycle  ***/
791 
792   /* 1.1 set the loop-entry arg of the reduction-phi:  */
793   /* For the case of reduction, vect_get_vec_def_for_operand returns
794      the scalar def before the loop, that defines the initial value
795      of the reduction variable.  */
796   vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
797 						  &scalar_initial_def);
798   add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
799 
800 
801   /* 1.2 set the loop-latch arg for the reduction-phi:  */
802   add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
803 
804   if (vect_print_dump_info (REPORT_DETAILS))
805     {
806       fprintf (vect_dump, "transform reduction: created def-use cycle:");
807       print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
808       fprintf (vect_dump, "\n");
809       print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
810     }
811 
812 
813   /*** 2. Create epilog code ***/
814 
815   /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
816         v_out1 = phi <v_loop>  */
817 
818   exit_bb = loop->single_exit->dest;
819   new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
820   SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
821 
822   exit_bsi = bsi_start (exit_bb);
823 
824 
825   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
826   bitsize = TYPE_SIZE (scalar_type);
827   bytesize = TYPE_SIZE_UNIT (scalar_type);
828 
829   /* 2.2 Create the reduction code.  */
830 
831   if (reduc_code < NUM_TREE_CODES)
832     {
833       /*** Case 1:  Create:
834 	   v_out2 = reduc_expr <v_out1>  */
835 
836       if (vect_print_dump_info (REPORT_DETAILS))
837 	fprintf (vect_dump, "Reduce using direct vector reduction.");
838 
839       vec_dest = vect_create_destination_var (scalar_dest, vectype);
840       epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
841 			build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
842       new_temp = make_ssa_name (vec_dest, epilog_stmt);
843       TREE_OPERAND (epilog_stmt, 0) = new_temp;
844       bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
845 
846       extract_scalar_result = true;
847     }
848   else
849     {
850       enum tree_code shift_code = 0;
851       bool have_whole_vector_shift = true;
852       enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
853       int bit_offset;
854       int element_bitsize = tree_low_cst (bitsize, 1);
855       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
856       tree vec_temp;
857 
858       /* The result of the reduction is expected to be at the least
859 	 significant bits of the vector.  This is merely convention,
860 	 as it's the extraction later that really matters, and that
861 	 is also under our control.  */
862       if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
863 	shift_code = VEC_RSHIFT_EXPR;
864       else
865 	have_whole_vector_shift = false;
866 
867       /* Regardless of whether we have a whole vector shift, if we're
868 	 emulating the operation via tree-vect-generic, we don't want
869 	 to use it.  Only the first round of the reduction is likely
870 	 to still be profitable via emulation.  */
871       /* ??? It might be better to emit a reduction tree code here, so that
872 	 tree-vect-generic can expand the first round via bit tricks.  */
873       if (!VECTOR_MODE_P (mode))
874 	have_whole_vector_shift = false;
875       else
876 	{
877 	  optab optab = optab_for_tree_code (code, vectype);
878 	  if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
879 	    have_whole_vector_shift = false;
880 	}
881 
882       if (have_whole_vector_shift)
883         {
884 	  /*** Case 2:
885 	     for (offset = VS/2; offset >= element_size; offset/=2)
886 	        {
887 	          Create:  va' = vec_shift <va, offset>
888 	          Create:  va = vop <va, va'>
889 	        }  */
890 
891 	  if (vect_print_dump_info (REPORT_DETAILS))
892 	    fprintf (vect_dump, "Reduce using vector shifts");
893 
894 	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
895 	  new_temp = PHI_RESULT (new_phi);
896 
897 	  for (bit_offset = vec_size_in_bits/2;
898 	       bit_offset >= element_bitsize;
899 	       bit_offset /= 2)
900 	    {
901 	      tree bitpos = size_int (bit_offset);
902 
903 	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
904 	      build2 (shift_code, vectype, new_temp, bitpos));
905 	      new_name = make_ssa_name (vec_dest, epilog_stmt);
906 	      TREE_OPERAND (epilog_stmt, 0) = new_name;
907 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
908 	      if (vect_print_dump_info (REPORT_DETAILS))
909 		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
910 
911 
912 	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
913 	      build2 (code, vectype, new_name, new_temp));
914 	      new_temp = make_ssa_name (vec_dest, epilog_stmt);
915 	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
916 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
917 	      if (vect_print_dump_info (REPORT_DETAILS))
918 		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
919 	    }
920 
921 	  extract_scalar_result = true;
922 	}
923       else
924         {
925 	  tree rhs;
926 
927 	  /*** Case 3:
928 	     Create:
929 	     s = extract_field <v_out2, 0>
930 	     for (offset=element_size; offset<vector_size; offset+=element_size;)
931 	       {
932 	         Create:  s' = extract_field <v_out2, offset>
933 	         Create:  s = op <s, s'>
934 	       }  */
935 
936 	  if (vect_print_dump_info (REPORT_DETAILS))
937 	    fprintf (vect_dump, "Reduce using scalar code. ");
938 
939 	  vec_temp = PHI_RESULT (new_phi);
940 	  vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
941 
942 	  rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
943 			 bitsize_zero_node);
944 
945 	  BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
946 	  epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
947 			        rhs);
948 	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
949 	  TREE_OPERAND (epilog_stmt, 0) = new_temp;
950 	  bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
951 	  if (vect_print_dump_info (REPORT_DETAILS))
952 	    print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
953 
954 	  for (bit_offset = element_bitsize;
955 	       bit_offset < vec_size_in_bits;
956 	       bit_offset += element_bitsize)
957 	    {
958 	      tree bitpos = bitsize_int (bit_offset);
959 	      tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
960 				 bitpos);
961 
962 	      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
963 	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
964 				    rhs);
965 	      new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
966 	      TREE_OPERAND (epilog_stmt, 0) = new_name;
967 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
968 	      if (vect_print_dump_info (REPORT_DETAILS))
969 		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
970 
971 
972 	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
973 				build2 (code, scalar_type, new_name, new_temp));
974 	      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
975 	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
976 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
977 	      if (vect_print_dump_info (REPORT_DETAILS))
978 		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
979 	    }
980 
981 	  extract_scalar_result = false;
982 	}
983     }
984 
985 
986   /* 2.3  Extract the final scalar result.  Create:
987          s_out3 = extract_field <v_out2, bitpos>  */
988 
989   if (extract_scalar_result)
990     {
991       tree rhs;
992 
993       if (vect_print_dump_info (REPORT_DETAILS))
994 	fprintf (vect_dump, "extract scalar result");
995 
996       /* The result is in the low order bits.  */
997       if (BYTES_BIG_ENDIAN)
998 	bitpos = size_binop (MULT_EXPR,
999 		       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1000 		       TYPE_SIZE (scalar_type));
1001       else
1002 	bitpos = bitsize_zero_node;
1003 
1004       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1005       BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1006       epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1007       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1008       TREE_OPERAND (epilog_stmt, 0) = new_temp;
1009       bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1010       if (vect_print_dump_info (REPORT_DETAILS))
1011 	print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1012     }
1013 
1014 
1015   /* 2.4 Adjust the final result by the initial value of the reduction
1016 	 variable. (when such adjustment is not needed, then
1017 	 'scalar_initial_def' is zero).
1018 
1019 	 Create:
1020 	 s_out = scalar_expr <s_out, scalar_initial_def>  */
1021 
1022   if (scalar_initial_def)
1023     {
1024       epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1025                       build2 (code, scalar_type, new_temp, scalar_initial_def));
1026       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1027       TREE_OPERAND (epilog_stmt, 0) = new_temp;
1028       bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1029 
1030       if (vect_print_dump_info (REPORT_DETAILS))
1031         print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1032     }
1033 
1034 
1035   /* 2.5 Replace uses of s_out0 with uses of s_out3  */
1036 
1037   /* Find the loop-closed-use at the loop exit of the original
1038      scalar result.  (The reduction result is expected to have
1039      two immediate uses - one at the latch block, and one at the
1040      loop exit).  */
1041   exit_phi = NULL;
1042   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1043     {
1044       if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1045 	{
1046 	  exit_phi = USE_STMT (use_p);
1047 	  break;
1048 	}
1049     }
1050 
1051   orig_name = PHI_RESULT (exit_phi);
1052 
1053   FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1054     SET_USE (use_p, new_temp);
1055 }
1056 
1057 
1058 /* Function vectorizable_reduction.
1059 
1060    Check if STMT performs a reduction operation that can be vectorized.
1061    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1062    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1063    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1064 
1065 bool
vectorizable_reduction(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1066 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1067 {
1068   tree vec_dest;
1069   tree scalar_dest;
1070   tree op0, op1;
1071   tree loop_vec_def;
1072   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1073   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1074   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1075   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1076   tree operation;
1077   enum tree_code code, reduc_code = 0;
1078   enum machine_mode vec_mode;
1079   int op_type;
1080   optab optab, reduc_optab;
1081   tree new_temp;
1082   tree def0, def1, def_stmt0, def_stmt1;
1083   enum vect_def_type dt0, dt1;
1084   tree new_phi;
1085   tree scalar_type;
1086   bool is_simple_use0;
1087   bool is_simple_use1;
1088 
1089   /* Is vectorizable reduction?  */
1090 
1091   /* Not supportable if the reduction variable is used in the loop.  */
1092   if (STMT_VINFO_RELEVANT_P (stmt_info))
1093     return false;
1094 
1095   if (!STMT_VINFO_LIVE_P (stmt_info))
1096     return false;
1097 
1098   /* Make sure it was already recognized as a reduction pattern.  */
1099   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1100     return false;
1101 
1102   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1103 
1104   operation = TREE_OPERAND (stmt, 1);
1105   code = TREE_CODE (operation);
1106   op_type = TREE_CODE_LENGTH (code);
1107 
1108   if (op_type != binary_op)
1109     return false;
1110 
1111   op0 = TREE_OPERAND (operation, 0);
1112   op1 = TREE_OPERAND (operation, 1);
1113   scalar_dest = TREE_OPERAND (stmt, 0);
1114   scalar_type = TREE_TYPE (scalar_dest);
1115 
1116   /* Check the first operand. It is expected to be defined inside the loop.  */
1117   is_simple_use0 =
1118         vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1119   is_simple_use1 =
1120         vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1121 
1122   gcc_assert (is_simple_use0);
1123   gcc_assert (is_simple_use1);
1124   gcc_assert (dt0 == vect_loop_def);
1125   gcc_assert (dt1 == vect_reduction_def);
1126   gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1127   gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1128 
1129   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1130    return false;
1131 
1132   /* Supportable by target?  */
1133 
1134   /* check support for the operation in the loop  */
1135   optab = optab_for_tree_code (code, vectype);
1136   if (!optab)
1137     {
1138       if (vect_print_dump_info (REPORT_DETAILS))
1139         fprintf (vect_dump, "no optab.");
1140       return false;
1141     }
1142   vec_mode = TYPE_MODE (vectype);
1143   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1144     {
1145       if (vect_print_dump_info (REPORT_DETAILS))
1146         fprintf (vect_dump, "op not supported by target.");
1147       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1148           || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1149 	     < vect_min_worthwhile_factor (code))
1150         return false;
1151       if (vect_print_dump_info (REPORT_DETAILS))
1152 	fprintf (vect_dump, "proceeding using word mode.");
1153     }
1154 
1155   /* Worthwhile without SIMD support?  */
1156   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1157       && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1158 	 < vect_min_worthwhile_factor (code))
1159     {
1160       if (vect_print_dump_info (REPORT_DETAILS))
1161 	fprintf (vect_dump, "not worthwhile without SIMD support.");
1162       return false;
1163     }
1164 
1165   /* check support for the epilog operation  */
1166   if (!reduction_code_for_scalar_code (code, &reduc_code))
1167     return false;
1168   reduc_optab = optab_for_tree_code (reduc_code, vectype);
1169   if (!reduc_optab)
1170     {
1171       if (vect_print_dump_info (REPORT_DETAILS))
1172         fprintf (vect_dump, "no optab for reduction.");
1173       reduc_code = NUM_TREE_CODES;
1174     }
1175   if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1176     {
1177       if (vect_print_dump_info (REPORT_DETAILS))
1178         fprintf (vect_dump, "reduc op not supported by target.");
1179       reduc_code = NUM_TREE_CODES;
1180     }
1181 
1182   if (!vec_stmt) /* transformation not required.  */
1183     {
1184       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1185       return true;
1186     }
1187 
1188   /** Transform.  **/
1189 
1190   if (vect_print_dump_info (REPORT_DETAILS))
1191     fprintf (vect_dump, "transform reduction.");
1192 
1193   /* Create the destination vector  */
1194   vec_dest = vect_create_destination_var (scalar_dest, vectype);
1195 
1196 
1197   /* Create the reduction-phi that defines the reduction-operand.  */
1198   new_phi = create_phi_node (vec_dest, loop->header);
1199 
1200 
1201   /* Prepare the operand that is defined inside the loop body  */
1202   loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1203 
1204   /* Create the vectorized operation that computes the partial results  */
1205   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1206                 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1207   new_temp = make_ssa_name (vec_dest, *vec_stmt);
1208   TREE_OPERAND (*vec_stmt, 0) = new_temp;
1209   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1210 
1211 
1212   /* Finalize the reduction-phi (set it's arguments) and create the
1213      epilog reduction code.  */
1214   vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1215   return true;
1216 }
1217 
1218 
1219 /* Function vectorizable_assignment.
1220 
1221    Check if STMT performs an assignment (copy) that can be vectorized.
1222    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1223    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1224    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1225 
1226 bool
vectorizable_assignment(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1227 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1228 {
1229   tree vec_dest;
1230   tree scalar_dest;
1231   tree op;
1232   tree vec_oprnd;
1233   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1234   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1235   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1236   tree new_temp;
1237   tree def, def_stmt;
1238   enum vect_def_type dt;
1239 
1240   /* Is vectorizable assignment?  */
1241   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1242     return false;
1243 
1244   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1245 
1246   if (TREE_CODE (stmt) != MODIFY_EXPR)
1247     return false;
1248 
1249   scalar_dest = TREE_OPERAND (stmt, 0);
1250   if (TREE_CODE (scalar_dest) != SSA_NAME)
1251     return false;
1252 
1253   op = TREE_OPERAND (stmt, 1);
1254   if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1255     {
1256       if (vect_print_dump_info (REPORT_DETAILS))
1257         fprintf (vect_dump, "use not simple.");
1258       return false;
1259     }
1260 
1261   if (!vec_stmt) /* transformation not required.  */
1262     {
1263       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1264       return true;
1265     }
1266 
1267   /** Transform.  **/
1268   if (vect_print_dump_info (REPORT_DETAILS))
1269     fprintf (vect_dump, "transform assignment.");
1270 
1271   /* Handle def.  */
1272   vec_dest = vect_create_destination_var (scalar_dest, vectype);
1273 
1274   /* Handle use.  */
1275   op = TREE_OPERAND (stmt, 1);
1276   vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1277 
1278   /* Arguments are ready. create the new vector stmt.  */
1279   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1280   new_temp = make_ssa_name (vec_dest, *vec_stmt);
1281   TREE_OPERAND (*vec_stmt, 0) = new_temp;
1282   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1283 
1284   return true;
1285 }
1286 
1287 
1288 /* Function vect_min_worthwhile_factor.
1289 
1290    For a loop where we could vectorize the operation indicated by CODE,
1291    return the minimum vectorization factor that makes it worthwhile
1292    to use generic vectors.  */
1293 static int
vect_min_worthwhile_factor(enum tree_code code)1294 vect_min_worthwhile_factor (enum tree_code code)
1295 {
1296   switch (code)
1297     {
1298     case PLUS_EXPR:
1299     case MINUS_EXPR:
1300     case NEGATE_EXPR:
1301       return 4;
1302 
1303     case BIT_AND_EXPR:
1304     case BIT_IOR_EXPR:
1305     case BIT_XOR_EXPR:
1306     case BIT_NOT_EXPR:
1307       return 2;
1308 
1309     default:
1310       return INT_MAX;
1311     }
1312 }
1313 
1314 
1315 /* Function vectorizable_operation.
1316 
1317    Check if STMT performs a binary or unary operation that can be vectorized.
1318    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1319    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1320    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1321 
1322 bool
vectorizable_operation(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1323 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1324 {
1325   tree vec_dest;
1326   tree scalar_dest;
1327   tree operation;
1328   tree op0, op1 = NULL;
1329   tree vec_oprnd0, vec_oprnd1=NULL;
1330   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1331   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1332   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1333   int i;
1334   enum tree_code code;
1335   enum machine_mode vec_mode;
1336   tree new_temp;
1337   int op_type;
1338   tree op;
1339   optab optab;
1340   int icode;
1341   enum machine_mode optab_op2_mode;
1342   tree def, def_stmt;
1343   enum vect_def_type dt;
1344 
1345   /* Is STMT a vectorizable binary/unary operation?   */
1346   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1347     return false;
1348 
1349   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1350 
1351   if (STMT_VINFO_LIVE_P (stmt_info))
1352     {
1353       /* FORNOW: not yet supported.  */
1354       if (vect_print_dump_info (REPORT_DETAILS))
1355         fprintf (vect_dump, "value used after loop.");
1356       return false;
1357     }
1358 
1359   if (TREE_CODE (stmt) != MODIFY_EXPR)
1360     return false;
1361 
1362   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1363     return false;
1364 
1365   operation = TREE_OPERAND (stmt, 1);
1366   code = TREE_CODE (operation);
1367   optab = optab_for_tree_code (code, vectype);
1368 
1369   /* Support only unary or binary operations.  */
1370   op_type = TREE_CODE_LENGTH (code);
1371   if (op_type != unary_op && op_type != binary_op)
1372     {
1373       if (vect_print_dump_info (REPORT_DETAILS))
1374 	fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1375       return false;
1376     }
1377 
1378   for (i = 0; i < op_type; i++)
1379     {
1380       op = TREE_OPERAND (operation, i);
1381       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1382 	{
1383 	  if (vect_print_dump_info (REPORT_DETAILS))
1384 	    fprintf (vect_dump, "use not simple.");
1385 	  return false;
1386 	}
1387     }
1388 
1389   /* Supportable by target?  */
1390   if (!optab)
1391     {
1392       if (vect_print_dump_info (REPORT_DETAILS))
1393 	fprintf (vect_dump, "no optab.");
1394       return false;
1395     }
1396   vec_mode = TYPE_MODE (vectype);
1397   icode = (int) optab->handlers[(int) vec_mode].insn_code;
1398   if (icode == CODE_FOR_nothing)
1399     {
1400       if (vect_print_dump_info (REPORT_DETAILS))
1401 	fprintf (vect_dump, "op not supported by target.");
1402       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1403           || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1404 	     < vect_min_worthwhile_factor (code))
1405         return false;
1406       if (vect_print_dump_info (REPORT_DETAILS))
1407 	fprintf (vect_dump, "proceeding using word mode.");
1408     }
1409 
1410   /* Worthwhile without SIMD support?  */
1411   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1412       && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1413 	 < vect_min_worthwhile_factor (code))
1414     {
1415       if (vect_print_dump_info (REPORT_DETAILS))
1416 	fprintf (vect_dump, "not worthwhile without SIMD support.");
1417       return false;
1418     }
1419 
1420   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1421     {
1422       /* FORNOW: not yet supported.  */
1423       if (!VECTOR_MODE_P (vec_mode))
1424 	return false;
1425 
1426       /* Invariant argument is needed for a vector shift
1427 	 by a scalar shift operand.  */
1428       optab_op2_mode = insn_data[icode].operand[2].mode;
1429       if (! (VECTOR_MODE_P (optab_op2_mode)
1430 	     || dt == vect_constant_def
1431 	     || dt == vect_invariant_def))
1432 	{
1433 	  if (vect_print_dump_info (REPORT_DETAILS))
1434 	    fprintf (vect_dump, "operand mode requires invariant argument.");
1435 	  return false;
1436 	}
1437     }
1438 
1439   if (!vec_stmt) /* transformation not required.  */
1440     {
1441       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1442       return true;
1443     }
1444 
1445   /** Transform.  **/
1446 
1447   if (vect_print_dump_info (REPORT_DETAILS))
1448     fprintf (vect_dump, "transform binary/unary operation.");
1449 
1450   /* Handle def.  */
1451   scalar_dest = TREE_OPERAND (stmt, 0);
1452   vec_dest = vect_create_destination_var (scalar_dest, vectype);
1453 
1454   /* Handle uses.  */
1455   op0 = TREE_OPERAND (operation, 0);
1456   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1457 
1458   if (op_type == binary_op)
1459     {
1460       op1 = TREE_OPERAND (operation, 1);
1461 
1462       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1463 	{
1464 	  /* Vector shl and shr insn patterns can be defined with
1465 	     scalar operand 2 (shift operand).  In this case, use
1466 	     constant or loop invariant op1 directly, without
1467 	     extending it to vector mode first.  */
1468 
1469 	  optab_op2_mode = insn_data[icode].operand[2].mode;
1470 	  if (!VECTOR_MODE_P (optab_op2_mode))
1471 	    {
1472 	      if (vect_print_dump_info (REPORT_DETAILS))
1473 		fprintf (vect_dump, "operand 1 using scalar mode.");
1474 	      vec_oprnd1 = op1;
1475 	    }
1476 	}
1477 
1478       if (!vec_oprnd1)
1479 	vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1480     }
1481 
1482   /* Arguments are ready. create the new vector stmt.  */
1483 
1484   if (op_type == binary_op)
1485     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1486 		build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1487   else
1488     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1489 		build1 (code, vectype, vec_oprnd0));
1490   new_temp = make_ssa_name (vec_dest, *vec_stmt);
1491   TREE_OPERAND (*vec_stmt, 0) = new_temp;
1492   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1493 
1494   return true;
1495 }
1496 
1497 
1498 /* Function vectorizable_store.
1499 
1500    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1501    can be vectorized.
1502    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1503    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1504    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1505 
1506 bool
vectorizable_store(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1507 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1508 {
1509   tree scalar_dest;
1510   tree data_ref;
1511   tree op;
1512   tree vec_oprnd1;
1513   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1514   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1515   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1516   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1517   enum machine_mode vec_mode;
1518   tree dummy;
1519   enum dr_alignment_support alignment_support_cheme;
1520   ssa_op_iter iter;
1521   tree def, def_stmt;
1522   enum vect_def_type dt;
1523 
1524   /* Is vectorizable store? */
1525 
1526   if (TREE_CODE (stmt) != MODIFY_EXPR)
1527     return false;
1528 
1529   scalar_dest = TREE_OPERAND (stmt, 0);
1530   if (TREE_CODE (scalar_dest) != ARRAY_REF
1531       && TREE_CODE (scalar_dest) != INDIRECT_REF)
1532     return false;
1533 
1534   op = TREE_OPERAND (stmt, 1);
1535   if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1536     {
1537       if (vect_print_dump_info (REPORT_DETAILS))
1538         fprintf (vect_dump, "use not simple.");
1539       return false;
1540     }
1541 
1542   vec_mode = TYPE_MODE (vectype);
1543   /* FORNOW. In some cases can vectorize even if data-type not supported
1544      (e.g. - array initialization with 0).  */
1545   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1546     return false;
1547 
1548   if (!STMT_VINFO_DATA_REF (stmt_info))
1549     return false;
1550 
1551 
1552   if (!vec_stmt) /* transformation not required.  */
1553     {
1554       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1555       return true;
1556     }
1557 
1558   /** Transform.  **/
1559 
1560   if (vect_print_dump_info (REPORT_DETAILS))
1561     fprintf (vect_dump, "transform store");
1562 
1563   alignment_support_cheme = vect_supportable_dr_alignment (dr);
1564   gcc_assert (alignment_support_cheme);
1565   gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
1566 
1567   /* Handle use - get the vectorized def from the defining stmt.  */
1568   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1569 
1570   /* Handle def.  */
1571   /* FORNOW: make sure the data reference is aligned.  */
1572   vect_align_data_ref (stmt);
1573   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1574   data_ref = build_fold_indirect_ref (data_ref);
1575 
1576   /* Arguments are ready. create the new vector stmt.  */
1577   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1578   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1579 
1580   /* Copy the V_MAY_DEFS representing the aliasing of the original array
1581      element's definition to the vector's definition then update the
1582      defining statement.  The original is being deleted so the same
1583      SSA_NAMEs can be used.  */
1584   copy_virtual_operands (*vec_stmt, stmt);
1585 
1586   FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1587     {
1588       SSA_NAME_DEF_STMT (def) = *vec_stmt;
1589 
1590       /* If this virtual def has a use outside the loop and a loop peel is
1591 	 performed then the def may be renamed by the peel.  Mark it for
1592 	 renaming so the later use will also be renamed.  */
1593       mark_sym_for_renaming (SSA_NAME_VAR (def));
1594     }
1595 
1596   return true;
1597 }
1598 
1599 
1600 /* vectorizable_load.
1601 
1602    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1603    can be vectorized.
1604    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1605    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1606    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1607 
1608 bool
vectorizable_load(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1609 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1610 {
1611   tree scalar_dest;
1612   tree vec_dest = NULL;
1613   tree data_ref = NULL;
1614   tree op;
1615   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1616   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1617   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1618   tree new_temp;
1619   int mode;
1620   tree init_addr;
1621   tree new_stmt;
1622   tree dummy;
1623   basic_block new_bb;
1624   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1625   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1626   edge pe = loop_preheader_edge (loop);
1627   enum dr_alignment_support alignment_support_cheme;
1628 
1629   /* Is vectorizable load? */
1630   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1631     return false;
1632 
1633   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1634 
1635   if (STMT_VINFO_LIVE_P (stmt_info))
1636     {
1637       /* FORNOW: not yet supported.  */
1638       if (vect_print_dump_info (REPORT_DETAILS))
1639         fprintf (vect_dump, "value used after loop.");
1640       return false;
1641     }
1642 
1643   if (TREE_CODE (stmt) != MODIFY_EXPR)
1644     return false;
1645 
1646   scalar_dest = TREE_OPERAND (stmt, 0);
1647   if (TREE_CODE (scalar_dest) != SSA_NAME)
1648     return false;
1649 
1650   op = TREE_OPERAND (stmt, 1);
1651   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1652     return false;
1653 
1654   if (!STMT_VINFO_DATA_REF (stmt_info))
1655     return false;
1656 
1657   mode = (int) TYPE_MODE (vectype);
1658 
1659   /* FORNOW. In some cases can vectorize even if data-type not supported
1660     (e.g. - data copies).  */
1661   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1662     {
1663       if (vect_print_dump_info (REPORT_DETAILS))
1664 	fprintf (vect_dump, "Aligned load, but unsupported type.");
1665       return false;
1666     }
1667 
1668   if (!vec_stmt) /* transformation not required.  */
1669     {
1670       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1671       return true;
1672     }
1673 
1674   /** Transform.  **/
1675 
1676   if (vect_print_dump_info (REPORT_DETAILS))
1677     fprintf (vect_dump, "transform load.");
1678 
1679   alignment_support_cheme = vect_supportable_dr_alignment (dr);
1680   gcc_assert (alignment_support_cheme);
1681 
1682   if (alignment_support_cheme == dr_aligned
1683       || alignment_support_cheme == dr_unaligned_supported)
1684     {
1685       /* Create:
1686          p = initial_addr;
1687          indx = 0;
1688          loop {
1689            vec_dest = *(p);
1690            indx = indx + 1;
1691          }
1692       */
1693 
1694       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1695       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1696       if (aligned_access_p (dr))
1697         data_ref = build_fold_indirect_ref (data_ref);
1698       else
1699 	{
1700 	  int mis = DR_MISALIGNMENT (dr);
1701 	  tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1702 	  tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1703 	  data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1704 	}
1705       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1706       new_temp = make_ssa_name (vec_dest, new_stmt);
1707       TREE_OPERAND (new_stmt, 0) = new_temp;
1708       vect_finish_stmt_generation (stmt, new_stmt, bsi);
1709       copy_virtual_operands (new_stmt, stmt);
1710     }
1711   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1712     {
1713       /* Create:
1714 	 p1 = initial_addr;
1715 	 msq_init = *(floor(p1))
1716 	 p2 = initial_addr + VS - 1;
1717 	 magic = have_builtin ? builtin_result : initial_address;
1718 	 indx = 0;
1719 	 loop {
1720 	   p2' = p2 + indx * vectype_size
1721 	   lsq = *(floor(p2'))
1722 	   vec_dest = realign_load (msq, lsq, magic)
1723 	   indx = indx + 1;
1724 	   msq = lsq;
1725 	 }
1726       */
1727 
1728       tree offset;
1729       tree magic;
1730       tree phi_stmt;
1731       tree msq_init;
1732       tree msq, lsq;
1733       tree dataref_ptr;
1734       tree params;
1735 
1736       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
1737       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1738       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1739 					   &init_addr, true);
1740       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1741       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1742       new_temp = make_ssa_name (vec_dest, new_stmt);
1743       TREE_OPERAND (new_stmt, 0) = new_temp;
1744       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1745       gcc_assert (!new_bb);
1746       msq_init = TREE_OPERAND (new_stmt, 0);
1747       copy_virtual_operands (new_stmt, stmt);
1748       update_vuses_to_preheader (new_stmt, loop);
1749 
1750 
1751       /* <2> Create lsq = *(floor(p2')) in the loop  */
1752       offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1753       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1754       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1755       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1756       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1757       new_temp = make_ssa_name (vec_dest, new_stmt);
1758       TREE_OPERAND (new_stmt, 0) = new_temp;
1759       vect_finish_stmt_generation (stmt, new_stmt, bsi);
1760       lsq = TREE_OPERAND (new_stmt, 0);
1761       copy_virtual_operands (new_stmt, stmt);
1762 
1763 
1764       /* <3> */
1765       if (targetm.vectorize.builtin_mask_for_load)
1766 	{
1767 	  /* Create permutation mask, if required, in loop preheader.  */
1768 	  tree builtin_decl;
1769 	  params = build_tree_list (NULL_TREE, init_addr);
1770 	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1771 	  builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1772 	  new_stmt = build_function_call_expr (builtin_decl, params);
1773 	  new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1774 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1775 	  TREE_OPERAND (new_stmt, 0) = new_temp;
1776 	  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1777 	  gcc_assert (!new_bb);
1778 	  magic = TREE_OPERAND (new_stmt, 0);
1779 
1780 	  /* The result of the CALL_EXPR to this builtin is determined from
1781 	     the value of the parameter and no global variables are touched
1782 	     which makes the builtin a "const" function.  Requiring the
1783 	     builtin to have the "const" attribute makes it unnecessary
1784 	     to call mark_call_clobbered.  */
1785 	  gcc_assert (TREE_READONLY (builtin_decl));
1786 	}
1787       else
1788 	{
1789 	  /* Use current address instead of init_addr for reduced reg pressure.
1790 	   */
1791 	  magic = dataref_ptr;
1792 	}
1793 
1794 
1795       /* <4> Create msq = phi <msq_init, lsq> in loop  */
1796       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1797       msq = make_ssa_name (vec_dest, NULL_TREE);
1798       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1799       SSA_NAME_DEF_STMT (msq) = phi_stmt;
1800       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1801       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1802 
1803 
1804       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
1805       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1806       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1807       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1808       new_temp = make_ssa_name (vec_dest, new_stmt);
1809       TREE_OPERAND (new_stmt, 0) = new_temp;
1810       vect_finish_stmt_generation (stmt, new_stmt, bsi);
1811     }
1812   else
1813     gcc_unreachable ();
1814 
1815   *vec_stmt = new_stmt;
1816   return true;
1817 }
1818 
1819 
1820 /* Function vectorizable_live_operation.
1821 
1822    STMT computes a value that is used outside the loop. Check if
1823    it can be supported.  */
1824 
1825 bool
vectorizable_live_operation(tree stmt,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,tree * vec_stmt ATTRIBUTE_UNUSED)1826 vectorizable_live_operation (tree stmt,
1827                              block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1828                              tree *vec_stmt ATTRIBUTE_UNUSED)
1829 {
1830   tree operation;
1831   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1832   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1833   int i;
1834   enum tree_code code;
1835   int op_type;
1836   tree op;
1837   tree def, def_stmt;
1838   enum vect_def_type dt;
1839 
1840   if (!STMT_VINFO_LIVE_P (stmt_info))
1841     return false;
1842 
1843   if (TREE_CODE (stmt) != MODIFY_EXPR)
1844     return false;
1845 
1846   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1847     return false;
1848 
1849   operation = TREE_OPERAND (stmt, 1);
1850   code = TREE_CODE (operation);
1851 
1852   op_type = TREE_CODE_LENGTH (code);
1853 
1854   /* FORNOW: support only if all uses are invariant. This means
1855      that the scalar operations can remain in place, unvectorized.
1856      The original last scalar value that they compute will be used.  */
1857 
1858   for (i = 0; i < op_type; i++)
1859     {
1860       op = TREE_OPERAND (operation, i);
1861       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1862         {
1863           if (vect_print_dump_info (REPORT_DETAILS))
1864             fprintf (vect_dump, "use not simple.");
1865           return false;
1866         }
1867 
1868       if (dt != vect_invariant_def && dt != vect_constant_def)
1869         return false;
1870     }
1871 
1872   /* No transformation is required for the cases we currently support.  */
1873   return true;
1874 }
1875 
1876 
1877 /* Function vect_is_simple_cond.
1878 
1879    Input:
1880    LOOP - the loop that is being vectorized.
1881    COND - Condition that is checked for simple use.
1882 
1883    Returns whether a COND can be vectorized.  Checks whether
1884    condition operands are supportable using vec_is_simple_use.  */
1885 
1886 static bool
vect_is_simple_cond(tree cond,loop_vec_info loop_vinfo)1887 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1888 {
1889   tree lhs, rhs;
1890   tree def;
1891   enum vect_def_type dt;
1892 
1893   if (!COMPARISON_CLASS_P (cond))
1894     return false;
1895 
1896   lhs = TREE_OPERAND (cond, 0);
1897   rhs = TREE_OPERAND (cond, 1);
1898 
1899   if (TREE_CODE (lhs) == SSA_NAME)
1900     {
1901       tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1902       if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1903 	return false;
1904     }
1905   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1906     return false;
1907 
1908   if (TREE_CODE (rhs) == SSA_NAME)
1909     {
1910       tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1911       if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1912 	return false;
1913     }
1914   else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
1915     return false;
1916 
1917   return true;
1918 }
1919 
1920 /* vectorizable_condition.
1921 
1922    Check if STMT is conditional modify expression that can be vectorized.
1923    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1924    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
1925    at BSI.
1926 
1927    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1928 
1929 bool
vectorizable_condition(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1930 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1931 {
1932   tree scalar_dest = NULL_TREE;
1933   tree vec_dest = NULL_TREE;
1934   tree op = NULL_TREE;
1935   tree cond_expr, then_clause, else_clause;
1936   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1937   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1938   tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1939   tree vec_compare, vec_cond_expr;
1940   tree new_temp;
1941   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1942   enum machine_mode vec_mode;
1943   tree def;
1944   enum vect_def_type dt;
1945 
1946   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1947     return false;
1948 
1949   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1950 
1951   if (STMT_VINFO_LIVE_P (stmt_info))
1952     {
1953       /* FORNOW: not yet supported.  */
1954       if (vect_print_dump_info (REPORT_DETAILS))
1955         fprintf (vect_dump, "value used after loop.");
1956       return false;
1957     }
1958 
1959   if (TREE_CODE (stmt) != MODIFY_EXPR)
1960     return false;
1961 
1962   op = TREE_OPERAND (stmt, 1);
1963 
1964   if (TREE_CODE (op) != COND_EXPR)
1965     return false;
1966 
1967   cond_expr = TREE_OPERAND (op, 0);
1968   then_clause = TREE_OPERAND (op, 1);
1969   else_clause = TREE_OPERAND (op, 2);
1970 
1971   /* We do not handle two different vector types for the condition
1972      and the values.  */
1973   if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
1974     return false;
1975 
1976   if (!vect_is_simple_cond (cond_expr, loop_vinfo))
1977     return false;
1978 
1979   if (TREE_CODE (then_clause) == SSA_NAME)
1980     {
1981       tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
1982       if (!vect_is_simple_use (then_clause, loop_vinfo,
1983 			       &then_def_stmt, &def, &dt))
1984 	return false;
1985     }
1986   else if (TREE_CODE (then_clause) != INTEGER_CST
1987 	   && TREE_CODE (then_clause) != REAL_CST)
1988     return false;
1989 
1990   if (TREE_CODE (else_clause) == SSA_NAME)
1991     {
1992       tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
1993       if (!vect_is_simple_use (else_clause, loop_vinfo,
1994 			       &else_def_stmt, &def, &dt))
1995 	return false;
1996     }
1997   else if (TREE_CODE (else_clause) != INTEGER_CST
1998 	   && TREE_CODE (else_clause) != REAL_CST)
1999     return false;
2000 
2001 
2002   vec_mode = TYPE_MODE (vectype);
2003 
2004   if (!vec_stmt)
2005     {
2006       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2007       return expand_vec_cond_expr_p (op, vec_mode);
2008     }
2009 
2010   /* Transform */
2011 
2012   /* Handle def.  */
2013   scalar_dest = TREE_OPERAND (stmt, 0);
2014   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2015 
2016   /* Handle cond expr.  */
2017   vec_cond_lhs =
2018     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2019   vec_cond_rhs =
2020     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2021   vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2022   vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2023 
2024   /* Arguments are ready. create the new vector stmt.  */
2025   vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2026 			vec_cond_lhs, vec_cond_rhs);
2027   vec_cond_expr = build (VEC_COND_EXPR, vectype,
2028 			 vec_compare, vec_then_clause, vec_else_clause);
2029 
2030   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2031   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2032   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2033   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2034 
2035   return true;
2036 }
2037 
2038 /* Function vect_transform_stmt.
2039 
2040    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2041 
2042 bool
vect_transform_stmt(tree stmt,block_stmt_iterator * bsi)2043 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2044 {
2045   bool is_store = false;
2046   tree vec_stmt = NULL_TREE;
2047   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2048   bool done;
2049 
2050   if (STMT_VINFO_RELEVANT_P (stmt_info))
2051     {
2052       switch (STMT_VINFO_TYPE (stmt_info))
2053       {
2054       case op_vec_info_type:
2055 	done = vectorizable_operation (stmt, bsi, &vec_stmt);
2056 	gcc_assert (done);
2057 	break;
2058 
2059       case assignment_vec_info_type:
2060 	done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2061 	gcc_assert (done);
2062 	break;
2063 
2064       case load_vec_info_type:
2065 	done = vectorizable_load (stmt, bsi, &vec_stmt);
2066 	gcc_assert (done);
2067 	break;
2068 
2069       case store_vec_info_type:
2070 	done = vectorizable_store (stmt, bsi, &vec_stmt);
2071 	gcc_assert (done);
2072 	is_store = true;
2073 	break;
2074 
2075       case condition_vec_info_type:
2076 	done = vectorizable_condition (stmt, bsi, &vec_stmt);
2077 	gcc_assert (done);
2078 	break;
2079 
2080       default:
2081 	if (vect_print_dump_info (REPORT_DETAILS))
2082 	  fprintf (vect_dump, "stmt not supported.");
2083 	gcc_unreachable ();
2084       }
2085 
2086       STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2087     }
2088 
2089   if (STMT_VINFO_LIVE_P (stmt_info))
2090     {
2091       switch (STMT_VINFO_TYPE (stmt_info))
2092       {
2093       case reduc_vec_info_type:
2094         done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2095         gcc_assert (done);
2096         break;
2097 
2098       default:
2099         done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2100         gcc_assert (done);
2101       }
2102 
2103       if (vec_stmt)
2104         {
2105           gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2106           STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2107         }
2108     }
2109 
2110   return is_store;
2111 }
2112 
2113 
2114 /* This function builds ni_name = number of iterations loop executes
2115    on the loop preheader.  */
2116 
2117 static tree
vect_build_loop_niters(loop_vec_info loop_vinfo)2118 vect_build_loop_niters (loop_vec_info loop_vinfo)
2119 {
2120   tree ni_name, stmt, var;
2121   edge pe;
2122   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2123   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2124 
2125   var = create_tmp_var (TREE_TYPE (ni), "niters");
2126   add_referenced_tmp_var (var);
2127   ni_name = force_gimple_operand (ni, &stmt, false, var);
2128 
2129   pe = loop_preheader_edge (loop);
2130   if (stmt)
2131     {
2132       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2133       gcc_assert (!new_bb);
2134     }
2135 
2136   return ni_name;
2137 }
2138 
2139 
2140 /* This function generates the following statements:
2141 
2142  ni_name = number of iterations loop executes
2143  ratio = ni_name / vf
2144  ratio_mult_vf_name = ratio * vf
2145 
2146  and places them at the loop preheader edge.  */
2147 
2148 static void
vect_generate_tmps_on_preheader(loop_vec_info loop_vinfo,tree * ni_name_ptr,tree * ratio_mult_vf_name_ptr,tree * ratio_name_ptr)2149 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2150 				 tree *ni_name_ptr,
2151 				 tree *ratio_mult_vf_name_ptr,
2152 				 tree *ratio_name_ptr)
2153 {
2154 
2155   edge pe;
2156   basic_block new_bb;
2157   tree stmt, ni_name;
2158   tree var;
2159   tree ratio_name;
2160   tree ratio_mult_vf_name;
2161   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2162   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2163   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2164   tree log_vf;
2165 
2166   pe = loop_preheader_edge (loop);
2167 
2168   /* Generate temporary variable that contains
2169      number of iterations loop executes.  */
2170 
2171   ni_name = vect_build_loop_niters (loop_vinfo);
2172   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2173 
2174   /* Create: ratio = ni >> log2(vf) */
2175 
2176   var = create_tmp_var (TREE_TYPE (ni), "bnd");
2177   add_referenced_tmp_var (var);
2178   ratio_name = make_ssa_name (var, NULL_TREE);
2179   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2180 	   build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2181   SSA_NAME_DEF_STMT (ratio_name) = stmt;
2182 
2183   pe = loop_preheader_edge (loop);
2184   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2185   gcc_assert (!new_bb);
2186 
2187   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2188 
2189   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2190   add_referenced_tmp_var (var);
2191   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2192   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2193 	   build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2194   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2195 
2196   pe = loop_preheader_edge (loop);
2197   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2198   gcc_assert (!new_bb);
2199 
2200   *ni_name_ptr = ni_name;
2201   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2202   *ratio_name_ptr = ratio_name;
2203 
2204   return;
2205 }
2206 
2207 
2208 /* Function update_vuses_to_preheader.
2209 
2210    Input:
2211    STMT - a statement with potential VUSEs.
2212    LOOP - the loop whose preheader will contain STMT.
2213 
2214    It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2215    appears to be defined in a V_MAY_DEF in another statement in a loop.
2216    One such case is when the VUSE is at the dereference of a __restricted__
2217    pointer in a load and the V_MAY_DEF is at the dereference of a different
2218    __restricted__ pointer in a store.  Vectorization may result in
2219    copy_virtual_uses being called to copy the problematic VUSE to a new
2220    statement that is being inserted in the loop preheader.  This procedure
2221    is called to change the SSA_NAME in the new statement's VUSE from the
2222    SSA_NAME updated in the loop to the related SSA_NAME available on the
2223    path entering the loop.
2224 
2225    When this function is called, we have the following situation:
2226 
2227         # vuse <name1>
2228         S1: vload
2229     do {
2230         # name1 = phi < name0 , name2>
2231 
2232         # vuse <name1>
2233         S2: vload
2234 
2235         # name2 = vdef <name1>
2236         S3: vstore
2237 
2238     }while...
2239 
2240    Stmt S1 was created in the loop preheader block as part of misaligned-load
2241    handling. This function fixes the name of the vuse of S1 from 'name1' to
2242    'name0'.  */
2243 
2244 static void
update_vuses_to_preheader(tree stmt,struct loop * loop)2245 update_vuses_to_preheader (tree stmt, struct loop *loop)
2246 {
2247   basic_block header_bb = loop->header;
2248   edge preheader_e = loop_preheader_edge (loop);
2249   ssa_op_iter iter;
2250   use_operand_p use_p;
2251 
2252   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2253     {
2254       tree ssa_name = USE_FROM_PTR (use_p);
2255       tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2256       tree name_var = SSA_NAME_VAR (ssa_name);
2257       basic_block bb = bb_for_stmt (def_stmt);
2258 
2259       /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
2260       if (!IS_EMPTY_STMT (def_stmt)
2261 	  && flow_bb_inside_loop_p (loop, bb))
2262         {
2263           /* If the block containing the statement defining the SSA_NAME
2264              is in the loop then it's necessary to find the definition
2265              outside the loop using the PHI nodes of the header.  */
2266 	  tree phi;
2267 	  bool updated = false;
2268 
2269 	  for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2270 	    {
2271 	      if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2272 		{
2273 		  SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2274 		  updated = true;
2275 		  break;
2276 		}
2277 	    }
2278 	  gcc_assert (updated);
2279 	}
2280     }
2281 }
2282 
2283 
2284 /*   Function vect_update_ivs_after_vectorizer.
2285 
2286      "Advance" the induction variables of LOOP to the value they should take
2287      after the execution of LOOP.  This is currently necessary because the
2288      vectorizer does not handle induction variables that are used after the
2289      loop.  Such a situation occurs when the last iterations of LOOP are
2290      peeled, because:
2291      1. We introduced new uses after LOOP for IVs that were not originally used
2292         after LOOP: the IVs of LOOP are now used by an epilog loop.
2293      2. LOOP is going to be vectorized; this means that it will iterate N/VF
2294         times, whereas the loop IVs should be bumped N times.
2295 
2296      Input:
2297      - LOOP - a loop that is going to be vectorized. The last few iterations
2298               of LOOP were peeled.
2299      - NITERS - the number of iterations that LOOP executes (before it is
2300                 vectorized). i.e, the number of times the ivs should be bumped.
2301      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2302                   coming out from LOOP on which there are uses of the LOOP ivs
2303 		  (this is the path from LOOP->exit to epilog_loop->preheader).
2304 
2305                   The new definitions of the ivs are placed in LOOP->exit.
2306                   The phi args associated with the edge UPDATE_E in the bb
2307                   UPDATE_E->dest are updated accordingly.
2308 
2309      Assumption 1: Like the rest of the vectorizer, this function assumes
2310      a single loop exit that has a single predecessor.
2311 
2312      Assumption 2: The phi nodes in the LOOP header and in update_bb are
2313      organized in the same order.
2314 
2315      Assumption 3: The access function of the ivs is simple enough (see
2316      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2317 
2318      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2319      coming out of LOOP on which the ivs of LOOP are used (this is the path
2320      that leads to the epilog loop; other paths skip the epilog loop).  This
2321      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2322      needs to have its phis updated.
2323  */
2324 
2325 static void
vect_update_ivs_after_vectorizer(loop_vec_info loop_vinfo,tree niters,edge update_e)2326 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2327 				  edge update_e)
2328 {
2329   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330   basic_block exit_bb = loop->single_exit->dest;
2331   tree phi, phi1;
2332   basic_block update_bb = update_e->dest;
2333 
2334   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2335 
2336   /* Make sure there exists a single-predecessor exit bb:  */
2337   gcc_assert (single_pred_p (exit_bb));
2338 
2339   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2340        phi && phi1;
2341        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2342     {
2343       tree access_fn = NULL;
2344       tree evolution_part;
2345       tree init_expr;
2346       tree step_expr;
2347       tree var, stmt, ni, ni_name;
2348       block_stmt_iterator last_bsi;
2349 
2350       if (vect_print_dump_info (REPORT_DETAILS))
2351         {
2352           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2353           print_generic_expr (vect_dump, phi, TDF_SLIM);
2354         }
2355 
2356       /* Skip virtual phi's.  */
2357       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2358 	{
2359 	  if (vect_print_dump_info (REPORT_DETAILS))
2360 	    fprintf (vect_dump, "virtual phi. skip.");
2361 	  continue;
2362 	}
2363 
2364       /* Skip reduction phis.  */
2365       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2366         {
2367           if (vect_print_dump_info (REPORT_DETAILS))
2368             fprintf (vect_dump, "reduc phi. skip.");
2369           continue;
2370         }
2371 
2372       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2373       gcc_assert (access_fn);
2374       evolution_part =
2375 	 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2376       gcc_assert (evolution_part != NULL_TREE);
2377 
2378       /* FORNOW: We do not support IVs whose evolution function is a polynomial
2379          of degree >= 2 or exponential.  */
2380       gcc_assert (!tree_is_chrec (evolution_part));
2381 
2382       step_expr = evolution_part;
2383       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2384 							       loop->num));
2385 
2386       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2387 		  build2 (MULT_EXPR, TREE_TYPE (niters),
2388 		       niters, step_expr), init_expr);
2389 
2390       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2391       add_referenced_tmp_var (var);
2392 
2393       ni_name = force_gimple_operand (ni, &stmt, false, var);
2394 
2395       /* Insert stmt into exit_bb.  */
2396       last_bsi = bsi_last (exit_bb);
2397       if (stmt)
2398         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2399 
2400       /* Fix phi expressions in the successor bb.  */
2401       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2402     }
2403 }
2404 
2405 
2406 /* Function vect_do_peeling_for_loop_bound
2407 
2408    Peel the last iterations of the loop represented by LOOP_VINFO.
2409    The peeled iterations form a new epilog loop.  Given that the loop now
2410    iterates NITERS times, the new epilog loop iterates
2411    NITERS % VECTORIZATION_FACTOR times.
2412 
2413    The original loop will later be made to iterate
2414    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2415 
2416 static void
vect_do_peeling_for_loop_bound(loop_vec_info loop_vinfo,tree * ratio,struct loops * loops)2417 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2418 				struct loops *loops)
2419 {
2420   tree ni_name, ratio_mult_vf_name;
2421   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2422   struct loop *new_loop;
2423   edge update_e;
2424   basic_block preheader;
2425   int loop_num;
2426 
2427   if (vect_print_dump_info (REPORT_DETAILS))
2428     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2429 
2430   initialize_original_copy_tables ();
2431 
2432   /* Generate the following variables on the preheader of original loop:
2433 
2434      ni_name = number of iteration the original loop executes
2435      ratio = ni_name / vf
2436      ratio_mult_vf_name = ratio * vf  */
2437   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2438 				   &ratio_mult_vf_name, ratio);
2439 
2440   loop_num  = loop->num;
2441   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2442 					    ratio_mult_vf_name, ni_name, false);
2443   gcc_assert (new_loop);
2444   gcc_assert (loop_num == loop->num);
2445 #ifdef ENABLE_CHECKING
2446   slpeel_verify_cfg_after_peeling (loop, new_loop);
2447 #endif
2448 
2449   /* A guard that controls whether the new_loop is to be executed or skipped
2450      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2451      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
2452      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
2453      is on the path where the LOOP IVs are used and need to be updated.  */
2454 
2455   preheader = loop_preheader_edge (new_loop)->src;
2456   if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2457     update_e = EDGE_PRED (preheader, 0);
2458   else
2459     update_e = EDGE_PRED (preheader, 1);
2460 
2461   /* Update IVs of original loop as if they were advanced
2462      by ratio_mult_vf_name steps.  */
2463   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2464 
2465   /* After peeling we have to reset scalar evolution analyzer.  */
2466   scev_reset ();
2467 
2468   free_original_copy_tables ();
2469 }
2470 
2471 
2472 /* Function vect_gen_niters_for_prolog_loop
2473 
2474    Set the number of iterations for the loop represented by LOOP_VINFO
2475    to the minimum between LOOP_NITERS (the original iteration count of the loop)
2476    and the misalignment of DR - the data reference recorded in
2477    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2478    this loop, the data reference DR will refer to an aligned location.
2479 
2480    The following computation is generated:
2481 
2482    If the misalignment of DR is known at compile time:
2483      addr_mis = int mis = DR_MISALIGNMENT (dr);
2484    Else, compute address misalignment in bytes:
2485      addr_mis = addr & (vectype_size - 1)
2486 
2487    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2488 
2489    (elem_size = element type size; an element is the scalar element
2490 	whose type is the inner type of the vectype)  */
2491 
2492 static tree
vect_gen_niters_for_prolog_loop(loop_vec_info loop_vinfo,tree loop_niters)2493 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2494 {
2495   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2496   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2497   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2498   tree var, stmt;
2499   tree iters, iters_name;
2500   edge pe;
2501   basic_block new_bb;
2502   tree dr_stmt = DR_STMT (dr);
2503   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2504   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2505   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2506   tree niters_type = TREE_TYPE (loop_niters);
2507 
2508   pe = loop_preheader_edge (loop);
2509 
2510   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2511     {
2512       int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2513       int element_size = vectype_align/vf;
2514       int elem_misalign = byte_misalign / element_size;
2515 
2516       if (vect_print_dump_info (REPORT_DETAILS))
2517         fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2518       iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2519     }
2520   else
2521     {
2522       tree new_stmts = NULL_TREE;
2523       tree start_addr =
2524         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2525       tree ptr_type = TREE_TYPE (start_addr);
2526       tree size = TYPE_SIZE (ptr_type);
2527       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2528       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2529       tree elem_size_log =
2530         build_int_cst (type, exact_log2 (vectype_align/vf));
2531       tree vf_minus_1 = build_int_cst (type, vf - 1);
2532       tree vf_tree = build_int_cst (type, vf);
2533       tree byte_misalign;
2534       tree elem_misalign;
2535 
2536       new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2537       gcc_assert (!new_bb);
2538 
2539       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2540       byte_misalign =
2541         build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2542 
2543       /* Create:  elem_misalign = byte_misalign / element_size  */
2544       elem_misalign =
2545         build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2546 
2547       /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
2548       iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2549       iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2550       iters = fold_convert (niters_type, iters);
2551     }
2552 
2553   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2554   /* If the loop bound is known at compile time we already verified that it is
2555      greater than vf; since the misalignment ('iters') is at most vf, there's
2556      no need to generate the MIN_EXPR in this case.  */
2557   if (TREE_CODE (loop_niters) != INTEGER_CST)
2558     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2559 
2560   if (vect_print_dump_info (REPORT_DETAILS))
2561     {
2562       fprintf (vect_dump, "niters for prolog loop: ");
2563       print_generic_expr (vect_dump, iters, TDF_SLIM);
2564     }
2565 
2566   var = create_tmp_var (niters_type, "prolog_loop_niters");
2567   add_referenced_tmp_var (var);
2568   iters_name = force_gimple_operand (iters, &stmt, false, var);
2569 
2570   /* Insert stmt on loop preheader edge.  */
2571   if (stmt)
2572     {
2573       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2574       gcc_assert (!new_bb);
2575     }
2576 
2577   return iters_name;
2578 }
2579 
2580 
2581 /* Function vect_update_init_of_dr
2582 
2583    NITERS iterations were peeled from LOOP.  DR represents a data reference
2584    in LOOP.  This function updates the information recorded in DR to
2585    account for the fact that the first NITERS iterations had already been
2586    executed.  Specifically, it updates the OFFSET field of DR.  */
2587 
2588 static void
vect_update_init_of_dr(struct data_reference * dr,tree niters)2589 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2590 {
2591   tree offset = DR_OFFSET (dr);
2592 
2593   niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2594   offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2595   DR_OFFSET (dr) = offset;
2596 }
2597 
2598 
2599 /* Function vect_update_inits_of_drs
2600 
2601    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2602    This function updates the information recorded for the data references in
2603    the loop to account for the fact that the first NITERS iterations had
2604    already been executed.  Specifically, it updates the initial_condition of the
2605    access_function of all the data_references in the loop.  */
2606 
2607 static void
vect_update_inits_of_drs(loop_vec_info loop_vinfo,tree niters)2608 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2609 {
2610   unsigned int i;
2611   varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2612 
2613   if (vect_dump && (dump_flags & TDF_DETAILS))
2614     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2615 
2616   for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
2617     {
2618       struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
2619       vect_update_init_of_dr (dr, niters);
2620     }
2621 }
2622 
2623 
2624 /* Function vect_do_peeling_for_alignment
2625 
2626    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2627    'niters' is set to the misalignment of one of the data references in the
2628    loop, thereby forcing it to refer to an aligned location at the beginning
2629    of the execution of this loop.  The data reference for which we are
2630    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2631 
2632 static void
vect_do_peeling_for_alignment(loop_vec_info loop_vinfo,struct loops * loops)2633 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2634 {
2635   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2636   tree niters_of_prolog_loop, ni_name;
2637   tree n_iters;
2638   struct loop *new_loop;
2639 
2640   if (vect_print_dump_info (REPORT_DETAILS))
2641     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2642 
2643   initialize_original_copy_tables ();
2644 
2645   ni_name = vect_build_loop_niters (loop_vinfo);
2646   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2647 
2648   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2649   new_loop =
2650 	slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2651 				       niters_of_prolog_loop, ni_name, true);
2652   gcc_assert (new_loop);
2653 #ifdef ENABLE_CHECKING
2654   slpeel_verify_cfg_after_peeling (new_loop, loop);
2655 #endif
2656 
2657   /* Update number of times loop executes.  */
2658   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2659   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2660 		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2661 
2662   /* Update the init conditions of the access functions of all data refs.  */
2663   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2664 
2665   /* After peeling we have to reset scalar evolution analyzer.  */
2666   scev_reset ();
2667 
2668   free_original_copy_tables ();
2669 }
2670 
2671 
2672 /* Function vect_create_cond_for_align_checks.
2673 
2674    Create a conditional expression that represents the alignment checks for
2675    all of data references (array element references) whose alignment must be
2676    checked at runtime.
2677 
2678    Input:
2679    LOOP_VINFO - two fields of the loop information are used.
2680                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2681                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2682 
2683    Output:
2684    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2685                          expression.
2686    The returned value is the conditional expression to be used in the if
2687    statement that controls which version of the loop gets executed at runtime.
2688 
2689    The algorithm makes two assumptions:
2690      1) The number of bytes "n" in a vector is a power of 2.
2691      2) An address "a" is aligned if a%n is zero and that this
2692         test can be done as a&(n-1) == 0.  For example, for 16
2693         byte vectors the test is a&0xf == 0.  */
2694 
2695 static tree
vect_create_cond_for_align_checks(loop_vec_info loop_vinfo,tree * cond_expr_stmt_list)2696 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2697                                    tree *cond_expr_stmt_list)
2698 {
2699   VEC(tree,heap) *may_misalign_stmts
2700     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2701   tree ref_stmt;
2702   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2703   tree mask_cst;
2704   unsigned int i;
2705   tree psize;
2706   tree int_ptrsize_type;
2707   char tmp_name[20];
2708   tree or_tmp_name = NULL_TREE;
2709   tree and_tmp, and_tmp_name, and_stmt;
2710   tree ptrsize_zero;
2711 
2712   /* Check that mask is one less than a power of 2, i.e., mask is
2713      all zeros followed by all ones.  */
2714   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2715 
2716   /* CHECKME: what is the best integer or unsigned type to use to hold a
2717      cast from a pointer value?  */
2718   psize = TYPE_SIZE (ptr_type_node);
2719   int_ptrsize_type
2720     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2721 
2722   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2723      of the first vector of the i'th data reference. */
2724 
2725   for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2726     {
2727       tree new_stmt_list = NULL_TREE;
2728       tree addr_base;
2729       tree addr_tmp, addr_tmp_name, addr_stmt;
2730       tree or_tmp, new_or_tmp_name, or_stmt;
2731 
2732       /* create: addr_tmp = (int)(address_of_first_vector) */
2733       addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2734 							&new_stmt_list,
2735 							NULL_TREE);
2736 
2737       if (new_stmt_list != NULL_TREE)
2738         append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2739 
2740       sprintf (tmp_name, "%s%d", "addr2int", i);
2741       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2742       add_referenced_tmp_var (addr_tmp);
2743       addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2744       addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2745       addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2746                           addr_tmp_name, addr_stmt);
2747       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2748       append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2749 
2750       /* The addresses are OR together.  */
2751 
2752       if (or_tmp_name != NULL_TREE)
2753         {
2754           /* create: or_tmp = or_tmp | addr_tmp */
2755           sprintf (tmp_name, "%s%d", "orptrs", i);
2756           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2757           add_referenced_tmp_var (or_tmp);
2758           new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2759           or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2760                             build2 (BIT_IOR_EXPR, int_ptrsize_type,
2761 	                            or_tmp_name,
2762                                     addr_tmp_name));
2763           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2764           append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2765           or_tmp_name = new_or_tmp_name;
2766         }
2767       else
2768         or_tmp_name = addr_tmp_name;
2769 
2770     } /* end for i */
2771 
2772   mask_cst = build_int_cst (int_ptrsize_type, mask);
2773 
2774   /* create: and_tmp = or_tmp & mask  */
2775   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2776   add_referenced_tmp_var (and_tmp);
2777   and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2778 
2779   and_stmt = build2 (MODIFY_EXPR, void_type_node,
2780                      and_tmp_name,
2781                      build2 (BIT_AND_EXPR, int_ptrsize_type,
2782                              or_tmp_name, mask_cst));
2783   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2784   append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2785 
2786   /* Make and_tmp the left operand of the conditional test against zero.
2787      if and_tmp has a non-zero bit then some address is unaligned.  */
2788   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2789   return build2 (EQ_EXPR, boolean_type_node,
2790                  and_tmp_name, ptrsize_zero);
2791 }
2792 
2793 
2794 /* Function vect_transform_loop.
2795 
2796    The analysis phase has determined that the loop is vectorizable.
2797    Vectorize the loop - created vectorized stmts to replace the scalar
2798    stmts in the loop, and update the loop exit condition.  */
2799 
2800 void
vect_transform_loop(loop_vec_info loop_vinfo,struct loops * loops ATTRIBUTE_UNUSED)2801 vect_transform_loop (loop_vec_info loop_vinfo,
2802 		     struct loops *loops ATTRIBUTE_UNUSED)
2803 {
2804   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2805   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2806   int nbbs = loop->num_nodes;
2807   block_stmt_iterator si;
2808   int i;
2809   tree ratio = NULL;
2810   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2811   bitmap_iterator bi;
2812   unsigned int j;
2813 
2814   if (vect_print_dump_info (REPORT_DETAILS))
2815     fprintf (vect_dump, "=== vec_transform_loop ===");
2816 
2817   /* If the loop has data references that may or may not be aligned then
2818      two versions of the loop need to be generated, one which is vectorized
2819      and one which isn't.  A test is then generated to control which of the
2820      loops is executed.  The test checks for the alignment of all of the
2821      data references that may or may not be aligned. */
2822 
2823   if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2824     {
2825       struct loop *nloop;
2826       tree cond_expr;
2827       tree cond_expr_stmt_list = NULL_TREE;
2828       basic_block condition_bb;
2829       block_stmt_iterator cond_exp_bsi;
2830       basic_block merge_bb;
2831       basic_block new_exit_bb;
2832       edge new_exit_e, e;
2833       tree orig_phi, new_phi, arg;
2834 
2835       cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
2836                                                      &cond_expr_stmt_list);
2837       initialize_original_copy_tables ();
2838       nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
2839       free_original_copy_tables();
2840 
2841       /** Loop versioning violates an assumption we try to maintain during
2842 	 vectorization - that the loop exit block has a single predecessor.
2843 	 After versioning, the exit block of both loop versions is the same
2844 	 basic block (i.e. it has two predecessors). Just in order to simplify
2845 	 following transformations in the vectorizer, we fix this situation
2846 	 here by adding a new (empty) block on the exit-edge of the loop,
2847 	 with the proper loop-exit phis to maintain loop-closed-form.  **/
2848 
2849       merge_bb = loop->single_exit->dest;
2850       gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2851       new_exit_bb = split_edge (loop->single_exit);
2852       add_bb_to_loop (new_exit_bb, loop->outer);
2853       new_exit_e = loop->single_exit;
2854       e = EDGE_SUCC (new_exit_bb, 0);
2855 
2856       for (orig_phi = phi_nodes (merge_bb); orig_phi;
2857 	   orig_phi = PHI_CHAIN (orig_phi))
2858 	{
2859           new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2860 				     new_exit_bb);
2861           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2862           add_phi_arg (new_phi, arg, new_exit_e);
2863 	  SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
2864 	}
2865 
2866       /** end loop-exit-fixes after versioning  **/
2867 
2868       update_ssa (TODO_update_ssa);
2869       cond_exp_bsi = bsi_last (condition_bb);
2870       bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
2871     }
2872 
2873   /* CHECKME: we wouldn't need this if we calles update_ssa once
2874      for all loops.  */
2875   bitmap_zero (vect_vnames_to_rename);
2876 
2877   /* Peel the loop if there are data refs with unknown alignment.
2878      Only one data ref with unknown store is allowed.  */
2879 
2880   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2881     vect_do_peeling_for_alignment (loop_vinfo, loops);
2882 
2883   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2884      compile time constant), or it is a constant that doesn't divide by the
2885      vectorization factor, then an epilog loop needs to be created.
2886      We therefore duplicate the loop: the original loop will be vectorized,
2887      and will compute the first (n/VF) iterations. The second copy of the loop
2888      will remain scalar and will compute the remaining (n%VF) iterations.
2889      (VF is the vectorization factor).  */
2890 
2891   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2892       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2893           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2894     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2895   else
2896     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2897 		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2898 
2899   /* 1) Make sure the loop header has exactly two entries
2900      2) Make sure we have a preheader basic block.  */
2901 
2902   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2903 
2904   loop_split_edge_with (loop_preheader_edge (loop), NULL);
2905 
2906 
2907   /* FORNOW: the vectorizer supports only loops which body consist
2908      of one basic block (header + empty latch). When the vectorizer will
2909      support more involved loop forms, the order by which the BBs are
2910      traversed need to be reconsidered.  */
2911 
2912   for (i = 0; i < nbbs; i++)
2913     {
2914       basic_block bb = bbs[i];
2915 
2916       for (si = bsi_start (bb); !bsi_end_p (si);)
2917 	{
2918 	  tree stmt = bsi_stmt (si);
2919 	  stmt_vec_info stmt_info;
2920 	  bool is_store;
2921 
2922 	  if (vect_print_dump_info (REPORT_DETAILS))
2923 	    {
2924 	      fprintf (vect_dump, "------>vectorizing statement: ");
2925 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
2926 	    }
2927 	  stmt_info = vinfo_for_stmt (stmt);
2928 	  gcc_assert (stmt_info);
2929 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
2930 	      && !STMT_VINFO_LIVE_P (stmt_info))
2931 	    {
2932 	      bsi_next (&si);
2933 	      continue;
2934 	    }
2935 	  /* FORNOW: Verify that all stmts operate on the same number of
2936 	             units and no inner unrolling is necessary.  */
2937 	  gcc_assert
2938 		(TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2939 		 == (unsigned HOST_WIDE_INT) vectorization_factor);
2940 
2941 	  /* -------- vectorize statement ------------ */
2942 	  if (vect_print_dump_info (REPORT_DETAILS))
2943 	    fprintf (vect_dump, "transform statement.");
2944 
2945 	  is_store = vect_transform_stmt (stmt, &si);
2946 	  if (is_store)
2947 	    {
2948 	      /* Free the attached stmt_vec_info and remove the stmt.  */
2949 	      stmt_ann_t ann = stmt_ann (stmt);
2950 	      free (stmt_info);
2951 	      set_stmt_info ((tree_ann_t)ann, NULL);
2952 	      bsi_remove (&si);
2953 	      continue;
2954 	    }
2955 
2956 	  bsi_next (&si);
2957 	}		        /* stmts in BB */
2958     }				/* BBs in loop */
2959 
2960   slpeel_make_loop_iterate_ntimes (loop, ratio);
2961 
2962   EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
2963     mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
2964 
2965   /* The memory tags and pointers in vectorized statements need to
2966      have their SSA forms updated.  FIXME, why can't this be delayed
2967      until all the loops have been transformed?  */
2968   update_ssa (TODO_update_ssa);
2969 
2970   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2971     fprintf (vect_dump, "LOOP VECTORIZED.");
2972 }
2973