xref: /openbsd/gnu/gcc/gcc/tree-vect-transform.c (revision 404b540a)
1 /* Transformation Utilities for Loop Vectorization.
2    Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
47 
48 /* Utility functions for the code transformation.  */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *);
50 static void vect_align_data_ref (tree);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53   (tree, block_stmt_iterator *, tree, tree *, bool);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 
65 /* Utility function dealing with loop peeling (not peeling itself).  */
66 static void vect_generate_tmps_on_preheader
67   (loop_vec_info, tree *, tree *, tree *);
68 static tree vect_build_loop_niters (loop_vec_info);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71 static void vect_update_init_of_dr (struct data_reference *, tree niters);
72 static void vect_update_inits_of_drs (loop_vec_info, tree);
73 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
74 static void vect_do_peeling_for_loop_bound
75   (loop_vec_info, tree *, struct loops *);
76 static int vect_min_worthwhile_factor (enum tree_code);
77 
78 
79 /* Function vect_get_new_vect_var.
80 
81    Returns a name for a new variable. The current naming scheme appends the
82    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83    the name of vectorizer generated variables, and appends that to NAME if
84    provided.  */
85 
86 static tree
vect_get_new_vect_var(tree type,enum vect_var_kind var_kind,const char * name)87 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
88 {
89   const char *prefix;
90   tree new_vect_var;
91 
92   switch (var_kind)
93   {
94   case vect_simple_var:
95     prefix = "vect_";
96     break;
97   case vect_scalar_var:
98     prefix = "stmp_";
99     break;
100   case vect_pointer_var:
101     prefix = "vect_p";
102     break;
103   default:
104     gcc_unreachable ();
105   }
106 
107   if (name)
108     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
109   else
110     new_vect_var = create_tmp_var (type, prefix);
111 
112   return new_vect_var;
113 }
114 
115 
116 /* Function vect_create_addr_base_for_vector_ref.
117 
118    Create an expression that computes the address of the first memory location
119    that will be accessed for a data reference.
120 
121    Input:
122    STMT: The statement containing the data reference.
123    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
124    OFFSET: Optional. If supplied, it is be added to the initial address.
125 
126    Output:
127    1. Return an SSA_NAME whose value is the address of the memory location of
128       the first vector of the data reference.
129    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
130       these statement(s) which define the returned SSA_NAME.
131 
132    FORNOW: We are only handling array accesses with step 1.  */
133 
134 static tree
vect_create_addr_base_for_vector_ref(tree stmt,tree * new_stmt_list,tree offset)135 vect_create_addr_base_for_vector_ref (tree stmt,
136                                       tree *new_stmt_list,
137 				      tree offset)
138 {
139   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
140   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
141   tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
142   tree base_name = build_fold_indirect_ref (data_ref_base);
143   tree ref = DR_REF (dr);
144   tree scalar_type = TREE_TYPE (ref);
145   tree scalar_ptr_type = build_pointer_type (scalar_type);
146   tree vec_stmt;
147   tree new_temp;
148   tree addr_base, addr_expr;
149   tree dest, new_stmt;
150   tree base_offset = unshare_expr (DR_OFFSET (dr));
151   tree init = unshare_expr (DR_INIT (dr));
152 
153   /* Create base_offset */
154   base_offset = size_binop (PLUS_EXPR, base_offset, init);
155   dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
156   add_referenced_var (dest);
157   base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
158   append_to_statement_list_force (new_stmt, new_stmt_list);
159 
160   if (offset)
161     {
162       tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
163       add_referenced_var (tmp);
164       offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
165 			    DR_STEP (dr));
166       base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
167 				 base_offset, offset);
168       base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
169       append_to_statement_list_force (new_stmt, new_stmt_list);
170     }
171 
172   /* base + base_offset */
173   addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
174 			   base_offset);
175 
176   /* addr_expr = addr_base */
177   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
178                                      get_name (base_name));
179   add_referenced_var (addr_expr);
180   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
181   new_temp = make_ssa_name (addr_expr, vec_stmt);
182   TREE_OPERAND (vec_stmt, 0) = new_temp;
183   append_to_statement_list_force (vec_stmt, new_stmt_list);
184 
185   if (vect_print_dump_info (REPORT_DETAILS))
186     {
187       fprintf (vect_dump, "created ");
188       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
189     }
190   return new_temp;
191 }
192 
193 
194 /* Function vect_align_data_ref.
195 
196    Handle misalignment of a memory accesses.
197 
198    FORNOW: Can't handle misaligned accesses.
199    Make sure that the dataref is aligned.  */
200 
201 static void
vect_align_data_ref(tree stmt)202 vect_align_data_ref (tree stmt)
203 {
204   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
206 
207   /* FORNOW: can't handle misaligned accesses;
208              all accesses expected to be aligned.  */
209   gcc_assert (aligned_access_p (dr));
210 }
211 
212 
213 /* Function vect_create_data_ref_ptr.
214 
215    Create a memory reference expression for vector access, to be used in a
216    vector load/store stmt. The reference is based on a new pointer to vector
217    type (vp).
218 
219    Input:
220    1. STMT: a stmt that references memory. Expected to be of the form
221          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
222    2. BSI: block_stmt_iterator where new stmts can be added.
223    3. OFFSET (optional): an offset to be added to the initial address accessed
224         by the data-ref in STMT.
225    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
226         pointing to the initial address.
227 
228    Output:
229    1. Declare a new ptr to vector_type, and have it point to the base of the
230       data reference (initial addressed accessed by the data reference).
231       For example, for vector of type V8HI, the following code is generated:
232 
233       v8hi *vp;
234       vp = (v8hi *)initial_address;
235 
236       if OFFSET is not supplied:
237          initial_address = &a[init];
238       if OFFSET is supplied:
239          initial_address = &a[init + OFFSET];
240 
241       Return the initial_address in INITIAL_ADDRESS.
242 
243    2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
244       a data-reference in the loop based on the new vector pointer vp.  This
245       new data reference will by some means be updated each iteration of
246       the loop.  Return the pointer vp'.
247 
248    FORNOW: handle only aligned and consecutive accesses.  */
249 
250 static tree
vect_create_data_ref_ptr(tree stmt,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,tree offset,tree * initial_address,bool only_init)251 vect_create_data_ref_ptr (tree stmt,
252 			  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
253 			  tree offset, tree *initial_address, bool only_init)
254 {
255   tree base_name;
256   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
257   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
258   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
259   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
260   tree vect_ptr_type;
261   tree vect_ptr;
262   tree tag;
263   tree new_temp;
264   tree vec_stmt;
265   tree new_stmt_list = NULL_TREE;
266   edge pe = loop_preheader_edge (loop);
267   basic_block new_bb;
268   tree vect_ptr_init;
269   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
270 
271   base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
272 
273   if (vect_print_dump_info (REPORT_DETAILS))
274     {
275       tree data_ref_base = base_name;
276       fprintf (vect_dump, "create vector-pointer variable to type: ");
277       print_generic_expr (vect_dump, vectype, TDF_SLIM);
278       if (TREE_CODE (data_ref_base) == VAR_DECL)
279         fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
280       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
281         fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
282       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
283         fprintf (vect_dump, "  vectorizing a record based array ref: ");
284       else if (TREE_CODE (data_ref_base) == SSA_NAME)
285         fprintf (vect_dump, "  vectorizing a pointer ref: ");
286       print_generic_expr (vect_dump, base_name, TDF_SLIM);
287     }
288 
289   /** (1) Create the new vector-pointer variable:  **/
290 
291   vect_ptr_type = build_pointer_type (vectype);
292   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
293                                     get_name (base_name));
294   add_referenced_var (vect_ptr);
295 
296 
297   /** (2) Add aliasing information to the new vector-pointer:
298           (The points-to info (DR_PTR_INFO) may be defined later.)  **/
299 
300   tag = DR_MEMTAG (dr);
301   gcc_assert (tag);
302 
303   /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
304      tag must be created with tag added to its may alias list.  */
305   if (!MTAG_P (tag))
306     new_type_alias (vect_ptr, tag, DR_REF (dr));
307   else
308     var_ann (vect_ptr)->symbol_mem_tag = tag;
309 
310   var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
311 
312   /** (3) Calculate the initial address the vector-pointer, and set
313           the vector-pointer to point to it before the loop:  **/
314 
315   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
316   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
317                                                    offset);
318   pe = loop_preheader_edge (loop);
319   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
320   gcc_assert (!new_bb);
321   *initial_address = new_temp;
322 
323   /* Create: p = (vectype *) initial_base  */
324   vec_stmt = fold_convert (vect_ptr_type, new_temp);
325   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
326   vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
327   TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
328   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
329   gcc_assert (!new_bb);
330 
331 
332   /** (4) Handle the updating of the vector-pointer inside the loop: **/
333 
334   if (only_init) /* No update in loop is required.  */
335     {
336       /* Copy the points-to information if it exists. */
337       if (DR_PTR_INFO (dr))
338         duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
339       return vect_ptr_init;
340     }
341   else
342     {
343       block_stmt_iterator incr_bsi;
344       bool insert_after;
345       tree indx_before_incr, indx_after_incr;
346       tree incr;
347 
348       standard_iv_increment_position (loop, &incr_bsi, &insert_after);
349       create_iv (vect_ptr_init,
350 		 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
351 		 NULL_TREE, loop, &incr_bsi, insert_after,
352 		 &indx_before_incr, &indx_after_incr);
353       incr = bsi_stmt (incr_bsi);
354       set_stmt_info (stmt_ann (incr),
355 		     new_stmt_vec_info (incr, loop_vinfo));
356 
357       /* Copy the points-to information if it exists. */
358       if (DR_PTR_INFO (dr))
359 	{
360 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
361 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
362 	}
363       merge_alias_info (vect_ptr_init, indx_before_incr);
364       merge_alias_info (vect_ptr_init, indx_after_incr);
365 
366       return indx_before_incr;
367     }
368 }
369 
370 
371 /* Function vect_create_destination_var.
372 
373    Create a new temporary of type VECTYPE.  */
374 
375 static tree
vect_create_destination_var(tree scalar_dest,tree vectype)376 vect_create_destination_var (tree scalar_dest, tree vectype)
377 {
378   tree vec_dest;
379   const char *new_name;
380   tree type;
381   enum vect_var_kind kind;
382 
383   kind = vectype ? vect_simple_var : vect_scalar_var;
384   type = vectype ? vectype : TREE_TYPE (scalar_dest);
385 
386   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
387 
388   new_name = get_name (scalar_dest);
389   if (!new_name)
390     new_name = "var_";
391   vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
392   add_referenced_var (vec_dest);
393 
394   return vec_dest;
395 }
396 
397 
398 /* Function vect_init_vector.
399 
400    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
401    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
402    used in the vectorization of STMT.  */
403 
404 static tree
vect_init_vector(tree stmt,tree vector_var)405 vect_init_vector (tree stmt, tree vector_var)
406 {
407   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
408   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
409   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
410   tree new_var;
411   tree init_stmt;
412   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
413   tree vec_oprnd;
414   edge pe;
415   tree new_temp;
416   basic_block new_bb;
417 
418   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
419   add_referenced_var (new_var);
420 
421   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
422   new_temp = make_ssa_name (new_var, init_stmt);
423   TREE_OPERAND (init_stmt, 0) = new_temp;
424 
425   pe = loop_preheader_edge (loop);
426   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
427   gcc_assert (!new_bb);
428 
429   if (vect_print_dump_info (REPORT_DETAILS))
430     {
431       fprintf (vect_dump, "created new init_stmt: ");
432       print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
433     }
434 
435   vec_oprnd = TREE_OPERAND (init_stmt, 0);
436   return vec_oprnd;
437 }
438 
439 
440 /* Function vect_get_vec_def_for_operand.
441 
442    OP is an operand in STMT. This function returns a (vector) def that will be
443    used in the vectorized stmt for STMT.
444 
445    In the case that OP is an SSA_NAME which is defined in the loop, then
446    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
447 
448    In case OP is an invariant or constant, a new stmt that creates a vector def
449    needs to be introduced.  */
450 
451 static tree
vect_get_vec_def_for_operand(tree op,tree stmt,tree * scalar_def)452 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
453 {
454   tree vec_oprnd;
455   tree vec_stmt;
456   tree def_stmt;
457   stmt_vec_info def_stmt_info = NULL;
458   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
459   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
460   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
461   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
462   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463   tree vec_inv;
464   tree vec_cst;
465   tree t = NULL_TREE;
466   tree def;
467   int i;
468   enum vect_def_type dt;
469   bool is_simple_use;
470 
471   if (vect_print_dump_info (REPORT_DETAILS))
472     {
473       fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
474       print_generic_expr (vect_dump, op, TDF_SLIM);
475     }
476 
477   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
478   gcc_assert (is_simple_use);
479   if (vect_print_dump_info (REPORT_DETAILS))
480     {
481       if (def)
482         {
483           fprintf (vect_dump, "def =  ");
484           print_generic_expr (vect_dump, def, TDF_SLIM);
485         }
486       if (def_stmt)
487         {
488           fprintf (vect_dump, "  def_stmt =  ");
489           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
490         }
491     }
492 
493   switch (dt)
494     {
495     /* Case 1: operand is a constant.  */
496     case vect_constant_def:
497       {
498 	if (scalar_def)
499 	  *scalar_def = op;
500 
501         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
502         if (vect_print_dump_info (REPORT_DETAILS))
503           fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
504 
505         for (i = nunits - 1; i >= 0; --i)
506           {
507             t = tree_cons (NULL_TREE, op, t);
508           }
509         vec_cst = build_vector (vectype, t);
510         return vect_init_vector (stmt, vec_cst);
511       }
512 
513     /* Case 2: operand is defined outside the loop - loop invariant.  */
514     case vect_invariant_def:
515       {
516 	if (scalar_def)
517 	  *scalar_def = def;
518 
519         /* Create 'vec_inv = {inv,inv,..,inv}'  */
520         if (vect_print_dump_info (REPORT_DETAILS))
521           fprintf (vect_dump, "Create vector_inv.");
522 
523         for (i = nunits - 1; i >= 0; --i)
524           {
525             t = tree_cons (NULL_TREE, def, t);
526           }
527 
528 	/* FIXME: use build_constructor directly.  */
529         vec_inv = build_constructor_from_list (vectype, t);
530         return vect_init_vector (stmt, vec_inv);
531       }
532 
533     /* Case 3: operand is defined inside the loop.  */
534     case vect_loop_def:
535       {
536 	if (scalar_def)
537 	  *scalar_def = def_stmt;
538 
539         /* Get the def from the vectorized stmt.  */
540         def_stmt_info = vinfo_for_stmt (def_stmt);
541         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
542         gcc_assert (vec_stmt);
543         vec_oprnd = TREE_OPERAND (vec_stmt, 0);
544         return vec_oprnd;
545       }
546 
547     /* Case 4: operand is defined by a loop header phi - reduction  */
548     case vect_reduction_def:
549       {
550         gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
551 
552         /* Get the def before the loop  */
553         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
554         return get_initial_def_for_reduction (stmt, op, scalar_def);
555      }
556 
557     /* Case 5: operand is defined by loop-header phi - induction.  */
558     case vect_induction_def:
559       {
560         if (vect_print_dump_info (REPORT_DETAILS))
561           fprintf (vect_dump, "induction - unsupported.");
562         internal_error ("no support for induction"); /* FORNOW */
563       }
564 
565     default:
566       gcc_unreachable ();
567     }
568 }
569 
570 
571 /* Function vect_finish_stmt_generation.
572 
573    Insert a new stmt.  */
574 
575 static void
vect_finish_stmt_generation(tree stmt,tree vec_stmt,block_stmt_iterator * bsi)576 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
577 {
578   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
579 
580   if (vect_print_dump_info (REPORT_DETAILS))
581     {
582       fprintf (vect_dump, "add new stmt: ");
583       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
584     }
585 
586   /* Make sure bsi points to the stmt that is being vectorized.  */
587   gcc_assert (stmt == bsi_stmt (*bsi));
588 
589 #ifdef USE_MAPPED_LOCATION
590   SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
591 #else
592   SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
593 #endif
594 }
595 
596 
597 #define ADJUST_IN_EPILOG 1
598 
599 /* Function get_initial_def_for_reduction
600 
601    Input:
602    STMT - a stmt that performs a reduction operation in the loop.
603    INIT_VAL - the initial value of the reduction variable
604 
605    Output:
606    SCALAR_DEF - a tree that holds a value to be added to the final result
607 	of the reduction (used for "ADJUST_IN_EPILOG" - see below).
608    Return a vector variable, initialized according to the operation that STMT
609 	performs. This vector will be used as the initial value of the
610 	vector of partial results.
611 
612    Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
613      add:         [0,0,...,0,0]
614      mult:        [1,1,...,1,1]
615      min/max:     [init_val,init_val,..,init_val,init_val]
616      bit and/or:  [init_val,init_val,..,init_val,init_val]
617    and when necessary (e.g. add/mult case) let the caller know
618    that it needs to adjust the result by init_val.
619 
620    Option2: Initialize the vector as follows:
621      add:         [0,0,...,0,init_val]
622      mult:        [1,1,...,1,init_val]
623      min/max:     [init_val,init_val,...,init_val]
624      bit and/or:  [init_val,init_val,...,init_val]
625    and no adjustments are needed.
626 
627    For example, for the following code:
628 
629    s = init_val;
630    for (i=0;i<n;i++)
631      s = s + a[i];
632 
633    STMT is 's = s + a[i]', and the reduction variable is 's'.
634    For a vector of 4 units, we want to return either [0,0,0,init_val],
635    or [0,0,0,0] and let the caller know that it needs to adjust
636    the result at the end by 'init_val'.
637 
638    FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
639    TODO: Use some cost-model to estimate which scheme is more profitable.
640 */
641 
642 static tree
get_initial_def_for_reduction(tree stmt,tree init_val,tree * scalar_def)643 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
644 {
645   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
646   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
647   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
648   int nelements;
649   enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
650   tree type = TREE_TYPE (init_val);
651   tree def;
652   tree vec, t = NULL_TREE;
653   bool need_epilog_adjust;
654   int i;
655 
656   gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
657 
658   switch (code)
659   {
660   case WIDEN_SUM_EXPR:
661   case DOT_PROD_EXPR:
662   case PLUS_EXPR:
663     if (INTEGRAL_TYPE_P (type))
664       def = build_int_cst (type, 0);
665     else
666       def = build_real (type, dconst0);
667 
668 #ifdef ADJUST_IN_EPILOG
669     /* All the 'nunits' elements are set to 0. The final result will be
670        adjusted by 'init_val' at the loop epilog.  */
671     nelements = nunits;
672     need_epilog_adjust = true;
673 #else
674     /* 'nunits - 1' elements are set to 0; The last element is set to
675         'init_val'.  No further adjustments at the epilog are needed.  */
676     nelements = nunits - 1;
677     need_epilog_adjust = false;
678 #endif
679     break;
680 
681   case MIN_EXPR:
682   case MAX_EXPR:
683     def = init_val;
684     nelements = nunits;
685     need_epilog_adjust = false;
686     break;
687 
688   default:
689     gcc_unreachable ();
690   }
691 
692   for (i = nelements - 1; i >= 0; --i)
693     t = tree_cons (NULL_TREE, def, t);
694 
695   if (nelements == nunits - 1)
696     {
697       /* Set the last element of the vector.  */
698       t = tree_cons (NULL_TREE, init_val, t);
699       nelements += 1;
700     }
701   gcc_assert (nelements == nunits);
702 
703   if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
704     vec = build_vector (vectype, t);
705   else
706     vec = build_constructor_from_list (vectype, t);
707 
708   if (!need_epilog_adjust)
709     *scalar_def = NULL_TREE;
710   else
711     *scalar_def = init_val;
712 
713   return vect_init_vector (stmt, vec);
714 }
715 
716 
717 /* Function vect_create_epilog_for_reduction
718 
719    Create code at the loop-epilog to finalize the result of a reduction
720    computation.
721 
722    VECT_DEF is a vector of partial results.
723    REDUC_CODE is the tree-code for the epilog reduction.
724    STMT is the scalar reduction stmt that is being vectorized.
725    REDUCTION_PHI is the phi-node that carries the reduction computation.
726 
727    This function:
728    1. Creates the reduction def-use cycle: sets the the arguments for
729       REDUCTION_PHI:
730       The loop-entry argument is the vectorized initial-value of the reduction.
731       The loop-latch argument is VECT_DEF - the vector of partial sums.
732    2. "Reduces" the vector of partial results VECT_DEF into a single result,
733       by applying the operation specified by REDUC_CODE if available, or by
734       other means (whole-vector shifts or a scalar loop).
735       The function also creates a new phi node at the loop exit to preserve
736       loop-closed form, as illustrated below.
737 
738      The flow at the entry to this function:
739 
740         loop:
741           vec_def = phi <null, null>            # REDUCTION_PHI
742           VECT_DEF = vector_stmt                # vectorized form of STMT
743           s_loop = scalar_stmt                  # (scalar) STMT
744         loop_exit:
745           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
746           use <s_out0>
747           use <s_out0>
748 
749      The above is transformed by this function into:
750 
751         loop:
752           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
753           VECT_DEF = vector_stmt                # vectorized form of STMT
754           s_loop = scalar_stmt                  # (scalar) STMT
755         loop_exit:
756           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
757           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
758           v_out2 = reduce <v_out1>
759           s_out3 = extract_field <v_out2, 0>
760           s_out4 = adjust_result <s_out3>
761           use <s_out4>
762           use <s_out4>
763 */
764 
765 static void
vect_create_epilog_for_reduction(tree vect_def,tree stmt,enum tree_code reduc_code,tree reduction_phi)766 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
767                                   enum tree_code reduc_code, tree reduction_phi)
768 {
769   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
770   tree vectype;
771   enum machine_mode mode;
772   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
773   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
774   basic_block exit_bb;
775   tree scalar_dest;
776   tree scalar_type;
777   tree new_phi;
778   block_stmt_iterator exit_bsi;
779   tree vec_dest;
780   tree new_temp;
781   tree new_name;
782   tree epilog_stmt;
783   tree new_scalar_dest, exit_phi;
784   tree bitsize, bitpos, bytesize;
785   enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
786   tree scalar_initial_def;
787   tree vec_initial_def;
788   tree orig_name;
789   imm_use_iterator imm_iter;
790   use_operand_p use_p;
791   bool extract_scalar_result;
792   tree reduction_op;
793   tree orig_stmt;
794   tree use_stmt;
795   tree operation = TREE_OPERAND (stmt, 1);
796   int op_type;
797 
798   op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
799   reduction_op = TREE_OPERAND (operation, op_type-1);
800   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
801   mode = TYPE_MODE (vectype);
802 
803   /*** 1. Create the reduction def-use cycle  ***/
804 
805   /* 1.1 set the loop-entry arg of the reduction-phi:  */
806   /* For the case of reduction, vect_get_vec_def_for_operand returns
807      the scalar def before the loop, that defines the initial value
808      of the reduction variable.  */
809   vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
810 						  &scalar_initial_def);
811   add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
812 
813   /* 1.2 set the loop-latch arg for the reduction-phi:  */
814   add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
815 
816   if (vect_print_dump_info (REPORT_DETAILS))
817     {
818       fprintf (vect_dump, "transform reduction: created def-use cycle:");
819       print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
820       fprintf (vect_dump, "\n");
821       print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
822     }
823 
824 
825   /*** 2. Create epilog code
826 	  The reduction epilog code operates across the elements of the vector
827           of partial results computed by the vectorized loop.
828           The reduction epilog code consists of:
829           step 1: compute the scalar result in a vector (v_out2)
830           step 2: extract the scalar result (s_out3) from the vector (v_out2)
831           step 3: adjust the scalar result (s_out3) if needed.
832 
833           Step 1 can be accomplished using one the following three schemes:
834           (scheme 1) using reduc_code, if available.
835           (scheme 2) using whole-vector shifts, if available.
836           (scheme 3) using a scalar loop. In this case steps 1+2 above are
837                      combined.
838 
839           The overall epilog code looks like this:
840 
841           s_out0 = phi <s_loop>         # original EXIT_PHI
842           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
843           v_out2 = reduce <v_out1>              # step 1
844           s_out3 = extract_field <v_out2, 0>    # step 2
845           s_out4 = adjust_result <s_out3>       # step 3
846 
847           (step 3 is optional, and step2 1 and 2 may be combined).
848           Lastly, the uses of s_out0 are replaced by s_out4.
849 
850 	  ***/
851 
852   /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
853         v_out1 = phi <v_loop>  */
854 
855   exit_bb = loop->single_exit->dest;
856   new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
857   SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
858   exit_bsi = bsi_start (exit_bb);
859 
860   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
861          (i.e. when reduc_code is not available) and in the final adjustment code
862          (if needed).  Also get the original scalar reduction variable as
863          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
864          represents a reduction pattern), the tree-code and scalar-def are
865          taken from the original stmt that the pattern-stmt (STMT) replaces.
866          Otherwise (it is a regular reduction) - the tree-code and scalar-def
867          are taken from STMT.  */
868 
869   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
870   if (!orig_stmt)
871     {
872       /* Regular reduction  */
873       orig_stmt = stmt;
874     }
875   else
876     {
877       /* Reduction pattern  */
878       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
879       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
880       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
881     }
882   code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
883   scalar_dest = TREE_OPERAND (orig_stmt, 0);
884   scalar_type = TREE_TYPE (scalar_dest);
885   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
886   bitsize = TYPE_SIZE (scalar_type);
887   bytesize = TYPE_SIZE_UNIT (scalar_type);
888 
889   /* 2.3 Create the reduction code, using one of the three schemes described
890          above.  */
891 
892   if (reduc_code < NUM_TREE_CODES)
893     {
894       /*** Case 1:  Create:
895 	   v_out2 = reduc_expr <v_out1>  */
896 
897       if (vect_print_dump_info (REPORT_DETAILS))
898 	fprintf (vect_dump, "Reduce using direct vector reduction.");
899 
900       vec_dest = vect_create_destination_var (scalar_dest, vectype);
901       epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
902 			build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
903       new_temp = make_ssa_name (vec_dest, epilog_stmt);
904       TREE_OPERAND (epilog_stmt, 0) = new_temp;
905       bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
906 
907       extract_scalar_result = true;
908     }
909   else
910     {
911       enum tree_code shift_code = 0;
912       bool have_whole_vector_shift = true;
913       int bit_offset;
914       int element_bitsize = tree_low_cst (bitsize, 1);
915       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
916       tree vec_temp;
917 
918       if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
919 	shift_code = VEC_RSHIFT_EXPR;
920       else
921 	have_whole_vector_shift = false;
922 
923       /* Regardless of whether we have a whole vector shift, if we're
924 	 emulating the operation via tree-vect-generic, we don't want
925 	 to use it.  Only the first round of the reduction is likely
926 	 to still be profitable via emulation.  */
927       /* ??? It might be better to emit a reduction tree code here, so that
928 	 tree-vect-generic can expand the first round via bit tricks.  */
929       if (!VECTOR_MODE_P (mode))
930 	have_whole_vector_shift = false;
931       else
932 	{
933 	  optab optab = optab_for_tree_code (code, vectype);
934 	  if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
935 	    have_whole_vector_shift = false;
936 	}
937 
938       if (have_whole_vector_shift)
939         {
940 	  /*** Case 2: Create:
941 	     for (offset = VS/2; offset >= element_size; offset/=2)
942 	        {
943 	          Create:  va' = vec_shift <va, offset>
944 	          Create:  va = vop <va, va'>
945 	        }  */
946 
947 	  if (vect_print_dump_info (REPORT_DETAILS))
948 	    fprintf (vect_dump, "Reduce using vector shifts");
949 
950 	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
951 	  new_temp = PHI_RESULT (new_phi);
952 
953 	  for (bit_offset = vec_size_in_bits/2;
954 	       bit_offset >= element_bitsize;
955 	       bit_offset /= 2)
956 	    {
957 	      tree bitpos = size_int (bit_offset);
958 
959 	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
960 	      build2 (shift_code, vectype, new_temp, bitpos));
961 	      new_name = make_ssa_name (vec_dest, epilog_stmt);
962 	      TREE_OPERAND (epilog_stmt, 0) = new_name;
963 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
964 
965 	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
966 	      build2 (code, vectype, new_name, new_temp));
967 	      new_temp = make_ssa_name (vec_dest, epilog_stmt);
968 	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
969 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
970 	    }
971 
972 	  extract_scalar_result = true;
973 	}
974       else
975         {
976 	  tree rhs;
977 
978 	  /*** Case 3: Create:
979 	     s = extract_field <v_out2, 0>
980 	     for (offset = element_size;
981 		  offset < vector_size;
982 		  offset += element_size;)
983 	       {
984 	         Create:  s' = extract_field <v_out2, offset>
985 	         Create:  s = op <s, s'>
986 	       }  */
987 
988 	  if (vect_print_dump_info (REPORT_DETAILS))
989 	    fprintf (vect_dump, "Reduce using scalar code. ");
990 
991 	  vec_temp = PHI_RESULT (new_phi);
992 	  vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
993 	  rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
994 			 bitsize_zero_node);
995 	  BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
996 	  epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
997 	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
998 	  TREE_OPERAND (epilog_stmt, 0) = new_temp;
999 	  bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1000 
1001 	  for (bit_offset = element_bitsize;
1002 	       bit_offset < vec_size_in_bits;
1003 	       bit_offset += element_bitsize)
1004 	    {
1005 	      tree bitpos = bitsize_int (bit_offset);
1006 	      tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1007 				 bitpos);
1008 
1009 	      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1010 	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1011 				    rhs);
1012 	      new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1013 	      TREE_OPERAND (epilog_stmt, 0) = new_name;
1014 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1015 
1016 	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1017 				build2 (code, scalar_type, new_name, new_temp));
1018 	      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1019 	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1020 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1021 	    }
1022 
1023 	  extract_scalar_result = false;
1024 	}
1025     }
1026 
1027   /* 2.4  Extract the final scalar result.  Create:
1028          s_out3 = extract_field <v_out2, bitpos>  */
1029 
1030   if (extract_scalar_result)
1031     {
1032       tree rhs;
1033 
1034       if (vect_print_dump_info (REPORT_DETAILS))
1035 	fprintf (vect_dump, "extract scalar result");
1036 
1037       if (BYTES_BIG_ENDIAN)
1038 	bitpos = size_binop (MULT_EXPR,
1039 		       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1040 		       TYPE_SIZE (scalar_type));
1041       else
1042 	bitpos = bitsize_zero_node;
1043 
1044       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1045       BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1046       epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1047       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1048       TREE_OPERAND (epilog_stmt, 0) = new_temp;
1049       bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1050     }
1051 
1052   /* 2.4 Adjust the final result by the initial value of the reduction
1053 	 variable. (When such adjustment is not needed, then
1054 	 'scalar_initial_def' is zero).
1055 
1056 	 Create:
1057 	 s_out4 = scalar_expr <s_out3, scalar_initial_def>  */
1058 
1059   if (scalar_initial_def)
1060     {
1061       epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1062                       build2 (code, scalar_type, new_temp, scalar_initial_def));
1063       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1064       TREE_OPERAND (epilog_stmt, 0) = new_temp;
1065       bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1066     }
1067 
1068   /* 2.6 Replace uses of s_out0 with uses of s_out3  */
1069 
1070   /* Find the loop-closed-use at the loop exit of the original scalar result.
1071      (The reduction result is expected to have two immediate uses - one at the
1072      latch block, and one at the loop exit).  */
1073   exit_phi = NULL;
1074   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1075     {
1076       if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1077 	{
1078 	  exit_phi = USE_STMT (use_p);
1079 	  break;
1080 	}
1081     }
1082   /* We expect to have found an exit_phi because of loop-closed-ssa form.  */
1083   gcc_assert (exit_phi);
1084   /* Replace the uses:  */
1085   orig_name = PHI_RESULT (exit_phi);
1086   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1087     FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1088       SET_USE (use_p, new_temp);
1089 }
1090 
1091 
1092 /* Function vectorizable_reduction.
1093 
1094    Check if STMT performs a reduction operation that can be vectorized.
1095    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1096    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1097    Return FALSE if not a vectorizable STMT, TRUE otherwise.
1098 
1099    This function also handles reduction idioms (patterns) that have been
1100    recognized in advance during vect_pattern_recog. In this case, STMT may be
1101    of this form:
1102      X = pattern_expr (arg0, arg1, ..., X)
1103    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1104    sequence that had been detected and replaced by the pattern-stmt (STMT).
1105 
1106    In some cases of reduction patterns, the type of the reduction variable X is
1107    different than the type of the other arguments of STMT.
1108    In such cases, the vectype that is used when transforming STMT into a vector
1109    stmt is different than the vectype that is used to determine the
1110    vectorization factor, because it consists of a different number of elements
1111    than the actual number of elements that are being operated upon in parallel.
1112 
1113    For example, consider an accumulation of shorts into an int accumulator.
1114    On some targets it's possible to vectorize this pattern operating on 8
1115    shorts at a time (hence, the vectype for purposes of determining the
1116    vectorization factor should be V8HI); on the other hand, the vectype that
1117    is used to create the vector form is actually V4SI (the type of the result).
1118 
1119    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1120    indicates what is the actual level of parallelism (V8HI in the example), so
1121    that the right vectorization factor would be derived. This vectype
1122    corresponds to the type of arguments to the reduction stmt, and should *NOT*
1123    be used to create the vectorized stmt. The right vectype for the vectorized
1124    stmt is obtained from the type of the result X:
1125         get_vectype_for_scalar_type (TREE_TYPE (X))
1126 
1127    This means that, contrary to "regular" reductions (or "regular" stmts in
1128    general), the following equation:
1129       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1130    does *NOT* necessarily hold for reduction patterns.  */
1131 
1132 bool
vectorizable_reduction(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1133 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1134 {
1135   tree vec_dest;
1136   tree scalar_dest;
1137   tree op;
1138   tree loop_vec_def0, loop_vec_def1;
1139   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1140   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1141   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1142   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1143   tree operation;
1144   enum tree_code code, orig_code, epilog_reduc_code = 0;
1145   enum machine_mode vec_mode;
1146   int op_type;
1147   optab optab, reduc_optab;
1148   tree new_temp;
1149   tree def, def_stmt;
1150   enum vect_def_type dt;
1151   tree new_phi;
1152   tree scalar_type;
1153   bool is_simple_use;
1154   tree orig_stmt;
1155   stmt_vec_info orig_stmt_info;
1156   tree expr = NULL_TREE;
1157   int i;
1158 
1159   /* 1. Is vectorizable reduction?  */
1160 
1161   /* Not supportable if the reduction variable is used in the loop.  */
1162   if (STMT_VINFO_RELEVANT_P (stmt_info))
1163     return false;
1164 
1165   if (!STMT_VINFO_LIVE_P (stmt_info))
1166     return false;
1167 
1168   /* Make sure it was already recognized as a reduction computation.  */
1169   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1170     return false;
1171 
1172   /* 2. Has this been recognized as a reduction pattern?
1173 
1174      Check if STMT represents a pattern that has been recognized
1175      in earlier analysis stages.  For stmts that represent a pattern,
1176      the STMT_VINFO_RELATED_STMT field records the last stmt in
1177      the original sequence that constitutes the pattern.  */
1178 
1179   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1180   if (orig_stmt)
1181     {
1182       orig_stmt_info = vinfo_for_stmt (orig_stmt);
1183       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1184       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1185       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1186     }
1187 
1188   /* 3. Check the operands of the operation. The first operands are defined
1189         inside the loop body. The last operand is the reduction variable,
1190         which is defined by the loop-header-phi.  */
1191 
1192   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1193 
1194   operation = TREE_OPERAND (stmt, 1);
1195   code = TREE_CODE (operation);
1196   op_type = TREE_CODE_LENGTH (code);
1197 
1198   if (op_type != binary_op && op_type != ternary_op)
1199     return false;
1200   scalar_dest = TREE_OPERAND (stmt, 0);
1201   scalar_type = TREE_TYPE (scalar_dest);
1202 
1203   /* All uses but the last are expected to be defined in the loop.
1204      The last use is the reduction variable.  */
1205   for (i = 0; i < op_type-1; i++)
1206     {
1207       op = TREE_OPERAND (operation, i);
1208       is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1209       gcc_assert (is_simple_use);
1210       gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1211                   dt == vect_constant_def);
1212     }
1213 
1214   op = TREE_OPERAND (operation, i);
1215   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1216   gcc_assert (is_simple_use);
1217   gcc_assert (dt == vect_reduction_def);
1218   gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1219   if (orig_stmt)
1220     gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1221   else
1222     gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1223 
1224   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1225     return false;
1226 
1227   /* 4. Supportable by target?  */
1228 
1229   /* 4.1. check support for the operation in the loop  */
1230   optab = optab_for_tree_code (code, vectype);
1231   if (!optab)
1232     {
1233       if (vect_print_dump_info (REPORT_DETAILS))
1234         fprintf (vect_dump, "no optab.");
1235       return false;
1236     }
1237   vec_mode = TYPE_MODE (vectype);
1238   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1239     {
1240       if (vect_print_dump_info (REPORT_DETAILS))
1241         fprintf (vect_dump, "op not supported by target.");
1242       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1243           || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1244 	     < vect_min_worthwhile_factor (code))
1245         return false;
1246       if (vect_print_dump_info (REPORT_DETAILS))
1247 	fprintf (vect_dump, "proceeding using word mode.");
1248     }
1249 
1250   /* Worthwhile without SIMD support?  */
1251   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1252       && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1253 	 < vect_min_worthwhile_factor (code))
1254     {
1255       if (vect_print_dump_info (REPORT_DETAILS))
1256 	fprintf (vect_dump, "not worthwhile without SIMD support.");
1257       return false;
1258     }
1259 
1260   /* 4.2. Check support for the epilog operation.
1261 
1262           If STMT represents a reduction pattern, then the type of the
1263           reduction variable may be different than the type of the rest
1264           of the arguments.  For example, consider the case of accumulation
1265           of shorts into an int accumulator; The original code:
1266                         S1: int_a = (int) short_a;
1267           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
1268 
1269           was replaced with:
1270                         STMT: int_acc = widen_sum <short_a, int_acc>
1271 
1272           This means that:
1273           1. The tree-code that is used to create the vector operation in the
1274              epilog code (that reduces the partial results) is not the
1275              tree-code of STMT, but is rather the tree-code of the original
1276              stmt from the pattern that STMT is replacing. I.e, in the example
1277              above we want to use 'widen_sum' in the loop, but 'plus' in the
1278              epilog.
1279           2. The type (mode) we use to check available target support
1280              for the vector operation to be created in the *epilog*, is
1281              determined by the type of the reduction variable (in the example
1282              above we'd check this: plus_optab[vect_int_mode]).
1283              However the type (mode) we use to check available target support
1284              for the vector operation to be created *inside the loop*, is
1285              determined by the type of the other arguments to STMT (in the
1286              example we'd check this: widen_sum_optab[vect_short_mode]).
1287 
1288           This is contrary to "regular" reductions, in which the types of all
1289           the arguments are the same as the type of the reduction variable.
1290           For "regular" reductions we can therefore use the same vector type
1291           (and also the same tree-code) when generating the epilog code and
1292           when generating the code inside the loop.  */
1293 
1294   if (orig_stmt)
1295     {
1296       /* This is a reduction pattern: get the vectype from the type of the
1297          reduction variable, and get the tree-code from orig_stmt.  */
1298       orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1299       vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1300       vec_mode = TYPE_MODE (vectype);
1301     }
1302   else
1303     {
1304       /* Regular reduction: use the same vectype and tree-code as used for
1305          the vector code inside the loop can be used for the epilog code. */
1306       orig_code = code;
1307     }
1308 
1309   if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1310     return false;
1311   reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1312   if (!reduc_optab)
1313     {
1314       if (vect_print_dump_info (REPORT_DETAILS))
1315         fprintf (vect_dump, "no optab for reduction.");
1316       epilog_reduc_code = NUM_TREE_CODES;
1317     }
1318   if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1319     {
1320       if (vect_print_dump_info (REPORT_DETAILS))
1321         fprintf (vect_dump, "reduc op not supported by target.");
1322       epilog_reduc_code = NUM_TREE_CODES;
1323     }
1324 
1325   if (!vec_stmt) /* transformation not required.  */
1326     {
1327       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1328       return true;
1329     }
1330 
1331   /** Transform.  **/
1332 
1333   if (vect_print_dump_info (REPORT_DETAILS))
1334     fprintf (vect_dump, "transform reduction.");
1335 
1336   /* Create the destination vector  */
1337   vec_dest = vect_create_destination_var (scalar_dest, vectype);
1338 
1339   /* Create the reduction-phi that defines the reduction-operand.  */
1340   new_phi = create_phi_node (vec_dest, loop->header);
1341 
1342   /* Prepare the operand that is defined inside the loop body  */
1343   op = TREE_OPERAND (operation, 0);
1344   loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1345   if (op_type == binary_op)
1346     expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi));
1347   else if (op_type == ternary_op)
1348     {
1349       op = TREE_OPERAND (operation, 1);
1350       loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1351       expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1352 		     PHI_RESULT (new_phi));
1353     }
1354 
1355   /* Create the vectorized operation that computes the partial results  */
1356   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
1357   new_temp = make_ssa_name (vec_dest, *vec_stmt);
1358   TREE_OPERAND (*vec_stmt, 0) = new_temp;
1359   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1360 
1361   /* Finalize the reduction-phi (set it's arguments) and create the
1362      epilog reduction code.  */
1363   vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1364   return true;
1365 }
1366 
1367 
1368 /* Function vectorizable_assignment.
1369 
1370    Check if STMT performs an assignment (copy) that can be vectorized.
1371    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1372    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1373    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1374 
1375 bool
vectorizable_assignment(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1376 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1377 {
1378   tree vec_dest;
1379   tree scalar_dest;
1380   tree op;
1381   tree vec_oprnd;
1382   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1383   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1384   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1385   tree new_temp;
1386   tree def, def_stmt;
1387   enum vect_def_type dt;
1388 
1389   /* Is vectorizable assignment?  */
1390   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1391     return false;
1392 
1393   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1394 
1395   if (TREE_CODE (stmt) != MODIFY_EXPR)
1396     return false;
1397 
1398   scalar_dest = TREE_OPERAND (stmt, 0);
1399   if (TREE_CODE (scalar_dest) != SSA_NAME)
1400     return false;
1401 
1402   op = TREE_OPERAND (stmt, 1);
1403   if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1404     {
1405       if (vect_print_dump_info (REPORT_DETAILS))
1406         fprintf (vect_dump, "use not simple.");
1407       return false;
1408     }
1409 
1410   if (!vec_stmt) /* transformation not required.  */
1411     {
1412       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1413       return true;
1414     }
1415 
1416   /** Transform.  **/
1417   if (vect_print_dump_info (REPORT_DETAILS))
1418     fprintf (vect_dump, "transform assignment.");
1419 
1420   /* Handle def.  */
1421   vec_dest = vect_create_destination_var (scalar_dest, vectype);
1422 
1423   /* Handle use.  */
1424   op = TREE_OPERAND (stmt, 1);
1425   vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1426 
1427   /* Arguments are ready. create the new vector stmt.  */
1428   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1429   new_temp = make_ssa_name (vec_dest, *vec_stmt);
1430   TREE_OPERAND (*vec_stmt, 0) = new_temp;
1431   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1432 
1433   return true;
1434 }
1435 
1436 
1437 /* Function vect_min_worthwhile_factor.
1438 
1439    For a loop where we could vectorize the operation indicated by CODE,
1440    return the minimum vectorization factor that makes it worthwhile
1441    to use generic vectors.  */
1442 static int
vect_min_worthwhile_factor(enum tree_code code)1443 vect_min_worthwhile_factor (enum tree_code code)
1444 {
1445   switch (code)
1446     {
1447     case PLUS_EXPR:
1448     case MINUS_EXPR:
1449     case NEGATE_EXPR:
1450       return 4;
1451 
1452     case BIT_AND_EXPR:
1453     case BIT_IOR_EXPR:
1454     case BIT_XOR_EXPR:
1455     case BIT_NOT_EXPR:
1456       return 2;
1457 
1458     default:
1459       return INT_MAX;
1460     }
1461 }
1462 
1463 
1464 /* Function vectorizable_operation.
1465 
1466    Check if STMT performs a binary or unary operation that can be vectorized.
1467    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1468    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1469    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1470 
1471 bool
vectorizable_operation(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1472 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1473 {
1474   tree vec_dest;
1475   tree scalar_dest;
1476   tree operation;
1477   tree op0, op1 = NULL;
1478   tree vec_oprnd0, vec_oprnd1=NULL;
1479   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1480   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1481   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1482   int i;
1483   enum tree_code code;
1484   enum machine_mode vec_mode;
1485   tree new_temp;
1486   int op_type;
1487   tree op;
1488   optab optab;
1489   int icode;
1490   enum machine_mode optab_op2_mode;
1491   tree def, def_stmt;
1492   enum vect_def_type dt;
1493 
1494   /* Is STMT a vectorizable binary/unary operation?   */
1495   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1496     return false;
1497 
1498   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1499 
1500   if (STMT_VINFO_LIVE_P (stmt_info))
1501     {
1502       /* FORNOW: not yet supported.  */
1503       if (vect_print_dump_info (REPORT_DETAILS))
1504         fprintf (vect_dump, "value used after loop.");
1505       return false;
1506     }
1507 
1508   if (TREE_CODE (stmt) != MODIFY_EXPR)
1509     return false;
1510 
1511   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1512     return false;
1513 
1514   operation = TREE_OPERAND (stmt, 1);
1515   code = TREE_CODE (operation);
1516   optab = optab_for_tree_code (code, vectype);
1517 
1518   /* Support only unary or binary operations.  */
1519   op_type = TREE_CODE_LENGTH (code);
1520   if (op_type != unary_op && op_type != binary_op)
1521     {
1522       if (vect_print_dump_info (REPORT_DETAILS))
1523 	fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1524       return false;
1525     }
1526 
1527   for (i = 0; i < op_type; i++)
1528     {
1529       op = TREE_OPERAND (operation, i);
1530       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1531 	{
1532 	  if (vect_print_dump_info (REPORT_DETAILS))
1533 	    fprintf (vect_dump, "use not simple.");
1534 	  return false;
1535 	}
1536     }
1537 
1538   /* Supportable by target?  */
1539   if (!optab)
1540     {
1541       if (vect_print_dump_info (REPORT_DETAILS))
1542 	fprintf (vect_dump, "no optab.");
1543       return false;
1544     }
1545   vec_mode = TYPE_MODE (vectype);
1546   icode = (int) optab->handlers[(int) vec_mode].insn_code;
1547   if (icode == CODE_FOR_nothing)
1548     {
1549       if (vect_print_dump_info (REPORT_DETAILS))
1550 	fprintf (vect_dump, "op not supported by target.");
1551       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1552           || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1553 	     < vect_min_worthwhile_factor (code))
1554         return false;
1555       if (vect_print_dump_info (REPORT_DETAILS))
1556 	fprintf (vect_dump, "proceeding using word mode.");
1557     }
1558 
1559   /* Worthwhile without SIMD support?  */
1560   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1561       && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1562 	 < vect_min_worthwhile_factor (code))
1563     {
1564       if (vect_print_dump_info (REPORT_DETAILS))
1565 	fprintf (vect_dump, "not worthwhile without SIMD support.");
1566       return false;
1567     }
1568 
1569   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1570     {
1571       /* FORNOW: not yet supported.  */
1572       if (!VECTOR_MODE_P (vec_mode))
1573 	return false;
1574 
1575       /* Invariant argument is needed for a vector shift
1576 	 by a scalar shift operand.  */
1577       optab_op2_mode = insn_data[icode].operand[2].mode;
1578       if (! (VECTOR_MODE_P (optab_op2_mode)
1579 	     || dt == vect_constant_def
1580 	     || dt == vect_invariant_def))
1581 	{
1582 	  if (vect_print_dump_info (REPORT_DETAILS))
1583 	    fprintf (vect_dump, "operand mode requires invariant argument.");
1584 	  return false;
1585 	}
1586     }
1587 
1588   if (!vec_stmt) /* transformation not required.  */
1589     {
1590       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1591       return true;
1592     }
1593 
1594   /** Transform.  **/
1595 
1596   if (vect_print_dump_info (REPORT_DETAILS))
1597     fprintf (vect_dump, "transform binary/unary operation.");
1598 
1599   /* Handle def.  */
1600   scalar_dest = TREE_OPERAND (stmt, 0);
1601   vec_dest = vect_create_destination_var (scalar_dest, vectype);
1602 
1603   /* Handle uses.  */
1604   op0 = TREE_OPERAND (operation, 0);
1605   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1606 
1607   if (op_type == binary_op)
1608     {
1609       op1 = TREE_OPERAND (operation, 1);
1610 
1611       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1612 	{
1613 	  /* Vector shl and shr insn patterns can be defined with
1614 	     scalar operand 2 (shift operand).  In this case, use
1615 	     constant or loop invariant op1 directly, without
1616 	     extending it to vector mode first.  */
1617 
1618 	  optab_op2_mode = insn_data[icode].operand[2].mode;
1619 	  if (!VECTOR_MODE_P (optab_op2_mode))
1620 	    {
1621 	      if (vect_print_dump_info (REPORT_DETAILS))
1622 		fprintf (vect_dump, "operand 1 using scalar mode.");
1623 	      vec_oprnd1 = op1;
1624 	    }
1625 	}
1626 
1627       if (!vec_oprnd1)
1628 	vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1629     }
1630 
1631   /* Arguments are ready. create the new vector stmt.  */
1632 
1633   if (op_type == binary_op)
1634     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1635 		build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1636   else
1637     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1638 		build1 (code, vectype, vec_oprnd0));
1639   new_temp = make_ssa_name (vec_dest, *vec_stmt);
1640   TREE_OPERAND (*vec_stmt, 0) = new_temp;
1641   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1642 
1643   return true;
1644 }
1645 
1646 
1647 /* Function vectorizable_store.
1648 
1649    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1650    can be vectorized.
1651    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1652    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1653    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1654 
1655 bool
vectorizable_store(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1656 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1657 {
1658   tree scalar_dest;
1659   tree data_ref;
1660   tree op;
1661   tree vec_oprnd1;
1662   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1663   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1664   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1665   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1666   enum machine_mode vec_mode;
1667   tree dummy;
1668   enum dr_alignment_support alignment_support_cheme;
1669   ssa_op_iter iter;
1670   tree def, def_stmt;
1671   enum vect_def_type dt;
1672 
1673   /* Is vectorizable store? */
1674 
1675   if (TREE_CODE (stmt) != MODIFY_EXPR)
1676     return false;
1677 
1678   scalar_dest = TREE_OPERAND (stmt, 0);
1679   if (TREE_CODE (scalar_dest) != ARRAY_REF
1680       && TREE_CODE (scalar_dest) != INDIRECT_REF)
1681     return false;
1682 
1683   op = TREE_OPERAND (stmt, 1);
1684   if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1685     {
1686       if (vect_print_dump_info (REPORT_DETAILS))
1687         fprintf (vect_dump, "use not simple.");
1688       return false;
1689     }
1690 
1691   vec_mode = TYPE_MODE (vectype);
1692   /* FORNOW. In some cases can vectorize even if data-type not supported
1693      (e.g. - array initialization with 0).  */
1694   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1695     return false;
1696 
1697   if (!STMT_VINFO_DATA_REF (stmt_info))
1698     return false;
1699 
1700 
1701   if (!vec_stmt) /* transformation not required.  */
1702     {
1703       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1704       return true;
1705     }
1706 
1707   /** Transform.  **/
1708 
1709   if (vect_print_dump_info (REPORT_DETAILS))
1710     fprintf (vect_dump, "transform store");
1711 
1712   alignment_support_cheme = vect_supportable_dr_alignment (dr);
1713   gcc_assert (alignment_support_cheme);
1714   gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
1715 
1716   /* Handle use - get the vectorized def from the defining stmt.  */
1717   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1718 
1719   /* Handle def.  */
1720   /* FORNOW: make sure the data reference is aligned.  */
1721   vect_align_data_ref (stmt);
1722   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1723   data_ref = build_fold_indirect_ref (data_ref);
1724 
1725   /* Arguments are ready. create the new vector stmt.  */
1726   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1727   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1728 
1729   /* Copy the V_MAY_DEFS representing the aliasing of the original array
1730      element's definition to the vector's definition then update the
1731      defining statement.  The original is being deleted so the same
1732      SSA_NAMEs can be used.  */
1733   copy_virtual_operands (*vec_stmt, stmt);
1734 
1735   FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1736     {
1737       SSA_NAME_DEF_STMT (def) = *vec_stmt;
1738 
1739       /* If this virtual def has a use outside the loop and a loop peel is
1740 	 performed then the def may be renamed by the peel.  Mark it for
1741 	 renaming so the later use will also be renamed.  */
1742       mark_sym_for_renaming (SSA_NAME_VAR (def));
1743     }
1744 
1745   return true;
1746 }
1747 
1748 
1749 /* vectorizable_load.
1750 
1751    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1752    can be vectorized.
1753    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1754    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1755    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1756 
1757 bool
vectorizable_load(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)1758 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1759 {
1760   tree scalar_dest;
1761   tree vec_dest = NULL;
1762   tree data_ref = NULL;
1763   tree op;
1764   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1765   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1766   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1767   tree new_temp;
1768   int mode;
1769   tree init_addr;
1770   tree new_stmt;
1771   tree dummy;
1772   basic_block new_bb;
1773   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1774   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1775   edge pe = loop_preheader_edge (loop);
1776   enum dr_alignment_support alignment_support_cheme;
1777 
1778   /* Is vectorizable load? */
1779   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1780     return false;
1781 
1782   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1783 
1784   if (STMT_VINFO_LIVE_P (stmt_info))
1785     {
1786       /* FORNOW: not yet supported.  */
1787       if (vect_print_dump_info (REPORT_DETAILS))
1788         fprintf (vect_dump, "value used after loop.");
1789       return false;
1790     }
1791 
1792   if (TREE_CODE (stmt) != MODIFY_EXPR)
1793     return false;
1794 
1795   scalar_dest = TREE_OPERAND (stmt, 0);
1796   if (TREE_CODE (scalar_dest) != SSA_NAME)
1797     return false;
1798 
1799   op = TREE_OPERAND (stmt, 1);
1800   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1801     return false;
1802 
1803   if (!STMT_VINFO_DATA_REF (stmt_info))
1804     return false;
1805 
1806   mode = (int) TYPE_MODE (vectype);
1807 
1808   /* FORNOW. In some cases can vectorize even if data-type not supported
1809     (e.g. - data copies).  */
1810   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1811     {
1812       if (vect_print_dump_info (REPORT_DETAILS))
1813 	fprintf (vect_dump, "Aligned load, but unsupported type.");
1814       return false;
1815     }
1816 
1817   if (!vec_stmt) /* transformation not required.  */
1818     {
1819       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1820       return true;
1821     }
1822 
1823   /** Transform.  **/
1824 
1825   if (vect_print_dump_info (REPORT_DETAILS))
1826     fprintf (vect_dump, "transform load.");
1827 
1828   alignment_support_cheme = vect_supportable_dr_alignment (dr);
1829   gcc_assert (alignment_support_cheme);
1830 
1831   if (alignment_support_cheme == dr_aligned
1832       || alignment_support_cheme == dr_unaligned_supported)
1833     {
1834       /* Create:
1835          p = initial_addr;
1836          indx = 0;
1837          loop {
1838            vec_dest = *(p);
1839            indx = indx + 1;
1840          }
1841       */
1842 
1843       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1844       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1845       if (aligned_access_p (dr))
1846         data_ref = build_fold_indirect_ref (data_ref);
1847       else
1848 	{
1849 	  int mis = DR_MISALIGNMENT (dr);
1850 	  tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1851 	  tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1852 	  data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1853 	}
1854       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1855       new_temp = make_ssa_name (vec_dest, new_stmt);
1856       TREE_OPERAND (new_stmt, 0) = new_temp;
1857       vect_finish_stmt_generation (stmt, new_stmt, bsi);
1858       copy_virtual_operands (new_stmt, stmt);
1859     }
1860   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1861     {
1862       /* Create:
1863 	 p1 = initial_addr;
1864 	 msq_init = *(floor(p1))
1865 	 p2 = initial_addr + VS - 1;
1866 	 magic = have_builtin ? builtin_result : initial_address;
1867 	 indx = 0;
1868 	 loop {
1869 	   p2' = p2 + indx * vectype_size
1870 	   lsq = *(floor(p2'))
1871 	   vec_dest = realign_load (msq, lsq, magic)
1872 	   indx = indx + 1;
1873 	   msq = lsq;
1874 	 }
1875       */
1876 
1877       tree offset;
1878       tree magic;
1879       tree phi_stmt;
1880       tree msq_init;
1881       tree msq, lsq;
1882       tree dataref_ptr;
1883       tree params;
1884 
1885       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
1886       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1887       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1888 					   &init_addr, true);
1889       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1890       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1891       new_temp = make_ssa_name (vec_dest, new_stmt);
1892       TREE_OPERAND (new_stmt, 0) = new_temp;
1893       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1894       gcc_assert (!new_bb);
1895       msq_init = TREE_OPERAND (new_stmt, 0);
1896       copy_virtual_operands (new_stmt, stmt);
1897       update_vuses_to_preheader (new_stmt, loop);
1898 
1899 
1900       /* <2> Create lsq = *(floor(p2')) in the loop  */
1901       offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1902       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1903       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1904       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1905       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1906       new_temp = make_ssa_name (vec_dest, new_stmt);
1907       TREE_OPERAND (new_stmt, 0) = new_temp;
1908       vect_finish_stmt_generation (stmt, new_stmt, bsi);
1909       lsq = TREE_OPERAND (new_stmt, 0);
1910       copy_virtual_operands (new_stmt, stmt);
1911 
1912 
1913       /* <3> */
1914       if (targetm.vectorize.builtin_mask_for_load)
1915 	{
1916 	  /* Create permutation mask, if required, in loop preheader.  */
1917 	  tree builtin_decl;
1918 	  params = build_tree_list (NULL_TREE, init_addr);
1919 	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1920 	  builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1921 	  new_stmt = build_function_call_expr (builtin_decl, params);
1922 	  new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1923 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1924 	  TREE_OPERAND (new_stmt, 0) = new_temp;
1925 	  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1926 	  gcc_assert (!new_bb);
1927 	  magic = TREE_OPERAND (new_stmt, 0);
1928 
1929 	  /* The result of the CALL_EXPR to this builtin is determined from
1930 	     the value of the parameter and no global variables are touched
1931 	     which makes the builtin a "const" function.  Requiring the
1932 	     builtin to have the "const" attribute makes it unnecessary
1933 	     to call mark_call_clobbered.  */
1934 	  gcc_assert (TREE_READONLY (builtin_decl));
1935 	}
1936       else
1937 	{
1938 	  /* Use current address instead of init_addr for reduced reg pressure.
1939 	   */
1940 	  magic = dataref_ptr;
1941 	}
1942 
1943 
1944       /* <4> Create msq = phi <msq_init, lsq> in loop  */
1945       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1946       msq = make_ssa_name (vec_dest, NULL_TREE);
1947       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1948       SSA_NAME_DEF_STMT (msq) = phi_stmt;
1949       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1950       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1951 
1952 
1953       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
1954       vec_dest = vect_create_destination_var (scalar_dest, vectype);
1955       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1956       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1957       new_temp = make_ssa_name (vec_dest, new_stmt);
1958       TREE_OPERAND (new_stmt, 0) = new_temp;
1959       vect_finish_stmt_generation (stmt, new_stmt, bsi);
1960     }
1961   else
1962     gcc_unreachable ();
1963 
1964   *vec_stmt = new_stmt;
1965   return true;
1966 }
1967 
1968 
1969 /* Function vectorizable_live_operation.
1970 
1971    STMT computes a value that is used outside the loop. Check if
1972    it can be supported.  */
1973 
1974 bool
vectorizable_live_operation(tree stmt,block_stmt_iterator * bsi ATTRIBUTE_UNUSED,tree * vec_stmt ATTRIBUTE_UNUSED)1975 vectorizable_live_operation (tree stmt,
1976                              block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1977                              tree *vec_stmt ATTRIBUTE_UNUSED)
1978 {
1979   tree operation;
1980   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1981   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1982   int i;
1983   enum tree_code code;
1984   int op_type;
1985   tree op;
1986   tree def, def_stmt;
1987   enum vect_def_type dt;
1988 
1989   if (!STMT_VINFO_LIVE_P (stmt_info))
1990     return false;
1991 
1992   if (TREE_CODE (stmt) != MODIFY_EXPR)
1993     return false;
1994 
1995   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1996     return false;
1997 
1998   operation = TREE_OPERAND (stmt, 1);
1999   code = TREE_CODE (operation);
2000 
2001   op_type = TREE_CODE_LENGTH (code);
2002 
2003   /* FORNOW: support only if all uses are invariant. This means
2004      that the scalar operations can remain in place, unvectorized.
2005      The original last scalar value that they compute will be used.  */
2006 
2007   for (i = 0; i < op_type; i++)
2008     {
2009       op = TREE_OPERAND (operation, i);
2010       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2011         {
2012           if (vect_print_dump_info (REPORT_DETAILS))
2013             fprintf (vect_dump, "use not simple.");
2014           return false;
2015         }
2016 
2017       if (dt != vect_invariant_def && dt != vect_constant_def)
2018         return false;
2019     }
2020 
2021   /* No transformation is required for the cases we currently support.  */
2022   return true;
2023 }
2024 
2025 
2026 /* Function vect_is_simple_cond.
2027 
2028    Input:
2029    LOOP - the loop that is being vectorized.
2030    COND - Condition that is checked for simple use.
2031 
2032    Returns whether a COND can be vectorized.  Checks whether
2033    condition operands are supportable using vec_is_simple_use.  */
2034 
2035 static bool
vect_is_simple_cond(tree cond,loop_vec_info loop_vinfo)2036 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2037 {
2038   tree lhs, rhs;
2039   tree def;
2040   enum vect_def_type dt;
2041 
2042   if (!COMPARISON_CLASS_P (cond))
2043     return false;
2044 
2045   lhs = TREE_OPERAND (cond, 0);
2046   rhs = TREE_OPERAND (cond, 1);
2047 
2048   if (TREE_CODE (lhs) == SSA_NAME)
2049     {
2050       tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2051       if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2052 	return false;
2053     }
2054   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2055     return false;
2056 
2057   if (TREE_CODE (rhs) == SSA_NAME)
2058     {
2059       tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2060       if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2061 	return false;
2062     }
2063   else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
2064     return false;
2065 
2066   return true;
2067 }
2068 
2069 /* vectorizable_condition.
2070 
2071    Check if STMT is conditional modify expression that can be vectorized.
2072    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2073    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
2074    at BSI.
2075 
2076    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2077 
2078 bool
vectorizable_condition(tree stmt,block_stmt_iterator * bsi,tree * vec_stmt)2079 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2080 {
2081   tree scalar_dest = NULL_TREE;
2082   tree vec_dest = NULL_TREE;
2083   tree op = NULL_TREE;
2084   tree cond_expr, then_clause, else_clause;
2085   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2086   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2087   tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2088   tree vec_compare, vec_cond_expr;
2089   tree new_temp;
2090   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2091   enum machine_mode vec_mode;
2092   tree def;
2093   enum vect_def_type dt;
2094 
2095   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2096     return false;
2097 
2098   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2099 
2100   if (STMT_VINFO_LIVE_P (stmt_info))
2101     {
2102       /* FORNOW: not yet supported.  */
2103       if (vect_print_dump_info (REPORT_DETAILS))
2104         fprintf (vect_dump, "value used after loop.");
2105       return false;
2106     }
2107 
2108   if (TREE_CODE (stmt) != MODIFY_EXPR)
2109     return false;
2110 
2111   op = TREE_OPERAND (stmt, 1);
2112 
2113   if (TREE_CODE (op) != COND_EXPR)
2114     return false;
2115 
2116   cond_expr = TREE_OPERAND (op, 0);
2117   then_clause = TREE_OPERAND (op, 1);
2118   else_clause = TREE_OPERAND (op, 2);
2119 
2120   if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2121     return false;
2122 
2123   /* We do not handle two different vector types for the condition
2124      and the values.  */
2125   if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2126     return false;
2127 
2128   if (TREE_CODE (then_clause) == SSA_NAME)
2129     {
2130       tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2131       if (!vect_is_simple_use (then_clause, loop_vinfo,
2132 			       &then_def_stmt, &def, &dt))
2133 	return false;
2134     }
2135   else if (TREE_CODE (then_clause) != INTEGER_CST
2136 	   && TREE_CODE (then_clause) != REAL_CST)
2137     return false;
2138 
2139   if (TREE_CODE (else_clause) == SSA_NAME)
2140     {
2141       tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2142       if (!vect_is_simple_use (else_clause, loop_vinfo,
2143 			       &else_def_stmt, &def, &dt))
2144 	return false;
2145     }
2146   else if (TREE_CODE (else_clause) != INTEGER_CST
2147 	   && TREE_CODE (else_clause) != REAL_CST)
2148     return false;
2149 
2150 
2151   vec_mode = TYPE_MODE (vectype);
2152 
2153   if (!vec_stmt)
2154     {
2155       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2156       return expand_vec_cond_expr_p (op, vec_mode);
2157     }
2158 
2159   /* Transform */
2160 
2161   /* Handle def.  */
2162   scalar_dest = TREE_OPERAND (stmt, 0);
2163   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2164 
2165   /* Handle cond expr.  */
2166   vec_cond_lhs =
2167     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2168   vec_cond_rhs =
2169     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2170   vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2171   vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2172 
2173   /* Arguments are ready. create the new vector stmt.  */
2174   vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2175 			vec_cond_lhs, vec_cond_rhs);
2176   vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2177 			  vec_compare, vec_then_clause, vec_else_clause);
2178 
2179   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2180   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2181   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2182   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2183 
2184   return true;
2185 }
2186 
2187 /* Function vect_transform_stmt.
2188 
2189    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2190 
2191 bool
vect_transform_stmt(tree stmt,block_stmt_iterator * bsi)2192 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2193 {
2194   bool is_store = false;
2195   tree vec_stmt = NULL_TREE;
2196   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2197   tree orig_stmt_in_pattern;
2198   bool done;
2199 
2200   if (STMT_VINFO_RELEVANT_P (stmt_info))
2201     {
2202       switch (STMT_VINFO_TYPE (stmt_info))
2203       {
2204       case op_vec_info_type:
2205 	done = vectorizable_operation (stmt, bsi, &vec_stmt);
2206 	gcc_assert (done);
2207 	break;
2208 
2209       case assignment_vec_info_type:
2210 	done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2211 	gcc_assert (done);
2212 	break;
2213 
2214       case load_vec_info_type:
2215 	done = vectorizable_load (stmt, bsi, &vec_stmt);
2216 	gcc_assert (done);
2217 	break;
2218 
2219       case store_vec_info_type:
2220 	done = vectorizable_store (stmt, bsi, &vec_stmt);
2221 	gcc_assert (done);
2222 	is_store = true;
2223 	break;
2224 
2225       case condition_vec_info_type:
2226 	done = vectorizable_condition (stmt, bsi, &vec_stmt);
2227 	gcc_assert (done);
2228 	break;
2229 
2230       default:
2231 	if (vect_print_dump_info (REPORT_DETAILS))
2232 	  fprintf (vect_dump, "stmt not supported.");
2233 	gcc_unreachable ();
2234       }
2235 
2236       gcc_assert (vec_stmt);
2237       STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2238       orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
2239       if (orig_stmt_in_pattern)
2240         {
2241           stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
2242           if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2243             {
2244               gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2245 
2246               /* STMT was inserted by the vectorizer to replace a computation
2247                  idiom.  ORIG_STMT_IN_PATTERN is a stmt in the original
2248                  sequence that computed this idiom.  We need to record a pointer
2249                  to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN.  See more
2250                  detail in the documentation of vect_pattern_recog.  */
2251 
2252               STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
2253             }
2254         }
2255     }
2256 
2257   if (STMT_VINFO_LIVE_P (stmt_info))
2258     {
2259       switch (STMT_VINFO_TYPE (stmt_info))
2260       {
2261       case reduc_vec_info_type:
2262         done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2263         gcc_assert (done);
2264         break;
2265 
2266       default:
2267         done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2268         gcc_assert (done);
2269       }
2270 
2271       if (vec_stmt)
2272         {
2273           gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2274           STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2275         }
2276     }
2277 
2278   return is_store;
2279 }
2280 
2281 
2282 /* This function builds ni_name = number of iterations loop executes
2283    on the loop preheader.  */
2284 
2285 static tree
vect_build_loop_niters(loop_vec_info loop_vinfo)2286 vect_build_loop_niters (loop_vec_info loop_vinfo)
2287 {
2288   tree ni_name, stmt, var;
2289   edge pe;
2290   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2291   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2292 
2293   var = create_tmp_var (TREE_TYPE (ni), "niters");
2294   add_referenced_var (var);
2295   ni_name = force_gimple_operand (ni, &stmt, false, var);
2296 
2297   pe = loop_preheader_edge (loop);
2298   if (stmt)
2299     {
2300       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2301       gcc_assert (!new_bb);
2302     }
2303 
2304   return ni_name;
2305 }
2306 
2307 
2308 /* This function generates the following statements:
2309 
2310  ni_name = number of iterations loop executes
2311  ratio = ni_name / vf
2312  ratio_mult_vf_name = ratio * vf
2313 
2314  and places them at the loop preheader edge.  */
2315 
2316 static void
vect_generate_tmps_on_preheader(loop_vec_info loop_vinfo,tree * ni_name_ptr,tree * ratio_mult_vf_name_ptr,tree * ratio_name_ptr)2317 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2318 				 tree *ni_name_ptr,
2319 				 tree *ratio_mult_vf_name_ptr,
2320 				 tree *ratio_name_ptr)
2321 {
2322 
2323   edge pe;
2324   basic_block new_bb;
2325   tree stmt, ni_name;
2326   tree var;
2327   tree ratio_name;
2328   tree ratio_mult_vf_name;
2329   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2331   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2332   tree log_vf;
2333 
2334   pe = loop_preheader_edge (loop);
2335 
2336   /* Generate temporary variable that contains
2337      number of iterations loop executes.  */
2338 
2339   ni_name = vect_build_loop_niters (loop_vinfo);
2340   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2341 
2342   /* Create: ratio = ni >> log2(vf) */
2343 
2344   var = create_tmp_var (TREE_TYPE (ni), "bnd");
2345   add_referenced_var (var);
2346   ratio_name = make_ssa_name (var, NULL_TREE);
2347   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2348 	   build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2349   SSA_NAME_DEF_STMT (ratio_name) = stmt;
2350 
2351   pe = loop_preheader_edge (loop);
2352   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2353   gcc_assert (!new_bb);
2354 
2355   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2356 
2357   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2358   add_referenced_var (var);
2359   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2360   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2361 	   build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2362   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2363 
2364   pe = loop_preheader_edge (loop);
2365   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2366   gcc_assert (!new_bb);
2367 
2368   *ni_name_ptr = ni_name;
2369   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2370   *ratio_name_ptr = ratio_name;
2371 
2372   return;
2373 }
2374 
2375 
2376 /* Function update_vuses_to_preheader.
2377 
2378    Input:
2379    STMT - a statement with potential VUSEs.
2380    LOOP - the loop whose preheader will contain STMT.
2381 
2382    It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2383    appears to be defined in a V_MAY_DEF in another statement in a loop.
2384    One such case is when the VUSE is at the dereference of a __restricted__
2385    pointer in a load and the V_MAY_DEF is at the dereference of a different
2386    __restricted__ pointer in a store.  Vectorization may result in
2387    copy_virtual_uses being called to copy the problematic VUSE to a new
2388    statement that is being inserted in the loop preheader.  This procedure
2389    is called to change the SSA_NAME in the new statement's VUSE from the
2390    SSA_NAME updated in the loop to the related SSA_NAME available on the
2391    path entering the loop.
2392 
2393    When this function is called, we have the following situation:
2394 
2395         # vuse <name1>
2396         S1: vload
2397     do {
2398         # name1 = phi < name0 , name2>
2399 
2400         # vuse <name1>
2401         S2: vload
2402 
2403         # name2 = vdef <name1>
2404         S3: vstore
2405 
2406     }while...
2407 
2408    Stmt S1 was created in the loop preheader block as part of misaligned-load
2409    handling. This function fixes the name of the vuse of S1 from 'name1' to
2410    'name0'.  */
2411 
2412 static void
update_vuses_to_preheader(tree stmt,struct loop * loop)2413 update_vuses_to_preheader (tree stmt, struct loop *loop)
2414 {
2415   basic_block header_bb = loop->header;
2416   edge preheader_e = loop_preheader_edge (loop);
2417   ssa_op_iter iter;
2418   use_operand_p use_p;
2419 
2420   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2421     {
2422       tree ssa_name = USE_FROM_PTR (use_p);
2423       tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2424       tree name_var = SSA_NAME_VAR (ssa_name);
2425       basic_block bb = bb_for_stmt (def_stmt);
2426 
2427       /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
2428       if (!IS_EMPTY_STMT (def_stmt)
2429 	  && flow_bb_inside_loop_p (loop, bb))
2430         {
2431           /* If the block containing the statement defining the SSA_NAME
2432              is in the loop then it's necessary to find the definition
2433              outside the loop using the PHI nodes of the header.  */
2434 	  tree phi;
2435 	  bool updated = false;
2436 
2437 	  for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2438 	    {
2439 	      if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2440 		{
2441 		  SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2442 		  updated = true;
2443 		  break;
2444 		}
2445 	    }
2446 	  gcc_assert (updated);
2447 	}
2448     }
2449 }
2450 
2451 
2452 /*   Function vect_update_ivs_after_vectorizer.
2453 
2454      "Advance" the induction variables of LOOP to the value they should take
2455      after the execution of LOOP.  This is currently necessary because the
2456      vectorizer does not handle induction variables that are used after the
2457      loop.  Such a situation occurs when the last iterations of LOOP are
2458      peeled, because:
2459      1. We introduced new uses after LOOP for IVs that were not originally used
2460         after LOOP: the IVs of LOOP are now used by an epilog loop.
2461      2. LOOP is going to be vectorized; this means that it will iterate N/VF
2462         times, whereas the loop IVs should be bumped N times.
2463 
2464      Input:
2465      - LOOP - a loop that is going to be vectorized. The last few iterations
2466               of LOOP were peeled.
2467      - NITERS - the number of iterations that LOOP executes (before it is
2468                 vectorized). i.e, the number of times the ivs should be bumped.
2469      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2470                   coming out from LOOP on which there are uses of the LOOP ivs
2471 		  (this is the path from LOOP->exit to epilog_loop->preheader).
2472 
2473                   The new definitions of the ivs are placed in LOOP->exit.
2474                   The phi args associated with the edge UPDATE_E in the bb
2475                   UPDATE_E->dest are updated accordingly.
2476 
2477      Assumption 1: Like the rest of the vectorizer, this function assumes
2478      a single loop exit that has a single predecessor.
2479 
2480      Assumption 2: The phi nodes in the LOOP header and in update_bb are
2481      organized in the same order.
2482 
2483      Assumption 3: The access function of the ivs is simple enough (see
2484      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2485 
2486      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2487      coming out of LOOP on which the ivs of LOOP are used (this is the path
2488      that leads to the epilog loop; other paths skip the epilog loop).  This
2489      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2490      needs to have its phis updated.
2491  */
2492 
2493 static void
vect_update_ivs_after_vectorizer(loop_vec_info loop_vinfo,tree niters,edge update_e)2494 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2495 				  edge update_e)
2496 {
2497   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2498   basic_block exit_bb = loop->single_exit->dest;
2499   tree phi, phi1;
2500   basic_block update_bb = update_e->dest;
2501 
2502   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2503 
2504   /* Make sure there exists a single-predecessor exit bb:  */
2505   gcc_assert (single_pred_p (exit_bb));
2506 
2507   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2508        phi && phi1;
2509        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2510     {
2511       tree access_fn = NULL;
2512       tree evolution_part;
2513       tree init_expr;
2514       tree step_expr;
2515       tree var, stmt, ni, ni_name;
2516       block_stmt_iterator last_bsi;
2517 
2518       if (vect_print_dump_info (REPORT_DETAILS))
2519         {
2520           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2521           print_generic_expr (vect_dump, phi, TDF_SLIM);
2522         }
2523 
2524       /* Skip virtual phi's.  */
2525       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2526 	{
2527 	  if (vect_print_dump_info (REPORT_DETAILS))
2528 	    fprintf (vect_dump, "virtual phi. skip.");
2529 	  continue;
2530 	}
2531 
2532       /* Skip reduction phis.  */
2533       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2534         {
2535           if (vect_print_dump_info (REPORT_DETAILS))
2536             fprintf (vect_dump, "reduc phi. skip.");
2537           continue;
2538         }
2539 
2540       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2541       gcc_assert (access_fn);
2542       evolution_part =
2543 	 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2544       gcc_assert (evolution_part != NULL_TREE);
2545 
2546       /* FORNOW: We do not support IVs whose evolution function is a polynomial
2547          of degree >= 2 or exponential.  */
2548       gcc_assert (!tree_is_chrec (evolution_part));
2549 
2550       step_expr = evolution_part;
2551       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2552 							       loop->num));
2553 
2554       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2555 		  build2 (MULT_EXPR, TREE_TYPE (niters),
2556 		       niters, step_expr), init_expr);
2557 
2558       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2559       add_referenced_var (var);
2560 
2561       ni_name = force_gimple_operand (ni, &stmt, false, var);
2562 
2563       /* Insert stmt into exit_bb.  */
2564       last_bsi = bsi_last (exit_bb);
2565       if (stmt)
2566         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2567 
2568       /* Fix phi expressions in the successor bb.  */
2569       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2570     }
2571 }
2572 
2573 
2574 /* Function vect_do_peeling_for_loop_bound
2575 
2576    Peel the last iterations of the loop represented by LOOP_VINFO.
2577    The peeled iterations form a new epilog loop.  Given that the loop now
2578    iterates NITERS times, the new epilog loop iterates
2579    NITERS % VECTORIZATION_FACTOR times.
2580 
2581    The original loop will later be made to iterate
2582    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2583 
2584 static void
vect_do_peeling_for_loop_bound(loop_vec_info loop_vinfo,tree * ratio,struct loops * loops)2585 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2586 				struct loops *loops)
2587 {
2588   tree ni_name, ratio_mult_vf_name;
2589   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2590   struct loop *new_loop;
2591   edge update_e;
2592   basic_block preheader;
2593   int loop_num;
2594 
2595   if (vect_print_dump_info (REPORT_DETAILS))
2596     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2597 
2598   initialize_original_copy_tables ();
2599 
2600   /* Generate the following variables on the preheader of original loop:
2601 
2602      ni_name = number of iteration the original loop executes
2603      ratio = ni_name / vf
2604      ratio_mult_vf_name = ratio * vf  */
2605   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2606 				   &ratio_mult_vf_name, ratio);
2607 
2608   loop_num  = loop->num;
2609   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2610 					    ratio_mult_vf_name, ni_name, false);
2611   gcc_assert (new_loop);
2612   gcc_assert (loop_num == loop->num);
2613 #ifdef ENABLE_CHECKING
2614   slpeel_verify_cfg_after_peeling (loop, new_loop);
2615 #endif
2616 
2617   /* A guard that controls whether the new_loop is to be executed or skipped
2618      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2619      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
2620      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
2621      is on the path where the LOOP IVs are used and need to be updated.  */
2622 
2623   preheader = loop_preheader_edge (new_loop)->src;
2624   if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2625     update_e = EDGE_PRED (preheader, 0);
2626   else
2627     update_e = EDGE_PRED (preheader, 1);
2628 
2629   /* Update IVs of original loop as if they were advanced
2630      by ratio_mult_vf_name steps.  */
2631   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2632 
2633   /* After peeling we have to reset scalar evolution analyzer.  */
2634   scev_reset ();
2635 
2636   free_original_copy_tables ();
2637 }
2638 
2639 
2640 /* Function vect_gen_niters_for_prolog_loop
2641 
2642    Set the number of iterations for the loop represented by LOOP_VINFO
2643    to the minimum between LOOP_NITERS (the original iteration count of the loop)
2644    and the misalignment of DR - the data reference recorded in
2645    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2646    this loop, the data reference DR will refer to an aligned location.
2647 
2648    The following computation is generated:
2649 
2650    If the misalignment of DR is known at compile time:
2651      addr_mis = int mis = DR_MISALIGNMENT (dr);
2652    Else, compute address misalignment in bytes:
2653      addr_mis = addr & (vectype_size - 1)
2654 
2655    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2656 
2657    (elem_size = element type size; an element is the scalar element
2658 	whose type is the inner type of the vectype)  */
2659 
2660 static tree
vect_gen_niters_for_prolog_loop(loop_vec_info loop_vinfo,tree loop_niters)2661 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2662 {
2663   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2664   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2665   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2666   tree var, stmt;
2667   tree iters, iters_name;
2668   edge pe;
2669   basic_block new_bb;
2670   tree dr_stmt = DR_STMT (dr);
2671   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2672   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2673   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2674   tree niters_type = TREE_TYPE (loop_niters);
2675 
2676   pe = loop_preheader_edge (loop);
2677 
2678   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2679     {
2680       int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2681       int element_size = vectype_align/vf;
2682       int elem_misalign = byte_misalign / element_size;
2683 
2684       if (vect_print_dump_info (REPORT_DETAILS))
2685         fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2686       iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2687     }
2688   else
2689     {
2690       tree new_stmts = NULL_TREE;
2691       tree start_addr =
2692         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2693       tree ptr_type = TREE_TYPE (start_addr);
2694       tree size = TYPE_SIZE (ptr_type);
2695       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2696       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2697       tree elem_size_log =
2698         build_int_cst (type, exact_log2 (vectype_align/vf));
2699       tree vf_minus_1 = build_int_cst (type, vf - 1);
2700       tree vf_tree = build_int_cst (type, vf);
2701       tree byte_misalign;
2702       tree elem_misalign;
2703 
2704       new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2705       gcc_assert (!new_bb);
2706 
2707       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2708       byte_misalign =
2709         build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2710 
2711       /* Create:  elem_misalign = byte_misalign / element_size  */
2712       elem_misalign =
2713         build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2714 
2715       /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
2716       iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2717       iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2718       iters = fold_convert (niters_type, iters);
2719     }
2720 
2721   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2722   /* If the loop bound is known at compile time we already verified that it is
2723      greater than vf; since the misalignment ('iters') is at most vf, there's
2724      no need to generate the MIN_EXPR in this case.  */
2725   if (TREE_CODE (loop_niters) != INTEGER_CST)
2726     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2727 
2728   if (vect_print_dump_info (REPORT_DETAILS))
2729     {
2730       fprintf (vect_dump, "niters for prolog loop: ");
2731       print_generic_expr (vect_dump, iters, TDF_SLIM);
2732     }
2733 
2734   var = create_tmp_var (niters_type, "prolog_loop_niters");
2735   add_referenced_var (var);
2736   iters_name = force_gimple_operand (iters, &stmt, false, var);
2737 
2738   /* Insert stmt on loop preheader edge.  */
2739   if (stmt)
2740     {
2741       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2742       gcc_assert (!new_bb);
2743     }
2744 
2745   return iters_name;
2746 }
2747 
2748 
2749 /* Function vect_update_init_of_dr
2750 
2751    NITERS iterations were peeled from LOOP.  DR represents a data reference
2752    in LOOP.  This function updates the information recorded in DR to
2753    account for the fact that the first NITERS iterations had already been
2754    executed.  Specifically, it updates the OFFSET field of DR.  */
2755 
2756 static void
vect_update_init_of_dr(struct data_reference * dr,tree niters)2757 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2758 {
2759   tree offset = DR_OFFSET (dr);
2760 
2761   niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2762   offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2763   DR_OFFSET (dr) = offset;
2764 }
2765 
2766 
2767 /* Function vect_update_inits_of_drs
2768 
2769    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2770    This function updates the information recorded for the data references in
2771    the loop to account for the fact that the first NITERS iterations had
2772    already been executed.  Specifically, it updates the initial_condition of the
2773    access_function of all the data_references in the loop.  */
2774 
2775 static void
vect_update_inits_of_drs(loop_vec_info loop_vinfo,tree niters)2776 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2777 {
2778   unsigned int i;
2779   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2780   struct data_reference *dr;
2781 
2782   if (vect_dump && (dump_flags & TDF_DETAILS))
2783     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2784 
2785   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2786     vect_update_init_of_dr (dr, niters);
2787 }
2788 
2789 
2790 /* Function vect_do_peeling_for_alignment
2791 
2792    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2793    'niters' is set to the misalignment of one of the data references in the
2794    loop, thereby forcing it to refer to an aligned location at the beginning
2795    of the execution of this loop.  The data reference for which we are
2796    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2797 
2798 static void
vect_do_peeling_for_alignment(loop_vec_info loop_vinfo,struct loops * loops)2799 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2800 {
2801   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2802   tree niters_of_prolog_loop, ni_name;
2803   tree n_iters;
2804   struct loop *new_loop;
2805 
2806   if (vect_print_dump_info (REPORT_DETAILS))
2807     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2808 
2809   initialize_original_copy_tables ();
2810 
2811   ni_name = vect_build_loop_niters (loop_vinfo);
2812   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2813 
2814   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2815   new_loop =
2816 	slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2817 				       niters_of_prolog_loop, ni_name, true);
2818   gcc_assert (new_loop);
2819 #ifdef ENABLE_CHECKING
2820   slpeel_verify_cfg_after_peeling (new_loop, loop);
2821 #endif
2822 
2823   /* Update number of times loop executes.  */
2824   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2825   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2826 		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2827 
2828   /* Update the init conditions of the access functions of all data refs.  */
2829   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2830 
2831   /* After peeling we have to reset scalar evolution analyzer.  */
2832   scev_reset ();
2833 
2834   free_original_copy_tables ();
2835 }
2836 
2837 
2838 /* Function vect_create_cond_for_align_checks.
2839 
2840    Create a conditional expression that represents the alignment checks for
2841    all of data references (array element references) whose alignment must be
2842    checked at runtime.
2843 
2844    Input:
2845    LOOP_VINFO - two fields of the loop information are used.
2846                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2847                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2848 
2849    Output:
2850    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2851                          expression.
2852    The returned value is the conditional expression to be used in the if
2853    statement that controls which version of the loop gets executed at runtime.
2854 
2855    The algorithm makes two assumptions:
2856      1) The number of bytes "n" in a vector is a power of 2.
2857      2) An address "a" is aligned if a%n is zero and that this
2858         test can be done as a&(n-1) == 0.  For example, for 16
2859         byte vectors the test is a&0xf == 0.  */
2860 
2861 static tree
vect_create_cond_for_align_checks(loop_vec_info loop_vinfo,tree * cond_expr_stmt_list)2862 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2863                                    tree *cond_expr_stmt_list)
2864 {
2865   VEC(tree,heap) *may_misalign_stmts
2866     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2867   tree ref_stmt;
2868   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2869   tree mask_cst;
2870   unsigned int i;
2871   tree psize;
2872   tree int_ptrsize_type;
2873   char tmp_name[20];
2874   tree or_tmp_name = NULL_TREE;
2875   tree and_tmp, and_tmp_name, and_stmt;
2876   tree ptrsize_zero;
2877 
2878   /* Check that mask is one less than a power of 2, i.e., mask is
2879      all zeros followed by all ones.  */
2880   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2881 
2882   /* CHECKME: what is the best integer or unsigned type to use to hold a
2883      cast from a pointer value?  */
2884   psize = TYPE_SIZE (ptr_type_node);
2885   int_ptrsize_type
2886     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2887 
2888   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2889      of the first vector of the i'th data reference. */
2890 
2891   for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2892     {
2893       tree new_stmt_list = NULL_TREE;
2894       tree addr_base;
2895       tree addr_tmp, addr_tmp_name, addr_stmt;
2896       tree or_tmp, new_or_tmp_name, or_stmt;
2897 
2898       /* create: addr_tmp = (int)(address_of_first_vector) */
2899       addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2900 							&new_stmt_list,
2901 							NULL_TREE);
2902 
2903       if (new_stmt_list != NULL_TREE)
2904         append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2905 
2906       sprintf (tmp_name, "%s%d", "addr2int", i);
2907       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2908       add_referenced_var (addr_tmp);
2909       addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2910       addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2911       addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2912                           addr_tmp_name, addr_stmt);
2913       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2914       append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2915 
2916       /* The addresses are OR together.  */
2917 
2918       if (or_tmp_name != NULL_TREE)
2919         {
2920           /* create: or_tmp = or_tmp | addr_tmp */
2921           sprintf (tmp_name, "%s%d", "orptrs", i);
2922           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2923           add_referenced_var (or_tmp);
2924           new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2925           or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2926                             build2 (BIT_IOR_EXPR, int_ptrsize_type,
2927 	                            or_tmp_name,
2928                                     addr_tmp_name));
2929           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2930           append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2931           or_tmp_name = new_or_tmp_name;
2932         }
2933       else
2934         or_tmp_name = addr_tmp_name;
2935 
2936     } /* end for i */
2937 
2938   mask_cst = build_int_cst (int_ptrsize_type, mask);
2939 
2940   /* create: and_tmp = or_tmp & mask  */
2941   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2942   add_referenced_var (and_tmp);
2943   and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2944 
2945   and_stmt = build2 (MODIFY_EXPR, void_type_node,
2946                      and_tmp_name,
2947                      build2 (BIT_AND_EXPR, int_ptrsize_type,
2948                              or_tmp_name, mask_cst));
2949   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2950   append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2951 
2952   /* Make and_tmp the left operand of the conditional test against zero.
2953      if and_tmp has a nonzero bit then some address is unaligned.  */
2954   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2955   return build2 (EQ_EXPR, boolean_type_node,
2956                  and_tmp_name, ptrsize_zero);
2957 }
2958 
2959 
2960 /* Function vect_transform_loop.
2961 
2962    The analysis phase has determined that the loop is vectorizable.
2963    Vectorize the loop - created vectorized stmts to replace the scalar
2964    stmts in the loop, and update the loop exit condition.  */
2965 
2966 void
vect_transform_loop(loop_vec_info loop_vinfo,struct loops * loops ATTRIBUTE_UNUSED)2967 vect_transform_loop (loop_vec_info loop_vinfo,
2968 		     struct loops *loops ATTRIBUTE_UNUSED)
2969 {
2970   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2971   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2972   int nbbs = loop->num_nodes;
2973   block_stmt_iterator si;
2974   int i;
2975   tree ratio = NULL;
2976   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2977   bitmap_iterator bi;
2978   unsigned int j;
2979 
2980   if (vect_print_dump_info (REPORT_DETAILS))
2981     fprintf (vect_dump, "=== vec_transform_loop ===");
2982 
2983   /* If the loop has data references that may or may not be aligned then
2984      two versions of the loop need to be generated, one which is vectorized
2985      and one which isn't.  A test is then generated to control which of the
2986      loops is executed.  The test checks for the alignment of all of the
2987      data references that may or may not be aligned. */
2988 
2989   if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2990     {
2991       struct loop *nloop;
2992       tree cond_expr;
2993       tree cond_expr_stmt_list = NULL_TREE;
2994       basic_block condition_bb;
2995       block_stmt_iterator cond_exp_bsi;
2996       basic_block merge_bb;
2997       basic_block new_exit_bb;
2998       edge new_exit_e, e;
2999       tree orig_phi, new_phi, arg;
3000 
3001       cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3002                                                      &cond_expr_stmt_list);
3003       initialize_original_copy_tables ();
3004       nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3005       free_original_copy_tables();
3006 
3007       /** Loop versioning violates an assumption we try to maintain during
3008 	 vectorization - that the loop exit block has a single predecessor.
3009 	 After versioning, the exit block of both loop versions is the same
3010 	 basic block (i.e. it has two predecessors). Just in order to simplify
3011 	 following transformations in the vectorizer, we fix this situation
3012 	 here by adding a new (empty) block on the exit-edge of the loop,
3013 	 with the proper loop-exit phis to maintain loop-closed-form.  **/
3014 
3015       merge_bb = loop->single_exit->dest;
3016       gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3017       new_exit_bb = split_edge (loop->single_exit);
3018       add_bb_to_loop (new_exit_bb, loop->outer);
3019       new_exit_e = loop->single_exit;
3020       e = EDGE_SUCC (new_exit_bb, 0);
3021 
3022       for (orig_phi = phi_nodes (merge_bb); orig_phi;
3023 	   orig_phi = PHI_CHAIN (orig_phi))
3024 	{
3025           new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3026 				     new_exit_bb);
3027           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3028           add_phi_arg (new_phi, arg, new_exit_e);
3029 	  SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3030 	}
3031 
3032       /** end loop-exit-fixes after versioning  **/
3033 
3034       update_ssa (TODO_update_ssa);
3035       cond_exp_bsi = bsi_last (condition_bb);
3036       bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3037     }
3038 
3039   /* CHECKME: we wouldn't need this if we called update_ssa once
3040      for all loops.  */
3041   bitmap_zero (vect_vnames_to_rename);
3042 
3043   /* Peel the loop if there are data refs with unknown alignment.
3044      Only one data ref with unknown store is allowed.  */
3045 
3046   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3047     vect_do_peeling_for_alignment (loop_vinfo, loops);
3048 
3049   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3050      compile time constant), or it is a constant that doesn't divide by the
3051      vectorization factor, then an epilog loop needs to be created.
3052      We therefore duplicate the loop: the original loop will be vectorized,
3053      and will compute the first (n/VF) iterations. The second copy of the loop
3054      will remain scalar and will compute the remaining (n%VF) iterations.
3055      (VF is the vectorization factor).  */
3056 
3057   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3058       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3059           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3060     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3061   else
3062     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3063 		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3064 
3065   /* 1) Make sure the loop header has exactly two entries
3066      2) Make sure we have a preheader basic block.  */
3067 
3068   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3069 
3070   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3071 
3072 
3073   /* FORNOW: the vectorizer supports only loops which body consist
3074      of one basic block (header + empty latch). When the vectorizer will
3075      support more involved loop forms, the order by which the BBs are
3076      traversed need to be reconsidered.  */
3077 
3078   for (i = 0; i < nbbs; i++)
3079     {
3080       basic_block bb = bbs[i];
3081 
3082       for (si = bsi_start (bb); !bsi_end_p (si);)
3083 	{
3084 	  tree stmt = bsi_stmt (si);
3085 	  stmt_vec_info stmt_info;
3086 	  bool is_store;
3087 
3088 	  if (vect_print_dump_info (REPORT_DETAILS))
3089 	    {
3090 	      fprintf (vect_dump, "------>vectorizing statement: ");
3091 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
3092 	    }
3093 	  stmt_info = vinfo_for_stmt (stmt);
3094 	  gcc_assert (stmt_info);
3095 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
3096 	      && !STMT_VINFO_LIVE_P (stmt_info))
3097 	    {
3098 	      bsi_next (&si);
3099 	      continue;
3100 	    }
3101 	  /* FORNOW: Verify that all stmts operate on the same number of
3102 	             units and no inner unrolling is necessary.  */
3103 	  gcc_assert
3104 		(TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3105 		 == (unsigned HOST_WIDE_INT) vectorization_factor);
3106 
3107 	  /* -------- vectorize statement ------------ */
3108 	  if (vect_print_dump_info (REPORT_DETAILS))
3109 	    fprintf (vect_dump, "transform statement.");
3110 
3111 	  is_store = vect_transform_stmt (stmt, &si);
3112 	  if (is_store)
3113 	    {
3114 	      /* Free the attached stmt_vec_info and remove the stmt.  */
3115 	      stmt_ann_t ann = stmt_ann (stmt);
3116 	      free (stmt_info);
3117 	      set_stmt_info (ann, NULL);
3118 	      bsi_remove (&si, true);
3119 	      continue;
3120 	    }
3121 
3122 	  bsi_next (&si);
3123 	}		        /* stmts in BB */
3124     }				/* BBs in loop */
3125 
3126   slpeel_make_loop_iterate_ntimes (loop, ratio);
3127 
3128   EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3129     mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3130 
3131   /* The memory tags and pointers in vectorized statements need to
3132      have their SSA forms updated.  FIXME, why can't this be delayed
3133      until all the loops have been transformed?  */
3134   update_ssa (TODO_update_ssa);
3135 
3136   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3137     fprintf (vect_dump, "LOOP VECTORIZED.");
3138 }
3139