xref: /openbsd/gnu/gcc/gcc/tree-vectorizer.c (revision 404b540a)
1 /* Loop Vectorization
2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 /* Loop Vectorization Pass.
23 
24    This pass tries to vectorize loops. This first implementation focuses on
25    simple inner-most loops, with no conditional control flow, and a set of
26    simple operations which vector form can be expressed using existing
27    tree codes (PLUS, MULT etc).
28 
29    For example, the vectorizer transforms the following simple loop:
30 
31 	short a[N]; short b[N]; short c[N]; int i;
32 
33 	for (i=0; i<N; i++){
34 	  a[i] = b[i] + c[i];
35 	}
36 
37    as if it was manually vectorized by rewriting the source code into:
38 
39 	typedef int __attribute__((mode(V8HI))) v8hi;
40 	short a[N];  short b[N]; short c[N];   int i;
41 	v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 	v8hi va, vb, vc;
43 
44 	for (i=0; i<N/8; i++){
45 	  vb = pb[i];
46 	  vc = pc[i];
47 	  va = vb + vc;
48 	  pa[i] = va;
49 	}
50 
51 	The main entry to this pass is vectorize_loops(), in which
52    the vectorizer applies a set of analyses on a given set of loops,
53    followed by the actual vectorization transformation for the loops that
54    had successfully passed the analysis phase.
55 
56 	Throughout this pass we make a distinction between two types of
57    data: scalars (which are represented by SSA_NAMES), and memory references
58    ("data-refs"). These two types of data require different handling both
59    during analysis and transformation. The types of data-refs that the
60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62    accesses are required to have a  simple (consecutive) access pattern.
63 
64    Analysis phase:
65    ===============
66 	The driver for the analysis phase is vect_analyze_loop_nest().
67    It applies a set of analyses, some of which rely on the scalar evolution
68    analyzer (scev) developed by Sebastian Pop.
69 
70 	During the analysis phase the vectorizer records some information
71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72    loop, as well as general information about the loop as a whole, which is
73    recorded in a "loop_vec_info" struct attached to each loop.
74 
75    Transformation phase:
76    =====================
77 	The loop transformation phase scans all the stmts in the loop, and
78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79    the loop that needs to be vectorized. It insert the vector code sequence
80    just before the scalar stmt S, and records a pointer to the vector code
81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82    attached to S). This pointer will be used for the vectorization of following
83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84    otherwise, we rely on dead code elimination for removing it.
85 
86 	For example, say stmt S1 was vectorized into stmt VS1:
87 
88    VS1: vb = px[i];
89    S1:	b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90    S2:  a = b;
91 
92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95    resulting sequence would be:
96 
97    VS1: vb = px[i];
98    S1:	b = x[i];	STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99    VS2: va = vb;
100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101 
102 	Operands that are not SSA_NAMEs, are data-refs that appear in
103    load/store operations (like 'x[i]' in S1), and are handled differently.
104 
105    Target modeling:
106    =================
107 	Currently the only target specific information that is used is the
108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109    support different sizes of vectors, for now will need to specify one value
110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111 
112 	Since we only vectorize operations which vector form can be
113    expressed using existing tree codes, to verify that an operation is
114    supported, the vectorizer checks the relevant optab at the relevant
115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116    the value found is CODE_FOR_nothing, then there's no target support, and
117    we can't vectorize the stmt.
118 
119    For additional information on this project see:
120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121 */
122 
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "ggc.h"
128 #include "tree.h"
129 #include "target.h"
130 #include "rtl.h"
131 #include "basic-block.h"
132 #include "diagnostic.h"
133 #include "tree-flow.h"
134 #include "tree-dump.h"
135 #include "timevar.h"
136 #include "cfgloop.h"
137 #include "cfglayout.h"
138 #include "expr.h"
139 #include "optabs.h"
140 #include "params.h"
141 #include "toplev.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "input.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 
149 /*************************************************************************
150   Simple Loop Peeling Utilities
151  *************************************************************************/
152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
153   (struct loop *, struct loops *, edge);
154 static void slpeel_update_phis_for_duplicate_loop
155   (struct loop *, struct loop *, bool after);
156 static void slpeel_update_phi_nodes_for_guard1
157   (edge, struct loop *, bool, basic_block *, bitmap *);
158 static void slpeel_update_phi_nodes_for_guard2
159   (edge, struct loop *, bool, basic_block *);
160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
161 
162 static void rename_use_op (use_operand_p);
163 static void rename_variables_in_bb (basic_block);
164 static void rename_variables_in_loop (struct loop *);
165 
166 /*************************************************************************
167   General Vectorization Utilities
168  *************************************************************************/
169 static void vect_set_dump_settings (void);
170 
171 /* vect_dump will be set to stderr or dump_file if exist.  */
172 FILE *vect_dump;
173 
174 /* vect_verbosity_level set to an invalid value
175    to mark that it's uninitialized.  */
176 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
177 
178 /* Number of loops, at the beginning of vectorization.  */
179 unsigned int vect_loops_num;
180 
181 /* Loop location.  */
182 static LOC vect_loop_location;
183 
184 /* Bitmap of virtual variables to be renamed.  */
185 bitmap vect_vnames_to_rename;
186 
187 /*************************************************************************
188   Simple Loop Peeling Utilities
189 
190   Utilities to support loop peeling for vectorization purposes.
191  *************************************************************************/
192 
193 
194 /* Renames the use *OP_P.  */
195 
196 static void
rename_use_op(use_operand_p op_p)197 rename_use_op (use_operand_p op_p)
198 {
199   tree new_name;
200 
201   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
202     return;
203 
204   new_name = get_current_def (USE_FROM_PTR (op_p));
205 
206   /* Something defined outside of the loop.  */
207   if (!new_name)
208     return;
209 
210   /* An ordinary ssa name defined in the loop.  */
211 
212   SET_USE (op_p, new_name);
213 }
214 
215 
216 /* Renames the variables in basic block BB.  */
217 
218 static void
rename_variables_in_bb(basic_block bb)219 rename_variables_in_bb (basic_block bb)
220 {
221   tree phi;
222   block_stmt_iterator bsi;
223   tree stmt;
224   use_operand_p use_p;
225   ssa_op_iter iter;
226   edge e;
227   edge_iterator ei;
228   struct loop *loop = bb->loop_father;
229 
230   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
231     {
232       stmt = bsi_stmt (bsi);
233       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
234 				 (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
235 	rename_use_op (use_p);
236     }
237 
238   FOR_EACH_EDGE (e, ei, bb->succs)
239     {
240       if (!flow_bb_inside_loop_p (loop, e->dest))
241 	continue;
242       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
243         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
244     }
245 }
246 
247 
248 /* Renames variables in new generated LOOP.  */
249 
250 static void
rename_variables_in_loop(struct loop * loop)251 rename_variables_in_loop (struct loop *loop)
252 {
253   unsigned i;
254   basic_block *bbs;
255 
256   bbs = get_loop_body (loop);
257 
258   for (i = 0; i < loop->num_nodes; i++)
259     rename_variables_in_bb (bbs[i]);
260 
261   free (bbs);
262 }
263 
264 
265 /* Update the PHI nodes of NEW_LOOP.
266 
267    NEW_LOOP is a duplicate of ORIG_LOOP.
268    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
269    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
270    executes before it.  */
271 
272 static void
slpeel_update_phis_for_duplicate_loop(struct loop * orig_loop,struct loop * new_loop,bool after)273 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
274 				       struct loop *new_loop, bool after)
275 {
276   tree new_ssa_name;
277   tree phi_new, phi_orig;
278   tree def;
279   edge orig_loop_latch = loop_latch_edge (orig_loop);
280   edge orig_entry_e = loop_preheader_edge (orig_loop);
281   edge new_loop_exit_e = new_loop->single_exit;
282   edge new_loop_entry_e = loop_preheader_edge (new_loop);
283   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
284 
285   /*
286      step 1. For each loop-header-phi:
287              Add the first phi argument for the phi in NEW_LOOP
288             (the one associated with the entry of NEW_LOOP)
289 
290      step 2. For each loop-header-phi:
291              Add the second phi argument for the phi in NEW_LOOP
292             (the one associated with the latch of NEW_LOOP)
293 
294      step 3. Update the phis in the successor block of NEW_LOOP.
295 
296         case 1: NEW_LOOP was placed before ORIG_LOOP:
297                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
298                 Updating the phis in the successor block can therefore be done
299                 along with the scanning of the loop header phis, because the
300                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
301                 phi nodes, organized in the same order.
302 
303         case 2: NEW_LOOP was placed after ORIG_LOOP:
304                 The successor block of NEW_LOOP is the original exit block of
305                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
306                 We postpone updating these phis to a later stage (when
307                 loop guards are added).
308    */
309 
310 
311   /* Scan the phis in the headers of the old and new loops
312      (they are organized in exactly the same order).  */
313 
314   for (phi_new = phi_nodes (new_loop->header),
315        phi_orig = phi_nodes (orig_loop->header);
316        phi_new && phi_orig;
317        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
318     {
319       /* step 1.  */
320       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
321       add_phi_arg (phi_new, def, new_loop_entry_e);
322 
323       /* step 2.  */
324       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
325       if (TREE_CODE (def) != SSA_NAME)
326         continue;
327 
328       new_ssa_name = get_current_def (def);
329       if (!new_ssa_name)
330 	{
331 	  /* This only happens if there are no definitions
332 	     inside the loop. use the phi_result in this case.  */
333 	  new_ssa_name = PHI_RESULT (phi_new);
334 	}
335 
336       /* An ordinary ssa name defined in the loop.  */
337       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
338 
339       /* step 3 (case 1).  */
340       if (!after)
341         {
342           gcc_assert (new_loop_exit_e == orig_entry_e);
343           SET_PHI_ARG_DEF (phi_orig,
344                            new_loop_exit_e->dest_idx,
345                            new_ssa_name);
346         }
347     }
348 }
349 
350 
351 /* Update PHI nodes for a guard of the LOOP.
352 
353    Input:
354    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
355         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
356         originates from the guard-bb, skips LOOP and reaches the (unique) exit
357         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
358         We denote this bb NEW_MERGE_BB because before the guard code was added
359         it had a single predecessor (the LOOP header), and now it became a merge
360         point of two paths - the path that ends with the LOOP exit-edge, and
361         the path that ends with GUARD_EDGE.
362    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
363         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
364 
365    ===> The CFG before the guard-code was added:
366         LOOP_header_bb:
367           loop_body
368           if (exit_loop) goto update_bb
369           else           goto LOOP_header_bb
370         update_bb:
371 
372    ==> The CFG after the guard-code was added:
373         guard_bb:
374           if (LOOP_guard_condition) goto new_merge_bb
375           else                      goto LOOP_header_bb
376         LOOP_header_bb:
377           loop_body
378           if (exit_loop_condition) goto new_merge_bb
379           else                     goto LOOP_header_bb
380         new_merge_bb:
381           goto update_bb
382         update_bb:
383 
384    ==> The CFG after this function:
385         guard_bb:
386           if (LOOP_guard_condition) goto new_merge_bb
387           else                      goto LOOP_header_bb
388         LOOP_header_bb:
389           loop_body
390           if (exit_loop_condition) goto new_exit_bb
391           else                     goto LOOP_header_bb
392         new_exit_bb:
393         new_merge_bb:
394           goto update_bb
395         update_bb:
396 
397    This function:
398    1. creates and updates the relevant phi nodes to account for the new
399       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
400       1.1. Create phi nodes at NEW_MERGE_BB.
401       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
402            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
403    2. preserves loop-closed-ssa-form by creating the required phi nodes
404       at the exit of LOOP (i.e, in NEW_EXIT_BB).
405 
406    There are two flavors to this function:
407 
408    slpeel_update_phi_nodes_for_guard1:
409      Here the guard controls whether we enter or skip LOOP, where LOOP is a
410      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
411      for variables that have phis in the loop header.
412 
413    slpeel_update_phi_nodes_for_guard2:
414      Here the guard controls whether we enter or skip LOOP, where LOOP is an
415      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
416      for variables that have phis in the loop exit.
417 
418    I.E., the overall structure is:
419 
420         loop1_preheader_bb:
421                 guard1 (goto loop1/merg1_bb)
422         loop1
423         loop1_exit_bb:
424                 guard2 (goto merge1_bb/merge2_bb)
425         merge1_bb
426         loop2
427         loop2_exit_bb
428         merge2_bb
429         next_bb
430 
431    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
432    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
433    that have phis in loop1->header).
434 
435    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
436    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
437    that have phis in next_bb). It also adds some of these phis to
438    loop1_exit_bb.
439 
440    slpeel_update_phi_nodes_for_guard1 is always called before
441    slpeel_update_phi_nodes_for_guard2. They are both needed in order
442    to create correct data-flow and loop-closed-ssa-form.
443 
444    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
445    that change between iterations of a loop (and therefore have a phi-node
446    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
447    phis for variables that are used out of the loop (and therefore have
448    loop-closed exit phis). Some variables may be both updated between
449    iterations and used after the loop. This is why in loop1_exit_bb we
450    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
451    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
452 
453    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
454      an original loop. i.e., we have:
455 
456            orig_loop
457            guard_bb (goto LOOP/new_merge)
458            new_loop <-- LOOP
459            new_exit
460            new_merge
461            next_bb
462 
463      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
464      have:
465 
466            new_loop
467            guard_bb (goto LOOP/new_merge)
468            orig_loop <-- LOOP
469            new_exit
470            new_merge
471            next_bb
472 
473      The SSA names defined in the original loop have a current
474      reaching definition that that records the corresponding new
475      ssa-name used in the new duplicated loop copy.
476   */
477 
478 /* Function slpeel_update_phi_nodes_for_guard1
479 
480    Input:
481    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
482    - DEFS - a bitmap of ssa names to mark new names for which we recorded
483             information.
484 
485    In the context of the overall structure, we have:
486 
487         loop1_preheader_bb:
488                 guard1 (goto loop1/merg1_bb)
489 LOOP->  loop1
490         loop1_exit_bb:
491                 guard2 (goto merge1_bb/merge2_bb)
492         merge1_bb
493         loop2
494         loop2_exit_bb
495         merge2_bb
496         next_bb
497 
498    For each name updated between loop iterations (i.e - for each name that has
499    an entry (loop-header) phi in LOOP) we create a new phi in:
500    1. merge1_bb (to account for the edge from guard1)
501    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
502 */
503 
504 static void
slpeel_update_phi_nodes_for_guard1(edge guard_edge,struct loop * loop,bool is_new_loop,basic_block * new_exit_bb,bitmap * defs)505 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
506                                     bool is_new_loop, basic_block *new_exit_bb,
507                                     bitmap *defs)
508 {
509   tree orig_phi, new_phi;
510   tree update_phi, update_phi2;
511   tree guard_arg, loop_arg;
512   basic_block new_merge_bb = guard_edge->dest;
513   edge e = EDGE_SUCC (new_merge_bb, 0);
514   basic_block update_bb = e->dest;
515   basic_block orig_bb = loop->header;
516   edge new_exit_e;
517   tree current_new_name;
518   tree name;
519 
520   /* Create new bb between loop and new_merge_bb.  */
521   *new_exit_bb = split_edge (loop->single_exit);
522   add_bb_to_loop (*new_exit_bb, loop->outer);
523 
524   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
525 
526   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
527        orig_phi && update_phi;
528        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
529     {
530       /* Virtual phi; Mark it for renaming. We actually want to call
531 	 mar_sym_for_renaming, but since all ssa renaming datastructures
532 	 are going to be freed before we get to call ssa_upate, we just
533 	 record this name for now in a bitmap, and will mark it for
534 	 renaming later.  */
535       name = PHI_RESULT (orig_phi);
536       if (!is_gimple_reg (SSA_NAME_VAR (name)))
537         bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
538 
539       /** 1. Handle new-merge-point phis  **/
540 
541       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
542       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
543                                  new_merge_bb);
544 
545       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
546             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
547       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
548       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
549 
550       add_phi_arg (new_phi, loop_arg, new_exit_e);
551       add_phi_arg (new_phi, guard_arg, guard_edge);
552 
553       /* 1.3. Update phi in successor block.  */
554       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
555                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
556       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
557       update_phi2 = new_phi;
558 
559 
560       /** 2. Handle loop-closed-ssa-form phis  **/
561 
562       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
563       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
564                                  *new_exit_bb);
565 
566       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
567       add_phi_arg (new_phi, loop_arg, loop->single_exit);
568 
569       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
570       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
571       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
572 
573       /* 2.4. Record the newly created name with set_current_def.
574          We want to find a name such that
575                 name = get_current_def (orig_loop_name)
576          and to set its current definition as follows:
577                 set_current_def (name, new_phi_name)
578 
579          If LOOP is a new loop then loop_arg is already the name we're
580          looking for. If LOOP is the original loop, then loop_arg is
581          the orig_loop_name and the relevant name is recorded in its
582          current reaching definition.  */
583       if (is_new_loop)
584         current_new_name = loop_arg;
585       else
586         {
587           current_new_name = get_current_def (loop_arg);
588 	  /* current_def is not available only if the variable does not
589 	     change inside the loop, in which case we also don't care
590 	     about recording a current_def for it because we won't be
591 	     trying to create loop-exit-phis for it.  */
592 	  if (!current_new_name)
593 	    continue;
594         }
595       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
596 
597       set_current_def (current_new_name, PHI_RESULT (new_phi));
598       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
599     }
600 
601   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
602 }
603 
604 
605 /* Function slpeel_update_phi_nodes_for_guard2
606 
607    Input:
608    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
609 
610    In the context of the overall structure, we have:
611 
612         loop1_preheader_bb:
613                 guard1 (goto loop1/merg1_bb)
614         loop1
615         loop1_exit_bb:
616                 guard2 (goto merge1_bb/merge2_bb)
617         merge1_bb
618 LOOP->  loop2
619         loop2_exit_bb
620         merge2_bb
621         next_bb
622 
623    For each name used out side the loop (i.e - for each name that has an exit
624    phi in next_bb) we create a new phi in:
625    1. merge2_bb (to account for the edge from guard_bb)
626    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
627    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
628       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
629 */
630 
631 static void
slpeel_update_phi_nodes_for_guard2(edge guard_edge,struct loop * loop,bool is_new_loop,basic_block * new_exit_bb)632 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
633                                     bool is_new_loop, basic_block *new_exit_bb)
634 {
635   tree orig_phi, new_phi;
636   tree update_phi, update_phi2;
637   tree guard_arg, loop_arg;
638   basic_block new_merge_bb = guard_edge->dest;
639   edge e = EDGE_SUCC (new_merge_bb, 0);
640   basic_block update_bb = e->dest;
641   edge new_exit_e;
642   tree orig_def, orig_def_new_name;
643   tree new_name, new_name2;
644   tree arg;
645 
646   /* Create new bb between loop and new_merge_bb.  */
647   *new_exit_bb = split_edge (loop->single_exit);
648   add_bb_to_loop (*new_exit_bb, loop->outer);
649 
650   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
651 
652   for (update_phi = phi_nodes (update_bb); update_phi;
653        update_phi = PHI_CHAIN (update_phi))
654     {
655       orig_phi = update_phi;
656       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
657       /* This loop-closed-phi actually doesn't represent a use
658          out of the loop - the phi arg is a constant.  */
659       if (TREE_CODE (orig_def) != SSA_NAME)
660         continue;
661       orig_def_new_name = get_current_def (orig_def);
662       arg = NULL_TREE;
663 
664       /** 1. Handle new-merge-point phis  **/
665 
666       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
667       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
668                                  new_merge_bb);
669 
670       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
671             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
672       new_name = orig_def;
673       new_name2 = NULL_TREE;
674       if (orig_def_new_name)
675         {
676           new_name = orig_def_new_name;
677 	  /* Some variables have both loop-entry-phis and loop-exit-phis.
678 	     Such variables were given yet newer names by phis placed in
679 	     guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
680 	     new_name2 = get_current_def (get_current_def (orig_name)).  */
681           new_name2 = get_current_def (new_name);
682         }
683 
684       if (is_new_loop)
685         {
686           guard_arg = orig_def;
687           loop_arg = new_name;
688         }
689       else
690         {
691           guard_arg = new_name;
692           loop_arg = orig_def;
693         }
694       if (new_name2)
695         guard_arg = new_name2;
696 
697       add_phi_arg (new_phi, loop_arg, new_exit_e);
698       add_phi_arg (new_phi, guard_arg, guard_edge);
699 
700       /* 1.3. Update phi in successor block.  */
701       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
702       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
703       update_phi2 = new_phi;
704 
705 
706       /** 2. Handle loop-closed-ssa-form phis  **/
707 
708       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
709       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
710                                  *new_exit_bb);
711 
712       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
713       add_phi_arg (new_phi, loop_arg, loop->single_exit);
714 
715       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
716       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
717       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
718 
719 
720       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
721 
722       /* 3.1. Find the relevant names that need an exit-phi in
723 	 GUARD_BB, i.e. names for which
724 	 slpeel_update_phi_nodes_for_guard1 had not already created a
725 	 phi node. This is the case for names that are used outside
726 	 the loop (and therefore need an exit phi) but are not updated
727 	 across loop iterations (and therefore don't have a
728 	 loop-header-phi).
729 
730 	 slpeel_update_phi_nodes_for_guard1 is responsible for
731 	 creating loop-exit phis in GUARD_BB for names that have a
732 	 loop-header-phi.  When such a phi is created we also record
733 	 the new name in its current definition.  If this new name
734 	 exists, then guard_arg was set to this new name (see 1.2
735 	 above).  Therefore, if guard_arg is not this new name, this
736 	 is an indication that an exit-phi in GUARD_BB was not yet
737 	 created, so we take care of it here.  */
738       if (guard_arg == new_name2)
739 	continue;
740       arg = guard_arg;
741 
742       /* 3.2. Generate new phi node in GUARD_BB:  */
743       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
744                                  guard_edge->src);
745 
746       /* 3.3. GUARD_BB has one incoming edge:  */
747       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
748       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
749 
750       /* 3.4. Update phi in successor of GUARD_BB:  */
751       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
752                                                                 == guard_arg);
753       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
754     }
755 
756   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
757 }
758 
759 
760 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
761    that starts at zero, increases by one and its limit is NITERS.
762 
763    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
764 
765 void
slpeel_make_loop_iterate_ntimes(struct loop * loop,tree niters)766 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
767 {
768   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
769   tree orig_cond;
770   edge exit_edge = loop->single_exit;
771   block_stmt_iterator loop_cond_bsi;
772   block_stmt_iterator incr_bsi;
773   bool insert_after;
774   tree begin_label = tree_block_label (loop->latch);
775   tree exit_label = tree_block_label (loop->single_exit->dest);
776   tree init = build_int_cst (TREE_TYPE (niters), 0);
777   tree step = build_int_cst (TREE_TYPE (niters), 1);
778   tree then_label;
779   tree else_label;
780   LOC loop_loc;
781 
782   orig_cond = get_loop_exit_condition (loop);
783   gcc_assert (orig_cond);
784   loop_cond_bsi = bsi_for_stmt (orig_cond);
785 
786   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
787   create_iv (init, step, NULL_TREE, loop,
788              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
789 
790   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
791     {
792       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
793       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
794       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
795     }
796   else /* 'then' edge loops back.  */
797     {
798       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
799       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
800       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
801     }
802 
803   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
804 		     then_label, else_label);
805   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
806 
807   /* Remove old loop exit test:  */
808   bsi_remove (&loop_cond_bsi, true);
809 
810   loop_loc = find_loop_location (loop);
811   if (dump_file && (dump_flags & TDF_DETAILS))
812     {
813       if (loop_loc != UNKNOWN_LOC)
814         fprintf (dump_file, "\nloop at %s:%d: ",
815                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
816       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
817     }
818 
819   loop->nb_iterations = niters;
820 }
821 
822 
823 /* Given LOOP this function generates a new copy of it and puts it
824    on E which is either the entry or exit of LOOP.  */
825 
826 static struct loop *
slpeel_tree_duplicate_loop_to_edge_cfg(struct loop * loop,struct loops * loops,edge e)827 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
828 					edge e)
829 {
830   struct loop *new_loop;
831   basic_block *new_bbs, *bbs;
832   bool at_exit;
833   bool was_imm_dom;
834   basic_block exit_dest;
835   tree phi, phi_arg;
836 
837   at_exit = (e == loop->single_exit);
838   if (!at_exit && e != loop_preheader_edge (loop))
839     return NULL;
840 
841   bbs = get_loop_body (loop);
842 
843   /* Check whether duplication is possible.  */
844   if (!can_copy_bbs_p (bbs, loop->num_nodes))
845     {
846       free (bbs);
847       return NULL;
848     }
849 
850   /* Generate new loop structure.  */
851   new_loop = duplicate_loop (loops, loop, loop->outer);
852   if (!new_loop)
853     {
854       free (bbs);
855       return NULL;
856     }
857 
858   exit_dest = loop->single_exit->dest;
859   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
860 					  exit_dest) == loop->header ?
861 		 true : false);
862 
863   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
864 
865   copy_bbs (bbs, loop->num_nodes, new_bbs,
866 	    &loop->single_exit, 1, &new_loop->single_exit, NULL,
867 	    e->src);
868 
869   /* Duplicating phi args at exit bbs as coming
870      also from exit of duplicated loop.  */
871   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
872     {
873       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
874       if (phi_arg)
875 	{
876 	  edge new_loop_exit_edge;
877 
878 	  if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
879 	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
880 	  else
881 	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
882 
883 	  add_phi_arg (phi, phi_arg, new_loop_exit_edge);
884 	}
885     }
886 
887   if (at_exit) /* Add the loop copy at exit.  */
888     {
889       redirect_edge_and_branch_force (e, new_loop->header);
890       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
891       if (was_imm_dom)
892 	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
893     }
894   else /* Add the copy at entry.  */
895     {
896       edge new_exit_e;
897       edge entry_e = loop_preheader_edge (loop);
898       basic_block preheader = entry_e->src;
899 
900       if (!flow_bb_inside_loop_p (new_loop,
901 				  EDGE_SUCC (new_loop->header, 0)->dest))
902         new_exit_e = EDGE_SUCC (new_loop->header, 0);
903       else
904 	new_exit_e = EDGE_SUCC (new_loop->header, 1);
905 
906       redirect_edge_and_branch_force (new_exit_e, loop->header);
907       set_immediate_dominator (CDI_DOMINATORS, loop->header,
908 			       new_exit_e->src);
909 
910       /* We have to add phi args to the loop->header here as coming
911 	 from new_exit_e edge.  */
912       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
913 	{
914 	  phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
915 	  if (phi_arg)
916 	    add_phi_arg (phi, phi_arg, new_exit_e);
917 	}
918 
919       redirect_edge_and_branch_force (entry_e, new_loop->header);
920       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
921     }
922 
923   free (new_bbs);
924   free (bbs);
925 
926   return new_loop;
927 }
928 
929 
930 /* Given the condition statement COND, put it as the last statement
931    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
932    Assumes that this is the single exit of the guarded loop.
933    Returns the skip edge.  */
934 
935 static edge
slpeel_add_loop_guard(basic_block guard_bb,tree cond,basic_block exit_bb,basic_block dom_bb)936 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
937 		        basic_block dom_bb)
938 {
939   block_stmt_iterator bsi;
940   edge new_e, enter_e;
941   tree cond_stmt, then_label, else_label;
942 
943   enter_e = EDGE_SUCC (guard_bb, 0);
944   enter_e->flags &= ~EDGE_FALLTHRU;
945   enter_e->flags |= EDGE_FALSE_VALUE;
946   bsi = bsi_last (guard_bb);
947 
948   then_label = build1 (GOTO_EXPR, void_type_node,
949                        tree_block_label (exit_bb));
950   else_label = build1 (GOTO_EXPR, void_type_node,
951                        tree_block_label (enter_e->dest));
952   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
953    		     then_label, else_label);
954   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
955   /* Add new edge to connect guard block to the merge/loop-exit block.  */
956   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
957   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
958   return new_e;
959 }
960 
961 
962 /* This function verifies that the following restrictions apply to LOOP:
963    (1) it is innermost
964    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
965    (3) it is single entry, single exit
966    (4) its exit condition is the last stmt in the header
967    (5) E is the entry/exit edge of LOOP.
968  */
969 
970 bool
slpeel_can_duplicate_loop_p(struct loop * loop,edge e)971 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
972 {
973   edge exit_e = loop->single_exit;
974   edge entry_e = loop_preheader_edge (loop);
975   tree orig_cond = get_loop_exit_condition (loop);
976   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
977 
978   if (need_ssa_update_p ())
979     return false;
980 
981   if (loop->inner
982       /* All loops have an outer scope; the only case loop->outer is NULL is for
983          the function itself.  */
984       || !loop->outer
985       || loop->num_nodes != 2
986       || !empty_block_p (loop->latch)
987       || !loop->single_exit
988       /* Verify that new loop exit condition can be trivially modified.  */
989       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
990       || (e != exit_e && e != entry_e))
991     return false;
992 
993   return true;
994 }
995 
996 #ifdef ENABLE_CHECKING
997 void
slpeel_verify_cfg_after_peeling(struct loop * first_loop,struct loop * second_loop)998 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
999                                  struct loop *second_loop)
1000 {
1001   basic_block loop1_exit_bb = first_loop->single_exit->dest;
1002   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1003   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1004 
1005   /* A guard that controls whether the second_loop is to be executed or skipped
1006      is placed in first_loop->exit.  first_loopt->exit therefore has two
1007      successors - one is the preheader of second_loop, and the other is a bb
1008      after second_loop.
1009    */
1010   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1011 
1012   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1013         of second_loop.  */
1014 
1015   /* The preheader of new_loop is expected to have two predecessors:
1016      first_loop->exit and the block that precedes first_loop.  */
1017 
1018   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1019               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1020                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1021                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1022                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1023 
1024   /* Verify that the other successor of first_loopt->exit is after the
1025      second_loop.  */
1026   /* TODO */
1027 }
1028 #endif
1029 
1030 /* Function slpeel_tree_peel_loop_to_edge.
1031 
1032    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1033    that is placed on the entry (exit) edge E of LOOP. After this transformation
1034    we have two loops one after the other - first-loop iterates FIRST_NITERS
1035    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1036 
1037    Input:
1038    - LOOP: the loop to be peeled.
1039    - E: the exit or entry edge of LOOP.
1040         If it is the entry edge, we peel the first iterations of LOOP. In this
1041         case first-loop is LOOP, and second-loop is the newly created loop.
1042         If it is the exit edge, we peel the last iterations of LOOP. In this
1043         case, first-loop is the newly created loop, and second-loop is LOOP.
1044    - NITERS: the number of iterations that LOOP iterates.
1045    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1046    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1047         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1048         is false, the caller of this function may want to take care of this
1049         (this can be useful if we don't want new stmts added to first-loop).
1050 
1051    Output:
1052    The function returns a pointer to the new loop-copy, or NULL if it failed
1053    to perform the transformation.
1054 
1055    The function generates two if-then-else guards: one before the first loop,
1056    and the other before the second loop:
1057    The first guard is:
1058      if (FIRST_NITERS == 0) then skip the first loop,
1059      and go directly to the second loop.
1060    The second guard is:
1061      if (FIRST_NITERS == NITERS) then skip the second loop.
1062 
1063    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1064    FORNOW the resulting code will not be in loop-closed-ssa form.
1065 */
1066 
1067 struct loop*
slpeel_tree_peel_loop_to_edge(struct loop * loop,struct loops * loops,edge e,tree first_niters,tree niters,bool update_first_loop_count)1068 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1069 			       edge e, tree first_niters,
1070 			       tree niters, bool update_first_loop_count)
1071 {
1072   struct loop *new_loop = NULL, *first_loop, *second_loop;
1073   edge skip_e;
1074   tree pre_condition;
1075   bitmap definitions;
1076   basic_block bb_before_second_loop, bb_after_second_loop;
1077   basic_block bb_before_first_loop;
1078   basic_block bb_between_loops;
1079   basic_block new_exit_bb;
1080   edge exit_e = loop->single_exit;
1081   LOC loop_loc;
1082 
1083   if (!slpeel_can_duplicate_loop_p (loop, e))
1084     return NULL;
1085 
1086   /* We have to initialize cfg_hooks. Then, when calling
1087    cfg_hooks->split_edge, the function tree_split_edge
1088    is actually called and, when calling cfg_hooks->duplicate_block,
1089    the function tree_duplicate_bb is called.  */
1090   tree_register_cfg_hooks ();
1091 
1092 
1093   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1094         Resulting CFG would be:
1095 
1096         first_loop:
1097         do {
1098         } while ...
1099 
1100         second_loop:
1101         do {
1102         } while ...
1103 
1104         orig_exit_bb:
1105    */
1106 
1107   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1108     {
1109       loop_loc = find_loop_location (loop);
1110       if (dump_file && (dump_flags & TDF_DETAILS))
1111         {
1112           if (loop_loc != UNKNOWN_LOC)
1113             fprintf (dump_file, "\n%s:%d: note: ",
1114                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1115           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1116         }
1117       return NULL;
1118     }
1119 
1120   if (e == exit_e)
1121     {
1122       /* NEW_LOOP was placed after LOOP.  */
1123       first_loop = loop;
1124       second_loop = new_loop;
1125     }
1126   else
1127     {
1128       /* NEW_LOOP was placed before LOOP.  */
1129       first_loop = new_loop;
1130       second_loop = loop;
1131     }
1132 
1133   definitions = ssa_names_to_replace ();
1134   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1135   rename_variables_in_loop (new_loop);
1136 
1137 
1138   /* 2. Add the guard that controls whether the first loop is executed.
1139         Resulting CFG would be:
1140 
1141         bb_before_first_loop:
1142         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1143                                GOTO first-loop
1144 
1145         first_loop:
1146         do {
1147         } while ...
1148 
1149         bb_before_second_loop:
1150 
1151         second_loop:
1152         do {
1153         } while ...
1154 
1155         orig_exit_bb:
1156    */
1157 
1158   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1159   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1160   bb_before_second_loop = split_edge (first_loop->single_exit);
1161   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1162 
1163   pre_condition =
1164     fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1165                  build_int_cst (TREE_TYPE (first_niters), 0));
1166   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1167                                   bb_before_second_loop, bb_before_first_loop);
1168   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1169 				      first_loop == new_loop,
1170 				      &new_exit_bb, &definitions);
1171 
1172 
1173   /* 3. Add the guard that controls whether the second loop is executed.
1174         Resulting CFG would be:
1175 
1176         bb_before_first_loop:
1177         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1178                                GOTO first-loop
1179 
1180         first_loop:
1181         do {
1182         } while ...
1183 
1184         bb_between_loops:
1185         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1186                                     GOTO bb_before_second_loop
1187 
1188         bb_before_second_loop:
1189 
1190         second_loop:
1191         do {
1192         } while ...
1193 
1194         bb_after_second_loop:
1195 
1196         orig_exit_bb:
1197    */
1198 
1199   bb_between_loops = new_exit_bb;
1200   bb_after_second_loop = split_edge (second_loop->single_exit);
1201   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1202 
1203   pre_condition =
1204 	fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1205   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1206                                   bb_after_second_loop, bb_before_first_loop);
1207   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1208                                      second_loop == new_loop, &new_exit_bb);
1209 
1210   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1211    */
1212   if (update_first_loop_count)
1213     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1214 
1215   BITMAP_FREE (definitions);
1216   delete_update_ssa ();
1217 
1218   return new_loop;
1219 }
1220 
1221 /* Function vect_get_loop_location.
1222 
1223    Extract the location of the loop in the source code.
1224    If the loop is not well formed for vectorization, an estimated
1225    location is calculated.
1226    Return the loop location if succeed and NULL if not.  */
1227 
1228 LOC
find_loop_location(struct loop * loop)1229 find_loop_location (struct loop *loop)
1230 {
1231   tree node = NULL_TREE;
1232   basic_block bb;
1233   block_stmt_iterator si;
1234 
1235   if (!loop)
1236     return UNKNOWN_LOC;
1237 
1238   node = get_loop_exit_condition (loop);
1239 
1240   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1241       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1242     return EXPR_LOC (node);
1243 
1244   /* If we got here the loop is probably not "well formed",
1245      try to estimate the loop location */
1246 
1247   if (!loop->header)
1248     return UNKNOWN_LOC;
1249 
1250   bb = loop->header;
1251 
1252   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1253     {
1254       node = bsi_stmt (si);
1255       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1256         return EXPR_LOC (node);
1257     }
1258 
1259   return UNKNOWN_LOC;
1260 }
1261 
1262 
1263 /*************************************************************************
1264   Vectorization Debug Information.
1265  *************************************************************************/
1266 
1267 /* Function vect_set_verbosity_level.
1268 
1269    Called from toplev.c upon detection of the
1270    -ftree-vectorizer-verbose=N option.  */
1271 
1272 void
vect_set_verbosity_level(const char * val)1273 vect_set_verbosity_level (const char *val)
1274 {
1275    unsigned int vl;
1276 
1277    vl = atoi (val);
1278    if (vl < MAX_VERBOSITY_LEVEL)
1279      vect_verbosity_level = vl;
1280    else
1281      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1282 }
1283 
1284 
1285 /* Function vect_set_dump_settings.
1286 
1287    Fix the verbosity level of the vectorizer if the
1288    requested level was not set explicitly using the flag
1289    -ftree-vectorizer-verbose=N.
1290    Decide where to print the debugging information (dump_file/stderr).
1291    If the user defined the verbosity level, but there is no dump file,
1292    print to stderr, otherwise print to the dump file.  */
1293 
1294 static void
vect_set_dump_settings(void)1295 vect_set_dump_settings (void)
1296 {
1297   vect_dump = dump_file;
1298 
1299   /* Check if the verbosity level was defined by the user:  */
1300   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1301     {
1302       /* If there is no dump file, print to stderr.  */
1303       if (!dump_file)
1304         vect_dump = stderr;
1305       return;
1306     }
1307 
1308   /* User didn't specify verbosity level:  */
1309   if (dump_file && (dump_flags & TDF_DETAILS))
1310     vect_verbosity_level = REPORT_DETAILS;
1311   else if (dump_file && (dump_flags & TDF_STATS))
1312     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1313   else
1314     vect_verbosity_level = REPORT_NONE;
1315 
1316   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1317 }
1318 
1319 
1320 /* Function debug_loop_details.
1321 
1322    For vectorization debug dumps.  */
1323 
1324 bool
vect_print_dump_info(enum verbosity_levels vl)1325 vect_print_dump_info (enum verbosity_levels vl)
1326 {
1327   if (vl > vect_verbosity_level)
1328     return false;
1329 
1330   if (!current_function_decl || !vect_dump)
1331     return false;
1332 
1333   if (vect_loop_location == UNKNOWN_LOC)
1334     fprintf (vect_dump, "\n%s:%d: note: ",
1335 		 DECL_SOURCE_FILE (current_function_decl),
1336 		 DECL_SOURCE_LINE (current_function_decl));
1337   else
1338     fprintf (vect_dump, "\n%s:%d: note: ",
1339 	     LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1340 
1341   return true;
1342 }
1343 
1344 
1345 /*************************************************************************
1346   Vectorization Utilities.
1347  *************************************************************************/
1348 
1349 /* Function new_stmt_vec_info.
1350 
1351    Create and initialize a new stmt_vec_info struct for STMT.  */
1352 
1353 stmt_vec_info
new_stmt_vec_info(tree stmt,loop_vec_info loop_vinfo)1354 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1355 {
1356   stmt_vec_info res;
1357   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1358 
1359   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1360   STMT_VINFO_STMT (res) = stmt;
1361   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1362   STMT_VINFO_RELEVANT_P (res) = 0;
1363   STMT_VINFO_LIVE_P (res) = 0;
1364   STMT_VINFO_VECTYPE (res) = NULL;
1365   STMT_VINFO_VEC_STMT (res) = NULL;
1366   STMT_VINFO_IN_PATTERN_P (res) = false;
1367   STMT_VINFO_RELATED_STMT (res) = NULL;
1368   STMT_VINFO_DATA_REF (res) = NULL;
1369   if (TREE_CODE (stmt) == PHI_NODE)
1370     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1371   else
1372     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1373   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1374 
1375   return res;
1376 }
1377 
1378 
1379 /* Function new_loop_vec_info.
1380 
1381    Create and initialize a new loop_vec_info struct for LOOP, as well as
1382    stmt_vec_info structs for all the stmts in LOOP.  */
1383 
1384 loop_vec_info
new_loop_vec_info(struct loop * loop)1385 new_loop_vec_info (struct loop *loop)
1386 {
1387   loop_vec_info res;
1388   basic_block *bbs;
1389   block_stmt_iterator si;
1390   unsigned int i;
1391 
1392   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1393 
1394   bbs = get_loop_body (loop);
1395 
1396   /* Create stmt_info for all stmts in the loop.  */
1397   for (i = 0; i < loop->num_nodes; i++)
1398     {
1399       basic_block bb = bbs[i];
1400       tree phi;
1401 
1402       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1403         {
1404           stmt_ann_t ann = get_stmt_ann (phi);
1405           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1406         }
1407 
1408       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1409 	{
1410 	  tree stmt = bsi_stmt (si);
1411 	  stmt_ann_t ann;
1412 
1413 	  ann = stmt_ann (stmt);
1414 	  set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1415 	}
1416     }
1417 
1418   LOOP_VINFO_LOOP (res) = loop;
1419   LOOP_VINFO_BBS (res) = bbs;
1420   LOOP_VINFO_EXIT_COND (res) = NULL;
1421   LOOP_VINFO_NITERS (res) = NULL;
1422   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1423   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1424   LOOP_VINFO_VECT_FACTOR (res) = 0;
1425   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1426   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1427   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1428   LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1429     = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1430 
1431   return res;
1432 }
1433 
1434 
1435 /* Function destroy_loop_vec_info.
1436 
1437    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1438    stmts in the loop.  */
1439 
1440 void
destroy_loop_vec_info(loop_vec_info loop_vinfo)1441 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1442 {
1443   struct loop *loop;
1444   basic_block *bbs;
1445   int nbbs;
1446   block_stmt_iterator si;
1447   int j;
1448 
1449   if (!loop_vinfo)
1450     return;
1451 
1452   loop = LOOP_VINFO_LOOP (loop_vinfo);
1453 
1454   bbs = LOOP_VINFO_BBS (loop_vinfo);
1455   nbbs = loop->num_nodes;
1456 
1457   for (j = 0; j < nbbs; j++)
1458     {
1459       basic_block bb = bbs[j];
1460       tree phi;
1461       stmt_vec_info stmt_info;
1462 
1463       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1464         {
1465           stmt_ann_t ann = stmt_ann (phi);
1466 
1467           stmt_info = vinfo_for_stmt (phi);
1468           free (stmt_info);
1469           set_stmt_info (ann, NULL);
1470         }
1471 
1472       for (si = bsi_start (bb); !bsi_end_p (si); )
1473 	{
1474 	  tree stmt = bsi_stmt (si);
1475 	  stmt_ann_t ann = stmt_ann (stmt);
1476 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1477 
1478 	  if (stmt_info)
1479 	    {
1480 	      /* Check if this is a "pattern stmt" (introduced by the
1481 		 vectorizer during the pattern recognition pass).  */
1482 	      bool remove_stmt_p = false;
1483 	      tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1484 	      if (orig_stmt)
1485 		{
1486 		  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1487 		  if (orig_stmt_info
1488 		      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1489 		    remove_stmt_p = true;
1490 		}
1491 
1492 	      /* Free stmt_vec_info.  */
1493 	      VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1494 	      free (stmt_info);
1495 	      set_stmt_info (ann, NULL);
1496 
1497 	      /* Remove dead "pattern stmts".  */
1498 	      if (remove_stmt_p)
1499 	        bsi_remove (&si, true);
1500 	    }
1501 	  bsi_next (&si);
1502 	}
1503     }
1504 
1505   free (LOOP_VINFO_BBS (loop_vinfo));
1506   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1507   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1508   VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1509 
1510   free (loop_vinfo);
1511 }
1512 
1513 
1514 /* Function vect_force_dr_alignment_p.
1515 
1516    Returns whether the alignment of a DECL can be forced to be aligned
1517    on ALIGNMENT bit boundary.  */
1518 
1519 bool
vect_can_force_dr_alignment_p(tree decl,unsigned int alignment)1520 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1521 {
1522   if (TREE_CODE (decl) != VAR_DECL)
1523     return false;
1524 
1525   if (DECL_EXTERNAL (decl))
1526     return false;
1527 
1528   if (TREE_ASM_WRITTEN (decl))
1529     return false;
1530 
1531   if (TREE_STATIC (decl))
1532     return (alignment <= MAX_OFILE_ALIGNMENT);
1533   else
1534     /* This is not 100% correct.  The absolute correct stack alignment
1535        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1536        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1537        However, until someone implements forced stack alignment, SSE
1538        isn't really usable without this.  */
1539     return (alignment <= PREFERRED_STACK_BOUNDARY);
1540 }
1541 
1542 
1543 /* Function get_vectype_for_scalar_type.
1544 
1545    Returns the vector type corresponding to SCALAR_TYPE as supported
1546    by the target.  */
1547 
1548 tree
get_vectype_for_scalar_type(tree scalar_type)1549 get_vectype_for_scalar_type (tree scalar_type)
1550 {
1551   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1552   int nbytes = GET_MODE_SIZE (inner_mode);
1553   int nunits;
1554   tree vectype;
1555 
1556   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1557     return NULL_TREE;
1558 
1559   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1560      is expected.  */
1561   nunits = UNITS_PER_SIMD_WORD / nbytes;
1562 
1563   vectype = build_vector_type (scalar_type, nunits);
1564   if (vect_print_dump_info (REPORT_DETAILS))
1565     {
1566       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1567       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1568     }
1569 
1570   if (!vectype)
1571     return NULL_TREE;
1572 
1573   if (vect_print_dump_info (REPORT_DETAILS))
1574     {
1575       fprintf (vect_dump, "vectype: ");
1576       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1577     }
1578 
1579   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1580       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1581     {
1582       if (vect_print_dump_info (REPORT_DETAILS))
1583         fprintf (vect_dump, "mode not supported by target.");
1584       return NULL_TREE;
1585     }
1586 
1587   return vectype;
1588 }
1589 
1590 
1591 /* Function vect_supportable_dr_alignment
1592 
1593    Return whether the data reference DR is supported with respect to its
1594    alignment.  */
1595 
1596 enum dr_alignment_support
vect_supportable_dr_alignment(struct data_reference * dr)1597 vect_supportable_dr_alignment (struct data_reference *dr)
1598 {
1599   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1600   enum machine_mode mode = (int) TYPE_MODE (vectype);
1601 
1602   if (aligned_access_p (dr))
1603     return dr_aligned;
1604 
1605   /* Possibly unaligned access.  */
1606 
1607   if (DR_IS_READ (dr))
1608     {
1609       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1610 	  && (!targetm.vectorize.builtin_mask_for_load
1611 	      || targetm.vectorize.builtin_mask_for_load ()))
1612 	return dr_unaligned_software_pipeline;
1613 
1614       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1615 	/* Can't software pipeline the loads, but can at least do them.  */
1616 	return dr_unaligned_supported;
1617     }
1618 
1619   /* Unsupported.  */
1620   return dr_unaligned_unsupported;
1621 }
1622 
1623 
1624 /* Function vect_is_simple_use.
1625 
1626    Input:
1627    LOOP - the loop that is being vectorized.
1628    OPERAND - operand of a stmt in LOOP.
1629    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1630 
1631    Returns whether a stmt with OPERAND can be vectorized.
1632    Supportable operands are constants, loop invariants, and operands that are
1633    defined by the current iteration of the loop. Unsupportable operands are
1634    those that are defined by a previous iteration of the loop (as is the case
1635    in reduction/induction computations).  */
1636 
1637 bool
vect_is_simple_use(tree operand,loop_vec_info loop_vinfo,tree * def_stmt,tree * def,enum vect_def_type * dt)1638 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1639 		    tree *def, enum vect_def_type *dt)
1640 {
1641   basic_block bb;
1642   stmt_vec_info stmt_vinfo;
1643   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1644 
1645   *def_stmt = NULL_TREE;
1646   *def = NULL_TREE;
1647 
1648   if (vect_print_dump_info (REPORT_DETAILS))
1649     {
1650       fprintf (vect_dump, "vect_is_simple_use: operand ");
1651       print_generic_expr (vect_dump, operand, TDF_SLIM);
1652     }
1653 
1654   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1655     {
1656       *dt = vect_constant_def;
1657       return true;
1658     }
1659 
1660   if (TREE_CODE (operand) != SSA_NAME)
1661     {
1662       if (vect_print_dump_info (REPORT_DETAILS))
1663         fprintf (vect_dump, "not ssa-name.");
1664       return false;
1665     }
1666 
1667   *def_stmt = SSA_NAME_DEF_STMT (operand);
1668   if (*def_stmt == NULL_TREE )
1669     {
1670       if (vect_print_dump_info (REPORT_DETAILS))
1671         fprintf (vect_dump, "no def_stmt.");
1672       return false;
1673     }
1674 
1675   if (vect_print_dump_info (REPORT_DETAILS))
1676     {
1677       fprintf (vect_dump, "def_stmt: ");
1678       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1679     }
1680 
1681   /* empty stmt is expected only in case of a function argument.
1682      (Otherwise - we expect a phi_node or a modify_expr).  */
1683   if (IS_EMPTY_STMT (*def_stmt))
1684     {
1685       tree arg = TREE_OPERAND (*def_stmt, 0);
1686       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1687         {
1688           *def = operand;
1689           *dt = vect_invariant_def;
1690           return true;
1691         }
1692 
1693       if (vect_print_dump_info (REPORT_DETAILS))
1694         fprintf (vect_dump, "Unexpected empty stmt.");
1695       return false;
1696     }
1697 
1698   bb = bb_for_stmt (*def_stmt);
1699   if (!flow_bb_inside_loop_p (loop, bb))
1700     *dt = vect_invariant_def;
1701   else
1702     {
1703       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1704       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1705     }
1706 
1707   if (*dt == vect_unknown_def_type)
1708     {
1709       if (vect_print_dump_info (REPORT_DETAILS))
1710         fprintf (vect_dump, "Unsupported pattern.");
1711       return false;
1712     }
1713 
1714   /* stmts inside the loop that have been identified as performing
1715      a reduction operation cannot have uses in the loop.  */
1716   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1717     {
1718       if (vect_print_dump_info (REPORT_DETAILS))
1719         fprintf (vect_dump, "reduction used in loop.");
1720       return false;
1721     }
1722 
1723   if (vect_print_dump_info (REPORT_DETAILS))
1724     fprintf (vect_dump, "type of def: %d.",*dt);
1725 
1726   switch (TREE_CODE (*def_stmt))
1727     {
1728     case PHI_NODE:
1729       *def = PHI_RESULT (*def_stmt);
1730       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1731                   || *dt == vect_invariant_def);
1732       break;
1733 
1734     case MODIFY_EXPR:
1735       *def = TREE_OPERAND (*def_stmt, 0);
1736       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1737       break;
1738 
1739     default:
1740       if (vect_print_dump_info (REPORT_DETAILS))
1741         fprintf (vect_dump, "unsupported defining stmt: ");
1742       return false;
1743     }
1744 
1745   if (*dt == vect_induction_def)
1746     {
1747       if (vect_print_dump_info (REPORT_DETAILS))
1748         fprintf (vect_dump, "induction not supported.");
1749       return false;
1750     }
1751 
1752   return true;
1753 }
1754 
1755 
1756 /* Function reduction_code_for_scalar_code
1757 
1758    Input:
1759    CODE - tree_code of a reduction operations.
1760 
1761    Output:
1762    REDUC_CODE - the corresponding tree-code to be used to reduce the
1763       vector of partial results into a single scalar result (which
1764       will also reside in a vector).
1765 
1766    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1767 
1768 bool
reduction_code_for_scalar_code(enum tree_code code,enum tree_code * reduc_code)1769 reduction_code_for_scalar_code (enum tree_code code,
1770                                 enum tree_code *reduc_code)
1771 {
1772   switch (code)
1773   {
1774   case MAX_EXPR:
1775     *reduc_code = REDUC_MAX_EXPR;
1776     return true;
1777 
1778   case MIN_EXPR:
1779     *reduc_code = REDUC_MIN_EXPR;
1780     return true;
1781 
1782   case PLUS_EXPR:
1783     *reduc_code = REDUC_PLUS_EXPR;
1784     return true;
1785 
1786   default:
1787     return false;
1788   }
1789 }
1790 
1791 
1792 /* Function vect_is_simple_reduction
1793 
1794    Detect a cross-iteration def-use cucle that represents a simple
1795    reduction computation. We look for the following pattern:
1796 
1797    loop_header:
1798      a1 = phi < a0, a2 >
1799      a3 = ...
1800      a2 = operation (a3, a1)
1801 
1802    such that:
1803    1. operation is commutative and associative and it is safe to
1804       change the order of the computation.
1805    2. no uses for a2 in the loop (a2 is used out of the loop)
1806    3. no uses of a1 in the loop besides the reduction operation.
1807 
1808    Condition 1 is tested here.
1809    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1810 
1811 tree
vect_is_simple_reduction(struct loop * loop,tree phi)1812 vect_is_simple_reduction (struct loop *loop, tree phi)
1813 {
1814   edge latch_e = loop_latch_edge (loop);
1815   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1816   tree def_stmt, def1, def2;
1817   enum tree_code code;
1818   int op_type;
1819   tree operation, op1, op2;
1820   tree type;
1821 
1822   if (TREE_CODE (loop_arg) != SSA_NAME)
1823     {
1824       if (vect_print_dump_info (REPORT_DETAILS))
1825         {
1826           fprintf (vect_dump, "reduction: not ssa_name: ");
1827           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1828         }
1829       return NULL_TREE;
1830     }
1831 
1832   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1833   if (!def_stmt)
1834     {
1835       if (vect_print_dump_info (REPORT_DETAILS))
1836         fprintf (vect_dump, "reduction: no def_stmt.");
1837       return NULL_TREE;
1838     }
1839 
1840   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1841     {
1842       if (vect_print_dump_info (REPORT_DETAILS))
1843         {
1844           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1845         }
1846       return NULL_TREE;
1847     }
1848 
1849   operation = TREE_OPERAND (def_stmt, 1);
1850   code = TREE_CODE (operation);
1851   if (!commutative_tree_code (code) || !associative_tree_code (code))
1852     {
1853       if (vect_print_dump_info (REPORT_DETAILS))
1854         {
1855           fprintf (vect_dump, "reduction: not commutative/associative: ");
1856           print_generic_expr (vect_dump, operation, TDF_SLIM);
1857         }
1858       return NULL_TREE;
1859     }
1860 
1861   op_type = TREE_CODE_LENGTH (code);
1862   if (op_type != binary_op)
1863     {
1864       if (vect_print_dump_info (REPORT_DETAILS))
1865         {
1866           fprintf (vect_dump, "reduction: not binary operation: ");
1867           print_generic_expr (vect_dump, operation, TDF_SLIM);
1868         }
1869       return NULL_TREE;
1870     }
1871 
1872   op1 = TREE_OPERAND (operation, 0);
1873   op2 = TREE_OPERAND (operation, 1);
1874   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1875     {
1876       if (vect_print_dump_info (REPORT_DETAILS))
1877         {
1878           fprintf (vect_dump, "reduction: uses not ssa_names: ");
1879           print_generic_expr (vect_dump, operation, TDF_SLIM);
1880         }
1881       return NULL_TREE;
1882     }
1883 
1884   /* Check that it's ok to change the order of the computation.  */
1885   type = TREE_TYPE (operation);
1886   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1887       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1888     {
1889       if (vect_print_dump_info (REPORT_DETAILS))
1890         {
1891           fprintf (vect_dump, "reduction: multiple types: operation type: ");
1892           print_generic_expr (vect_dump, type, TDF_SLIM);
1893           fprintf (vect_dump, ", operands types: ");
1894           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1895           fprintf (vect_dump, ",");
1896           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1897         }
1898       return NULL_TREE;
1899     }
1900 
1901   /* CHECKME: check for !flag_finite_math_only too?  */
1902   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1903     {
1904       /* Changing the order of operations changes the semantics.  */
1905       if (vect_print_dump_info (REPORT_DETAILS))
1906         {
1907           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1908           print_generic_expr (vect_dump, operation, TDF_SLIM);
1909         }
1910       return NULL_TREE;
1911     }
1912   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
1913     {
1914       /* Changing the order of operations changes the semantics.  */
1915       if (vect_print_dump_info (REPORT_DETAILS))
1916         {
1917           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1918           print_generic_expr (vect_dump, operation, TDF_SLIM);
1919         }
1920       return NULL_TREE;
1921     }
1922 
1923   /* reduction is safe. we're dealing with one of the following:
1924      1) integer arithmetic and no trapv
1925      2) floating point arithmetic, and special flags permit this optimization.
1926    */
1927   def1 = SSA_NAME_DEF_STMT (op1);
1928   def2 = SSA_NAME_DEF_STMT (op2);
1929   if (!def1 || !def2)
1930     {
1931       if (vect_print_dump_info (REPORT_DETAILS))
1932         {
1933           fprintf (vect_dump, "reduction: no defs for operands: ");
1934           print_generic_expr (vect_dump, operation, TDF_SLIM);
1935         }
1936       return NULL_TREE;
1937     }
1938 
1939   if (TREE_CODE (def1) == MODIFY_EXPR
1940       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1941       && def2 == phi)
1942     {
1943       if (vect_print_dump_info (REPORT_DETAILS))
1944         {
1945           fprintf (vect_dump, "detected reduction:");
1946           print_generic_expr (vect_dump, operation, TDF_SLIM);
1947         }
1948       return def_stmt;
1949     }
1950   else if (TREE_CODE (def2) == MODIFY_EXPR
1951       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1952       && def1 == phi)
1953     {
1954       /* Swap operands (just for simplicity - so that the rest of the code
1955 	 can assume that the reduction variable is always the last (second)
1956 	 argument).  */
1957       if (vect_print_dump_info (REPORT_DETAILS))
1958         {
1959           fprintf (vect_dump, "detected reduction: need to swap operands:");
1960           print_generic_expr (vect_dump, operation, TDF_SLIM);
1961         }
1962       swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
1963 				    &TREE_OPERAND (operation, 1));
1964       return def_stmt;
1965     }
1966   else
1967     {
1968       if (vect_print_dump_info (REPORT_DETAILS))
1969         {
1970           fprintf (vect_dump, "reduction: unknown pattern.");
1971           print_generic_expr (vect_dump, operation, TDF_SLIM);
1972         }
1973       return NULL_TREE;
1974     }
1975 }
1976 
1977 
1978 /* Function vect_is_simple_iv_evolution.
1979 
1980    FORNOW: A simple evolution of an induction variables in the loop is
1981    considered a polynomial evolution with constant step.  */
1982 
1983 bool
vect_is_simple_iv_evolution(unsigned loop_nb,tree access_fn,tree * init,tree * step)1984 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1985 			     tree * step)
1986 {
1987   tree init_expr;
1988   tree step_expr;
1989 
1990   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1991 
1992   /* When there is no evolution in this loop, the evolution function
1993      is not "simple".  */
1994   if (evolution_part == NULL_TREE)
1995     return false;
1996 
1997   /* When the evolution is a polynomial of degree >= 2
1998      the evolution function is not "simple".  */
1999   if (tree_is_chrec (evolution_part))
2000     return false;
2001 
2002   step_expr = evolution_part;
2003   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2004                                                            loop_nb));
2005 
2006   if (vect_print_dump_info (REPORT_DETAILS))
2007     {
2008       fprintf (vect_dump, "step: ");
2009       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2010       fprintf (vect_dump, ",  init: ");
2011       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2012     }
2013 
2014   *init = init_expr;
2015   *step = step_expr;
2016 
2017   if (TREE_CODE (step_expr) != INTEGER_CST)
2018     {
2019       if (vect_print_dump_info (REPORT_DETAILS))
2020         fprintf (vect_dump, "step unknown.");
2021       return false;
2022     }
2023 
2024   return true;
2025 }
2026 
2027 
2028 /* Function vectorize_loops.
2029 
2030    Entry Point to loop vectorization phase.  */
2031 
2032 void
vectorize_loops(struct loops * loops)2033 vectorize_loops (struct loops *loops)
2034 {
2035   unsigned int i;
2036   unsigned int num_vectorized_loops = 0;
2037 
2038   /* Fix the verbosity level if not defined explicitly by the user.  */
2039   vect_set_dump_settings ();
2040 
2041   /* Allocate the bitmap that records which virtual variables that
2042      need to be renamed.  */
2043   vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2044 
2045   /*  ----------- Analyze loops. -----------  */
2046 
2047   /* If some loop was duplicated, it gets bigger number
2048      than all previously defined loops. This fact allows us to run
2049      only over initial loops skipping newly generated ones.  */
2050   vect_loops_num = loops->num;
2051   for (i = 1; i < vect_loops_num; i++)
2052     {
2053       loop_vec_info loop_vinfo;
2054       struct loop *loop = loops->parray[i];
2055 
2056       if (!loop)
2057         continue;
2058 
2059       vect_loop_location = find_loop_location (loop);
2060       loop_vinfo = vect_analyze_loop (loop);
2061       loop->aux = loop_vinfo;
2062 
2063       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2064 	continue;
2065 
2066       vect_transform_loop (loop_vinfo, loops);
2067       num_vectorized_loops++;
2068     }
2069   vect_loop_location = UNKNOWN_LOC;
2070 
2071   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2072     fprintf (vect_dump, "vectorized %u loops in function.\n",
2073 	     num_vectorized_loops);
2074 
2075   /*  ----------- Finalize. -----------  */
2076 
2077   BITMAP_FREE (vect_vnames_to_rename);
2078 
2079   for (i = 1; i < vect_loops_num; i++)
2080     {
2081       struct loop *loop = loops->parray[i];
2082       loop_vec_info loop_vinfo;
2083 
2084       if (!loop)
2085 	continue;
2086       loop_vinfo = loop->aux;
2087       destroy_loop_vec_info (loop_vinfo);
2088       loop->aux = NULL;
2089     }
2090 }
2091