1 /* Vectorizer
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Loop and basic block vectorizer.
22 
23   This file contains drivers for the three vectorizers:
24   (1) loop vectorizer (inter-iteration parallelism),
25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26       vectorizer)
27   (3) BB vectorizer (out-of-loops), aka SLP
28 
29   The rest of the vectorizer's code is organized as follows:
30   - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31     used by drivers (1) and (2).
32   - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33     drivers (1) and (2).
34   - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35     used by drivers (2) and (3).
36   - tree-vect-stmts.c - statements analysis and transformation (used by all).
37   - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38     manipulations (used by all).
39   - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40 
41   Here's a poor attempt at illustrating that:
42 
43      tree-vectorizer.c:
44      loop_vect()  loop_aware_slp()  slp_vect()
45           |        /           \          /
46           |       /             \        /
47           tree-vect-loop.c  tree-vect-slp.c
48                 | \      \  /      /   |
49                 |  \      \/      /    |
50                 |   \     /\     /     |
51                 |    \   /  \   /      |
52          tree-vect-stmts.c  tree-vect-data-refs.c
53                        \      /
54                     tree-vect-patterns.c
55 */
56 
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 #include "stringpool.h"
80 #include "attribs.h"
81 
82 
83 /* Loop or bb location.  */
84 source_location vect_location;
85 
86 /* Vector mapping GIMPLE stmt to stmt_vec_info. */
87 vec<stmt_vec_info> stmt_vec_info_vec;
88 
89 /* For mapping simduid to vectorization factor.  */
90 
91 struct simduid_to_vf : free_ptr_hash<simduid_to_vf>
92 {
93   unsigned int simduid;
94   poly_uint64 vf;
95 
96   /* hash_table support.  */
97   static inline hashval_t hash (const simduid_to_vf *);
98   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
99 };
100 
101 inline hashval_t
hash(const simduid_to_vf * p)102 simduid_to_vf::hash (const simduid_to_vf *p)
103 {
104   return p->simduid;
105 }
106 
107 inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)108 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
109 {
110   return p1->simduid == p2->simduid;
111 }
112 
113 /* This hash maps the OMP simd array to the corresponding simduid used
114    to index into it.  Like thus,
115 
116         _7 = GOMP_SIMD_LANE (simduid.0)
117         ...
118         ...
119         D.1737[_7] = stuff;
120 
121 
122    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
123    simduid.0.  */
124 
125 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
126 {
127   tree decl;
128   unsigned int simduid;
129 
130   /* hash_table support.  */
131   static inline hashval_t hash (const simd_array_to_simduid *);
132   static inline int equal (const simd_array_to_simduid *,
133 			   const simd_array_to_simduid *);
134 };
135 
136 inline hashval_t
hash(const simd_array_to_simduid * p)137 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
138 {
139   return DECL_UID (p->decl);
140 }
141 
142 inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)143 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
144 			      const simd_array_to_simduid *p2)
145 {
146   return p1->decl == p2->decl;
147 }
148 
149 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
150    into their corresponding constants and remove
151    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
152 
153 static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)154 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
155 {
156   basic_block bb;
157 
158   FOR_EACH_BB_FN (bb, cfun)
159     {
160       gimple_stmt_iterator i;
161 
162       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
163 	{
164 	  poly_uint64 vf = 1;
165 	  enum internal_fn ifn;
166 	  gimple *stmt = gsi_stmt (i);
167 	  tree t;
168 	  if (!is_gimple_call (stmt)
169 	      || !gimple_call_internal_p (stmt))
170 	    {
171 	      gsi_next (&i);
172 	      continue;
173 	    }
174 	  ifn = gimple_call_internal_fn (stmt);
175 	  switch (ifn)
176 	    {
177 	    case IFN_GOMP_SIMD_LANE:
178 	    case IFN_GOMP_SIMD_VF:
179 	    case IFN_GOMP_SIMD_LAST_LANE:
180 	      break;
181 	    case IFN_GOMP_SIMD_ORDERED_START:
182 	    case IFN_GOMP_SIMD_ORDERED_END:
183 	      if (integer_onep (gimple_call_arg (stmt, 0)))
184 		{
185 		  enum built_in_function bcode
186 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
187 		       ? BUILT_IN_GOMP_ORDERED_START
188 		       : BUILT_IN_GOMP_ORDERED_END);
189 		  gimple *g
190 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
191 		  tree vdef = gimple_vdef (stmt);
192 		  gimple_set_vdef (g, vdef);
193 		  SSA_NAME_DEF_STMT (vdef) = g;
194 		  gimple_set_vuse (g, gimple_vuse (stmt));
195 		  gsi_replace (&i, g, true);
196 		  continue;
197 		}
198 	      gsi_remove (&i, true);
199 	      unlink_stmt_vdef (stmt);
200 	      continue;
201 	    default:
202 	      gsi_next (&i);
203 	      continue;
204 	    }
205 	  tree arg = gimple_call_arg (stmt, 0);
206 	  gcc_assert (arg != NULL_TREE);
207 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
208 	  simduid_to_vf *p = NULL, data;
209 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
210 	  /* Need to nullify loop safelen field since it's value is not
211 	     valid after transformation.  */
212 	  if (bb->loop_father && bb->loop_father->safelen > 0)
213 	    bb->loop_father->safelen = 0;
214 	  if (htab)
215 	    {
216 	      p = htab->find (&data);
217 	      if (p)
218 		vf = p->vf;
219 	    }
220 	  switch (ifn)
221 	    {
222 	    case IFN_GOMP_SIMD_VF:
223 	      t = build_int_cst (unsigned_type_node, vf);
224 	      break;
225 	    case IFN_GOMP_SIMD_LANE:
226 	      t = build_int_cst (unsigned_type_node, 0);
227 	      break;
228 	    case IFN_GOMP_SIMD_LAST_LANE:
229 	      t = gimple_call_arg (stmt, 1);
230 	      break;
231 	    default:
232 	      gcc_unreachable ();
233 	    }
234 	  tree lhs = gimple_call_lhs (stmt);
235 	  if (lhs)
236 	    replace_uses_by (lhs, t);
237 	  release_defs (stmt);
238 	  gsi_remove (&i, true);
239 	}
240     }
241 }
242 
243 /* Helper structure for note_simd_array_uses.  */
244 
245 struct note_simd_array_uses_struct
246 {
247   hash_table<simd_array_to_simduid> **htab;
248   unsigned int simduid;
249 };
250 
251 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
252 
253 static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)254 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
255 {
256   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
257   struct note_simd_array_uses_struct *ns
258     = (struct note_simd_array_uses_struct *) wi->info;
259 
260   if (TYPE_P (*tp))
261     *walk_subtrees = 0;
262   else if (VAR_P (*tp)
263 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
264 	   && DECL_CONTEXT (*tp) == current_function_decl)
265     {
266       simd_array_to_simduid data;
267       if (!*ns->htab)
268 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
269       data.decl = *tp;
270       data.simduid = ns->simduid;
271       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
272       if (*slot == NULL)
273 	{
274 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
275 	  *p = data;
276 	  *slot = p;
277 	}
278       else if ((*slot)->simduid != ns->simduid)
279 	(*slot)->simduid = -1U;
280       *walk_subtrees = 0;
281     }
282   return NULL_TREE;
283 }
284 
285 /* Find "omp simd array" temporaries and map them to corresponding
286    simduid.  */
287 
288 static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)289 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
290 {
291   basic_block bb;
292   gimple_stmt_iterator gsi;
293   struct walk_stmt_info wi;
294   struct note_simd_array_uses_struct ns;
295 
296   memset (&wi, 0, sizeof (wi));
297   wi.info = &ns;
298   ns.htab = htab;
299 
300   FOR_EACH_BB_FN (bb, cfun)
301     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
302       {
303 	gimple *stmt = gsi_stmt (gsi);
304 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
305 	  continue;
306 	switch (gimple_call_internal_fn (stmt))
307 	  {
308 	  case IFN_GOMP_SIMD_LANE:
309 	  case IFN_GOMP_SIMD_VF:
310 	  case IFN_GOMP_SIMD_LAST_LANE:
311 	    break;
312 	  default:
313 	    continue;
314 	  }
315 	tree lhs = gimple_call_lhs (stmt);
316 	if (lhs == NULL_TREE)
317 	  continue;
318 	imm_use_iterator use_iter;
319 	gimple *use_stmt;
320 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
321 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
322 	  if (!is_gimple_debug (use_stmt))
323 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
324       }
325 }
326 
327 /* Shrink arrays with "omp simd array" attribute to the corresponding
328    vectorization factor.  */
329 
330 static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)331 shrink_simd_arrays
332   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
333    hash_table<simduid_to_vf> *simduid_to_vf_htab)
334 {
335   for (hash_table<simd_array_to_simduid>::iterator iter
336 	 = simd_array_to_simduid_htab->begin ();
337        iter != simd_array_to_simduid_htab->end (); ++iter)
338     if ((*iter)->simduid != -1U)
339       {
340 	tree decl = (*iter)->decl;
341 	poly_uint64 vf = 1;
342 	if (simduid_to_vf_htab)
343 	  {
344 	    simduid_to_vf *p = NULL, data;
345 	    data.simduid = (*iter)->simduid;
346 	    p = simduid_to_vf_htab->find (&data);
347 	    if (p)
348 	      vf = p->vf;
349 	  }
350 	tree atype
351 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
352 	TREE_TYPE (decl) = atype;
353 	relayout_decl (decl);
354       }
355 
356   delete simd_array_to_simduid_htab;
357 }
358 
359 /* Initialize the vec_info with kind KIND_IN and target cost data
360    TARGET_COST_DATA_IN.  */
361 
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in)362 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in)
363   : kind (kind_in),
364     datarefs (vNULL),
365     ddrs (vNULL),
366     target_cost_data (target_cost_data_in)
367 {
368 }
369 
~vec_info()370 vec_info::~vec_info ()
371 {
372   slp_instance instance;
373   struct data_reference *dr;
374   unsigned int i;
375 
376   FOR_EACH_VEC_ELT (datarefs, i, dr)
377     if (dr->aux)
378       {
379         free (dr->aux);
380         dr->aux = NULL;
381       }
382 
383   FOR_EACH_VEC_ELT (slp_instances, i, instance)
384     vect_free_slp_instance (instance);
385 
386   free_data_refs (datarefs);
387   free_dependence_relations (ddrs);
388   destroy_cost_data (target_cost_data);
389 }
390 
391 /* A helper function to free scev and LOOP niter information, as well as
392    clear loop constraint LOOP_C_FINITE.  */
393 
394 void
vect_free_loop_info_assumptions(struct loop * loop)395 vect_free_loop_info_assumptions (struct loop *loop)
396 {
397   scev_reset_htab ();
398   /* We need to explicitly reset upper bound information since they are
399      used even after free_numbers_of_iterations_estimates.  */
400   loop->any_upper_bound = false;
401   loop->any_likely_upper_bound = false;
402   free_numbers_of_iterations_estimates (loop);
403   loop_constraint_clear (loop, LOOP_C_FINITE);
404 }
405 
406 /* Return whether STMT is inside the region we try to vectorize.  */
407 
408 bool
vect_stmt_in_region_p(vec_info * vinfo,gimple * stmt)409 vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt)
410 {
411   if (!gimple_bb (stmt))
412     return false;
413 
414   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
415     {
416       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
417       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
418 	return false;
419     }
420   else
421     {
422       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
423       if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo)
424 	  || gimple_uid (stmt) == -1U
425 	  || gimple_code (stmt) == GIMPLE_PHI)
426 	return false;
427     }
428 
429   return true;
430 }
431 
432 
433 /* If LOOP has been versioned during ifcvt, return the internal call
434    guarding it.  */
435 
436 static gimple *
vect_loop_vectorized_call(struct loop * loop)437 vect_loop_vectorized_call (struct loop *loop)
438 {
439   basic_block bb = loop_preheader_edge (loop)->src;
440   gimple *g;
441   do
442     {
443       g = last_stmt (bb);
444       if (g)
445 	break;
446       if (!single_pred_p (bb))
447 	break;
448       bb = single_pred (bb);
449     }
450   while (1);
451   if (g && gimple_code (g) == GIMPLE_COND)
452     {
453       gimple_stmt_iterator gsi = gsi_for_stmt (g);
454       gsi_prev (&gsi);
455       if (!gsi_end_p (gsi))
456 	{
457 	  g = gsi_stmt (gsi);
458 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
459 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
460 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
461 	    return g;
462 	}
463     }
464   return NULL;
465 }
466 
467 /* If LOOP has been versioned during loop distribution, return the gurading
468    internal call.  */
469 
470 static gimple *
vect_loop_dist_alias_call(struct loop * loop)471 vect_loop_dist_alias_call (struct loop *loop)
472 {
473   basic_block bb;
474   basic_block entry;
475   struct loop *outer, *orig;
476   gimple_stmt_iterator gsi;
477   gimple *g;
478 
479   if (loop->orig_loop_num == 0)
480     return NULL;
481 
482   orig = get_loop (cfun, loop->orig_loop_num);
483   if (orig == NULL)
484     {
485       /* The original loop is somehow destroyed.  Clear the information.  */
486       loop->orig_loop_num = 0;
487       return NULL;
488     }
489 
490   if (loop != orig)
491     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
492   else
493     bb = loop_preheader_edge (loop)->src;
494 
495   outer = bb->loop_father;
496   entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
497 
498   /* Look upward in dominance tree.  */
499   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
500        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
501     {
502       g = last_stmt (bb);
503       if (g == NULL || gimple_code (g) != GIMPLE_COND)
504 	continue;
505 
506       gsi = gsi_for_stmt (g);
507       gsi_prev (&gsi);
508       if (gsi_end_p (gsi))
509 	continue;
510 
511       g = gsi_stmt (gsi);
512       /* The guarding internal function call must have the same distribution
513 	 alias id.  */
514       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
515 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
516 	return g;
517     }
518   return NULL;
519 }
520 
521 /* Set the uids of all the statements in basic blocks inside loop
522    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
523    call guarding the loop which has been if converted.  */
524 static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)525 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
526 {
527   tree arg = gimple_call_arg (loop_vectorized_call, 1);
528   basic_block *bbs;
529   unsigned int i;
530   struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
531 
532   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
533   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
534 		       == loop_vectorized_call);
535   /* If we are going to vectorize outer loop, prevent vectorization
536      of the inner loop in the scalar loop - either the scalar loop is
537      thrown away, so it is a wasted work, or is used only for
538      a few iterations.  */
539   if (scalar_loop->inner)
540     {
541       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
542       if (g)
543 	{
544 	  arg = gimple_call_arg (g, 0);
545 	  get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
546 	  fold_loop_internal_call (g, boolean_false_node);
547 	}
548     }
549   bbs = get_loop_body (scalar_loop);
550   for (i = 0; i < scalar_loop->num_nodes; i++)
551     {
552       basic_block bb = bbs[i];
553       gimple_stmt_iterator gsi;
554       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
555 	{
556 	  gimple *phi = gsi_stmt (gsi);
557 	  gimple_set_uid (phi, 0);
558 	}
559       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
560 	{
561 	  gimple *stmt = gsi_stmt (gsi);
562 	  gimple_set_uid (stmt, 0);
563 	}
564     }
565   free (bbs);
566 }
567 
568 /* Function vectorize_loops.
569 
570    Entry point to loop vectorization phase.  */
571 
572 unsigned
vectorize_loops(void)573 vectorize_loops (void)
574 {
575   unsigned int i;
576   unsigned int num_vectorized_loops = 0;
577   unsigned int vect_loops_num;
578   struct loop *loop;
579   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
580   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
581   bool any_ifcvt_loops = false;
582   unsigned ret = 0;
583   struct loop *new_loop;
584 
585   vect_loops_num = number_of_loops (cfun);
586 
587   /* Bail out if there are no loops.  */
588   if (vect_loops_num <= 1)
589     return 0;
590 
591   if (cfun->has_simduid_loops)
592     note_simd_array_uses (&simd_array_to_simduid_htab);
593 
594   init_stmt_vec_info_vec ();
595 
596   /*  ----------- Analyze loops. -----------  */
597 
598   /* If some loop was duplicated, it gets bigger number
599      than all previously defined loops.  This fact allows us to run
600      only over initial loops skipping newly generated ones.  */
601   FOR_EACH_LOOP (loop, 0)
602     if (loop->dont_vectorize)
603       {
604 	any_ifcvt_loops = true;
605 	/* If-conversion sometimes versions both the outer loop
606 	   (for the case when outer loop vectorization might be
607 	   desirable) as well as the inner loop in the scalar version
608 	   of the loop.  So we have:
609 	    if (LOOP_VECTORIZED (1, 3))
610 	      {
611 		loop1
612 		  loop2
613 	      }
614 	    else
615 	      loop3 (copy of loop1)
616 		if (LOOP_VECTORIZED (4, 5))
617 		  loop4 (copy of loop2)
618 		else
619 		  loop5 (copy of loop4)
620 	   If FOR_EACH_LOOP gives us loop3 first (which has
621 	   dont_vectorize set), make sure to process loop1 before loop4;
622 	   so that we can prevent vectorization of loop4 if loop1
623 	   is successfully vectorized.  */
624 	if (loop->inner)
625 	  {
626 	    gimple *loop_vectorized_call
627 	      = vect_loop_vectorized_call (loop);
628 	    if (loop_vectorized_call
629 		&& vect_loop_vectorized_call (loop->inner))
630 	      {
631 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
632 		struct loop *vector_loop
633 		  = get_loop (cfun, tree_to_shwi (arg));
634 		if (vector_loop && vector_loop != loop)
635 		  {
636 		    loop = vector_loop;
637 		    /* Make sure we don't vectorize it twice.  */
638 		    loop->dont_vectorize = true;
639 		    goto try_vectorize;
640 		  }
641 	      }
642 	  }
643       }
644     else
645       {
646 	loop_vec_info loop_vinfo, orig_loop_vinfo;
647 	gimple *loop_vectorized_call, *loop_dist_alias_call;
648        try_vectorize:
649 	if (!((flag_tree_loop_vectorize
650 	       && optimize_loop_nest_for_speed_p (loop))
651 	      || loop->force_vectorize))
652 	  continue;
653 	orig_loop_vinfo = NULL;
654 	loop_vectorized_call = vect_loop_vectorized_call (loop);
655 	loop_dist_alias_call = vect_loop_dist_alias_call (loop);
656        vectorize_epilogue:
657 	vect_location = find_loop_location (loop);
658         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
659 	    && dump_enabled_p ())
660 	  dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n",
661                        LOCATION_FILE (vect_location),
662 		       LOCATION_LINE (vect_location));
663 
664 	loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
665 	loop->aux = loop_vinfo;
666 
667 	if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
668 	  {
669 	    /* Free existing information if loop is analyzed with some
670 	       assumptions.  */
671 	    if (loop_constraint_set_p (loop, LOOP_C_FINITE))
672 	      vect_free_loop_info_assumptions (loop);
673 
674 	    /* If we applied if-conversion then try to vectorize the
675 	       BB of innermost loops.
676 	       ???  Ideally BB vectorization would learn to vectorize
677 	       control flow by applying if-conversion on-the-fly, the
678 	       following retains the if-converted loop body even when
679 	       only non-if-converted parts took part in BB vectorization.  */
680 	    if (flag_tree_slp_vectorize != 0
681 		&& loop_vectorized_call
682 		&& ! loop->inner)
683 	      {
684 		basic_block bb = loop->header;
685 		bool has_mask_load_store = false;
686 		for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
687 		     !gsi_end_p (gsi); gsi_next (&gsi))
688 		  {
689 		    gimple *stmt = gsi_stmt (gsi);
690 		    if (is_gimple_call (stmt)
691 			&& gimple_call_internal_p (stmt)
692 			&& (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
693 			    || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
694 		      {
695 			has_mask_load_store = true;
696 			break;
697 		      }
698 		    gimple_set_uid (stmt, -1);
699 		    gimple_set_visited (stmt, false);
700 		  }
701 		if (! has_mask_load_store && vect_slp_bb (bb))
702 		  {
703 		    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
704 				     "basic block vectorized\n");
705 		    fold_loop_internal_call (loop_vectorized_call,
706 					     boolean_true_node);
707 		    loop_vectorized_call = NULL;
708 		    ret |= TODO_cleanup_cfg;
709 		  }
710 	      }
711 	    /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
712 	       loop, don't vectorize its inner loop; we'll attempt to
713 	       vectorize LOOP_VECTORIZED guarded inner loop of the scalar
714 	       loop version.  */
715 	    if (loop_vectorized_call && loop->inner)
716 	      loop->inner->dont_vectorize = true;
717 	    continue;
718 	  }
719 
720         if (!dbg_cnt (vect_loop))
721 	  {
722 	    /* We may miss some if-converted loops due to
723 	       debug counter.  Set any_ifcvt_loops to visit
724 	       them at finalization.  */
725 	    any_ifcvt_loops = true;
726 	    /* Free existing information if loop is analyzed with some
727 	       assumptions.  */
728 	    if (loop_constraint_set_p (loop, LOOP_C_FINITE))
729 	      vect_free_loop_info_assumptions (loop);
730 
731 	    break;
732 	  }
733 
734 	if (loop_vectorized_call)
735 	  set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
736         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
737 	    && dump_enabled_p ())
738           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
739                            "loop vectorized\n");
740 	new_loop = vect_transform_loop (loop_vinfo);
741 	num_vectorized_loops++;
742 	/* Now that the loop has been vectorized, allow it to be unrolled
743 	   etc.  */
744 	loop->force_vectorize = false;
745 
746 	if (loop->simduid)
747 	  {
748 	    simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
749 	    if (!simduid_to_vf_htab)
750 	      simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
751 	    simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
752 	    simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
753 	    *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
754 	      = simduid_to_vf_data;
755 	  }
756 
757 	if (loop_vectorized_call)
758 	  {
759 	    fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
760 	    loop_vectorized_call = NULL;
761 	    ret |= TODO_cleanup_cfg;
762 	  }
763 	if (loop_dist_alias_call)
764 	  {
765 	    tree value = gimple_call_arg (loop_dist_alias_call, 1);
766 	    fold_loop_internal_call (loop_dist_alias_call, value);
767 	    loop_dist_alias_call = NULL;
768 	    ret |= TODO_cleanup_cfg;
769 	  }
770 
771 	if (new_loop)
772 	  {
773 	    /* Epilogue of vectorized loop must be vectorized too.  */
774 	    vect_loops_num = number_of_loops (cfun);
775 	    loop = new_loop;
776 	    orig_loop_vinfo = loop_vinfo;  /* To pass vect_analyze_loop.  */
777 	    goto vectorize_epilogue;
778 	  }
779       }
780 
781   vect_location = UNKNOWN_LOCATION;
782 
783   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
784   if (dump_enabled_p ()
785       || (num_vectorized_loops > 0 && dump_enabled_p ()))
786     dump_printf_loc (MSG_NOTE, vect_location,
787                      "vectorized %u loops in function.\n",
788                      num_vectorized_loops);
789 
790   /*  ----------- Finalize. -----------  */
791 
792   if (any_ifcvt_loops)
793     for (i = 1; i < vect_loops_num; i++)
794       {
795 	loop = get_loop (cfun, i);
796 	if (loop && loop->dont_vectorize)
797 	  {
798 	    gimple *g = vect_loop_vectorized_call (loop);
799 	    if (g)
800 	      {
801 		fold_loop_internal_call (g, boolean_false_node);
802 		ret |= TODO_cleanup_cfg;
803 		g = NULL;
804 	      }
805 	    else
806 	      g = vect_loop_dist_alias_call (loop);
807 
808 	    if (g)
809 	      {
810 		fold_loop_internal_call (g, boolean_false_node);
811 		ret |= TODO_cleanup_cfg;
812 	      }
813 	  }
814       }
815 
816   for (i = 1; i < vect_loops_num; i++)
817     {
818       loop_vec_info loop_vinfo;
819       bool has_mask_store;
820 
821       loop = get_loop (cfun, i);
822       if (!loop)
823 	continue;
824       loop_vinfo = (loop_vec_info) loop->aux;
825       has_mask_store = false;
826       if (loop_vinfo)
827 	has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
828       delete loop_vinfo;
829       if (has_mask_store
830 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
831 	optimize_mask_stores (loop);
832       loop->aux = NULL;
833     }
834 
835   free_stmt_vec_info_vec ();
836 
837   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
838   if (cfun->has_simduid_loops)
839     adjust_simduid_builtins (simduid_to_vf_htab);
840 
841   /* Shrink any "omp array simd" temporary arrays to the
842      actual vectorization factors.  */
843   if (simd_array_to_simduid_htab)
844     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
845   delete simduid_to_vf_htab;
846   cfun->has_simduid_loops = false;
847 
848   if (num_vectorized_loops > 0)
849     {
850       /* If we vectorized any loop only virtual SSA form needs to be updated.
851 	 ???  Also while we try hard to update loop-closed SSA form we fail
852 	 to properly do this in some corner-cases (see PR56286).  */
853       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
854       return TODO_cleanup_cfg;
855     }
856 
857   return ret;
858 }
859 
860 
861 /* Entry point to the simduid cleanup pass.  */
862 
863 namespace {
864 
865 const pass_data pass_data_simduid_cleanup =
866 {
867   GIMPLE_PASS, /* type */
868   "simduid", /* name */
869   OPTGROUP_NONE, /* optinfo_flags */
870   TV_NONE, /* tv_id */
871   ( PROP_ssa | PROP_cfg ), /* properties_required */
872   0, /* properties_provided */
873   0, /* properties_destroyed */
874   0, /* todo_flags_start */
875   0, /* todo_flags_finish */
876 };
877 
878 class pass_simduid_cleanup : public gimple_opt_pass
879 {
880 public:
pass_simduid_cleanup(gcc::context * ctxt)881   pass_simduid_cleanup (gcc::context *ctxt)
882     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
883   {}
884 
885   /* opt_pass methods: */
clone()886   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)887   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
888   virtual unsigned int execute (function *);
889 
890 }; // class pass_simduid_cleanup
891 
892 unsigned int
execute(function * fun)893 pass_simduid_cleanup::execute (function *fun)
894 {
895   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
896 
897   note_simd_array_uses (&simd_array_to_simduid_htab);
898 
899   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
900   adjust_simduid_builtins (NULL);
901 
902   /* Shrink any "omp array simd" temporary arrays to the
903      actual vectorization factors.  */
904   if (simd_array_to_simduid_htab)
905     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
906   fun->has_simduid_loops = false;
907   return 0;
908 }
909 
910 }  // anon namespace
911 
912 gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)913 make_pass_simduid_cleanup (gcc::context *ctxt)
914 {
915   return new pass_simduid_cleanup (ctxt);
916 }
917 
918 
919 /*  Entry point to basic block SLP phase.  */
920 
921 namespace {
922 
923 const pass_data pass_data_slp_vectorize =
924 {
925   GIMPLE_PASS, /* type */
926   "slp", /* name */
927   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
928   TV_TREE_SLP_VECTORIZATION, /* tv_id */
929   ( PROP_ssa | PROP_cfg ), /* properties_required */
930   0, /* properties_provided */
931   0, /* properties_destroyed */
932   0, /* todo_flags_start */
933   TODO_update_ssa, /* todo_flags_finish */
934 };
935 
936 class pass_slp_vectorize : public gimple_opt_pass
937 {
938 public:
pass_slp_vectorize(gcc::context * ctxt)939   pass_slp_vectorize (gcc::context *ctxt)
940     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
941   {}
942 
943   /* opt_pass methods: */
clone()944   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)945   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
946   virtual unsigned int execute (function *);
947 
948 }; // class pass_slp_vectorize
949 
950 unsigned int
execute(function * fun)951 pass_slp_vectorize::execute (function *fun)
952 {
953   basic_block bb;
954 
955   bool in_loop_pipeline = scev_initialized_p ();
956   if (!in_loop_pipeline)
957     {
958       loop_optimizer_init (LOOPS_NORMAL);
959       scev_initialize ();
960     }
961 
962   /* Mark all stmts as not belonging to the current region and unvisited.  */
963   FOR_EACH_BB_FN (bb, fun)
964     {
965       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
966 	   gsi_next (&gsi))
967 	{
968 	  gimple *stmt = gsi_stmt (gsi);
969 	  gimple_set_uid (stmt, -1);
970 	  gimple_set_visited (stmt, false);
971 	}
972     }
973 
974   init_stmt_vec_info_vec ();
975 
976   FOR_EACH_BB_FN (bb, fun)
977     {
978       if (vect_slp_bb (bb))
979 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
980 			 "basic block vectorized\n");
981     }
982 
983   free_stmt_vec_info_vec ();
984 
985   if (!in_loop_pipeline)
986     {
987       scev_finalize ();
988       loop_optimizer_finalize ();
989     }
990 
991   return 0;
992 }
993 
994 } // anon namespace
995 
996 gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)997 make_pass_slp_vectorize (gcc::context *ctxt)
998 {
999   return new pass_slp_vectorize (ctxt);
1000 }
1001 
1002 
1003 /* Increase alignment of global arrays to improve vectorization potential.
1004    TODO:
1005    - Consider also structs that have an array field.
1006    - Use ipa analysis to prune arrays that can't be vectorized?
1007      This should involve global alignment analysis and in the future also
1008      array padding.  */
1009 
1010 static unsigned get_vec_alignment_for_type (tree);
1011 static hash_map<tree, unsigned> *type_align_map;
1012 
1013 /* Return alignment of array's vector type corresponding to scalar type.
1014    0 if no vector type exists.  */
1015 static unsigned
get_vec_alignment_for_array_type(tree type)1016 get_vec_alignment_for_array_type (tree type)
1017 {
1018   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1019   poly_uint64 array_size, vector_size;
1020 
1021   tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
1022   if (!vectype
1023       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1024       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1025       || maybe_lt (array_size, vector_size))
1026     return 0;
1027 
1028   return TYPE_ALIGN (vectype);
1029 }
1030 
1031 /* Return alignment of field having maximum alignment of vector type
1032    corresponding to it's scalar type. For now, we only consider fields whose
1033    offset is a multiple of it's vector alignment.
1034    0 if no suitable field is found.  */
1035 static unsigned
get_vec_alignment_for_record_type(tree type)1036 get_vec_alignment_for_record_type (tree type)
1037 {
1038   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1039 
1040   unsigned max_align = 0, alignment;
1041   HOST_WIDE_INT offset;
1042   tree offset_tree;
1043 
1044   if (TYPE_PACKED (type))
1045     return 0;
1046 
1047   unsigned *slot = type_align_map->get (type);
1048   if (slot)
1049     return *slot;
1050 
1051   for (tree field = first_field (type);
1052        field != NULL_TREE;
1053        field = DECL_CHAIN (field))
1054     {
1055       /* Skip if not FIELD_DECL or if alignment is set by user.  */
1056       if (TREE_CODE (field) != FIELD_DECL
1057 	  || DECL_USER_ALIGN (field)
1058 	  || DECL_ARTIFICIAL (field))
1059 	continue;
1060 
1061       /* We don't need to process the type further if offset is variable,
1062 	 since the offsets of remaining members will also be variable.  */
1063       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1064 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1065 	break;
1066 
1067       /* Similarly stop processing the type if offset_tree
1068 	 does not fit in unsigned HOST_WIDE_INT.  */
1069       offset_tree = bit_position (field);
1070       if (!tree_fits_uhwi_p (offset_tree))
1071 	break;
1072 
1073       offset = tree_to_uhwi (offset_tree);
1074       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1075 
1076       /* Get maximum alignment of vectorized field/array among those members
1077 	 whose offset is multiple of the vector alignment.  */
1078       if (alignment
1079 	  && (offset % alignment == 0)
1080 	  && (alignment > max_align))
1081 	max_align = alignment;
1082     }
1083 
1084   type_align_map->put (type, max_align);
1085   return max_align;
1086 }
1087 
1088 /* Return alignment of vector type corresponding to decl's scalar type
1089    or 0 if it doesn't exist or the vector alignment is lesser than
1090    decl's alignment.  */
1091 static unsigned
get_vec_alignment_for_type(tree type)1092 get_vec_alignment_for_type (tree type)
1093 {
1094   if (type == NULL_TREE)
1095     return 0;
1096 
1097   gcc_assert (TYPE_P (type));
1098 
1099   static unsigned alignment = 0;
1100   switch (TREE_CODE (type))
1101     {
1102       case ARRAY_TYPE:
1103 	alignment = get_vec_alignment_for_array_type (type);
1104 	break;
1105       case RECORD_TYPE:
1106 	alignment = get_vec_alignment_for_record_type (type);
1107 	break;
1108       default:
1109 	alignment = 0;
1110 	break;
1111     }
1112 
1113   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1114 }
1115 
1116 /* Entry point to increase_alignment pass.  */
1117 static unsigned int
increase_alignment(void)1118 increase_alignment (void)
1119 {
1120   varpool_node *vnode;
1121 
1122   vect_location = UNKNOWN_LOCATION;
1123   type_align_map = new hash_map<tree, unsigned>;
1124 
1125   /* Increase the alignment of all global arrays for vectorization.  */
1126   FOR_EACH_DEFINED_VARIABLE (vnode)
1127     {
1128       tree decl = vnode->decl;
1129       unsigned int alignment;
1130 
1131       if ((decl_in_symtab_p (decl)
1132 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
1133 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1134 	continue;
1135 
1136       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1137       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1138         {
1139 	  vnode->increase_alignment (alignment);
1140           dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
1141           dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
1142           dump_printf (MSG_NOTE, "\n");
1143         }
1144     }
1145 
1146   delete type_align_map;
1147   return 0;
1148 }
1149 
1150 
1151 namespace {
1152 
1153 const pass_data pass_data_ipa_increase_alignment =
1154 {
1155   SIMPLE_IPA_PASS, /* type */
1156   "increase_alignment", /* name */
1157   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1158   TV_IPA_OPT, /* tv_id */
1159   0, /* properties_required */
1160   0, /* properties_provided */
1161   0, /* properties_destroyed */
1162   0, /* todo_flags_start */
1163   0, /* todo_flags_finish */
1164 };
1165 
1166 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1167 {
1168 public:
pass_ipa_increase_alignment(gcc::context * ctxt)1169   pass_ipa_increase_alignment (gcc::context *ctxt)
1170     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1171   {}
1172 
1173   /* opt_pass methods: */
gate(function *)1174   virtual bool gate (function *)
1175     {
1176       return flag_section_anchors && flag_tree_loop_vectorize;
1177     }
1178 
execute(function *)1179   virtual unsigned int execute (function *) { return increase_alignment (); }
1180 
1181 }; // class pass_ipa_increase_alignment
1182 
1183 } // anon namespace
1184 
1185 simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)1186 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1187 {
1188   return new pass_ipa_increase_alignment (ctxt);
1189 }
1190