1 /* Vectorizer
2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Loop and basic block vectorizer.
22 
23   This file contains drivers for the three vectorizers:
24   (1) loop vectorizer (inter-iteration parallelism),
25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26       vectorizer)
27   (3) BB vectorizer (out-of-loops), aka SLP
28 
29   The rest of the vectorizer's code is organized as follows:
30   - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31     used by drivers (1) and (2).
32   - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33     drivers (1) and (2).
34   - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35     used by drivers (2) and (3).
36   - tree-vect-stmts.c - statements analysis and transformation (used by all).
37   - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38     manipulations (used by all).
39   - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40 
41   Here's a poor attempt at illustrating that:
42 
43      tree-vectorizer.c:
44      loop_vect()  loop_aware_slp()  slp_vect()
45           |        /           \          /
46           |       /             \        /
47           tree-vect-loop.c  tree-vect-slp.c
48                 | \      \  /      /   |
49                 |  \      \/      /    |
50                 |   \     /\     /     |
51                 |    \   /  \   /      |
52          tree-vect-stmts.c  tree-vect-data-refs.c
53                        \      /
54                     tree-vect-patterns.c
55 */
56 
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 #include "stringpool.h"
80 #include "attribs.h"
81 #include "gimple-pretty-print.h"
82 #include "opt-problem.h"
83 #include "internal-fn.h"
84 
85 
86 /* Loop or bb location, with hotness information.  */
87 dump_user_location_t vect_location;
88 
89 /* auto_purge_vect_location's dtor: reset the vect_location
90    global, to avoid stale location_t values that could reference
91    GC-ed blocks.  */
92 
~auto_purge_vect_location()93 auto_purge_vect_location::~auto_purge_vect_location ()
94 {
95   vect_location = dump_user_location_t ();
96 }
97 
98 /* Dump a cost entry according to args to F.  */
99 
100 void
dump_stmt_cost(FILE * f,void * data,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,unsigned cost,enum vect_cost_model_location where)101 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
102 		stmt_vec_info stmt_info, int misalign, unsigned cost,
103 		enum vect_cost_model_location where)
104 {
105   fprintf (f, "%p ", data);
106   if (stmt_info)
107     {
108       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109       fprintf (f, " ");
110     }
111   else
112     fprintf (f, "<unknown> ");
113   fprintf (f, "%d times ", count);
114   const char *ks = "unknown";
115   switch (kind)
116     {
117     case scalar_stmt:
118       ks = "scalar_stmt";
119       break;
120     case scalar_load:
121       ks = "scalar_load";
122       break;
123     case scalar_store:
124       ks = "scalar_store";
125       break;
126     case vector_stmt:
127       ks = "vector_stmt";
128       break;
129     case vector_load:
130       ks = "vector_load";
131       break;
132     case vector_gather_load:
133       ks = "vector_gather_load";
134       break;
135     case unaligned_load:
136       ks = "unaligned_load";
137       break;
138     case unaligned_store:
139       ks = "unaligned_store";
140       break;
141     case vector_store:
142       ks = "vector_store";
143       break;
144     case vector_scatter_store:
145       ks = "vector_scatter_store";
146       break;
147     case vec_to_scalar:
148       ks = "vec_to_scalar";
149       break;
150     case scalar_to_vec:
151       ks = "scalar_to_vec";
152       break;
153     case cond_branch_not_taken:
154       ks = "cond_branch_not_taken";
155       break;
156     case cond_branch_taken:
157       ks = "cond_branch_taken";
158       break;
159     case vec_perm:
160       ks = "vec_perm";
161       break;
162     case vec_promote_demote:
163       ks = "vec_promote_demote";
164       break;
165     case vec_construct:
166       ks = "vec_construct";
167       break;
168     }
169   fprintf (f, "%s ", ks);
170   if (kind == unaligned_load || kind == unaligned_store)
171     fprintf (f, "(misalign %d) ", misalign);
172   fprintf (f, "costs %u ", cost);
173   const char *ws = "unknown";
174   switch (where)
175     {
176     case vect_prologue:
177       ws = "prologue";
178       break;
179     case vect_body:
180       ws = "body";
181       break;
182     case vect_epilogue:
183       ws = "epilogue";
184       break;
185     }
186   fprintf (f, "in %s\n", ws);
187 }
188 
189 /* For mapping simduid to vectorization factor.  */
190 
191 struct simduid_to_vf : free_ptr_hash<simduid_to_vf>
192 {
193   unsigned int simduid;
194   poly_uint64 vf;
195 
196   /* hash_table support.  */
197   static inline hashval_t hash (const simduid_to_vf *);
198   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
199 };
200 
201 inline hashval_t
hash(const simduid_to_vf * p)202 simduid_to_vf::hash (const simduid_to_vf *p)
203 {
204   return p->simduid;
205 }
206 
207 inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)208 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
209 {
210   return p1->simduid == p2->simduid;
211 }
212 
213 /* This hash maps the OMP simd array to the corresponding simduid used
214    to index into it.  Like thus,
215 
216         _7 = GOMP_SIMD_LANE (simduid.0)
217         ...
218         ...
219         D.1737[_7] = stuff;
220 
221 
222    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
223    simduid.0.  */
224 
225 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
226 {
227   tree decl;
228   unsigned int simduid;
229 
230   /* hash_table support.  */
231   static inline hashval_t hash (const simd_array_to_simduid *);
232   static inline int equal (const simd_array_to_simduid *,
233 			   const simd_array_to_simduid *);
234 };
235 
236 inline hashval_t
hash(const simd_array_to_simduid * p)237 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
238 {
239   return DECL_UID (p->decl);
240 }
241 
242 inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)243 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
244 			      const simd_array_to_simduid *p2)
245 {
246   return p1->decl == p2->decl;
247 }
248 
249 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
250    into their corresponding constants and remove
251    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
252 
253 static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)254 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
255 {
256   basic_block bb;
257 
258   FOR_EACH_BB_FN (bb, cfun)
259     {
260       gimple_stmt_iterator i;
261 
262       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
263 	{
264 	  poly_uint64 vf = 1;
265 	  enum internal_fn ifn;
266 	  gimple *stmt = gsi_stmt (i);
267 	  tree t;
268 	  if (!is_gimple_call (stmt)
269 	      || !gimple_call_internal_p (stmt))
270 	    {
271 	      gsi_next (&i);
272 	      continue;
273 	    }
274 	  ifn = gimple_call_internal_fn (stmt);
275 	  switch (ifn)
276 	    {
277 	    case IFN_GOMP_SIMD_LANE:
278 	    case IFN_GOMP_SIMD_VF:
279 	    case IFN_GOMP_SIMD_LAST_LANE:
280 	      break;
281 	    case IFN_GOMP_SIMD_ORDERED_START:
282 	    case IFN_GOMP_SIMD_ORDERED_END:
283 	      if (integer_onep (gimple_call_arg (stmt, 0)))
284 		{
285 		  enum built_in_function bcode
286 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
287 		       ? BUILT_IN_GOMP_ORDERED_START
288 		       : BUILT_IN_GOMP_ORDERED_END);
289 		  gimple *g
290 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
291 		  tree vdef = gimple_vdef (stmt);
292 		  gimple_set_vdef (g, vdef);
293 		  SSA_NAME_DEF_STMT (vdef) = g;
294 		  gimple_set_vuse (g, gimple_vuse (stmt));
295 		  gsi_replace (&i, g, true);
296 		  continue;
297 		}
298 	      gsi_remove (&i, true);
299 	      unlink_stmt_vdef (stmt);
300 	      continue;
301 	    default:
302 	      gsi_next (&i);
303 	      continue;
304 	    }
305 	  tree arg = gimple_call_arg (stmt, 0);
306 	  gcc_assert (arg != NULL_TREE);
307 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
308 	  simduid_to_vf *p = NULL, data;
309 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
310 	  /* Need to nullify loop safelen field since it's value is not
311 	     valid after transformation.  */
312 	  if (bb->loop_father && bb->loop_father->safelen > 0)
313 	    bb->loop_father->safelen = 0;
314 	  if (htab)
315 	    {
316 	      p = htab->find (&data);
317 	      if (p)
318 		vf = p->vf;
319 	    }
320 	  switch (ifn)
321 	    {
322 	    case IFN_GOMP_SIMD_VF:
323 	      t = build_int_cst (unsigned_type_node, vf);
324 	      break;
325 	    case IFN_GOMP_SIMD_LANE:
326 	      t = build_int_cst (unsigned_type_node, 0);
327 	      break;
328 	    case IFN_GOMP_SIMD_LAST_LANE:
329 	      t = gimple_call_arg (stmt, 1);
330 	      break;
331 	    default:
332 	      gcc_unreachable ();
333 	    }
334 	  tree lhs = gimple_call_lhs (stmt);
335 	  if (lhs)
336 	    replace_uses_by (lhs, t);
337 	  release_defs (stmt);
338 	  gsi_remove (&i, true);
339 	}
340     }
341 }
342 
343 /* Helper structure for note_simd_array_uses.  */
344 
345 struct note_simd_array_uses_struct
346 {
347   hash_table<simd_array_to_simduid> **htab;
348   unsigned int simduid;
349 };
350 
351 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
352 
353 static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)354 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
355 {
356   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
357   struct note_simd_array_uses_struct *ns
358     = (struct note_simd_array_uses_struct *) wi->info;
359 
360   if (TYPE_P (*tp))
361     *walk_subtrees = 0;
362   else if (VAR_P (*tp)
363 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
364 	   && DECL_CONTEXT (*tp) == current_function_decl)
365     {
366       simd_array_to_simduid data;
367       if (!*ns->htab)
368 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
369       data.decl = *tp;
370       data.simduid = ns->simduid;
371       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
372       if (*slot == NULL)
373 	{
374 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
375 	  *p = data;
376 	  *slot = p;
377 	}
378       else if ((*slot)->simduid != ns->simduid)
379 	(*slot)->simduid = -1U;
380       *walk_subtrees = 0;
381     }
382   return NULL_TREE;
383 }
384 
385 /* Find "omp simd array" temporaries and map them to corresponding
386    simduid.  */
387 
388 static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)389 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
390 {
391   basic_block bb;
392   gimple_stmt_iterator gsi;
393   struct walk_stmt_info wi;
394   struct note_simd_array_uses_struct ns;
395 
396   memset (&wi, 0, sizeof (wi));
397   wi.info = &ns;
398   ns.htab = htab;
399 
400   FOR_EACH_BB_FN (bb, cfun)
401     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
402       {
403 	gimple *stmt = gsi_stmt (gsi);
404 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
405 	  continue;
406 	switch (gimple_call_internal_fn (stmt))
407 	  {
408 	  case IFN_GOMP_SIMD_LANE:
409 	  case IFN_GOMP_SIMD_VF:
410 	  case IFN_GOMP_SIMD_LAST_LANE:
411 	    break;
412 	  default:
413 	    continue;
414 	  }
415 	tree lhs = gimple_call_lhs (stmt);
416 	if (lhs == NULL_TREE)
417 	  continue;
418 	imm_use_iterator use_iter;
419 	gimple *use_stmt;
420 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
421 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
422 	  if (!is_gimple_debug (use_stmt))
423 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
424       }
425 }
426 
427 /* Shrink arrays with "omp simd array" attribute to the corresponding
428    vectorization factor.  */
429 
430 static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)431 shrink_simd_arrays
432   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
433    hash_table<simduid_to_vf> *simduid_to_vf_htab)
434 {
435   for (hash_table<simd_array_to_simduid>::iterator iter
436 	 = simd_array_to_simduid_htab->begin ();
437        iter != simd_array_to_simduid_htab->end (); ++iter)
438     if ((*iter)->simduid != -1U)
439       {
440 	tree decl = (*iter)->decl;
441 	poly_uint64 vf = 1;
442 	if (simduid_to_vf_htab)
443 	  {
444 	    simduid_to_vf *p = NULL, data;
445 	    data.simduid = (*iter)->simduid;
446 	    p = simduid_to_vf_htab->find (&data);
447 	    if (p)
448 	      vf = p->vf;
449 	  }
450 	tree atype
451 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
452 	TREE_TYPE (decl) = atype;
453 	relayout_decl (decl);
454       }
455 
456   delete simd_array_to_simduid_htab;
457 }
458 
459 /* Initialize the vec_info with kind KIND_IN and target cost data
460    TARGET_COST_DATA_IN.  */
461 
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in,vec_info_shared * shared_)462 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in,
463 		    vec_info_shared *shared_)
464   : kind (kind_in),
465     shared (shared_),
466     target_cost_data (target_cost_data_in)
467 {
468   stmt_vec_infos.create (50);
469 }
470 
~vec_info()471 vec_info::~vec_info ()
472 {
473   slp_instance instance;
474   unsigned int i;
475 
476   FOR_EACH_VEC_ELT (slp_instances, i, instance)
477     vect_free_slp_instance (instance, true);
478 
479   destroy_cost_data (target_cost_data);
480   free_stmt_vec_infos ();
481 }
482 
vec_info_shared()483 vec_info_shared::vec_info_shared ()
484   : datarefs (vNULL),
485     datarefs_copy (vNULL),
486     ddrs (vNULL)
487 {
488 }
489 
~vec_info_shared()490 vec_info_shared::~vec_info_shared ()
491 {
492   free_data_refs (datarefs);
493   free_dependence_relations (ddrs);
494   datarefs_copy.release ();
495 }
496 
497 void
save_datarefs()498 vec_info_shared::save_datarefs ()
499 {
500   if (!flag_checking)
501     return;
502   datarefs_copy.reserve_exact (datarefs.length ());
503   for (unsigned i = 0; i < datarefs.length (); ++i)
504     datarefs_copy.quick_push (*datarefs[i]);
505 }
506 
507 void
check_datarefs()508 vec_info_shared::check_datarefs ()
509 {
510   if (!flag_checking)
511     return;
512   gcc_assert (datarefs.length () == datarefs_copy.length ());
513   for (unsigned i = 0; i < datarefs.length (); ++i)
514     if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0)
515       gcc_unreachable ();
516 }
517 
518 /* Record that STMT belongs to the vectorizable region.  Create and return
519    an associated stmt_vec_info.  */
520 
521 stmt_vec_info
add_stmt(gimple * stmt)522 vec_info::add_stmt (gimple *stmt)
523 {
524   stmt_vec_info res = new_stmt_vec_info (stmt);
525   set_vinfo_for_stmt (stmt, res);
526   return res;
527 }
528 
529 /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
530    return null.  It is safe to call this function on any statement, even if
531    it might not be part of the vectorizable region.  */
532 
533 stmt_vec_info
lookup_stmt(gimple * stmt)534 vec_info::lookup_stmt (gimple *stmt)
535 {
536   unsigned int uid = gimple_uid (stmt);
537   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
538     {
539       stmt_vec_info res = stmt_vec_infos[uid - 1];
540       if (res && res->stmt == stmt)
541 	return res;
542     }
543   return NULL;
544 }
545 
546 /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
547    return that stmt_vec_info, otherwise return null.  It is safe to call
548    this on arbitrary operands.  */
549 
550 stmt_vec_info
lookup_def(tree name)551 vec_info::lookup_def (tree name)
552 {
553   if (TREE_CODE (name) == SSA_NAME
554       && !SSA_NAME_IS_DEFAULT_DEF (name))
555     return lookup_stmt (SSA_NAME_DEF_STMT (name));
556   return NULL;
557 }
558 
559 /* See whether there is a single non-debug statement that uses LHS and
560    whether that statement has an associated stmt_vec_info.  Return the
561    stmt_vec_info if so, otherwise return null.  */
562 
563 stmt_vec_info
lookup_single_use(tree lhs)564 vec_info::lookup_single_use (tree lhs)
565 {
566   use_operand_p dummy;
567   gimple *use_stmt;
568   if (single_imm_use (lhs, &dummy, &use_stmt))
569     return lookup_stmt (use_stmt);
570   return NULL;
571 }
572 
573 /* Return vectorization information about DR.  */
574 
575 dr_vec_info *
lookup_dr(data_reference * dr)576 vec_info::lookup_dr (data_reference *dr)
577 {
578   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
579   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
580   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
581   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
582 }
583 
584 /* Record that NEW_STMT_INFO now implements the same data reference
585    as OLD_STMT_INFO.  */
586 
587 void
move_dr(stmt_vec_info new_stmt_info,stmt_vec_info old_stmt_info)588 vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
589 {
590   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
591   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
592   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
593   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
594     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
595   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
596     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
597 }
598 
599 /* Permanently remove the statement described by STMT_INFO from the
600    function.  */
601 
602 void
remove_stmt(stmt_vec_info stmt_info)603 vec_info::remove_stmt (stmt_vec_info stmt_info)
604 {
605   gcc_assert (!stmt_info->pattern_stmt_p);
606   set_vinfo_for_stmt (stmt_info->stmt, NULL);
607   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
608   unlink_stmt_vdef (stmt_info->stmt);
609   gsi_remove (&si, true);
610   release_defs (stmt_info->stmt);
611   free_stmt_vec_info (stmt_info);
612 }
613 
614 /* Replace the statement at GSI by NEW_STMT, both the vectorization
615    information and the function itself.  STMT_INFO describes the statement
616    at GSI.  */
617 
618 void
replace_stmt(gimple_stmt_iterator * gsi,stmt_vec_info stmt_info,gimple * new_stmt)619 vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
620 			gimple *new_stmt)
621 {
622   gimple *old_stmt = stmt_info->stmt;
623   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
624   set_vinfo_for_stmt (old_stmt, NULL);
625   set_vinfo_for_stmt (new_stmt, stmt_info);
626   stmt_info->stmt = new_stmt;
627   gsi_replace (gsi, new_stmt, true);
628 }
629 
630 /* Create and initialize a new stmt_vec_info struct for STMT.  */
631 
632 stmt_vec_info
new_stmt_vec_info(gimple * stmt)633 vec_info::new_stmt_vec_info (gimple *stmt)
634 {
635   stmt_vec_info res = XCNEW (struct _stmt_vec_info);
636   res->vinfo = this;
637   res->stmt = stmt;
638 
639   STMT_VINFO_TYPE (res) = undef_vec_info_type;
640   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
641   STMT_VINFO_VECTORIZABLE (res) = true;
642   STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
643   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
644 
645   if (gimple_code (stmt) == GIMPLE_PHI
646       && is_loop_header_bb_p (gimple_bb (stmt)))
647     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
648   else
649     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
650 
651   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
652   STMT_SLP_TYPE (res) = loop_vect;
653 
654   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
655   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
656 
657   return res;
658 }
659 
660 /* Associate STMT with INFO.  */
661 
662 void
set_vinfo_for_stmt(gimple * stmt,stmt_vec_info info)663 vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info)
664 {
665   unsigned int uid = gimple_uid (stmt);
666   if (uid == 0)
667     {
668       gcc_checking_assert (info);
669       uid = stmt_vec_infos.length () + 1;
670       gimple_set_uid (stmt, uid);
671       stmt_vec_infos.safe_push (info);
672     }
673   else
674     {
675       gcc_checking_assert (info == NULL);
676       stmt_vec_infos[uid - 1] = info;
677     }
678 }
679 
680 /* Free the contents of stmt_vec_infos.  */
681 
682 void
free_stmt_vec_infos(void)683 vec_info::free_stmt_vec_infos (void)
684 {
685   unsigned int i;
686   stmt_vec_info info;
687   FOR_EACH_VEC_ELT (stmt_vec_infos, i, info)
688     if (info != NULL)
689       free_stmt_vec_info (info);
690   stmt_vec_infos.release ();
691 }
692 
693 /* Free STMT_INFO.  */
694 
695 void
free_stmt_vec_info(stmt_vec_info stmt_info)696 vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
697 {
698   if (stmt_info->pattern_stmt_p)
699     {
700       gimple_set_bb (stmt_info->stmt, NULL);
701       tree lhs = gimple_get_lhs (stmt_info->stmt);
702       if (lhs && TREE_CODE (lhs) == SSA_NAME)
703 	release_ssa_name (lhs);
704     }
705 
706   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
707   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
708   free (stmt_info);
709 }
710 
711 /* A helper function to free scev and LOOP niter information, as well as
712    clear loop constraint LOOP_C_FINITE.  */
713 
714 void
vect_free_loop_info_assumptions(struct loop * loop)715 vect_free_loop_info_assumptions (struct loop *loop)
716 {
717   scev_reset_htab ();
718   /* We need to explicitly reset upper bound information since they are
719      used even after free_numbers_of_iterations_estimates.  */
720   loop->any_upper_bound = false;
721   loop->any_likely_upper_bound = false;
722   free_numbers_of_iterations_estimates (loop);
723   loop_constraint_clear (loop, LOOP_C_FINITE);
724 }
725 
726 /* If LOOP has been versioned during ifcvt, return the internal call
727    guarding it.  */
728 
729 static gimple *
vect_loop_vectorized_call(struct loop * loop)730 vect_loop_vectorized_call (struct loop *loop)
731 {
732   basic_block bb = loop_preheader_edge (loop)->src;
733   gimple *g;
734   do
735     {
736       g = last_stmt (bb);
737       if (g)
738 	break;
739       if (!single_pred_p (bb))
740 	break;
741       bb = single_pred (bb);
742     }
743   while (1);
744   if (g && gimple_code (g) == GIMPLE_COND)
745     {
746       gimple_stmt_iterator gsi = gsi_for_stmt (g);
747       gsi_prev (&gsi);
748       if (!gsi_end_p (gsi))
749 	{
750 	  g = gsi_stmt (gsi);
751 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
752 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
753 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
754 	    return g;
755 	}
756     }
757   return NULL;
758 }
759 
760 /* If LOOP has been versioned during loop distribution, return the gurading
761    internal call.  */
762 
763 static gimple *
vect_loop_dist_alias_call(struct loop * loop)764 vect_loop_dist_alias_call (struct loop *loop)
765 {
766   basic_block bb;
767   basic_block entry;
768   struct loop *outer, *orig;
769   gimple_stmt_iterator gsi;
770   gimple *g;
771 
772   if (loop->orig_loop_num == 0)
773     return NULL;
774 
775   orig = get_loop (cfun, loop->orig_loop_num);
776   if (orig == NULL)
777     {
778       /* The original loop is somehow destroyed.  Clear the information.  */
779       loop->orig_loop_num = 0;
780       return NULL;
781     }
782 
783   if (loop != orig)
784     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
785   else
786     bb = loop_preheader_edge (loop)->src;
787 
788   outer = bb->loop_father;
789   entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
790 
791   /* Look upward in dominance tree.  */
792   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
793        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
794     {
795       g = last_stmt (bb);
796       if (g == NULL || gimple_code (g) != GIMPLE_COND)
797 	continue;
798 
799       gsi = gsi_for_stmt (g);
800       gsi_prev (&gsi);
801       if (gsi_end_p (gsi))
802 	continue;
803 
804       g = gsi_stmt (gsi);
805       /* The guarding internal function call must have the same distribution
806 	 alias id.  */
807       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
808 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
809 	return g;
810     }
811   return NULL;
812 }
813 
814 /* Set the uids of all the statements in basic blocks inside loop
815    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
816    call guarding the loop which has been if converted.  */
817 static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)818 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
819 {
820   tree arg = gimple_call_arg (loop_vectorized_call, 1);
821   basic_block *bbs;
822   unsigned int i;
823   struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
824 
825   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
826   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
827 		       == loop_vectorized_call);
828   /* If we are going to vectorize outer loop, prevent vectorization
829      of the inner loop in the scalar loop - either the scalar loop is
830      thrown away, so it is a wasted work, or is used only for
831      a few iterations.  */
832   if (scalar_loop->inner)
833     {
834       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
835       if (g)
836 	{
837 	  arg = gimple_call_arg (g, 0);
838 	  get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
839 	  fold_loop_internal_call (g, boolean_false_node);
840 	}
841     }
842   bbs = get_loop_body (scalar_loop);
843   for (i = 0; i < scalar_loop->num_nodes; i++)
844     {
845       basic_block bb = bbs[i];
846       gimple_stmt_iterator gsi;
847       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
848 	{
849 	  gimple *phi = gsi_stmt (gsi);
850 	  gimple_set_uid (phi, 0);
851 	}
852       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
853 	{
854 	  gimple *stmt = gsi_stmt (gsi);
855 	  gimple_set_uid (stmt, 0);
856 	}
857     }
858   free (bbs);
859 }
860 
861 /* Try to vectorize LOOP.  */
862 
863 static unsigned
try_vectorize_loop_1(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop,loop_vec_info orig_loop_vinfo,gimple * loop_vectorized_call,gimple * loop_dist_alias_call)864 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
865 		      unsigned *num_vectorized_loops,
866 		      loop_p loop, loop_vec_info orig_loop_vinfo,
867 		      gimple *loop_vectorized_call,
868 		      gimple *loop_dist_alias_call)
869 {
870   unsigned ret = 0;
871   vec_info_shared shared;
872   auto_purge_vect_location sentinel;
873   vect_location = find_loop_location (loop);
874   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
875       && dump_enabled_p ())
876     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
877 		 "\nAnalyzing loop at %s:%d\n",
878 		 LOCATION_FILE (vect_location.get_location_t ()),
879 		 LOCATION_LINE (vect_location.get_location_t ()));
880 
881   /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
882   opt_loop_vec_info loop_vinfo
883     = vect_analyze_loop (loop, orig_loop_vinfo, &shared);
884   loop->aux = loop_vinfo;
885 
886   if (!loop_vinfo)
887     if (dump_enabled_p ())
888       if (opt_problem *problem = loop_vinfo.get_problem ())
889 	{
890 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
891 			   "couldn't vectorize loop\n");
892 	  problem->emit_and_clear ();
893 	}
894 
895   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
896     {
897       /* Free existing information if loop is analyzed with some
898 	 assumptions.  */
899       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
900 	vect_free_loop_info_assumptions (loop);
901 
902       /* If we applied if-conversion then try to vectorize the
903 	 BB of innermost loops.
904 	 ???  Ideally BB vectorization would learn to vectorize
905 	 control flow by applying if-conversion on-the-fly, the
906 	 following retains the if-converted loop body even when
907 	 only non-if-converted parts took part in BB vectorization.  */
908       if (flag_tree_slp_vectorize != 0
909 	  && loop_vectorized_call
910 	  && ! loop->inner)
911 	{
912 	  basic_block bb = loop->header;
913 	  bool require_loop_vectorize = false;
914 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
915 	       !gsi_end_p (gsi); gsi_next (&gsi))
916 	    {
917 	      gimple *stmt = gsi_stmt (gsi);
918 	      gcall *call = dyn_cast <gcall *> (stmt);
919 	      if (call && gimple_call_internal_p (call))
920 		{
921 		  internal_fn ifn = gimple_call_internal_fn (call);
922 		  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
923 		      /* Don't keep the if-converted parts when the ifn with
924 			 specifc type is not supported by the backend.  */
925 		      || (direct_internal_fn_p (ifn)
926 			  && !direct_internal_fn_supported_p
927 			  (call, OPTIMIZE_FOR_SPEED)))
928 		    {
929 		      require_loop_vectorize = true;
930 		      break;
931 		    }
932 		}
933 	      gimple_set_uid (stmt, -1);
934 	      gimple_set_visited (stmt, false);
935 	    }
936 	  if (!require_loop_vectorize && vect_slp_bb (bb))
937 	    {
938 	      if (dump_enabled_p ())
939 		dump_printf_loc (MSG_NOTE, vect_location,
940 				 "basic block vectorized\n");
941 	      fold_loop_internal_call (loop_vectorized_call,
942 				       boolean_true_node);
943 	      loop_vectorized_call = NULL;
944 	      ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
945 	    }
946 	}
947       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
948 	 loop, don't vectorize its inner loop; we'll attempt to
949 	 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
950 	 loop version.  */
951       if (loop_vectorized_call && loop->inner)
952 	loop->inner->dont_vectorize = true;
953       return ret;
954     }
955 
956   if (!dbg_cnt (vect_loop))
957     {
958       /* Free existing information if loop is analyzed with some
959 	 assumptions.  */
960       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
961 	vect_free_loop_info_assumptions (loop);
962       return ret;
963     }
964 
965   if (loop_vectorized_call)
966     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
967 
968   unsigned HOST_WIDE_INT bytes;
969   if (dump_enabled_p ())
970     {
971       if (current_vector_size.is_constant (&bytes))
972 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
973 			 "loop vectorized using %wu byte vectors\n", bytes);
974       else
975 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
976 			 "loop vectorized using variable length vectors\n");
977     }
978 
979   loop_p new_loop = vect_transform_loop (loop_vinfo);
980   (*num_vectorized_loops)++;
981   /* Now that the loop has been vectorized, allow it to be unrolled
982      etc.  */
983   loop->force_vectorize = false;
984 
985   if (loop->simduid)
986     {
987       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
988       if (!simduid_to_vf_htab)
989 	simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
990       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
991       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
992       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
993 	  = simduid_to_vf_data;
994     }
995 
996   if (loop_vectorized_call)
997     {
998       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
999       loop_vectorized_call = NULL;
1000       ret |= TODO_cleanup_cfg;
1001     }
1002   if (loop_dist_alias_call)
1003     {
1004       tree value = gimple_call_arg (loop_dist_alias_call, 1);
1005       fold_loop_internal_call (loop_dist_alias_call, value);
1006       loop_dist_alias_call = NULL;
1007       ret |= TODO_cleanup_cfg;
1008     }
1009 
1010   /* Epilogue of vectorized loop must be vectorized too.  */
1011   if (new_loop)
1012     ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
1013 				 new_loop, loop_vinfo, NULL, NULL);
1014 
1015   return ret;
1016 }
1017 
1018 /* Try to vectorize LOOP.  */
1019 
1020 static unsigned
try_vectorize_loop(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop)1021 try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1022 		    unsigned *num_vectorized_loops, loop_p loop)
1023 {
1024   if (!((flag_tree_loop_vectorize
1025 	 && optimize_loop_nest_for_speed_p (loop))
1026 	|| loop->force_vectorize))
1027     return 0;
1028 
1029   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
1030 			       loop, NULL,
1031 			       vect_loop_vectorized_call (loop),
1032 			       vect_loop_dist_alias_call (loop));
1033 }
1034 
1035 
1036 /* Function vectorize_loops.
1037 
1038    Entry point to loop vectorization phase.  */
1039 
1040 unsigned
vectorize_loops(void)1041 vectorize_loops (void)
1042 {
1043   unsigned int i;
1044   unsigned int num_vectorized_loops = 0;
1045   unsigned int vect_loops_num;
1046   struct loop *loop;
1047   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1048   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1049   bool any_ifcvt_loops = false;
1050   unsigned ret = 0;
1051 
1052   vect_loops_num = number_of_loops (cfun);
1053 
1054   /* Bail out if there are no loops.  */
1055   if (vect_loops_num <= 1)
1056     return 0;
1057 
1058   if (cfun->has_simduid_loops)
1059     note_simd_array_uses (&simd_array_to_simduid_htab);
1060 
1061   /*  ----------- Analyze loops. -----------  */
1062 
1063   /* If some loop was duplicated, it gets bigger number
1064      than all previously defined loops.  This fact allows us to run
1065      only over initial loops skipping newly generated ones.  */
1066   FOR_EACH_LOOP (loop, 0)
1067     if (loop->dont_vectorize)
1068       {
1069 	any_ifcvt_loops = true;
1070 	/* If-conversion sometimes versions both the outer loop
1071 	   (for the case when outer loop vectorization might be
1072 	   desirable) as well as the inner loop in the scalar version
1073 	   of the loop.  So we have:
1074 	    if (LOOP_VECTORIZED (1, 3))
1075 	      {
1076 		loop1
1077 		  loop2
1078 	      }
1079 	    else
1080 	      loop3 (copy of loop1)
1081 		if (LOOP_VECTORIZED (4, 5))
1082 		  loop4 (copy of loop2)
1083 		else
1084 		  loop5 (copy of loop4)
1085 	   If FOR_EACH_LOOP gives us loop3 first (which has
1086 	   dont_vectorize set), make sure to process loop1 before loop4;
1087 	   so that we can prevent vectorization of loop4 if loop1
1088 	   is successfully vectorized.  */
1089 	if (loop->inner)
1090 	  {
1091 	    gimple *loop_vectorized_call
1092 	      = vect_loop_vectorized_call (loop);
1093 	    if (loop_vectorized_call
1094 		&& vect_loop_vectorized_call (loop->inner))
1095 	      {
1096 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
1097 		struct loop *vector_loop
1098 		  = get_loop (cfun, tree_to_shwi (arg));
1099 		if (vector_loop && vector_loop != loop)
1100 		  {
1101 		    /* Make sure we don't vectorize it twice.  */
1102 		    vector_loop->dont_vectorize = true;
1103 		    ret |= try_vectorize_loop (simduid_to_vf_htab,
1104 					       &num_vectorized_loops,
1105 					       vector_loop);
1106 		  }
1107 	      }
1108 	  }
1109       }
1110     else
1111       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1112 				 loop);
1113 
1114   vect_location = dump_user_location_t ();
1115 
1116   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
1117   if (dump_enabled_p ()
1118       || (num_vectorized_loops > 0 && dump_enabled_p ()))
1119     dump_printf_loc (MSG_NOTE, vect_location,
1120                      "vectorized %u loops in function.\n",
1121                      num_vectorized_loops);
1122 
1123   /*  ----------- Finalize. -----------  */
1124 
1125   if (any_ifcvt_loops)
1126     for (i = 1; i < number_of_loops (cfun); i++)
1127       {
1128 	loop = get_loop (cfun, i);
1129 	if (loop && loop->dont_vectorize)
1130 	  {
1131 	    gimple *g = vect_loop_vectorized_call (loop);
1132 	    if (g)
1133 	      {
1134 		fold_loop_internal_call (g, boolean_false_node);
1135 		ret |= TODO_cleanup_cfg;
1136 		g = NULL;
1137 	      }
1138 	    else
1139 	      g = vect_loop_dist_alias_call (loop);
1140 
1141 	    if (g)
1142 	      {
1143 		fold_loop_internal_call (g, boolean_false_node);
1144 		ret |= TODO_cleanup_cfg;
1145 	      }
1146 	  }
1147       }
1148 
1149   for (i = 1; i < number_of_loops (cfun); i++)
1150     {
1151       loop_vec_info loop_vinfo;
1152       bool has_mask_store;
1153 
1154       loop = get_loop (cfun, i);
1155       if (!loop || !loop->aux)
1156 	continue;
1157       loop_vinfo = (loop_vec_info) loop->aux;
1158       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1159       delete loop_vinfo;
1160       if (has_mask_store
1161 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1162 	optimize_mask_stores (loop);
1163       loop->aux = NULL;
1164     }
1165 
1166   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1167   if (cfun->has_simduid_loops)
1168     adjust_simduid_builtins (simduid_to_vf_htab);
1169 
1170   /* Shrink any "omp array simd" temporary arrays to the
1171      actual vectorization factors.  */
1172   if (simd_array_to_simduid_htab)
1173     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1174   delete simduid_to_vf_htab;
1175   cfun->has_simduid_loops = false;
1176 
1177   if (num_vectorized_loops > 0)
1178     {
1179       /* If we vectorized any loop only virtual SSA form needs to be updated.
1180 	 ???  Also while we try hard to update loop-closed SSA form we fail
1181 	 to properly do this in some corner-cases (see PR56286).  */
1182       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1183       return TODO_cleanup_cfg;
1184     }
1185 
1186   return ret;
1187 }
1188 
1189 
1190 /* Entry point to the simduid cleanup pass.  */
1191 
1192 namespace {
1193 
1194 const pass_data pass_data_simduid_cleanup =
1195 {
1196   GIMPLE_PASS, /* type */
1197   "simduid", /* name */
1198   OPTGROUP_NONE, /* optinfo_flags */
1199   TV_NONE, /* tv_id */
1200   ( PROP_ssa | PROP_cfg ), /* properties_required */
1201   0, /* properties_provided */
1202   0, /* properties_destroyed */
1203   0, /* todo_flags_start */
1204   0, /* todo_flags_finish */
1205 };
1206 
1207 class pass_simduid_cleanup : public gimple_opt_pass
1208 {
1209 public:
pass_simduid_cleanup(gcc::context * ctxt)1210   pass_simduid_cleanup (gcc::context *ctxt)
1211     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1212   {}
1213 
1214   /* opt_pass methods: */
clone()1215   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)1216   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
1217   virtual unsigned int execute (function *);
1218 
1219 }; // class pass_simduid_cleanup
1220 
1221 unsigned int
execute(function * fun)1222 pass_simduid_cleanup::execute (function *fun)
1223 {
1224   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1225 
1226   note_simd_array_uses (&simd_array_to_simduid_htab);
1227 
1228   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1229   adjust_simduid_builtins (NULL);
1230 
1231   /* Shrink any "omp array simd" temporary arrays to the
1232      actual vectorization factors.  */
1233   if (simd_array_to_simduid_htab)
1234     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1235   fun->has_simduid_loops = false;
1236   return 0;
1237 }
1238 
1239 }  // anon namespace
1240 
1241 gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)1242 make_pass_simduid_cleanup (gcc::context *ctxt)
1243 {
1244   return new pass_simduid_cleanup (ctxt);
1245 }
1246 
1247 
1248 /*  Entry point to basic block SLP phase.  */
1249 
1250 namespace {
1251 
1252 const pass_data pass_data_slp_vectorize =
1253 {
1254   GIMPLE_PASS, /* type */
1255   "slp", /* name */
1256   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1257   TV_TREE_SLP_VECTORIZATION, /* tv_id */
1258   ( PROP_ssa | PROP_cfg ), /* properties_required */
1259   0, /* properties_provided */
1260   0, /* properties_destroyed */
1261   0, /* todo_flags_start */
1262   TODO_update_ssa, /* todo_flags_finish */
1263 };
1264 
1265 class pass_slp_vectorize : public gimple_opt_pass
1266 {
1267 public:
pass_slp_vectorize(gcc::context * ctxt)1268   pass_slp_vectorize (gcc::context *ctxt)
1269     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1270   {}
1271 
1272   /* opt_pass methods: */
clone()1273   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)1274   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
1275   virtual unsigned int execute (function *);
1276 
1277 }; // class pass_slp_vectorize
1278 
1279 unsigned int
execute(function * fun)1280 pass_slp_vectorize::execute (function *fun)
1281 {
1282   auto_purge_vect_location sentinel;
1283   basic_block bb;
1284 
1285   bool in_loop_pipeline = scev_initialized_p ();
1286   if (!in_loop_pipeline)
1287     {
1288       loop_optimizer_init (LOOPS_NORMAL);
1289       scev_initialize ();
1290     }
1291 
1292   /* Mark all stmts as not belonging to the current region and unvisited.  */
1293   FOR_EACH_BB_FN (bb, fun)
1294     {
1295       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1296 	   gsi_next (&gsi))
1297 	{
1298 	  gimple *stmt = gsi_stmt (gsi);
1299 	  gimple_set_uid (stmt, -1);
1300 	  gimple_set_visited (stmt, false);
1301 	}
1302     }
1303 
1304   FOR_EACH_BB_FN (bb, fun)
1305     {
1306       if (vect_slp_bb (bb))
1307 	if (dump_enabled_p ())
1308 	  dump_printf_loc (MSG_NOTE, vect_location, "basic block vectorized\n");
1309     }
1310 
1311   if (!in_loop_pipeline)
1312     {
1313       scev_finalize ();
1314       loop_optimizer_finalize ();
1315     }
1316 
1317   return 0;
1318 }
1319 
1320 } // anon namespace
1321 
1322 gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)1323 make_pass_slp_vectorize (gcc::context *ctxt)
1324 {
1325   return new pass_slp_vectorize (ctxt);
1326 }
1327 
1328 
1329 /* Increase alignment of global arrays to improve vectorization potential.
1330    TODO:
1331    - Consider also structs that have an array field.
1332    - Use ipa analysis to prune arrays that can't be vectorized?
1333      This should involve global alignment analysis and in the future also
1334      array padding.  */
1335 
1336 static unsigned get_vec_alignment_for_type (tree);
1337 static hash_map<tree, unsigned> *type_align_map;
1338 
1339 /* Return alignment of array's vector type corresponding to scalar type.
1340    0 if no vector type exists.  */
1341 static unsigned
get_vec_alignment_for_array_type(tree type)1342 get_vec_alignment_for_array_type (tree type)
1343 {
1344   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1345   poly_uint64 array_size, vector_size;
1346 
1347   tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
1348   if (!vectype
1349       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1350       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1351       || maybe_lt (array_size, vector_size))
1352     return 0;
1353 
1354   return TYPE_ALIGN (vectype);
1355 }
1356 
1357 /* Return alignment of field having maximum alignment of vector type
1358    corresponding to it's scalar type. For now, we only consider fields whose
1359    offset is a multiple of it's vector alignment.
1360    0 if no suitable field is found.  */
1361 static unsigned
get_vec_alignment_for_record_type(tree type)1362 get_vec_alignment_for_record_type (tree type)
1363 {
1364   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1365 
1366   unsigned max_align = 0, alignment;
1367   HOST_WIDE_INT offset;
1368   tree offset_tree;
1369 
1370   if (TYPE_PACKED (type))
1371     return 0;
1372 
1373   unsigned *slot = type_align_map->get (type);
1374   if (slot)
1375     return *slot;
1376 
1377   for (tree field = first_field (type);
1378        field != NULL_TREE;
1379        field = DECL_CHAIN (field))
1380     {
1381       /* Skip if not FIELD_DECL or if alignment is set by user.  */
1382       if (TREE_CODE (field) != FIELD_DECL
1383 	  || DECL_USER_ALIGN (field)
1384 	  || DECL_ARTIFICIAL (field))
1385 	continue;
1386 
1387       /* We don't need to process the type further if offset is variable,
1388 	 since the offsets of remaining members will also be variable.  */
1389       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1390 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1391 	break;
1392 
1393       /* Similarly stop processing the type if offset_tree
1394 	 does not fit in unsigned HOST_WIDE_INT.  */
1395       offset_tree = bit_position (field);
1396       if (!tree_fits_uhwi_p (offset_tree))
1397 	break;
1398 
1399       offset = tree_to_uhwi (offset_tree);
1400       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1401 
1402       /* Get maximum alignment of vectorized field/array among those members
1403 	 whose offset is multiple of the vector alignment.  */
1404       if (alignment
1405 	  && (offset % alignment == 0)
1406 	  && (alignment > max_align))
1407 	max_align = alignment;
1408     }
1409 
1410   type_align_map->put (type, max_align);
1411   return max_align;
1412 }
1413 
1414 /* Return alignment of vector type corresponding to decl's scalar type
1415    or 0 if it doesn't exist or the vector alignment is lesser than
1416    decl's alignment.  */
1417 static unsigned
get_vec_alignment_for_type(tree type)1418 get_vec_alignment_for_type (tree type)
1419 {
1420   if (type == NULL_TREE)
1421     return 0;
1422 
1423   gcc_assert (TYPE_P (type));
1424 
1425   static unsigned alignment = 0;
1426   switch (TREE_CODE (type))
1427     {
1428       case ARRAY_TYPE:
1429 	alignment = get_vec_alignment_for_array_type (type);
1430 	break;
1431       case RECORD_TYPE:
1432 	alignment = get_vec_alignment_for_record_type (type);
1433 	break;
1434       default:
1435 	alignment = 0;
1436 	break;
1437     }
1438 
1439   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1440 }
1441 
1442 /* Entry point to increase_alignment pass.  */
1443 static unsigned int
increase_alignment(void)1444 increase_alignment (void)
1445 {
1446   varpool_node *vnode;
1447 
1448   vect_location = dump_user_location_t ();
1449   type_align_map = new hash_map<tree, unsigned>;
1450 
1451   /* Increase the alignment of all global arrays for vectorization.  */
1452   FOR_EACH_DEFINED_VARIABLE (vnode)
1453     {
1454       tree decl = vnode->decl;
1455       unsigned int alignment;
1456 
1457       if ((decl_in_symtab_p (decl)
1458 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
1459 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1460 	continue;
1461 
1462       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1463       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1464         {
1465 	  vnode->increase_alignment (alignment);
1466 	  if (dump_enabled_p ())
1467 	    dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1468         }
1469     }
1470 
1471   delete type_align_map;
1472   return 0;
1473 }
1474 
1475 
1476 namespace {
1477 
1478 const pass_data pass_data_ipa_increase_alignment =
1479 {
1480   SIMPLE_IPA_PASS, /* type */
1481   "increase_alignment", /* name */
1482   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1483   TV_IPA_OPT, /* tv_id */
1484   0, /* properties_required */
1485   0, /* properties_provided */
1486   0, /* properties_destroyed */
1487   0, /* todo_flags_start */
1488   0, /* todo_flags_finish */
1489 };
1490 
1491 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1492 {
1493 public:
pass_ipa_increase_alignment(gcc::context * ctxt)1494   pass_ipa_increase_alignment (gcc::context *ctxt)
1495     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1496   {}
1497 
1498   /* opt_pass methods: */
gate(function *)1499   virtual bool gate (function *)
1500     {
1501       return flag_section_anchors && flag_tree_loop_vectorize;
1502     }
1503 
execute(function *)1504   virtual unsigned int execute (function *) { return increase_alignment (); }
1505 
1506 }; // class pass_ipa_increase_alignment
1507 
1508 } // anon namespace
1509 
1510 simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)1511 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1512 {
1513   return new pass_ipa_increase_alignment (ctxt);
1514 }
1515