1 /* Vectorizer
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Loop and basic block vectorizer.
22 
23   This file contains drivers for the three vectorizers:
24   (1) loop vectorizer (inter-iteration parallelism),
25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26       vectorizer)
27   (3) BB vectorizer (out-of-loops), aka SLP
28 
29   The rest of the vectorizer's code is organized as follows:
30   - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31     used by drivers (1) and (2).
32   - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33     drivers (1) and (2).
34   - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35     used by drivers (2) and (3).
36   - tree-vect-stmts.c - statements analysis and transformation (used by all).
37   - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38     manipulations (used by all).
39   - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40 
41   Here's a poor attempt at illustrating that:
42 
43      tree-vectorizer.c:
44      loop_vect()  loop_aware_slp()  slp_vect()
45           |        /           \          /
46           |       /             \        /
47           tree-vect-loop.c  tree-vect-slp.c
48                 | \      \  /      /   |
49                 |  \      \/      /    |
50                 |   \     /\     /     |
51                 |    \   /  \   /      |
52          tree-vect-stmts.c  tree-vect-data-refs.c
53                        \      /
54                     tree-vect-patterns.c
55 */
56 
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 #include "stringpool.h"
80 #include "attribs.h"
81 #include "gimple-pretty-print.h"
82 #include "opt-problem.h"
83 #include "internal-fn.h"
84 
85 
86 /* Loop or bb location, with hotness information.  */
87 dump_user_location_t vect_location;
88 
89 /* auto_purge_vect_location's dtor: reset the vect_location
90    global, to avoid stale location_t values that could reference
91    GC-ed blocks.  */
92 
~auto_purge_vect_location()93 auto_purge_vect_location::~auto_purge_vect_location ()
94 {
95   vect_location = dump_user_location_t ();
96 }
97 
98 /* Dump a cost entry according to args to F.  */
99 
100 void
dump_stmt_cost(FILE * f,void * data,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree,int misalign,unsigned cost,enum vect_cost_model_location where)101 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
102 		stmt_vec_info stmt_info, tree, int misalign, unsigned cost,
103 		enum vect_cost_model_location where)
104 {
105   fprintf (f, "%p ", data);
106   if (stmt_info)
107     {
108       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109       fprintf (f, " ");
110     }
111   else
112     fprintf (f, "<unknown> ");
113   fprintf (f, "%d times ", count);
114   const char *ks = "unknown";
115   switch (kind)
116     {
117     case scalar_stmt:
118       ks = "scalar_stmt";
119       break;
120     case scalar_load:
121       ks = "scalar_load";
122       break;
123     case scalar_store:
124       ks = "scalar_store";
125       break;
126     case vector_stmt:
127       ks = "vector_stmt";
128       break;
129     case vector_load:
130       ks = "vector_load";
131       break;
132     case vector_gather_load:
133       ks = "vector_gather_load";
134       break;
135     case unaligned_load:
136       ks = "unaligned_load";
137       break;
138     case unaligned_store:
139       ks = "unaligned_store";
140       break;
141     case vector_store:
142       ks = "vector_store";
143       break;
144     case vector_scatter_store:
145       ks = "vector_scatter_store";
146       break;
147     case vec_to_scalar:
148       ks = "vec_to_scalar";
149       break;
150     case scalar_to_vec:
151       ks = "scalar_to_vec";
152       break;
153     case cond_branch_not_taken:
154       ks = "cond_branch_not_taken";
155       break;
156     case cond_branch_taken:
157       ks = "cond_branch_taken";
158       break;
159     case vec_perm:
160       ks = "vec_perm";
161       break;
162     case vec_promote_demote:
163       ks = "vec_promote_demote";
164       break;
165     case vec_construct:
166       ks = "vec_construct";
167       break;
168     }
169   fprintf (f, "%s ", ks);
170   if (kind == unaligned_load || kind == unaligned_store)
171     fprintf (f, "(misalign %d) ", misalign);
172   fprintf (f, "costs %u ", cost);
173   const char *ws = "unknown";
174   switch (where)
175     {
176     case vect_prologue:
177       ws = "prologue";
178       break;
179     case vect_body:
180       ws = "body";
181       break;
182     case vect_epilogue:
183       ws = "epilogue";
184       break;
185     }
186   fprintf (f, "in %s\n", ws);
187 }
188 
189 /* For mapping simduid to vectorization factor.  */
190 
191 class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
192 {
193 public:
194   unsigned int simduid;
195   poly_uint64 vf;
196 
197   /* hash_table support.  */
198   static inline hashval_t hash (const simduid_to_vf *);
199   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
200 };
201 
202 inline hashval_t
hash(const simduid_to_vf * p)203 simduid_to_vf::hash (const simduid_to_vf *p)
204 {
205   return p->simduid;
206 }
207 
208 inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)209 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
210 {
211   return p1->simduid == p2->simduid;
212 }
213 
214 /* This hash maps the OMP simd array to the corresponding simduid used
215    to index into it.  Like thus,
216 
217         _7 = GOMP_SIMD_LANE (simduid.0)
218         ...
219         ...
220         D.1737[_7] = stuff;
221 
222 
223    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
224    simduid.0.  */
225 
226 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
227 {
228   tree decl;
229   unsigned int simduid;
230 
231   /* hash_table support.  */
232   static inline hashval_t hash (const simd_array_to_simduid *);
233   static inline int equal (const simd_array_to_simduid *,
234 			   const simd_array_to_simduid *);
235 };
236 
237 inline hashval_t
hash(const simd_array_to_simduid * p)238 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
239 {
240   return DECL_UID (p->decl);
241 }
242 
243 inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)244 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
245 			      const simd_array_to_simduid *p2)
246 {
247   return p1->decl == p2->decl;
248 }
249 
250 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
251    into their corresponding constants and remove
252    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
253 
254 static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)255 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
256 {
257   basic_block bb;
258 
259   FOR_EACH_BB_FN (bb, cfun)
260     {
261       gimple_stmt_iterator i;
262 
263       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
264 	{
265 	  poly_uint64 vf = 1;
266 	  enum internal_fn ifn;
267 	  gimple *stmt = gsi_stmt (i);
268 	  tree t;
269 	  if (!is_gimple_call (stmt)
270 	      || !gimple_call_internal_p (stmt))
271 	    {
272 	      gsi_next (&i);
273 	      continue;
274 	    }
275 	  ifn = gimple_call_internal_fn (stmt);
276 	  switch (ifn)
277 	    {
278 	    case IFN_GOMP_SIMD_LANE:
279 	    case IFN_GOMP_SIMD_VF:
280 	    case IFN_GOMP_SIMD_LAST_LANE:
281 	      break;
282 	    case IFN_GOMP_SIMD_ORDERED_START:
283 	    case IFN_GOMP_SIMD_ORDERED_END:
284 	      if (integer_onep (gimple_call_arg (stmt, 0)))
285 		{
286 		  enum built_in_function bcode
287 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
288 		       ? BUILT_IN_GOMP_ORDERED_START
289 		       : BUILT_IN_GOMP_ORDERED_END);
290 		  gimple *g
291 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
292 		  gimple_move_vops (g, stmt);
293 		  gsi_replace (&i, g, true);
294 		  continue;
295 		}
296 	      gsi_remove (&i, true);
297 	      unlink_stmt_vdef (stmt);
298 	      continue;
299 	    default:
300 	      gsi_next (&i);
301 	      continue;
302 	    }
303 	  tree arg = gimple_call_arg (stmt, 0);
304 	  gcc_assert (arg != NULL_TREE);
305 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
306 	  simduid_to_vf *p = NULL, data;
307 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
308 	  /* Need to nullify loop safelen field since it's value is not
309 	     valid after transformation.  */
310 	  if (bb->loop_father && bb->loop_father->safelen > 0)
311 	    bb->loop_father->safelen = 0;
312 	  if (htab)
313 	    {
314 	      p = htab->find (&data);
315 	      if (p)
316 		vf = p->vf;
317 	    }
318 	  switch (ifn)
319 	    {
320 	    case IFN_GOMP_SIMD_VF:
321 	      t = build_int_cst (unsigned_type_node, vf);
322 	      break;
323 	    case IFN_GOMP_SIMD_LANE:
324 	      t = build_int_cst (unsigned_type_node, 0);
325 	      break;
326 	    case IFN_GOMP_SIMD_LAST_LANE:
327 	      t = gimple_call_arg (stmt, 1);
328 	      break;
329 	    default:
330 	      gcc_unreachable ();
331 	    }
332 	  tree lhs = gimple_call_lhs (stmt);
333 	  if (lhs)
334 	    replace_uses_by (lhs, t);
335 	  release_defs (stmt);
336 	  gsi_remove (&i, true);
337 	}
338     }
339 }
340 
341 /* Helper structure for note_simd_array_uses.  */
342 
343 struct note_simd_array_uses_struct
344 {
345   hash_table<simd_array_to_simduid> **htab;
346   unsigned int simduid;
347 };
348 
349 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
350 
351 static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)352 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
353 {
354   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
355   struct note_simd_array_uses_struct *ns
356     = (struct note_simd_array_uses_struct *) wi->info;
357 
358   if (TYPE_P (*tp))
359     *walk_subtrees = 0;
360   else if (VAR_P (*tp)
361 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
362 	   && DECL_CONTEXT (*tp) == current_function_decl)
363     {
364       simd_array_to_simduid data;
365       if (!*ns->htab)
366 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
367       data.decl = *tp;
368       data.simduid = ns->simduid;
369       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
370       if (*slot == NULL)
371 	{
372 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
373 	  *p = data;
374 	  *slot = p;
375 	}
376       else if ((*slot)->simduid != ns->simduid)
377 	(*slot)->simduid = -1U;
378       *walk_subtrees = 0;
379     }
380   return NULL_TREE;
381 }
382 
383 /* Find "omp simd array" temporaries and map them to corresponding
384    simduid.  */
385 
386 static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)387 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
388 {
389   basic_block bb;
390   gimple_stmt_iterator gsi;
391   struct walk_stmt_info wi;
392   struct note_simd_array_uses_struct ns;
393 
394   memset (&wi, 0, sizeof (wi));
395   wi.info = &ns;
396   ns.htab = htab;
397 
398   FOR_EACH_BB_FN (bb, cfun)
399     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
400       {
401 	gimple *stmt = gsi_stmt (gsi);
402 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
403 	  continue;
404 	switch (gimple_call_internal_fn (stmt))
405 	  {
406 	  case IFN_GOMP_SIMD_LANE:
407 	  case IFN_GOMP_SIMD_VF:
408 	  case IFN_GOMP_SIMD_LAST_LANE:
409 	    break;
410 	  default:
411 	    continue;
412 	  }
413 	tree lhs = gimple_call_lhs (stmt);
414 	if (lhs == NULL_TREE)
415 	  continue;
416 	imm_use_iterator use_iter;
417 	gimple *use_stmt;
418 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
419 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
420 	  if (!is_gimple_debug (use_stmt))
421 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
422       }
423 }
424 
425 /* Shrink arrays with "omp simd array" attribute to the corresponding
426    vectorization factor.  */
427 
428 static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)429 shrink_simd_arrays
430   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
431    hash_table<simduid_to_vf> *simduid_to_vf_htab)
432 {
433   for (hash_table<simd_array_to_simduid>::iterator iter
434 	 = simd_array_to_simduid_htab->begin ();
435        iter != simd_array_to_simduid_htab->end (); ++iter)
436     if ((*iter)->simduid != -1U)
437       {
438 	tree decl = (*iter)->decl;
439 	poly_uint64 vf = 1;
440 	if (simduid_to_vf_htab)
441 	  {
442 	    simduid_to_vf *p = NULL, data;
443 	    data.simduid = (*iter)->simduid;
444 	    p = simduid_to_vf_htab->find (&data);
445 	    if (p)
446 	      vf = p->vf;
447 	  }
448 	tree atype
449 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
450 	TREE_TYPE (decl) = atype;
451 	relayout_decl (decl);
452       }
453 
454   delete simd_array_to_simduid_htab;
455 }
456 
457 /* Initialize the vec_info with kind KIND_IN and target cost data
458    TARGET_COST_DATA_IN.  */
459 
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in,vec_info_shared * shared_)460 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in,
461 		    vec_info_shared *shared_)
462   : kind (kind_in),
463     shared (shared_),
464     stmt_vec_info_ro (false),
465     target_cost_data (target_cost_data_in)
466 {
467   stmt_vec_infos.create (50);
468 }
469 
~vec_info()470 vec_info::~vec_info ()
471 {
472   slp_instance instance;
473   unsigned int i;
474 
475   FOR_EACH_VEC_ELT (slp_instances, i, instance)
476     vect_free_slp_instance (instance);
477 
478   destroy_cost_data (target_cost_data);
479   free_stmt_vec_infos ();
480 }
481 
vec_info_shared()482 vec_info_shared::vec_info_shared ()
483   : datarefs (vNULL),
484     datarefs_copy (vNULL),
485     ddrs (vNULL)
486 {
487 }
488 
~vec_info_shared()489 vec_info_shared::~vec_info_shared ()
490 {
491   free_data_refs (datarefs);
492   free_dependence_relations (ddrs);
493   datarefs_copy.release ();
494 }
495 
496 void
save_datarefs()497 vec_info_shared::save_datarefs ()
498 {
499   if (!flag_checking)
500     return;
501   datarefs_copy.reserve_exact (datarefs.length ());
502   for (unsigned i = 0; i < datarefs.length (); ++i)
503     datarefs_copy.quick_push (*datarefs[i]);
504 }
505 
506 void
check_datarefs()507 vec_info_shared::check_datarefs ()
508 {
509   if (!flag_checking)
510     return;
511   gcc_assert (datarefs.length () == datarefs_copy.length ());
512   for (unsigned i = 0; i < datarefs.length (); ++i)
513     if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0)
514       gcc_unreachable ();
515 }
516 
517 /* Record that STMT belongs to the vectorizable region.  Create and return
518    an associated stmt_vec_info.  */
519 
520 stmt_vec_info
add_stmt(gimple * stmt)521 vec_info::add_stmt (gimple *stmt)
522 {
523   stmt_vec_info res = new_stmt_vec_info (stmt);
524   set_vinfo_for_stmt (stmt, res);
525   return res;
526 }
527 
528 /* Record that STMT belongs to the vectorizable region.  Create a new
529    stmt_vec_info and mark VECINFO as being related and return the new
530    stmt_vec_info.  */
531 
532 stmt_vec_info
add_pattern_stmt(gimple * stmt,stmt_vec_info stmt_info)533 vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
534 {
535   stmt_vec_info res = new_stmt_vec_info (stmt);
536   set_vinfo_for_stmt (stmt, res, false);
537   STMT_VINFO_RELATED_STMT (res) = stmt_info;
538   return res;
539 }
540 
541 /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
542    return null.  It is safe to call this function on any statement, even if
543    it might not be part of the vectorizable region.  */
544 
545 stmt_vec_info
lookup_stmt(gimple * stmt)546 vec_info::lookup_stmt (gimple *stmt)
547 {
548   unsigned int uid = gimple_uid (stmt);
549   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
550     {
551       stmt_vec_info res = stmt_vec_infos[uid - 1];
552       if (res && res->stmt == stmt)
553 	return res;
554     }
555   return NULL;
556 }
557 
558 /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
559    return that stmt_vec_info, otherwise return null.  It is safe to call
560    this on arbitrary operands.  */
561 
562 stmt_vec_info
lookup_def(tree name)563 vec_info::lookup_def (tree name)
564 {
565   if (TREE_CODE (name) == SSA_NAME
566       && !SSA_NAME_IS_DEFAULT_DEF (name))
567     return lookup_stmt (SSA_NAME_DEF_STMT (name));
568   return NULL;
569 }
570 
571 /* See whether there is a single non-debug statement that uses LHS and
572    whether that statement has an associated stmt_vec_info.  Return the
573    stmt_vec_info if so, otherwise return null.  */
574 
575 stmt_vec_info
lookup_single_use(tree lhs)576 vec_info::lookup_single_use (tree lhs)
577 {
578   use_operand_p dummy;
579   gimple *use_stmt;
580   if (single_imm_use (lhs, &dummy, &use_stmt))
581     return lookup_stmt (use_stmt);
582   return NULL;
583 }
584 
585 /* Return vectorization information about DR.  */
586 
587 dr_vec_info *
lookup_dr(data_reference * dr)588 vec_info::lookup_dr (data_reference *dr)
589 {
590   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
591   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
592   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
593   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
594 }
595 
596 /* Record that NEW_STMT_INFO now implements the same data reference
597    as OLD_STMT_INFO.  */
598 
599 void
move_dr(stmt_vec_info new_stmt_info,stmt_vec_info old_stmt_info)600 vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
601 {
602   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
603   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
604   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
605   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
606     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
607   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
608     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
609 }
610 
611 /* Permanently remove the statement described by STMT_INFO from the
612    function.  */
613 
614 void
remove_stmt(stmt_vec_info stmt_info)615 vec_info::remove_stmt (stmt_vec_info stmt_info)
616 {
617   gcc_assert (!stmt_info->pattern_stmt_p);
618   set_vinfo_for_stmt (stmt_info->stmt, NULL);
619   unlink_stmt_vdef (stmt_info->stmt);
620   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
621   gsi_remove (&si, true);
622   release_defs (stmt_info->stmt);
623   free_stmt_vec_info (stmt_info);
624 }
625 
626 /* Replace the statement at GSI by NEW_STMT, both the vectorization
627    information and the function itself.  STMT_INFO describes the statement
628    at GSI.  */
629 
630 void
replace_stmt(gimple_stmt_iterator * gsi,stmt_vec_info stmt_info,gimple * new_stmt)631 vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
632 			gimple *new_stmt)
633 {
634   gimple *old_stmt = stmt_info->stmt;
635   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
636   gimple_set_uid (new_stmt, gimple_uid (old_stmt));
637   stmt_info->stmt = new_stmt;
638   gsi_replace (gsi, new_stmt, true);
639 }
640 
641 /* Insert stmts in SEQ on the VEC_INFO region entry.  If CONTEXT is
642    not NULL it specifies whether to use the sub-region entry
643    determined by it, currently used for loop vectorization to insert
644    on the inner loop entry vs. the outer loop entry.  */
645 
646 void
insert_seq_on_entry(stmt_vec_info context,gimple_seq seq)647 vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
648 {
649   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
650     {
651       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
652       basic_block new_bb;
653       edge pe;
654 
655       if (context && nested_in_vect_loop_p (loop, context))
656 	loop = loop->inner;
657 
658       pe = loop_preheader_edge (loop);
659       new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
660       gcc_assert (!new_bb);
661     }
662   else
663     {
664       bb_vec_info bb_vinfo = as_a <bb_vec_info> (this);
665       gimple_stmt_iterator gsi_region_begin
666 	= gsi_after_labels (bb_vinfo->bbs[0]);
667       gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
668     }
669 }
670 
671 /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT.  */
672 
673 void
insert_on_entry(stmt_vec_info context,gimple * new_stmt)674 vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
675 {
676   gimple_seq seq = NULL;
677   gimple_stmt_iterator gsi = gsi_start (seq);
678   gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
679   insert_seq_on_entry (context, seq);
680 }
681 
682 /* Create and initialize a new stmt_vec_info struct for STMT.  */
683 
684 stmt_vec_info
new_stmt_vec_info(gimple * stmt)685 vec_info::new_stmt_vec_info (gimple *stmt)
686 {
687   stmt_vec_info res = XCNEW (class _stmt_vec_info);
688   res->stmt = stmt;
689 
690   STMT_VINFO_TYPE (res) = undef_vec_info_type;
691   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
692   STMT_VINFO_VECTORIZABLE (res) = true;
693   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
694   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
695   STMT_VINFO_REDUC_FN (res) = IFN_LAST;
696   STMT_VINFO_REDUC_IDX (res) = -1;
697   STMT_VINFO_SLP_VECT_ONLY (res) = false;
698   STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
699   STMT_VINFO_VEC_STMTS (res) = vNULL;
700 
701   if (is_a <loop_vec_info> (this)
702       && gimple_code (stmt) == GIMPLE_PHI
703       && is_loop_header_bb_p (gimple_bb (stmt)))
704     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
705   else
706     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
707 
708   STMT_SLP_TYPE (res) = loop_vect;
709 
710   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
711   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
712 
713   return res;
714 }
715 
716 /* Associate STMT with INFO.  */
717 
718 void
set_vinfo_for_stmt(gimple * stmt,stmt_vec_info info,bool check_ro)719 vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
720 {
721   unsigned int uid = gimple_uid (stmt);
722   if (uid == 0)
723     {
724       gcc_assert (!check_ro || !stmt_vec_info_ro);
725       gcc_checking_assert (info);
726       uid = stmt_vec_infos.length () + 1;
727       gimple_set_uid (stmt, uid);
728       stmt_vec_infos.safe_push (info);
729     }
730   else
731     {
732       gcc_checking_assert (info == NULL);
733       stmt_vec_infos[uid - 1] = info;
734     }
735 }
736 
737 /* Free the contents of stmt_vec_infos.  */
738 
739 void
free_stmt_vec_infos(void)740 vec_info::free_stmt_vec_infos (void)
741 {
742   unsigned int i;
743   stmt_vec_info info;
744   FOR_EACH_VEC_ELT (stmt_vec_infos, i, info)
745     if (info != NULL)
746       free_stmt_vec_info (info);
747   stmt_vec_infos.release ();
748 }
749 
750 /* Free STMT_INFO.  */
751 
752 void
free_stmt_vec_info(stmt_vec_info stmt_info)753 vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
754 {
755   if (stmt_info->pattern_stmt_p)
756     {
757       gimple_set_bb (stmt_info->stmt, NULL);
758       tree lhs = gimple_get_lhs (stmt_info->stmt);
759       if (lhs && TREE_CODE (lhs) == SSA_NAME)
760 	release_ssa_name (lhs);
761     }
762 
763   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
764   STMT_VINFO_VEC_STMTS (stmt_info).release ();
765   free (stmt_info);
766 }
767 
768 /* Returns true if S1 dominates S2.  */
769 
770 bool
vect_stmt_dominates_stmt_p(gimple * s1,gimple * s2)771 vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
772 {
773   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
774 
775   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
776      SSA_NAME.  Assume it lives at the beginning of function and
777      thus dominates everything.  */
778   if (!bb1 || s1 == s2)
779     return true;
780 
781   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
782   if (!bb2)
783     return false;
784 
785   if (bb1 != bb2)
786     return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
787 
788   /* PHIs in the same basic block are assumed to be
789      executed all in parallel, if only one stmt is a PHI,
790      it dominates the other stmt in the same basic block.  */
791   if (gimple_code (s1) == GIMPLE_PHI)
792     return true;
793 
794   if (gimple_code (s2) == GIMPLE_PHI)
795     return false;
796 
797   /* Inserted vectorized stmts all have UID 0 while the original stmts
798      in the IL have UID increasing within a BB.  Walk from both sides
799      until we find the other stmt or a stmt with UID != 0.  */
800   gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
801   while (gimple_uid (gsi_stmt (gsi1)) == 0)
802     {
803       gsi_next (&gsi1);
804       if (gsi_end_p (gsi1))
805 	return false;
806       if (gsi_stmt (gsi1) == s2)
807 	return true;
808     }
809   if (gimple_uid (gsi_stmt (gsi1)) == -1u)
810     return false;
811 
812   gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
813   while (gimple_uid (gsi_stmt (gsi2)) == 0)
814     {
815       gsi_prev (&gsi2);
816       if (gsi_end_p (gsi2))
817 	return false;
818       if (gsi_stmt (gsi2) == s1)
819 	return true;
820     }
821   if (gimple_uid (gsi_stmt (gsi2)) == -1u)
822     return false;
823 
824   if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
825     return true;
826   return false;
827 }
828 
829 /* A helper function to free scev and LOOP niter information, as well as
830    clear loop constraint LOOP_C_FINITE.  */
831 
832 void
vect_free_loop_info_assumptions(class loop * loop)833 vect_free_loop_info_assumptions (class loop *loop)
834 {
835   scev_reset_htab ();
836   /* We need to explicitly reset upper bound information since they are
837      used even after free_numbers_of_iterations_estimates.  */
838   loop->any_upper_bound = false;
839   loop->any_likely_upper_bound = false;
840   free_numbers_of_iterations_estimates (loop);
841   loop_constraint_clear (loop, LOOP_C_FINITE);
842 }
843 
844 /* If LOOP has been versioned during ifcvt, return the internal call
845    guarding it.  */
846 
847 gimple *
vect_loop_vectorized_call(class loop * loop,gcond ** cond)848 vect_loop_vectorized_call (class loop *loop, gcond **cond)
849 {
850   basic_block bb = loop_preheader_edge (loop)->src;
851   gimple *g;
852   do
853     {
854       g = last_stmt (bb);
855       if (g)
856 	break;
857       if (!single_pred_p (bb))
858 	break;
859       bb = single_pred (bb);
860     }
861   while (1);
862   if (g && gimple_code (g) == GIMPLE_COND)
863     {
864       if (cond)
865 	*cond = as_a <gcond *> (g);
866       gimple_stmt_iterator gsi = gsi_for_stmt (g);
867       gsi_prev (&gsi);
868       if (!gsi_end_p (gsi))
869 	{
870 	  g = gsi_stmt (gsi);
871 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
872 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
873 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
874 	    return g;
875 	}
876     }
877   return NULL;
878 }
879 
880 /* If LOOP has been versioned during loop distribution, return the gurading
881    internal call.  */
882 
883 static gimple *
vect_loop_dist_alias_call(class loop * loop)884 vect_loop_dist_alias_call (class loop *loop)
885 {
886   basic_block bb;
887   basic_block entry;
888   class loop *outer, *orig;
889   gimple_stmt_iterator gsi;
890   gimple *g;
891 
892   if (loop->orig_loop_num == 0)
893     return NULL;
894 
895   orig = get_loop (cfun, loop->orig_loop_num);
896   if (orig == NULL)
897     {
898       /* The original loop is somehow destroyed.  Clear the information.  */
899       loop->orig_loop_num = 0;
900       return NULL;
901     }
902 
903   if (loop != orig)
904     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
905   else
906     bb = loop_preheader_edge (loop)->src;
907 
908   outer = bb->loop_father;
909   entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
910 
911   /* Look upward in dominance tree.  */
912   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
913        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
914     {
915       g = last_stmt (bb);
916       if (g == NULL || gimple_code (g) != GIMPLE_COND)
917 	continue;
918 
919       gsi = gsi_for_stmt (g);
920       gsi_prev (&gsi);
921       if (gsi_end_p (gsi))
922 	continue;
923 
924       g = gsi_stmt (gsi);
925       /* The guarding internal function call must have the same distribution
926 	 alias id.  */
927       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
928 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
929 	return g;
930     }
931   return NULL;
932 }
933 
934 /* Set the uids of all the statements in basic blocks inside loop
935    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
936    call guarding the loop which has been if converted.  */
937 static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)938 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
939 {
940   tree arg = gimple_call_arg (loop_vectorized_call, 1);
941   basic_block *bbs;
942   unsigned int i;
943   class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
944 
945   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
946   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
947 		       == loop_vectorized_call);
948   /* If we are going to vectorize outer loop, prevent vectorization
949      of the inner loop in the scalar loop - either the scalar loop is
950      thrown away, so it is a wasted work, or is used only for
951      a few iterations.  */
952   if (scalar_loop->inner)
953     {
954       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
955       if (g)
956 	{
957 	  arg = gimple_call_arg (g, 0);
958 	  get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
959 	  fold_loop_internal_call (g, boolean_false_node);
960 	}
961     }
962   bbs = get_loop_body (scalar_loop);
963   for (i = 0; i < scalar_loop->num_nodes; i++)
964     {
965       basic_block bb = bbs[i];
966       gimple_stmt_iterator gsi;
967       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
968 	{
969 	  gimple *phi = gsi_stmt (gsi);
970 	  gimple_set_uid (phi, 0);
971 	}
972       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
973 	{
974 	  gimple *stmt = gsi_stmt (gsi);
975 	  gimple_set_uid (stmt, 0);
976 	}
977     }
978   free (bbs);
979 }
980 
981 /* Try to vectorize LOOP.  */
982 
983 static unsigned
try_vectorize_loop_1(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop,gimple * loop_vectorized_call,gimple * loop_dist_alias_call)984 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
985 		      unsigned *num_vectorized_loops, loop_p loop,
986 		      gimple *loop_vectorized_call,
987 		      gimple *loop_dist_alias_call)
988 {
989   unsigned ret = 0;
990   vec_info_shared shared;
991   auto_purge_vect_location sentinel;
992   vect_location = find_loop_location (loop);
993 
994   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
995       && dump_enabled_p ())
996     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
997 		 "\nAnalyzing loop at %s:%d\n",
998 		 LOCATION_FILE (vect_location.get_location_t ()),
999 		 LOCATION_LINE (vect_location.get_location_t ()));
1000 
1001   opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL);
1002   /* In the case of epilogue vectorization the loop already has its
1003      loop_vec_info set, we do not require to analyze the loop in this case.  */
1004   if (loop_vec_info vinfo = loop_vec_info_for_loop (loop))
1005     loop_vinfo = opt_loop_vec_info::success (vinfo);
1006   else
1007     {
1008       /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
1009       loop_vinfo = vect_analyze_loop (loop, &shared);
1010       loop->aux = loop_vinfo;
1011     }
1012 
1013   if (!loop_vinfo)
1014     if (dump_enabled_p ())
1015       if (opt_problem *problem = loop_vinfo.get_problem ())
1016 	{
1017 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1018 			   "couldn't vectorize loop\n");
1019 	  problem->emit_and_clear ();
1020 	}
1021 
1022   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1023     {
1024       /* Free existing information if loop is analyzed with some
1025 	 assumptions.  */
1026       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1027 	vect_free_loop_info_assumptions (loop);
1028 
1029       /* If we applied if-conversion then try to vectorize the
1030 	 BB of innermost loops.
1031 	 ???  Ideally BB vectorization would learn to vectorize
1032 	 control flow by applying if-conversion on-the-fly, the
1033 	 following retains the if-converted loop body even when
1034 	 only non-if-converted parts took part in BB vectorization.  */
1035       if (flag_tree_slp_vectorize != 0
1036 	  && loop_vectorized_call
1037 	  && ! loop->inner)
1038 	{
1039 	  basic_block bb = loop->header;
1040 	  bool require_loop_vectorize = false;
1041 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1042 	       !gsi_end_p (gsi); gsi_next (&gsi))
1043 	    {
1044 	      gimple *stmt = gsi_stmt (gsi);
1045 	      gcall *call = dyn_cast <gcall *> (stmt);
1046 	      if (call && gimple_call_internal_p (call))
1047 		{
1048 		  internal_fn ifn = gimple_call_internal_fn (call);
1049 		  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
1050 		      /* Don't keep the if-converted parts when the ifn with
1051 			 specifc type is not supported by the backend.  */
1052 		      || (direct_internal_fn_p (ifn)
1053 			  && !direct_internal_fn_supported_p
1054 			  (call, OPTIMIZE_FOR_SPEED)))
1055 		    {
1056 		      require_loop_vectorize = true;
1057 		      break;
1058 		    }
1059 		}
1060 	      gimple_set_uid (stmt, -1);
1061 	      gimple_set_visited (stmt, false);
1062 	    }
1063 	  if (!require_loop_vectorize && vect_slp_bb (bb))
1064 	    {
1065 	      fold_loop_internal_call (loop_vectorized_call,
1066 				       boolean_true_node);
1067 	      loop_vectorized_call = NULL;
1068 	      ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1069 	    }
1070 	}
1071       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
1072 	 loop, don't vectorize its inner loop; we'll attempt to
1073 	 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
1074 	 loop version.  */
1075       if (loop_vectorized_call && loop->inner)
1076 	loop->inner->dont_vectorize = true;
1077       return ret;
1078     }
1079 
1080   /* Only count the original scalar loops.  */
1081   if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo) && !dbg_cnt (vect_loop))
1082     {
1083       /* Free existing information if loop is analyzed with some
1084 	 assumptions.  */
1085       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1086 	vect_free_loop_info_assumptions (loop);
1087       return ret;
1088     }
1089 
1090   if (loop_vectorized_call)
1091     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
1092 
1093   unsigned HOST_WIDE_INT bytes;
1094   if (dump_enabled_p ())
1095     {
1096       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
1097 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1098 			 "loop vectorized using %wu byte vectors\n", bytes);
1099       else
1100 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1101 			 "loop vectorized using variable length vectors\n");
1102     }
1103 
1104   loop_p new_loop = vect_transform_loop (loop_vinfo,
1105 					 loop_vectorized_call);
1106   (*num_vectorized_loops)++;
1107   /* Now that the loop has been vectorized, allow it to be unrolled
1108      etc.  */
1109   loop->force_vectorize = false;
1110 
1111   if (loop->simduid)
1112     {
1113       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
1114       if (!simduid_to_vf_htab)
1115 	simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1116       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1117       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1118       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
1119 	  = simduid_to_vf_data;
1120     }
1121 
1122   if (loop_vectorized_call)
1123     {
1124       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1125       loop_vectorized_call = NULL;
1126       ret |= TODO_cleanup_cfg;
1127     }
1128   if (loop_dist_alias_call)
1129     {
1130       tree value = gimple_call_arg (loop_dist_alias_call, 1);
1131       fold_loop_internal_call (loop_dist_alias_call, value);
1132       loop_dist_alias_call = NULL;
1133       ret |= TODO_cleanup_cfg;
1134     }
1135 
1136   /* Epilogue of vectorized loop must be vectorized too.  */
1137   if (new_loop)
1138     {
1139       /* Don't include vectorized epilogues in the "vectorized loops" count.
1140        */
1141       unsigned dont_count = *num_vectorized_loops;
1142       ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count,
1143 				   new_loop, NULL, NULL);
1144     }
1145 
1146   return ret;
1147 }
1148 
1149 /* Try to vectorize LOOP.  */
1150 
1151 static unsigned
try_vectorize_loop(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop)1152 try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1153 		    unsigned *num_vectorized_loops, loop_p loop)
1154 {
1155   if (!((flag_tree_loop_vectorize
1156 	 && optimize_loop_nest_for_speed_p (loop))
1157 	|| loop->force_vectorize))
1158     return 0;
1159 
1160   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1161 			       vect_loop_vectorized_call (loop),
1162 			       vect_loop_dist_alias_call (loop));
1163 }
1164 
1165 
1166 /* Function vectorize_loops.
1167 
1168    Entry point to loop vectorization phase.  */
1169 
1170 unsigned
vectorize_loops(void)1171 vectorize_loops (void)
1172 {
1173   unsigned int i;
1174   unsigned int num_vectorized_loops = 0;
1175   unsigned int vect_loops_num;
1176   class loop *loop;
1177   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1178   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1179   bool any_ifcvt_loops = false;
1180   unsigned ret = 0;
1181 
1182   vect_loops_num = number_of_loops (cfun);
1183 
1184   /* Bail out if there are no loops.  */
1185   if (vect_loops_num <= 1)
1186     return 0;
1187 
1188   vect_slp_init ();
1189 
1190   if (cfun->has_simduid_loops)
1191     note_simd_array_uses (&simd_array_to_simduid_htab);
1192 
1193   /*  ----------- Analyze loops. -----------  */
1194 
1195   /* If some loop was duplicated, it gets bigger number
1196      than all previously defined loops.  This fact allows us to run
1197      only over initial loops skipping newly generated ones.  */
1198   FOR_EACH_LOOP (loop, 0)
1199     if (loop->dont_vectorize)
1200       {
1201 	any_ifcvt_loops = true;
1202 	/* If-conversion sometimes versions both the outer loop
1203 	   (for the case when outer loop vectorization might be
1204 	   desirable) as well as the inner loop in the scalar version
1205 	   of the loop.  So we have:
1206 	    if (LOOP_VECTORIZED (1, 3))
1207 	      {
1208 		loop1
1209 		  loop2
1210 	      }
1211 	    else
1212 	      loop3 (copy of loop1)
1213 		if (LOOP_VECTORIZED (4, 5))
1214 		  loop4 (copy of loop2)
1215 		else
1216 		  loop5 (copy of loop4)
1217 	   If FOR_EACH_LOOP gives us loop3 first (which has
1218 	   dont_vectorize set), make sure to process loop1 before loop4;
1219 	   so that we can prevent vectorization of loop4 if loop1
1220 	   is successfully vectorized.  */
1221 	if (loop->inner)
1222 	  {
1223 	    gimple *loop_vectorized_call
1224 	      = vect_loop_vectorized_call (loop);
1225 	    if (loop_vectorized_call
1226 		&& vect_loop_vectorized_call (loop->inner))
1227 	      {
1228 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
1229 		class loop *vector_loop
1230 		  = get_loop (cfun, tree_to_shwi (arg));
1231 		if (vector_loop && vector_loop != loop)
1232 		  {
1233 		    /* Make sure we don't vectorize it twice.  */
1234 		    vector_loop->dont_vectorize = true;
1235 		    ret |= try_vectorize_loop (simduid_to_vf_htab,
1236 					       &num_vectorized_loops,
1237 					       vector_loop);
1238 		  }
1239 	      }
1240 	  }
1241       }
1242     else
1243       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1244 				 loop);
1245 
1246   vect_location = dump_user_location_t ();
1247 
1248   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
1249   if (dump_enabled_p ()
1250       || (num_vectorized_loops > 0 && dump_enabled_p ()))
1251     dump_printf_loc (MSG_NOTE, vect_location,
1252                      "vectorized %u loops in function.\n",
1253                      num_vectorized_loops);
1254 
1255   /*  ----------- Finalize. -----------  */
1256 
1257   if (any_ifcvt_loops)
1258     for (i = 1; i < number_of_loops (cfun); i++)
1259       {
1260 	loop = get_loop (cfun, i);
1261 	if (loop && loop->dont_vectorize)
1262 	  {
1263 	    gimple *g = vect_loop_vectorized_call (loop);
1264 	    if (g)
1265 	      {
1266 		fold_loop_internal_call (g, boolean_false_node);
1267 		ret |= TODO_cleanup_cfg;
1268 		g = NULL;
1269 	      }
1270 	    else
1271 	      g = vect_loop_dist_alias_call (loop);
1272 
1273 	    if (g)
1274 	      {
1275 		fold_loop_internal_call (g, boolean_false_node);
1276 		ret |= TODO_cleanup_cfg;
1277 	      }
1278 	  }
1279       }
1280 
1281   for (i = 1; i < number_of_loops (cfun); i++)
1282     {
1283       loop_vec_info loop_vinfo;
1284       bool has_mask_store;
1285 
1286       loop = get_loop (cfun, i);
1287       if (!loop || !loop->aux)
1288 	continue;
1289       loop_vinfo = (loop_vec_info) loop->aux;
1290       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1291       delete loop_vinfo;
1292       if (has_mask_store
1293 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1294 	optimize_mask_stores (loop);
1295       loop->aux = NULL;
1296     }
1297 
1298   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1299   if (cfun->has_simduid_loops)
1300     {
1301       adjust_simduid_builtins (simduid_to_vf_htab);
1302       /* Avoid stale SCEV cache entries for the SIMD_LANE defs.  */
1303       scev_reset ();
1304     }
1305 
1306   /* Shrink any "omp array simd" temporary arrays to the
1307      actual vectorization factors.  */
1308   if (simd_array_to_simduid_htab)
1309     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1310   delete simduid_to_vf_htab;
1311   cfun->has_simduid_loops = false;
1312   vect_slp_fini ();
1313 
1314   if (num_vectorized_loops > 0)
1315     {
1316       /* If we vectorized any loop only virtual SSA form needs to be updated.
1317 	 ???  Also while we try hard to update loop-closed SSA form we fail
1318 	 to properly do this in some corner-cases (see PR56286).  */
1319       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1320       return TODO_cleanup_cfg;
1321     }
1322 
1323   return ret;
1324 }
1325 
1326 
1327 /* Entry point to the simduid cleanup pass.  */
1328 
1329 namespace {
1330 
1331 const pass_data pass_data_simduid_cleanup =
1332 {
1333   GIMPLE_PASS, /* type */
1334   "simduid", /* name */
1335   OPTGROUP_NONE, /* optinfo_flags */
1336   TV_NONE, /* tv_id */
1337   ( PROP_ssa | PROP_cfg ), /* properties_required */
1338   0, /* properties_provided */
1339   0, /* properties_destroyed */
1340   0, /* todo_flags_start */
1341   0, /* todo_flags_finish */
1342 };
1343 
1344 class pass_simduid_cleanup : public gimple_opt_pass
1345 {
1346 public:
pass_simduid_cleanup(gcc::context * ctxt)1347   pass_simduid_cleanup (gcc::context *ctxt)
1348     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1349   {}
1350 
1351   /* opt_pass methods: */
clone()1352   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)1353   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
1354   virtual unsigned int execute (function *);
1355 
1356 }; // class pass_simduid_cleanup
1357 
1358 unsigned int
execute(function * fun)1359 pass_simduid_cleanup::execute (function *fun)
1360 {
1361   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1362 
1363   note_simd_array_uses (&simd_array_to_simduid_htab);
1364 
1365   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1366   adjust_simduid_builtins (NULL);
1367 
1368   /* Shrink any "omp array simd" temporary arrays to the
1369      actual vectorization factors.  */
1370   if (simd_array_to_simduid_htab)
1371     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1372   fun->has_simduid_loops = false;
1373   return 0;
1374 }
1375 
1376 }  // anon namespace
1377 
1378 gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)1379 make_pass_simduid_cleanup (gcc::context *ctxt)
1380 {
1381   return new pass_simduid_cleanup (ctxt);
1382 }
1383 
1384 
1385 /*  Entry point to basic block SLP phase.  */
1386 
1387 namespace {
1388 
1389 const pass_data pass_data_slp_vectorize =
1390 {
1391   GIMPLE_PASS, /* type */
1392   "slp", /* name */
1393   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1394   TV_TREE_SLP_VECTORIZATION, /* tv_id */
1395   ( PROP_ssa | PROP_cfg ), /* properties_required */
1396   0, /* properties_provided */
1397   0, /* properties_destroyed */
1398   0, /* todo_flags_start */
1399   TODO_update_ssa, /* todo_flags_finish */
1400 };
1401 
1402 class pass_slp_vectorize : public gimple_opt_pass
1403 {
1404 public:
pass_slp_vectorize(gcc::context * ctxt)1405   pass_slp_vectorize (gcc::context *ctxt)
1406     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1407   {}
1408 
1409   /* opt_pass methods: */
clone()1410   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)1411   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
1412   virtual unsigned int execute (function *);
1413 
1414 }; // class pass_slp_vectorize
1415 
1416 unsigned int
execute(function * fun)1417 pass_slp_vectorize::execute (function *fun)
1418 {
1419   auto_purge_vect_location sentinel;
1420   basic_block bb;
1421 
1422   bool in_loop_pipeline = scev_initialized_p ();
1423   if (!in_loop_pipeline)
1424     {
1425       loop_optimizer_init (LOOPS_NORMAL);
1426       scev_initialize ();
1427     }
1428 
1429   /* Mark all stmts as not belonging to the current region and unvisited.  */
1430   FOR_EACH_BB_FN (bb, fun)
1431     {
1432       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1433 	   gsi_next (&gsi))
1434 	{
1435 	  gphi *stmt = gsi.phi ();
1436 	  gimple_set_uid (stmt, -1);
1437 	  gimple_set_visited (stmt, false);
1438 	}
1439       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1440 	   gsi_next (&gsi))
1441 	{
1442 	  gimple *stmt = gsi_stmt (gsi);
1443 	  gimple_set_uid (stmt, -1);
1444 	  gimple_set_visited (stmt, false);
1445 	}
1446     }
1447 
1448   vect_slp_init ();
1449 
1450   vect_slp_function (fun);
1451 
1452   vect_slp_fini ();
1453 
1454   if (!in_loop_pipeline)
1455     {
1456       scev_finalize ();
1457       loop_optimizer_finalize ();
1458     }
1459 
1460   return 0;
1461 }
1462 
1463 } // anon namespace
1464 
1465 gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)1466 make_pass_slp_vectorize (gcc::context *ctxt)
1467 {
1468   return new pass_slp_vectorize (ctxt);
1469 }
1470 
1471 
1472 /* Increase alignment of global arrays to improve vectorization potential.
1473    TODO:
1474    - Consider also structs that have an array field.
1475    - Use ipa analysis to prune arrays that can't be vectorized?
1476      This should involve global alignment analysis and in the future also
1477      array padding.  */
1478 
1479 static unsigned get_vec_alignment_for_type (tree);
1480 static hash_map<tree, unsigned> *type_align_map;
1481 
1482 /* Return alignment of array's vector type corresponding to scalar type.
1483    0 if no vector type exists.  */
1484 static unsigned
get_vec_alignment_for_array_type(tree type)1485 get_vec_alignment_for_array_type (tree type)
1486 {
1487   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1488   poly_uint64 array_size, vector_size;
1489 
1490   tree scalar_type = strip_array_types (type);
1491   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
1492   if (!vectype
1493       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1494       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1495       || maybe_lt (array_size, vector_size))
1496     return 0;
1497 
1498   return TYPE_ALIGN (vectype);
1499 }
1500 
1501 /* Return alignment of field having maximum alignment of vector type
1502    corresponding to it's scalar type. For now, we only consider fields whose
1503    offset is a multiple of it's vector alignment.
1504    0 if no suitable field is found.  */
1505 static unsigned
get_vec_alignment_for_record_type(tree type)1506 get_vec_alignment_for_record_type (tree type)
1507 {
1508   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1509 
1510   unsigned max_align = 0, alignment;
1511   HOST_WIDE_INT offset;
1512   tree offset_tree;
1513 
1514   if (TYPE_PACKED (type))
1515     return 0;
1516 
1517   unsigned *slot = type_align_map->get (type);
1518   if (slot)
1519     return *slot;
1520 
1521   for (tree field = first_field (type);
1522        field != NULL_TREE;
1523        field = DECL_CHAIN (field))
1524     {
1525       /* Skip if not FIELD_DECL or if alignment is set by user.  */
1526       if (TREE_CODE (field) != FIELD_DECL
1527 	  || DECL_USER_ALIGN (field)
1528 	  || DECL_ARTIFICIAL (field))
1529 	continue;
1530 
1531       /* We don't need to process the type further if offset is variable,
1532 	 since the offsets of remaining members will also be variable.  */
1533       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1534 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1535 	break;
1536 
1537       /* Similarly stop processing the type if offset_tree
1538 	 does not fit in unsigned HOST_WIDE_INT.  */
1539       offset_tree = bit_position (field);
1540       if (!tree_fits_uhwi_p (offset_tree))
1541 	break;
1542 
1543       offset = tree_to_uhwi (offset_tree);
1544       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1545 
1546       /* Get maximum alignment of vectorized field/array among those members
1547 	 whose offset is multiple of the vector alignment.  */
1548       if (alignment
1549 	  && (offset % alignment == 0)
1550 	  && (alignment > max_align))
1551 	max_align = alignment;
1552     }
1553 
1554   type_align_map->put (type, max_align);
1555   return max_align;
1556 }
1557 
1558 /* Return alignment of vector type corresponding to decl's scalar type
1559    or 0 if it doesn't exist or the vector alignment is lesser than
1560    decl's alignment.  */
1561 static unsigned
get_vec_alignment_for_type(tree type)1562 get_vec_alignment_for_type (tree type)
1563 {
1564   if (type == NULL_TREE)
1565     return 0;
1566 
1567   gcc_assert (TYPE_P (type));
1568 
1569   static unsigned alignment = 0;
1570   switch (TREE_CODE (type))
1571     {
1572       case ARRAY_TYPE:
1573 	alignment = get_vec_alignment_for_array_type (type);
1574 	break;
1575       case RECORD_TYPE:
1576 	alignment = get_vec_alignment_for_record_type (type);
1577 	break;
1578       default:
1579 	alignment = 0;
1580 	break;
1581     }
1582 
1583   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1584 }
1585 
1586 /* Entry point to increase_alignment pass.  */
1587 static unsigned int
increase_alignment(void)1588 increase_alignment (void)
1589 {
1590   varpool_node *vnode;
1591 
1592   vect_location = dump_user_location_t ();
1593   type_align_map = new hash_map<tree, unsigned>;
1594 
1595   /* Increase the alignment of all global arrays for vectorization.  */
1596   FOR_EACH_DEFINED_VARIABLE (vnode)
1597     {
1598       tree decl = vnode->decl;
1599       unsigned int alignment;
1600 
1601       if ((decl_in_symtab_p (decl)
1602 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
1603 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1604 	continue;
1605 
1606       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1607       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1608         {
1609 	  vnode->increase_alignment (alignment);
1610 	  if (dump_enabled_p ())
1611 	    dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1612         }
1613     }
1614 
1615   delete type_align_map;
1616   return 0;
1617 }
1618 
1619 
1620 namespace {
1621 
1622 const pass_data pass_data_ipa_increase_alignment =
1623 {
1624   SIMPLE_IPA_PASS, /* type */
1625   "increase_alignment", /* name */
1626   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1627   TV_IPA_OPT, /* tv_id */
1628   0, /* properties_required */
1629   0, /* properties_provided */
1630   0, /* properties_destroyed */
1631   0, /* todo_flags_start */
1632   0, /* todo_flags_finish */
1633 };
1634 
1635 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1636 {
1637 public:
pass_ipa_increase_alignment(gcc::context * ctxt)1638   pass_ipa_increase_alignment (gcc::context *ctxt)
1639     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1640   {}
1641 
1642   /* opt_pass methods: */
gate(function *)1643   virtual bool gate (function *)
1644     {
1645       return flag_section_anchors && flag_tree_loop_vectorize;
1646     }
1647 
execute(function *)1648   virtual unsigned int execute (function *) { return increase_alignment (); }
1649 
1650 }; // class pass_ipa_increase_alignment
1651 
1652 } // anon namespace
1653 
1654 simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)1655 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1656 {
1657   return new pass_ipa_increase_alignment (ctxt);
1658 }
1659 
1660 /* If the condition represented by T is a comparison or the SSA name
1661    result of a comparison, extract the comparison's operands.  Represent
1662    T as NE_EXPR <T, 0> otherwise.  */
1663 
1664 void
get_cond_ops_from_tree(tree t)1665 scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1666 {
1667   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1668     {
1669       this->code = TREE_CODE (t);
1670       this->op0 = TREE_OPERAND (t, 0);
1671       this->op1 = TREE_OPERAND (t, 1);
1672       return;
1673     }
1674 
1675   if (TREE_CODE (t) == SSA_NAME)
1676     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1677       {
1678 	tree_code code = gimple_assign_rhs_code (stmt);
1679 	if (TREE_CODE_CLASS (code) == tcc_comparison)
1680 	  {
1681 	    this->code = code;
1682 	    this->op0 = gimple_assign_rhs1 (stmt);
1683 	    this->op1 = gimple_assign_rhs2 (stmt);
1684 	    return;
1685 	  }
1686       }
1687 
1688   this->code = NE_EXPR;
1689   this->op0 = t;
1690   this->op1 = build_zero_cst (TREE_TYPE (t));
1691 }
1692