1 /* Vectorizer
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Loop and basic block vectorizer.
22 
23   This file contains drivers for the three vectorizers:
24   (1) loop vectorizer (inter-iteration parallelism),
25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26       vectorizer)
27   (3) BB vectorizer (out-of-loops), aka SLP
28 
29   The rest of the vectorizer's code is organized as follows:
30   - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
31     used by drivers (1) and (2).
32   - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
33     drivers (1) and (2).
34   - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
35     used by drivers (2) and (3).
36   - tree-vect-stmts.cc - statements analysis and transformation (used by all).
37   - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
38     manipulations (used by all).
39   - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
40 
41   Here's a poor attempt at illustrating that:
42 
43      tree-vectorizer.cc:
44      loop_vect()  loop_aware_slp()  slp_vect()
45           |        /           \          /
46           |       /             \        /
47           tree-vect-loop.cc  tree-vect-slp.cc
48                 | \      \  /      /   |
49                 |  \      \/      /    |
50                 |   \     /\     /     |
51                 |    \   /  \   /      |
52          tree-vect-stmts.cc  tree-vect-data-refs.cc
53                        \      /
54                     tree-vect-patterns.cc
55 */
56 
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 #include "stringpool.h"
80 #include "attribs.h"
81 #include "gimple-pretty-print.h"
82 #include "opt-problem.h"
83 #include "internal-fn.h"
84 #include "tree-ssa-sccvn.h"
85 
86 /* Loop or bb location, with hotness information.  */
87 dump_user_location_t vect_location;
88 
89 /* auto_purge_vect_location's dtor: reset the vect_location
90    global, to avoid stale location_t values that could reference
91    GC-ed blocks.  */
92 
~auto_purge_vect_location()93 auto_purge_vect_location::~auto_purge_vect_location ()
94 {
95   vect_location = dump_user_location_t ();
96 }
97 
98 /* Dump a cost entry according to args to F.  */
99 
100 void
dump_stmt_cost(FILE * f,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,slp_tree node,tree,int misalign,unsigned cost,enum vect_cost_model_location where)101 dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
102 		stmt_vec_info stmt_info, slp_tree node, tree,
103 		int misalign, unsigned cost,
104 		enum vect_cost_model_location where)
105 {
106   if (stmt_info)
107     {
108       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109       fprintf (f, " ");
110     }
111   else if (node)
112     fprintf (f, "node %p ", (void *)node);
113   else
114     fprintf (f, "<unknown> ");
115   fprintf (f, "%d times ", count);
116   const char *ks = "unknown";
117   switch (kind)
118     {
119     case scalar_stmt:
120       ks = "scalar_stmt";
121       break;
122     case scalar_load:
123       ks = "scalar_load";
124       break;
125     case scalar_store:
126       ks = "scalar_store";
127       break;
128     case vector_stmt:
129       ks = "vector_stmt";
130       break;
131     case vector_load:
132       ks = "vector_load";
133       break;
134     case vector_gather_load:
135       ks = "vector_gather_load";
136       break;
137     case unaligned_load:
138       ks = "unaligned_load";
139       break;
140     case unaligned_store:
141       ks = "unaligned_store";
142       break;
143     case vector_store:
144       ks = "vector_store";
145       break;
146     case vector_scatter_store:
147       ks = "vector_scatter_store";
148       break;
149     case vec_to_scalar:
150       ks = "vec_to_scalar";
151       break;
152     case scalar_to_vec:
153       ks = "scalar_to_vec";
154       break;
155     case cond_branch_not_taken:
156       ks = "cond_branch_not_taken";
157       break;
158     case cond_branch_taken:
159       ks = "cond_branch_taken";
160       break;
161     case vec_perm:
162       ks = "vec_perm";
163       break;
164     case vec_promote_demote:
165       ks = "vec_promote_demote";
166       break;
167     case vec_construct:
168       ks = "vec_construct";
169       break;
170     }
171   fprintf (f, "%s ", ks);
172   if (kind == unaligned_load || kind == unaligned_store)
173     fprintf (f, "(misalign %d) ", misalign);
174   fprintf (f, "costs %u ", cost);
175   const char *ws = "unknown";
176   switch (where)
177     {
178     case vect_prologue:
179       ws = "prologue";
180       break;
181     case vect_body:
182       ws = "body";
183       break;
184     case vect_epilogue:
185       ws = "epilogue";
186       break;
187     }
188   fprintf (f, "in %s\n", ws);
189 }
190 
191 /* For mapping simduid to vectorization factor.  */
192 
193 class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
194 {
195 public:
196   unsigned int simduid;
197   poly_uint64 vf;
198 
199   /* hash_table support.  */
200   static inline hashval_t hash (const simduid_to_vf *);
201   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
202 };
203 
204 inline hashval_t
hash(const simduid_to_vf * p)205 simduid_to_vf::hash (const simduid_to_vf *p)
206 {
207   return p->simduid;
208 }
209 
210 inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)211 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
212 {
213   return p1->simduid == p2->simduid;
214 }
215 
216 /* This hash maps the OMP simd array to the corresponding simduid used
217    to index into it.  Like thus,
218 
219         _7 = GOMP_SIMD_LANE (simduid.0)
220         ...
221         ...
222         D.1737[_7] = stuff;
223 
224 
225    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
226    simduid.0.  */
227 
228 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
229 {
230   tree decl;
231   unsigned int simduid;
232 
233   /* hash_table support.  */
234   static inline hashval_t hash (const simd_array_to_simduid *);
235   static inline int equal (const simd_array_to_simduid *,
236 			   const simd_array_to_simduid *);
237 };
238 
239 inline hashval_t
hash(const simd_array_to_simduid * p)240 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
241 {
242   return DECL_UID (p->decl);
243 }
244 
245 inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)246 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
247 			      const simd_array_to_simduid *p2)
248 {
249   return p1->decl == p2->decl;
250 }
251 
252 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
253    into their corresponding constants and remove
254    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
255 
256 static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab,function * fun)257 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
258 {
259   basic_block bb;
260 
261   FOR_EACH_BB_FN (bb, fun)
262     {
263       gimple_stmt_iterator i;
264 
265       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
266 	{
267 	  poly_uint64 vf = 1;
268 	  enum internal_fn ifn;
269 	  gimple *stmt = gsi_stmt (i);
270 	  tree t;
271 	  if (!is_gimple_call (stmt)
272 	      || !gimple_call_internal_p (stmt))
273 	    {
274 	      gsi_next (&i);
275 	      continue;
276 	    }
277 	  ifn = gimple_call_internal_fn (stmt);
278 	  switch (ifn)
279 	    {
280 	    case IFN_GOMP_SIMD_LANE:
281 	    case IFN_GOMP_SIMD_VF:
282 	    case IFN_GOMP_SIMD_LAST_LANE:
283 	      break;
284 	    case IFN_GOMP_SIMD_ORDERED_START:
285 	    case IFN_GOMP_SIMD_ORDERED_END:
286 	      if (integer_onep (gimple_call_arg (stmt, 0)))
287 		{
288 		  enum built_in_function bcode
289 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
290 		       ? BUILT_IN_GOMP_ORDERED_START
291 		       : BUILT_IN_GOMP_ORDERED_END);
292 		  gimple *g
293 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
294 		  gimple_move_vops (g, stmt);
295 		  gsi_replace (&i, g, true);
296 		  continue;
297 		}
298 	      gsi_remove (&i, true);
299 	      unlink_stmt_vdef (stmt);
300 	      continue;
301 	    default:
302 	      gsi_next (&i);
303 	      continue;
304 	    }
305 	  tree arg = gimple_call_arg (stmt, 0);
306 	  gcc_assert (arg != NULL_TREE);
307 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
308 	  simduid_to_vf *p = NULL, data;
309 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
310 	  /* Need to nullify loop safelen field since it's value is not
311 	     valid after transformation.  */
312 	  if (bb->loop_father && bb->loop_father->safelen > 0)
313 	    bb->loop_father->safelen = 0;
314 	  if (htab)
315 	    {
316 	      p = htab->find (&data);
317 	      if (p)
318 		vf = p->vf;
319 	    }
320 	  switch (ifn)
321 	    {
322 	    case IFN_GOMP_SIMD_VF:
323 	      t = build_int_cst (unsigned_type_node, vf);
324 	      break;
325 	    case IFN_GOMP_SIMD_LANE:
326 	      t = build_int_cst (unsigned_type_node, 0);
327 	      break;
328 	    case IFN_GOMP_SIMD_LAST_LANE:
329 	      t = gimple_call_arg (stmt, 1);
330 	      break;
331 	    default:
332 	      gcc_unreachable ();
333 	    }
334 	  tree lhs = gimple_call_lhs (stmt);
335 	  if (lhs)
336 	    replace_uses_by (lhs, t);
337 	  release_defs (stmt);
338 	  gsi_remove (&i, true);
339 	}
340     }
341 }
342 
343 /* Helper structure for note_simd_array_uses.  */
344 
345 struct note_simd_array_uses_struct
346 {
347   hash_table<simd_array_to_simduid> **htab;
348   unsigned int simduid;
349 };
350 
351 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
352 
353 static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)354 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
355 {
356   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
357   struct note_simd_array_uses_struct *ns
358     = (struct note_simd_array_uses_struct *) wi->info;
359 
360   if (TYPE_P (*tp))
361     *walk_subtrees = 0;
362   else if (VAR_P (*tp)
363 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
364 	   && DECL_CONTEXT (*tp) == current_function_decl)
365     {
366       simd_array_to_simduid data;
367       if (!*ns->htab)
368 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
369       data.decl = *tp;
370       data.simduid = ns->simduid;
371       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
372       if (*slot == NULL)
373 	{
374 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
375 	  *p = data;
376 	  *slot = p;
377 	}
378       else if ((*slot)->simduid != ns->simduid)
379 	(*slot)->simduid = -1U;
380       *walk_subtrees = 0;
381     }
382   return NULL_TREE;
383 }
384 
385 /* Find "omp simd array" temporaries and map them to corresponding
386    simduid.  */
387 
388 static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab,function * fun)389 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
390 {
391   basic_block bb;
392   gimple_stmt_iterator gsi;
393   struct walk_stmt_info wi;
394   struct note_simd_array_uses_struct ns;
395 
396   memset (&wi, 0, sizeof (wi));
397   wi.info = &ns;
398   ns.htab = htab;
399 
400   FOR_EACH_BB_FN (bb, fun)
401     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
402       {
403 	gimple *stmt = gsi_stmt (gsi);
404 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
405 	  continue;
406 	switch (gimple_call_internal_fn (stmt))
407 	  {
408 	  case IFN_GOMP_SIMD_LANE:
409 	  case IFN_GOMP_SIMD_VF:
410 	  case IFN_GOMP_SIMD_LAST_LANE:
411 	    break;
412 	  default:
413 	    continue;
414 	  }
415 	tree lhs = gimple_call_lhs (stmt);
416 	if (lhs == NULL_TREE)
417 	  continue;
418 	imm_use_iterator use_iter;
419 	gimple *use_stmt;
420 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
421 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
422 	  if (!is_gimple_debug (use_stmt))
423 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
424       }
425 }
426 
427 /* Shrink arrays with "omp simd array" attribute to the corresponding
428    vectorization factor.  */
429 
430 static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)431 shrink_simd_arrays
432   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
433    hash_table<simduid_to_vf> *simduid_to_vf_htab)
434 {
435   for (hash_table<simd_array_to_simduid>::iterator iter
436 	 = simd_array_to_simduid_htab->begin ();
437        iter != simd_array_to_simduid_htab->end (); ++iter)
438     if ((*iter)->simduid != -1U)
439       {
440 	tree decl = (*iter)->decl;
441 	poly_uint64 vf = 1;
442 	if (simduid_to_vf_htab)
443 	  {
444 	    simduid_to_vf *p = NULL, data;
445 	    data.simduid = (*iter)->simduid;
446 	    p = simduid_to_vf_htab->find (&data);
447 	    if (p)
448 	      vf = p->vf;
449 	  }
450 	tree atype
451 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
452 	TREE_TYPE (decl) = atype;
453 	relayout_decl (decl);
454       }
455 
456   delete simd_array_to_simduid_htab;
457 }
458 
459 /* Initialize the vec_info with kind KIND_IN and target cost data
460    TARGET_COST_DATA_IN.  */
461 
vec_info(vec_info::vec_kind kind_in,vec_info_shared * shared_)462 vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
463   : kind (kind_in),
464     shared (shared_),
465     stmt_vec_info_ro (false)
466 {
467   stmt_vec_infos.create (50);
468 }
469 
~vec_info()470 vec_info::~vec_info ()
471 {
472   for (slp_instance &instance : slp_instances)
473     vect_free_slp_instance (instance);
474 
475   free_stmt_vec_infos ();
476 }
477 
vec_info_shared()478 vec_info_shared::vec_info_shared ()
479   : n_stmts (0),
480     datarefs (vNULL),
481     datarefs_copy (vNULL),
482     ddrs (vNULL)
483 {
484 }
485 
~vec_info_shared()486 vec_info_shared::~vec_info_shared ()
487 {
488   free_data_refs (datarefs);
489   free_dependence_relations (ddrs);
490   datarefs_copy.release ();
491 }
492 
493 void
save_datarefs()494 vec_info_shared::save_datarefs ()
495 {
496   if (!flag_checking)
497     return;
498   datarefs_copy.reserve_exact (datarefs.length ());
499   for (unsigned i = 0; i < datarefs.length (); ++i)
500     datarefs_copy.quick_push (*datarefs[i]);
501 }
502 
503 void
check_datarefs()504 vec_info_shared::check_datarefs ()
505 {
506   if (!flag_checking)
507     return;
508   gcc_assert (datarefs.length () == datarefs_copy.length ());
509   for (unsigned i = 0; i < datarefs.length (); ++i)
510     if (memcmp (&datarefs_copy[i], datarefs[i],
511 		offsetof (data_reference, alt_indices)) != 0)
512       gcc_unreachable ();
513 }
514 
515 /* Record that STMT belongs to the vectorizable region.  Create and return
516    an associated stmt_vec_info.  */
517 
518 stmt_vec_info
add_stmt(gimple * stmt)519 vec_info::add_stmt (gimple *stmt)
520 {
521   stmt_vec_info res = new_stmt_vec_info (stmt);
522   set_vinfo_for_stmt (stmt, res);
523   return res;
524 }
525 
526 /* Record that STMT belongs to the vectorizable region.  Create a new
527    stmt_vec_info and mark VECINFO as being related and return the new
528    stmt_vec_info.  */
529 
530 stmt_vec_info
add_pattern_stmt(gimple * stmt,stmt_vec_info stmt_info)531 vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
532 {
533   stmt_vec_info res = new_stmt_vec_info (stmt);
534   set_vinfo_for_stmt (stmt, res, false);
535   STMT_VINFO_RELATED_STMT (res) = stmt_info;
536   return res;
537 }
538 
539 /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
540    return null.  It is safe to call this function on any statement, even if
541    it might not be part of the vectorizable region.  */
542 
543 stmt_vec_info
lookup_stmt(gimple * stmt)544 vec_info::lookup_stmt (gimple *stmt)
545 {
546   unsigned int uid = gimple_uid (stmt);
547   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
548     {
549       stmt_vec_info res = stmt_vec_infos[uid - 1];
550       if (res && res->stmt == stmt)
551 	return res;
552     }
553   return NULL;
554 }
555 
556 /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
557    return that stmt_vec_info, otherwise return null.  It is safe to call
558    this on arbitrary operands.  */
559 
560 stmt_vec_info
lookup_def(tree name)561 vec_info::lookup_def (tree name)
562 {
563   if (TREE_CODE (name) == SSA_NAME
564       && !SSA_NAME_IS_DEFAULT_DEF (name))
565     return lookup_stmt (SSA_NAME_DEF_STMT (name));
566   return NULL;
567 }
568 
569 /* See whether there is a single non-debug statement that uses LHS and
570    whether that statement has an associated stmt_vec_info.  Return the
571    stmt_vec_info if so, otherwise return null.  */
572 
573 stmt_vec_info
lookup_single_use(tree lhs)574 vec_info::lookup_single_use (tree lhs)
575 {
576   use_operand_p dummy;
577   gimple *use_stmt;
578   if (single_imm_use (lhs, &dummy, &use_stmt))
579     return lookup_stmt (use_stmt);
580   return NULL;
581 }
582 
583 /* Return vectorization information about DR.  */
584 
585 dr_vec_info *
lookup_dr(data_reference * dr)586 vec_info::lookup_dr (data_reference *dr)
587 {
588   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
589   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
590   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
591   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
592 }
593 
594 /* Record that NEW_STMT_INFO now implements the same data reference
595    as OLD_STMT_INFO.  */
596 
597 void
move_dr(stmt_vec_info new_stmt_info,stmt_vec_info old_stmt_info)598 vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
599 {
600   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
601   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
602   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
603   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
604     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
605   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
606     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
607 }
608 
609 /* Permanently remove the statement described by STMT_INFO from the
610    function.  */
611 
612 void
remove_stmt(stmt_vec_info stmt_info)613 vec_info::remove_stmt (stmt_vec_info stmt_info)
614 {
615   gcc_assert (!stmt_info->pattern_stmt_p);
616   set_vinfo_for_stmt (stmt_info->stmt, NULL);
617   unlink_stmt_vdef (stmt_info->stmt);
618   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
619   gsi_remove (&si, true);
620   release_defs (stmt_info->stmt);
621   free_stmt_vec_info (stmt_info);
622 }
623 
624 /* Replace the statement at GSI by NEW_STMT, both the vectorization
625    information and the function itself.  STMT_INFO describes the statement
626    at GSI.  */
627 
628 void
replace_stmt(gimple_stmt_iterator * gsi,stmt_vec_info stmt_info,gimple * new_stmt)629 vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
630 			gimple *new_stmt)
631 {
632   gimple *old_stmt = stmt_info->stmt;
633   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
634   gimple_set_uid (new_stmt, gimple_uid (old_stmt));
635   stmt_info->stmt = new_stmt;
636   gsi_replace (gsi, new_stmt, true);
637 }
638 
639 /* Insert stmts in SEQ on the VEC_INFO region entry.  If CONTEXT is
640    not NULL it specifies whether to use the sub-region entry
641    determined by it, currently used for loop vectorization to insert
642    on the inner loop entry vs. the outer loop entry.  */
643 
644 void
insert_seq_on_entry(stmt_vec_info context,gimple_seq seq)645 vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
646 {
647   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
648     {
649       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650       basic_block new_bb;
651       edge pe;
652 
653       if (context && nested_in_vect_loop_p (loop, context))
654 	loop = loop->inner;
655 
656       pe = loop_preheader_edge (loop);
657       new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
658       gcc_assert (!new_bb);
659     }
660   else
661     {
662       bb_vec_info bb_vinfo = as_a <bb_vec_info> (this);
663       gimple_stmt_iterator gsi_region_begin
664 	= gsi_after_labels (bb_vinfo->bbs[0]);
665       gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
666     }
667 }
668 
669 /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT.  */
670 
671 void
insert_on_entry(stmt_vec_info context,gimple * new_stmt)672 vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
673 {
674   gimple_seq seq = NULL;
675   gimple_stmt_iterator gsi = gsi_start (seq);
676   gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
677   insert_seq_on_entry (context, seq);
678 }
679 
680 /* Create and initialize a new stmt_vec_info struct for STMT.  */
681 
682 stmt_vec_info
new_stmt_vec_info(gimple * stmt)683 vec_info::new_stmt_vec_info (gimple *stmt)
684 {
685   stmt_vec_info res = XCNEW (class _stmt_vec_info);
686   res->stmt = stmt;
687 
688   STMT_VINFO_TYPE (res) = undef_vec_info_type;
689   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
690   STMT_VINFO_VECTORIZABLE (res) = true;
691   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
692   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
693   STMT_VINFO_REDUC_FN (res) = IFN_LAST;
694   STMT_VINFO_REDUC_IDX (res) = -1;
695   STMT_VINFO_SLP_VECT_ONLY (res) = false;
696   STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
697   STMT_VINFO_VEC_STMTS (res) = vNULL;
698   res->reduc_initial_values = vNULL;
699   res->reduc_scalar_results = vNULL;
700 
701   if (is_a <loop_vec_info> (this)
702       && gimple_code (stmt) == GIMPLE_PHI
703       && is_loop_header_bb_p (gimple_bb (stmt)))
704     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
705   else
706     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
707 
708   STMT_SLP_TYPE (res) = loop_vect;
709 
710   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
711   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
712 
713   return res;
714 }
715 
716 /* Associate STMT with INFO.  */
717 
718 void
set_vinfo_for_stmt(gimple * stmt,stmt_vec_info info,bool check_ro)719 vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
720 {
721   unsigned int uid = gimple_uid (stmt);
722   if (uid == 0)
723     {
724       gcc_assert (!check_ro || !stmt_vec_info_ro);
725       gcc_checking_assert (info);
726       uid = stmt_vec_infos.length () + 1;
727       gimple_set_uid (stmt, uid);
728       stmt_vec_infos.safe_push (info);
729     }
730   else
731     {
732       gcc_checking_assert (info == NULL);
733       stmt_vec_infos[uid - 1] = info;
734     }
735 }
736 
737 /* Free the contents of stmt_vec_infos.  */
738 
739 void
free_stmt_vec_infos(void)740 vec_info::free_stmt_vec_infos (void)
741 {
742   for (stmt_vec_info &info : stmt_vec_infos)
743     if (info != NULL)
744       free_stmt_vec_info (info);
745   stmt_vec_infos.release ();
746 }
747 
748 /* Free STMT_INFO.  */
749 
750 void
free_stmt_vec_info(stmt_vec_info stmt_info)751 vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
752 {
753   if (stmt_info->pattern_stmt_p)
754     {
755       gimple_set_bb (stmt_info->stmt, NULL);
756       tree lhs = gimple_get_lhs (stmt_info->stmt);
757       if (lhs && TREE_CODE (lhs) == SSA_NAME)
758 	release_ssa_name (lhs);
759     }
760 
761   stmt_info->reduc_initial_values.release ();
762   stmt_info->reduc_scalar_results.release ();
763   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
764   STMT_VINFO_VEC_STMTS (stmt_info).release ();
765   free (stmt_info);
766 }
767 
768 /* Returns true if S1 dominates S2.  */
769 
770 bool
vect_stmt_dominates_stmt_p(gimple * s1,gimple * s2)771 vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
772 {
773   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
774 
775   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
776      SSA_NAME.  Assume it lives at the beginning of function and
777      thus dominates everything.  */
778   if (!bb1 || s1 == s2)
779     return true;
780 
781   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
782   if (!bb2)
783     return false;
784 
785   if (bb1 != bb2)
786     return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
787 
788   /* PHIs in the same basic block are assumed to be
789      executed all in parallel, if only one stmt is a PHI,
790      it dominates the other stmt in the same basic block.  */
791   if (gimple_code (s1) == GIMPLE_PHI)
792     return true;
793 
794   if (gimple_code (s2) == GIMPLE_PHI)
795     return false;
796 
797   /* Inserted vectorized stmts all have UID 0 while the original stmts
798      in the IL have UID increasing within a BB.  Walk from both sides
799      until we find the other stmt or a stmt with UID != 0.  */
800   gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
801   while (gimple_uid (gsi_stmt (gsi1)) == 0)
802     {
803       gsi_next (&gsi1);
804       if (gsi_end_p (gsi1))
805 	return false;
806       if (gsi_stmt (gsi1) == s2)
807 	return true;
808     }
809   if (gimple_uid (gsi_stmt (gsi1)) == -1u)
810     return false;
811 
812   gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
813   while (gimple_uid (gsi_stmt (gsi2)) == 0)
814     {
815       gsi_prev (&gsi2);
816       if (gsi_end_p (gsi2))
817 	return false;
818       if (gsi_stmt (gsi2) == s1)
819 	return true;
820     }
821   if (gimple_uid (gsi_stmt (gsi2)) == -1u)
822     return false;
823 
824   if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
825     return true;
826   return false;
827 }
828 
829 /* A helper function to free scev and LOOP niter information, as well as
830    clear loop constraint LOOP_C_FINITE.  */
831 
832 void
vect_free_loop_info_assumptions(class loop * loop)833 vect_free_loop_info_assumptions (class loop *loop)
834 {
835   scev_reset_htab ();
836   /* We need to explicitly reset upper bound information since they are
837      used even after free_numbers_of_iterations_estimates.  */
838   loop->any_upper_bound = false;
839   loop->any_likely_upper_bound = false;
840   free_numbers_of_iterations_estimates (loop);
841   loop_constraint_clear (loop, LOOP_C_FINITE);
842 }
843 
844 /* If LOOP has been versioned during ifcvt, return the internal call
845    guarding it.  */
846 
847 gimple *
vect_loop_vectorized_call(class loop * loop,gcond ** cond)848 vect_loop_vectorized_call (class loop *loop, gcond **cond)
849 {
850   basic_block bb = loop_preheader_edge (loop)->src;
851   gimple *g;
852   do
853     {
854       g = last_stmt (bb);
855       if ((g && gimple_code (g) == GIMPLE_COND)
856 	  || !single_succ_p (bb))
857 	break;
858       if (!single_pred_p (bb))
859 	break;
860       bb = single_pred (bb);
861     }
862   while (1);
863   if (g && gimple_code (g) == GIMPLE_COND)
864     {
865       if (cond)
866 	*cond = as_a <gcond *> (g);
867       gimple_stmt_iterator gsi = gsi_for_stmt (g);
868       gsi_prev (&gsi);
869       if (!gsi_end_p (gsi))
870 	{
871 	  g = gsi_stmt (gsi);
872 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
873 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
874 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
875 	    return g;
876 	}
877     }
878   return NULL;
879 }
880 
881 /* If LOOP has been versioned during loop distribution, return the gurading
882    internal call.  */
883 
884 static gimple *
vect_loop_dist_alias_call(class loop * loop,function * fun)885 vect_loop_dist_alias_call (class loop *loop, function *fun)
886 {
887   basic_block bb;
888   basic_block entry;
889   class loop *outer, *orig;
890   gimple_stmt_iterator gsi;
891   gimple *g;
892 
893   if (loop->orig_loop_num == 0)
894     return NULL;
895 
896   orig = get_loop (fun, loop->orig_loop_num);
897   if (orig == NULL)
898     {
899       /* The original loop is somehow destroyed.  Clear the information.  */
900       loop->orig_loop_num = 0;
901       return NULL;
902     }
903 
904   if (loop != orig)
905     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
906   else
907     bb = loop_preheader_edge (loop)->src;
908 
909   outer = bb->loop_father;
910   entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
911 
912   /* Look upward in dominance tree.  */
913   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
914        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
915     {
916       g = last_stmt (bb);
917       if (g == NULL || gimple_code (g) != GIMPLE_COND)
918 	continue;
919 
920       gsi = gsi_for_stmt (g);
921       gsi_prev (&gsi);
922       if (gsi_end_p (gsi))
923 	continue;
924 
925       g = gsi_stmt (gsi);
926       /* The guarding internal function call must have the same distribution
927 	 alias id.  */
928       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
929 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
930 	return g;
931     }
932   return NULL;
933 }
934 
935 /* Set the uids of all the statements in basic blocks inside loop
936    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
937    call guarding the loop which has been if converted.  */
938 static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call,function * fun)939 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
940 		  function *fun)
941 {
942   tree arg = gimple_call_arg (loop_vectorized_call, 1);
943   basic_block *bbs;
944   unsigned int i;
945   class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
946 
947   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
948   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
949 		       == loop_vectorized_call);
950   /* If we are going to vectorize outer loop, prevent vectorization
951      of the inner loop in the scalar loop - either the scalar loop is
952      thrown away, so it is a wasted work, or is used only for
953      a few iterations.  */
954   if (scalar_loop->inner)
955     {
956       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
957       if (g)
958 	{
959 	  arg = gimple_call_arg (g, 0);
960 	  get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
961 	  fold_loop_internal_call (g, boolean_false_node);
962 	}
963     }
964   bbs = get_loop_body (scalar_loop);
965   for (i = 0; i < scalar_loop->num_nodes; i++)
966     {
967       basic_block bb = bbs[i];
968       gimple_stmt_iterator gsi;
969       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
970 	{
971 	  gimple *phi = gsi_stmt (gsi);
972 	  gimple_set_uid (phi, 0);
973 	}
974       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
975 	{
976 	  gimple *stmt = gsi_stmt (gsi);
977 	  gimple_set_uid (stmt, 0);
978 	}
979     }
980   free (bbs);
981 }
982 
983 /* Generate vectorized code for LOOP and its epilogues.  */
984 
985 static void
vect_transform_loops(hash_table<simduid_to_vf> * & simduid_to_vf_htab,loop_p loop,gimple * loop_vectorized_call,function * fun)986 vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
987 		      loop_p loop, gimple *loop_vectorized_call,
988 		      function *fun)
989 {
990   loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
991 
992   if (loop_vectorized_call)
993     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
994 
995   unsigned HOST_WIDE_INT bytes;
996   if (dump_enabled_p ())
997     {
998       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
999 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1000 			 "loop vectorized using %wu byte vectors\n", bytes);
1001       else
1002 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1003 			 "loop vectorized using variable length vectors\n");
1004     }
1005 
1006   loop_p new_loop = vect_transform_loop (loop_vinfo,
1007 					 loop_vectorized_call);
1008   /* Now that the loop has been vectorized, allow it to be unrolled
1009      etc.  */
1010   loop->force_vectorize = false;
1011 
1012   if (loop->simduid)
1013     {
1014       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
1015       if (!simduid_to_vf_htab)
1016 	simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1017       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1018       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1019       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
1020 	  = simduid_to_vf_data;
1021     }
1022 
1023   /* Epilogue of vectorized loop must be vectorized too.  */
1024   if (new_loop)
1025     vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
1026 }
1027 
1028 /* Try to vectorize LOOP.  */
1029 
1030 static unsigned
try_vectorize_loop_1(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop,gimple * loop_vectorized_call,gimple * loop_dist_alias_call,function * fun)1031 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1032 		      unsigned *num_vectorized_loops, loop_p loop,
1033 		      gimple *loop_vectorized_call,
1034 		      gimple *loop_dist_alias_call,
1035 		      function *fun)
1036 {
1037   unsigned ret = 0;
1038   vec_info_shared shared;
1039   auto_purge_vect_location sentinel;
1040   vect_location = find_loop_location (loop);
1041 
1042   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
1043       && dump_enabled_p ())
1044     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
1045 		 "\nAnalyzing loop at %s:%d\n",
1046 		 LOCATION_FILE (vect_location.get_location_t ()),
1047 		 LOCATION_LINE (vect_location.get_location_t ()));
1048 
1049   /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
1050   opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, &shared);
1051   loop->aux = loop_vinfo;
1052 
1053   if (!loop_vinfo)
1054     if (dump_enabled_p ())
1055       if (opt_problem *problem = loop_vinfo.get_problem ())
1056 	{
1057 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1058 			   "couldn't vectorize loop\n");
1059 	  problem->emit_and_clear ();
1060 	}
1061 
1062   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1063     {
1064       /* Free existing information if loop is analyzed with some
1065 	 assumptions.  */
1066       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1067 	vect_free_loop_info_assumptions (loop);
1068 
1069       /* If we applied if-conversion then try to vectorize the
1070 	 BB of innermost loops.
1071 	 ???  Ideally BB vectorization would learn to vectorize
1072 	 control flow by applying if-conversion on-the-fly, the
1073 	 following retains the if-converted loop body even when
1074 	 only non-if-converted parts took part in BB vectorization.  */
1075       if (flag_tree_slp_vectorize != 0
1076 	  && loop_vectorized_call
1077 	  && ! loop->inner)
1078 	{
1079 	  basic_block bb = loop->header;
1080 	  bool require_loop_vectorize = false;
1081 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1082 	       !gsi_end_p (gsi); gsi_next (&gsi))
1083 	    {
1084 	      gimple *stmt = gsi_stmt (gsi);
1085 	      gcall *call = dyn_cast <gcall *> (stmt);
1086 	      if (call && gimple_call_internal_p (call))
1087 		{
1088 		  internal_fn ifn = gimple_call_internal_fn (call);
1089 		  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
1090 		      /* Don't keep the if-converted parts when the ifn with
1091 			 specifc type is not supported by the backend.  */
1092 		      || (direct_internal_fn_p (ifn)
1093 			  && !direct_internal_fn_supported_p
1094 			  (call, OPTIMIZE_FOR_SPEED)))
1095 		    {
1096 		      require_loop_vectorize = true;
1097 		      break;
1098 		    }
1099 		}
1100 	      gimple_set_uid (stmt, -1);
1101 	      gimple_set_visited (stmt, false);
1102 	    }
1103 	  if (!require_loop_vectorize)
1104 	    {
1105 	      tree arg = gimple_call_arg (loop_vectorized_call, 1);
1106 	      class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
1107 	      if (vect_slp_if_converted_bb (bb, scalar_loop))
1108 		{
1109 		  fold_loop_internal_call (loop_vectorized_call,
1110 					   boolean_true_node);
1111 		  loop_vectorized_call = NULL;
1112 		  ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1113 		}
1114 	    }
1115 	}
1116       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
1117 	 loop, don't vectorize its inner loop; we'll attempt to
1118 	 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
1119 	 loop version.  */
1120       if (loop_vectorized_call && loop->inner)
1121 	loop->inner->dont_vectorize = true;
1122       return ret;
1123     }
1124 
1125   if (!dbg_cnt (vect_loop))
1126     {
1127       /* Free existing information if loop is analyzed with some
1128 	 assumptions.  */
1129       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1130 	vect_free_loop_info_assumptions (loop);
1131       return ret;
1132     }
1133 
1134   (*num_vectorized_loops)++;
1135   /* Transform LOOP and its epilogues.  */
1136   vect_transform_loops (simduid_to_vf_htab, loop, loop_vectorized_call, fun);
1137 
1138   if (loop_vectorized_call)
1139     {
1140       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1141       ret |= TODO_cleanup_cfg;
1142     }
1143   if (loop_dist_alias_call)
1144     {
1145       tree value = gimple_call_arg (loop_dist_alias_call, 1);
1146       fold_loop_internal_call (loop_dist_alias_call, value);
1147       ret |= TODO_cleanup_cfg;
1148     }
1149 
1150   return ret;
1151 }
1152 
1153 /* Try to vectorize LOOP.  */
1154 
1155 static unsigned
try_vectorize_loop(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop,function * fun)1156 try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1157 		    unsigned *num_vectorized_loops, loop_p loop,
1158 		    function *fun)
1159 {
1160   if (!((flag_tree_loop_vectorize
1161 	 && optimize_loop_nest_for_speed_p (loop))
1162 	|| loop->force_vectorize))
1163     return 0;
1164 
1165   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1166 			       vect_loop_vectorized_call (loop),
1167 			       vect_loop_dist_alias_call (loop, fun), fun);
1168 }
1169 
1170 
1171 /* Loop autovectorization.  */
1172 
1173 namespace {
1174 
1175 const pass_data pass_data_vectorize =
1176 {
1177   GIMPLE_PASS, /* type */
1178   "vect", /* name */
1179   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1180   TV_TREE_VECTORIZATION, /* tv_id */
1181   ( PROP_cfg | PROP_ssa ), /* properties_required */
1182   0, /* properties_provided */
1183   0, /* properties_destroyed */
1184   0, /* todo_flags_start */
1185   0, /* todo_flags_finish */
1186 };
1187 
1188 class pass_vectorize : public gimple_opt_pass
1189 {
1190 public:
pass_vectorize(gcc::context * ctxt)1191   pass_vectorize (gcc::context *ctxt)
1192     : gimple_opt_pass (pass_data_vectorize, ctxt)
1193   {}
1194 
1195   /* opt_pass methods: */
gate(function * fun)1196   virtual bool gate (function *fun)
1197     {
1198       return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
1199     }
1200 
1201   virtual unsigned int execute (function *);
1202 
1203 }; // class pass_vectorize
1204 
1205 /* Function vectorize_loops.
1206 
1207    Entry point to loop vectorization phase.  */
1208 
1209 unsigned
execute(function * fun)1210 pass_vectorize::execute (function *fun)
1211 {
1212   unsigned int i;
1213   unsigned int num_vectorized_loops = 0;
1214   unsigned int vect_loops_num;
1215   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1216   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1217   bool any_ifcvt_loops = false;
1218   unsigned ret = 0;
1219 
1220   vect_loops_num = number_of_loops (fun);
1221 
1222   /* Bail out if there are no loops.  */
1223   if (vect_loops_num <= 1)
1224     return 0;
1225 
1226   vect_slp_init ();
1227 
1228   if (fun->has_simduid_loops)
1229     note_simd_array_uses (&simd_array_to_simduid_htab, fun);
1230 
1231   /*  ----------- Analyze loops. -----------  */
1232 
1233   /* If some loop was duplicated, it gets bigger number
1234      than all previously defined loops.  This fact allows us to run
1235      only over initial loops skipping newly generated ones.  */
1236   for (auto loop : loops_list (fun, 0))
1237     if (loop->dont_vectorize)
1238       {
1239 	any_ifcvt_loops = true;
1240 	/* If-conversion sometimes versions both the outer loop
1241 	   (for the case when outer loop vectorization might be
1242 	   desirable) as well as the inner loop in the scalar version
1243 	   of the loop.  So we have:
1244 	    if (LOOP_VECTORIZED (1, 3))
1245 	      {
1246 		loop1
1247 		  loop2
1248 	      }
1249 	    else
1250 	      loop3 (copy of loop1)
1251 		if (LOOP_VECTORIZED (4, 5))
1252 		  loop4 (copy of loop2)
1253 		else
1254 		  loop5 (copy of loop4)
1255 	   If loops' iteration gives us loop3 first (which has
1256 	   dont_vectorize set), make sure to process loop1 before loop4;
1257 	   so that we can prevent vectorization of loop4 if loop1
1258 	   is successfully vectorized.  */
1259 	if (loop->inner)
1260 	  {
1261 	    gimple *loop_vectorized_call
1262 	      = vect_loop_vectorized_call (loop);
1263 	    if (loop_vectorized_call
1264 		&& vect_loop_vectorized_call (loop->inner))
1265 	      {
1266 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
1267 		class loop *vector_loop
1268 		  = get_loop (fun, tree_to_shwi (arg));
1269 		if (vector_loop && vector_loop != loop)
1270 		  {
1271 		    /* Make sure we don't vectorize it twice.  */
1272 		    vector_loop->dont_vectorize = true;
1273 		    ret |= try_vectorize_loop (simduid_to_vf_htab,
1274 					       &num_vectorized_loops,
1275 					       vector_loop, fun);
1276 		  }
1277 	      }
1278 	  }
1279       }
1280     else
1281       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1282 				 loop, fun);
1283 
1284   vect_location = dump_user_location_t ();
1285 
1286   statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
1287   if (dump_enabled_p ()
1288       || (num_vectorized_loops > 0 && dump_enabled_p ()))
1289     dump_printf_loc (MSG_NOTE, vect_location,
1290                      "vectorized %u loops in function.\n",
1291                      num_vectorized_loops);
1292 
1293   /*  ----------- Finalize. -----------  */
1294 
1295   if (any_ifcvt_loops)
1296     for (i = 1; i < number_of_loops (fun); i++)
1297       {
1298 	class loop *loop = get_loop (fun, i);
1299 	if (loop && loop->dont_vectorize)
1300 	  {
1301 	    gimple *g = vect_loop_vectorized_call (loop);
1302 	    if (g)
1303 	      {
1304 		fold_loop_internal_call (g, boolean_false_node);
1305 		ret |= TODO_cleanup_cfg;
1306 		g = NULL;
1307 	      }
1308 	    else
1309 	      g = vect_loop_dist_alias_call (loop, fun);
1310 
1311 	    if (g)
1312 	      {
1313 		fold_loop_internal_call (g, boolean_false_node);
1314 		ret |= TODO_cleanup_cfg;
1315 	      }
1316 	  }
1317       }
1318 
1319   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1320   if (fun->has_simduid_loops)
1321     {
1322       adjust_simduid_builtins (simduid_to_vf_htab, fun);
1323       /* Avoid stale SCEV cache entries for the SIMD_LANE defs.  */
1324       scev_reset ();
1325     }
1326   /* Shrink any "omp array simd" temporary arrays to the
1327      actual vectorization factors.  */
1328   if (simd_array_to_simduid_htab)
1329     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1330   delete simduid_to_vf_htab;
1331   fun->has_simduid_loops = false;
1332 
1333   if (num_vectorized_loops > 0)
1334     {
1335       /* If we vectorized any loop only virtual SSA form needs to be updated.
1336 	 ???  Also while we try hard to update loop-closed SSA form we fail
1337 	 to properly do this in some corner-cases (see PR56286).  */
1338       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1339       ret |= TODO_cleanup_cfg;
1340     }
1341 
1342   for (i = 1; i < number_of_loops (fun); i++)
1343     {
1344       loop_vec_info loop_vinfo;
1345       bool has_mask_store;
1346 
1347       class loop *loop = get_loop (fun, i);
1348       if (!loop || !loop->aux)
1349 	continue;
1350       loop_vinfo = (loop_vec_info) loop->aux;
1351       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1352       delete loop_vinfo;
1353       if (has_mask_store
1354 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1355 	optimize_mask_stores (loop);
1356 
1357       auto_bitmap exit_bbs;
1358       /* Perform local CSE, this esp. helps because we emit code for
1359 	 predicates that need to be shared for optimal predicate usage.
1360 	 However reassoc will re-order them and prevent CSE from working
1361 	 as it should.  CSE only the loop body, not the entry.  */
1362       bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
1363 
1364       edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
1365       do_rpo_vn (fun, entry, exit_bbs);
1366 
1367       loop->aux = NULL;
1368     }
1369 
1370   vect_slp_fini ();
1371 
1372   return ret;
1373 }
1374 
1375 } // anon namespace
1376 
1377 gimple_opt_pass *
make_pass_vectorize(gcc::context * ctxt)1378 make_pass_vectorize (gcc::context *ctxt)
1379 {
1380   return new pass_vectorize (ctxt);
1381 }
1382 
1383 /* Entry point to the simduid cleanup pass.  */
1384 
1385 namespace {
1386 
1387 const pass_data pass_data_simduid_cleanup =
1388 {
1389   GIMPLE_PASS, /* type */
1390   "simduid", /* name */
1391   OPTGROUP_NONE, /* optinfo_flags */
1392   TV_NONE, /* tv_id */
1393   ( PROP_ssa | PROP_cfg ), /* properties_required */
1394   0, /* properties_provided */
1395   0, /* properties_destroyed */
1396   0, /* todo_flags_start */
1397   0, /* todo_flags_finish */
1398 };
1399 
1400 class pass_simduid_cleanup : public gimple_opt_pass
1401 {
1402 public:
pass_simduid_cleanup(gcc::context * ctxt)1403   pass_simduid_cleanup (gcc::context *ctxt)
1404     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1405   {}
1406 
1407   /* opt_pass methods: */
clone()1408   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)1409   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
1410   virtual unsigned int execute (function *);
1411 
1412 }; // class pass_simduid_cleanup
1413 
1414 unsigned int
execute(function * fun)1415 pass_simduid_cleanup::execute (function *fun)
1416 {
1417   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1418 
1419   note_simd_array_uses (&simd_array_to_simduid_htab, fun);
1420 
1421   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1422   adjust_simduid_builtins (NULL, fun);
1423 
1424   /* Shrink any "omp array simd" temporary arrays to the
1425      actual vectorization factors.  */
1426   if (simd_array_to_simduid_htab)
1427     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1428   fun->has_simduid_loops = false;
1429   return 0;
1430 }
1431 
1432 }  // anon namespace
1433 
1434 gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)1435 make_pass_simduid_cleanup (gcc::context *ctxt)
1436 {
1437   return new pass_simduid_cleanup (ctxt);
1438 }
1439 
1440 
1441 /*  Entry point to basic block SLP phase.  */
1442 
1443 namespace {
1444 
1445 const pass_data pass_data_slp_vectorize =
1446 {
1447   GIMPLE_PASS, /* type */
1448   "slp", /* name */
1449   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1450   TV_TREE_SLP_VECTORIZATION, /* tv_id */
1451   ( PROP_ssa | PROP_cfg ), /* properties_required */
1452   0, /* properties_provided */
1453   0, /* properties_destroyed */
1454   0, /* todo_flags_start */
1455   TODO_update_ssa, /* todo_flags_finish */
1456 };
1457 
1458 class pass_slp_vectorize : public gimple_opt_pass
1459 {
1460 public:
pass_slp_vectorize(gcc::context * ctxt)1461   pass_slp_vectorize (gcc::context *ctxt)
1462     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1463   {}
1464 
1465   /* opt_pass methods: */
clone()1466   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)1467   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
1468   virtual unsigned int execute (function *);
1469 
1470 }; // class pass_slp_vectorize
1471 
1472 unsigned int
execute(function * fun)1473 pass_slp_vectorize::execute (function *fun)
1474 {
1475   auto_purge_vect_location sentinel;
1476   basic_block bb;
1477 
1478   bool in_loop_pipeline = scev_initialized_p ();
1479   if (!in_loop_pipeline)
1480     {
1481       loop_optimizer_init (LOOPS_NORMAL);
1482       scev_initialize ();
1483     }
1484 
1485   /* Mark all stmts as not belonging to the current region and unvisited.  */
1486   FOR_EACH_BB_FN (bb, fun)
1487     {
1488       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1489 	   gsi_next (&gsi))
1490 	{
1491 	  gphi *stmt = gsi.phi ();
1492 	  gimple_set_uid (stmt, -1);
1493 	  gimple_set_visited (stmt, false);
1494 	}
1495       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1496 	   gsi_next (&gsi))
1497 	{
1498 	  gimple *stmt = gsi_stmt (gsi);
1499 	  gimple_set_uid (stmt, -1);
1500 	  gimple_set_visited (stmt, false);
1501 	}
1502     }
1503 
1504   vect_slp_init ();
1505 
1506   vect_slp_function (fun);
1507 
1508   vect_slp_fini ();
1509 
1510   if (!in_loop_pipeline)
1511     {
1512       scev_finalize ();
1513       loop_optimizer_finalize ();
1514     }
1515 
1516   return 0;
1517 }
1518 
1519 } // anon namespace
1520 
1521 gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)1522 make_pass_slp_vectorize (gcc::context *ctxt)
1523 {
1524   return new pass_slp_vectorize (ctxt);
1525 }
1526 
1527 
1528 /* Increase alignment of global arrays to improve vectorization potential.
1529    TODO:
1530    - Consider also structs that have an array field.
1531    - Use ipa analysis to prune arrays that can't be vectorized?
1532      This should involve global alignment analysis and in the future also
1533      array padding.  */
1534 
1535 static unsigned get_vec_alignment_for_type (tree);
1536 static hash_map<tree, unsigned> *type_align_map;
1537 
1538 /* Return alignment of array's vector type corresponding to scalar type.
1539    0 if no vector type exists.  */
1540 static unsigned
get_vec_alignment_for_array_type(tree type)1541 get_vec_alignment_for_array_type (tree type)
1542 {
1543   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1544   poly_uint64 array_size, vector_size;
1545 
1546   tree scalar_type = strip_array_types (type);
1547   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
1548   if (!vectype
1549       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1550       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1551       || maybe_lt (array_size, vector_size))
1552     return 0;
1553 
1554   return TYPE_ALIGN (vectype);
1555 }
1556 
1557 /* Return alignment of field having maximum alignment of vector type
1558    corresponding to it's scalar type. For now, we only consider fields whose
1559    offset is a multiple of it's vector alignment.
1560    0 if no suitable field is found.  */
1561 static unsigned
get_vec_alignment_for_record_type(tree type)1562 get_vec_alignment_for_record_type (tree type)
1563 {
1564   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1565 
1566   unsigned max_align = 0, alignment;
1567   HOST_WIDE_INT offset;
1568   tree offset_tree;
1569 
1570   if (TYPE_PACKED (type))
1571     return 0;
1572 
1573   unsigned *slot = type_align_map->get (type);
1574   if (slot)
1575     return *slot;
1576 
1577   for (tree field = first_field (type);
1578        field != NULL_TREE;
1579        field = DECL_CHAIN (field))
1580     {
1581       /* Skip if not FIELD_DECL or if alignment is set by user.  */
1582       if (TREE_CODE (field) != FIELD_DECL
1583 	  || DECL_USER_ALIGN (field)
1584 	  || DECL_ARTIFICIAL (field))
1585 	continue;
1586 
1587       /* We don't need to process the type further if offset is variable,
1588 	 since the offsets of remaining members will also be variable.  */
1589       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1590 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1591 	break;
1592 
1593       /* Similarly stop processing the type if offset_tree
1594 	 does not fit in unsigned HOST_WIDE_INT.  */
1595       offset_tree = bit_position (field);
1596       if (!tree_fits_uhwi_p (offset_tree))
1597 	break;
1598 
1599       offset = tree_to_uhwi (offset_tree);
1600       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1601 
1602       /* Get maximum alignment of vectorized field/array among those members
1603 	 whose offset is multiple of the vector alignment.  */
1604       if (alignment
1605 	  && (offset % alignment == 0)
1606 	  && (alignment > max_align))
1607 	max_align = alignment;
1608     }
1609 
1610   type_align_map->put (type, max_align);
1611   return max_align;
1612 }
1613 
1614 /* Return alignment of vector type corresponding to decl's scalar type
1615    or 0 if it doesn't exist or the vector alignment is lesser than
1616    decl's alignment.  */
1617 static unsigned
get_vec_alignment_for_type(tree type)1618 get_vec_alignment_for_type (tree type)
1619 {
1620   if (type == NULL_TREE)
1621     return 0;
1622 
1623   gcc_assert (TYPE_P (type));
1624 
1625   static unsigned alignment = 0;
1626   switch (TREE_CODE (type))
1627     {
1628       case ARRAY_TYPE:
1629 	alignment = get_vec_alignment_for_array_type (type);
1630 	break;
1631       case RECORD_TYPE:
1632 	alignment = get_vec_alignment_for_record_type (type);
1633 	break;
1634       default:
1635 	alignment = 0;
1636 	break;
1637     }
1638 
1639   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1640 }
1641 
1642 /* Entry point to increase_alignment pass.  */
1643 static unsigned int
increase_alignment(void)1644 increase_alignment (void)
1645 {
1646   varpool_node *vnode;
1647 
1648   vect_location = dump_user_location_t ();
1649   type_align_map = new hash_map<tree, unsigned>;
1650 
1651   /* Increase the alignment of all global arrays for vectorization.  */
1652   FOR_EACH_DEFINED_VARIABLE (vnode)
1653     {
1654       tree decl = vnode->decl;
1655       unsigned int alignment;
1656 
1657       if ((decl_in_symtab_p (decl)
1658 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
1659 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1660 	continue;
1661 
1662       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1663       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1664         {
1665 	  vnode->increase_alignment (alignment);
1666 	  if (dump_enabled_p ())
1667 	    dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1668         }
1669     }
1670 
1671   delete type_align_map;
1672   return 0;
1673 }
1674 
1675 
1676 namespace {
1677 
1678 const pass_data pass_data_ipa_increase_alignment =
1679 {
1680   SIMPLE_IPA_PASS, /* type */
1681   "increase_alignment", /* name */
1682   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1683   TV_IPA_OPT, /* tv_id */
1684   0, /* properties_required */
1685   0, /* properties_provided */
1686   0, /* properties_destroyed */
1687   0, /* todo_flags_start */
1688   0, /* todo_flags_finish */
1689 };
1690 
1691 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1692 {
1693 public:
pass_ipa_increase_alignment(gcc::context * ctxt)1694   pass_ipa_increase_alignment (gcc::context *ctxt)
1695     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1696   {}
1697 
1698   /* opt_pass methods: */
gate(function *)1699   virtual bool gate (function *)
1700     {
1701       return flag_section_anchors && flag_tree_loop_vectorize;
1702     }
1703 
execute(function *)1704   virtual unsigned int execute (function *) { return increase_alignment (); }
1705 
1706 }; // class pass_ipa_increase_alignment
1707 
1708 } // anon namespace
1709 
1710 simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)1711 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1712 {
1713   return new pass_ipa_increase_alignment (ctxt);
1714 }
1715 
1716 /* If the condition represented by T is a comparison or the SSA name
1717    result of a comparison, extract the comparison's operands.  Represent
1718    T as NE_EXPR <T, 0> otherwise.  */
1719 
1720 void
get_cond_ops_from_tree(tree t)1721 scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1722 {
1723   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1724     {
1725       this->code = TREE_CODE (t);
1726       this->op0 = TREE_OPERAND (t, 0);
1727       this->op1 = TREE_OPERAND (t, 1);
1728       this->inverted_p = false;
1729       return;
1730     }
1731 
1732   if (TREE_CODE (t) == SSA_NAME)
1733     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1734       {
1735 	tree_code code = gimple_assign_rhs_code (stmt);
1736 	if (TREE_CODE_CLASS (code) == tcc_comparison)
1737 	  {
1738 	    this->code = code;
1739 	    this->op0 = gimple_assign_rhs1 (stmt);
1740 	    this->op1 = gimple_assign_rhs2 (stmt);
1741 	    this->inverted_p = false;
1742 	    return;
1743 	  }
1744 	else if (code == BIT_NOT_EXPR)
1745 	  {
1746 	    tree n_op = gimple_assign_rhs1 (stmt);
1747 	    if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
1748 	      {
1749 		code = gimple_assign_rhs_code (stmt);
1750 		if (TREE_CODE_CLASS (code) == tcc_comparison)
1751 		  {
1752 		    this->code = code;
1753 		    this->op0 = gimple_assign_rhs1 (stmt);
1754 		    this->op1 = gimple_assign_rhs2 (stmt);
1755 		    this->inverted_p = true;
1756 		    return;
1757 		  }
1758 	      }
1759 	  }
1760       }
1761 
1762   this->code = NE_EXPR;
1763   this->op0 = t;
1764   this->op1 = build_zero_cst (TREE_TYPE (t));
1765   this->inverted_p = false;
1766 }
1767 
1768 /* See the comment above the declaration for details.  */
1769 
1770 unsigned int
add_stmt_cost(int count,vect_cost_for_stmt kind,stmt_vec_info stmt_info,slp_tree,tree vectype,int misalign,vect_cost_model_location where)1771 vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
1772 			     stmt_vec_info stmt_info, slp_tree,
1773 			     tree vectype, int misalign,
1774 			     vect_cost_model_location where)
1775 {
1776   unsigned int cost
1777     = builtin_vectorization_cost (kind, vectype, misalign) * count;
1778   return record_stmt_cost (stmt_info, where, cost);
1779 }
1780 
1781 /* See the comment above the declaration for details.  */
1782 
1783 void
finish_cost(const vector_costs *)1784 vector_costs::finish_cost (const vector_costs *)
1785 {
1786   gcc_assert (!m_finished);
1787   m_finished = true;
1788 }
1789 
1790 /* Record a base cost of COST units against WHERE.  If STMT_INFO is
1791    nonnull, use it to adjust the cost based on execution frequency
1792    (where appropriate).  */
1793 
1794 unsigned int
record_stmt_cost(stmt_vec_info stmt_info,vect_cost_model_location where,unsigned int cost)1795 vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
1796 				vect_cost_model_location where,
1797 				unsigned int cost)
1798 {
1799   cost = adjust_cost_for_freq (stmt_info, where, cost);
1800   m_costs[where] += cost;
1801   return cost;
1802 }
1803 
1804 /* COST is the base cost we have calculated for an operation in location WHERE.
1805    If STMT_INFO is nonnull, use it to adjust the cost based on execution
1806    frequency (where appropriate).  Return the adjusted cost.  */
1807 
1808 unsigned int
adjust_cost_for_freq(stmt_vec_info stmt_info,vect_cost_model_location where,unsigned int cost)1809 vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
1810 				    vect_cost_model_location where,
1811 				    unsigned int cost)
1812 {
1813   /* Statements in an inner loop relative to the loop being
1814      vectorized are weighted more heavily.  The value here is
1815      arbitrary and could potentially be improved with analysis.  */
1816   if (where == vect_body
1817       && stmt_info
1818       && stmt_in_inner_loop_p (m_vinfo, stmt_info))
1819     {
1820       loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
1821       cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
1822     }
1823   return cost;
1824 }
1825 
1826 /* See the comment above the declaration for details.  */
1827 
1828 bool
better_main_loop_than_p(const vector_costs * other) const1829 vector_costs::better_main_loop_than_p (const vector_costs *other) const
1830 {
1831   int diff = compare_inside_loop_cost (other);
1832   if (diff != 0)
1833     return diff < 0;
1834 
1835   /* If there's nothing to choose between the loop bodies, see whether
1836      there's a difference in the prologue and epilogue costs.  */
1837   diff = compare_outside_loop_cost (other);
1838   if (diff != 0)
1839     return diff < 0;
1840 
1841   return false;
1842 }
1843 
1844 
1845 /* See the comment above the declaration for details.  */
1846 
1847 bool
better_epilogue_loop_than_p(const vector_costs * other,loop_vec_info main_loop) const1848 vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
1849 					   loop_vec_info main_loop) const
1850 {
1851   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
1852   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
1853 
1854   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1855   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1856 
1857   poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
1858   unsigned HOST_WIDE_INT main_vf;
1859   unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
1860   /* If we can determine how many iterations are left for the epilogue
1861      loop, that is if both the main loop's vectorization factor and number
1862      of iterations are constant, then we use them to calculate the cost of
1863      the epilogue loop together with a 'likely value' for the epilogues
1864      vectorization factor.  Otherwise we use the main loop's vectorization
1865      factor and the maximum poly value for the epilogue's.  If the target
1866      has not provided with a sensible upper bound poly vectorization
1867      factors are likely to be favored over constant ones.  */
1868   if (main_poly_vf.is_constant (&main_vf)
1869       && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
1870     {
1871       unsigned HOST_WIDE_INT niters
1872 	= LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
1873       HOST_WIDE_INT other_likely_vf
1874 	= estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
1875       HOST_WIDE_INT this_likely_vf
1876 	= estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
1877 
1878       /* If the epilogue is using partial vectors we account for the
1879 	 partial iteration here too.  */
1880       other_factor = niters / other_likely_vf;
1881       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
1882 	  && niters % other_likely_vf != 0)
1883 	other_factor++;
1884 
1885       this_factor = niters / this_likely_vf;
1886       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
1887 	  && niters % this_likely_vf != 0)
1888 	this_factor++;
1889     }
1890   else
1891     {
1892       unsigned HOST_WIDE_INT main_vf_max
1893 	= estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
1894       unsigned HOST_WIDE_INT other_vf_max
1895 	= estimated_poly_value (other_vf, POLY_VALUE_MAX);
1896       unsigned HOST_WIDE_INT this_vf_max
1897 	= estimated_poly_value (this_vf, POLY_VALUE_MAX);
1898 
1899       other_factor = CEIL (main_vf_max, other_vf_max);
1900       this_factor = CEIL (main_vf_max, this_vf_max);
1901 
1902       /* If the loop is not using partial vectors then it will iterate one
1903 	 time less than one that does.  It is safe to subtract one here,
1904 	 because the main loop's vf is always at least 2x bigger than that
1905 	 of an epilogue.  */
1906       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
1907 	other_factor -= 1;
1908       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
1909 	this_factor -= 1;
1910     }
1911 
1912   /* Compute the costs by multiplying the inside costs with the factor and
1913      add the outside costs for a more complete picture.  The factor is the
1914      amount of times we are expecting to iterate this epilogue.  */
1915   other_cost = other->body_cost () * other_factor;
1916   this_cost = this->body_cost () * this_factor;
1917   other_cost += other->outside_cost ();
1918   this_cost += this->outside_cost ();
1919   return this_cost < other_cost;
1920 }
1921 
1922 /* A <=>-style subroutine of better_main_loop_than_p.  Check whether we can
1923    determine the return value of better_main_loop_than_p by comparing the
1924    inside (loop body) costs of THIS and OTHER.  Return:
1925 
1926    * -1 if better_main_loop_than_p should return true.
1927    * 1 if better_main_loop_than_p should return false.
1928    * 0 if we can't decide.  */
1929 
1930 int
compare_inside_loop_cost(const vector_costs * other) const1931 vector_costs::compare_inside_loop_cost (const vector_costs *other) const
1932 {
1933   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
1934   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
1935 
1936   struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
1937   gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
1938 
1939   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1940   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1941 
1942   /* Limit the VFs to what is likely to be the maximum number of iterations,
1943      to handle cases in which at least one loop_vinfo is fully-masked.  */
1944   HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
1945   if (estimated_max_niter != -1)
1946     {
1947       if (known_le (estimated_max_niter, this_vf))
1948 	this_vf = estimated_max_niter;
1949       if (known_le (estimated_max_niter, other_vf))
1950 	other_vf = estimated_max_niter;
1951     }
1952 
1953   /* Check whether the (fractional) cost per scalar iteration is lower or
1954      higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf.  */
1955   poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
1956   poly_int64 rel_other
1957     = other_loop_vinfo->vector_costs->body_cost () * this_vf;
1958 
1959   HOST_WIDE_INT est_rel_this_min
1960     = estimated_poly_value (rel_this, POLY_VALUE_MIN);
1961   HOST_WIDE_INT est_rel_this_max
1962     = estimated_poly_value (rel_this, POLY_VALUE_MAX);
1963 
1964   HOST_WIDE_INT est_rel_other_min
1965     = estimated_poly_value (rel_other, POLY_VALUE_MIN);
1966   HOST_WIDE_INT est_rel_other_max
1967     = estimated_poly_value (rel_other, POLY_VALUE_MAX);
1968 
1969   /* Check first if we can make out an unambigous total order from the minimum
1970      and maximum estimates.  */
1971   if (est_rel_this_min < est_rel_other_min
1972       && est_rel_this_max < est_rel_other_max)
1973     return -1;
1974 
1975   if (est_rel_other_min < est_rel_this_min
1976       && est_rel_other_max < est_rel_this_max)
1977     return 1;
1978 
1979   /* When other_loop_vinfo uses a variable vectorization factor,
1980      we know that it has a lower cost for at least one runtime VF.
1981      However, we don't know how likely that VF is.
1982 
1983      One option would be to compare the costs for the estimated VFs.
1984      The problem is that that can put too much pressure on the cost
1985      model.  E.g. if the estimated VF is also the lowest possible VF,
1986      and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
1987      for the estimated VF, we'd then choose this_loop_vinfo even
1988      though (a) this_loop_vinfo might not actually be better than
1989      other_loop_vinfo for that VF and (b) it would be significantly
1990      worse at larger VFs.
1991 
1992      Here we go for a hacky compromise: pick this_loop_vinfo if it is
1993      no more expensive than other_loop_vinfo even after doubling the
1994      estimated other_loop_vinfo VF.  For all but trivial loops, this
1995      ensures that we only pick this_loop_vinfo if it is significantly
1996      better than other_loop_vinfo at the estimated VF.  */
1997   if (est_rel_other_min != est_rel_this_min
1998       || est_rel_other_max != est_rel_this_max)
1999     {
2000       HOST_WIDE_INT est_rel_this_likely
2001 	= estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
2002       HOST_WIDE_INT est_rel_other_likely
2003 	= estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
2004 
2005       return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
2006     }
2007 
2008   return 0;
2009 }
2010 
2011 /* A <=>-style subroutine of better_main_loop_than_p, used when there is
2012    nothing to choose between the inside (loop body) costs of THIS and OTHER.
2013    Check whether we can determine the return value of better_main_loop_than_p
2014    by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
2015    Return:
2016 
2017    * -1 if better_main_loop_than_p should return true.
2018    * 1 if better_main_loop_than_p should return false.
2019    * 0 if we can't decide.  */
2020 
2021 int
compare_outside_loop_cost(const vector_costs * other) const2022 vector_costs::compare_outside_loop_cost (const vector_costs *other) const
2023 {
2024   auto this_outside_cost = this->outside_cost ();
2025   auto other_outside_cost = other->outside_cost ();
2026   if (this_outside_cost != other_outside_cost)
2027     return this_outside_cost < other_outside_cost ? -1 : 1;
2028 
2029   return 0;
2030 }
2031