1 /* Vectorizer
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Loop and basic block vectorizer.
22 
23   This file contains drivers for the three vectorizers:
24   (1) loop vectorizer (inter-iteration parallelism),
25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26       vectorizer)
27   (3) BB vectorizer (out-of-loops), aka SLP
28 
29   The rest of the vectorizer's code is organized as follows:
30   - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31     used by drivers (1) and (2).
32   - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33     drivers (1) and (2).
34   - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35     used by drivers (2) and (3).
36   - tree-vect-stmts.c - statements analysis and transformation (used by all).
37   - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38     manipulations (used by all).
39   - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40 
41   Here's a poor attempt at illustrating that:
42 
43      tree-vectorizer.c:
44      loop_vect()  loop_aware_slp()  slp_vect()
45           |        /           \          /
46           |       /             \        /
47           tree-vect-loop.c  tree-vect-slp.c
48                 | \      \  /      /   |
49                 |  \      \/      /    |
50                 |   \     /\     /     |
51                 |    \   /  \   /      |
52          tree-vect-stmts.c  tree-vect-data-refs.c
53                        \      /
54                     tree-vect-patterns.c
55 */
56 
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 #include "stringpool.h"
80 #include "attribs.h"
81 #include "gimple-pretty-print.h"
82 #include "opt-problem.h"
83 #include "internal-fn.h"
84 
85 
86 /* Loop or bb location, with hotness information.  */
87 dump_user_location_t vect_location;
88 
89 /* auto_purge_vect_location's dtor: reset the vect_location
90    global, to avoid stale location_t values that could reference
91    GC-ed blocks.  */
92 
~auto_purge_vect_location()93 auto_purge_vect_location::~auto_purge_vect_location ()
94 {
95   vect_location = dump_user_location_t ();
96 }
97 
98 /* Dump a cost entry according to args to F.  */
99 
100 void
dump_stmt_cost(FILE * f,void * data,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,unsigned cost,enum vect_cost_model_location where)101 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
102 		stmt_vec_info stmt_info, int misalign, unsigned cost,
103 		enum vect_cost_model_location where)
104 {
105   fprintf (f, "%p ", data);
106   if (stmt_info)
107     {
108       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109       fprintf (f, " ");
110     }
111   else
112     fprintf (f, "<unknown> ");
113   fprintf (f, "%d times ", count);
114   const char *ks = "unknown";
115   switch (kind)
116     {
117     case scalar_stmt:
118       ks = "scalar_stmt";
119       break;
120     case scalar_load:
121       ks = "scalar_load";
122       break;
123     case scalar_store:
124       ks = "scalar_store";
125       break;
126     case vector_stmt:
127       ks = "vector_stmt";
128       break;
129     case vector_load:
130       ks = "vector_load";
131       break;
132     case vector_gather_load:
133       ks = "vector_gather_load";
134       break;
135     case unaligned_load:
136       ks = "unaligned_load";
137       break;
138     case unaligned_store:
139       ks = "unaligned_store";
140       break;
141     case vector_store:
142       ks = "vector_store";
143       break;
144     case vector_scatter_store:
145       ks = "vector_scatter_store";
146       break;
147     case vec_to_scalar:
148       ks = "vec_to_scalar";
149       break;
150     case scalar_to_vec:
151       ks = "scalar_to_vec";
152       break;
153     case cond_branch_not_taken:
154       ks = "cond_branch_not_taken";
155       break;
156     case cond_branch_taken:
157       ks = "cond_branch_taken";
158       break;
159     case vec_perm:
160       ks = "vec_perm";
161       break;
162     case vec_promote_demote:
163       ks = "vec_promote_demote";
164       break;
165     case vec_construct:
166       ks = "vec_construct";
167       break;
168     }
169   fprintf (f, "%s ", ks);
170   if (kind == unaligned_load || kind == unaligned_store)
171     fprintf (f, "(misalign %d) ", misalign);
172   fprintf (f, "costs %u ", cost);
173   const char *ws = "unknown";
174   switch (where)
175     {
176     case vect_prologue:
177       ws = "prologue";
178       break;
179     case vect_body:
180       ws = "body";
181       break;
182     case vect_epilogue:
183       ws = "epilogue";
184       break;
185     }
186   fprintf (f, "in %s\n", ws);
187 }
188 
189 /* For mapping simduid to vectorization factor.  */
190 
191 class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
192 {
193 public:
194   unsigned int simduid;
195   poly_uint64 vf;
196 
197   /* hash_table support.  */
198   static inline hashval_t hash (const simduid_to_vf *);
199   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
200 };
201 
202 inline hashval_t
hash(const simduid_to_vf * p)203 simduid_to_vf::hash (const simduid_to_vf *p)
204 {
205   return p->simduid;
206 }
207 
208 inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)209 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
210 {
211   return p1->simduid == p2->simduid;
212 }
213 
214 /* This hash maps the OMP simd array to the corresponding simduid used
215    to index into it.  Like thus,
216 
217         _7 = GOMP_SIMD_LANE (simduid.0)
218         ...
219         ...
220         D.1737[_7] = stuff;
221 
222 
223    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
224    simduid.0.  */
225 
226 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
227 {
228   tree decl;
229   unsigned int simduid;
230 
231   /* hash_table support.  */
232   static inline hashval_t hash (const simd_array_to_simduid *);
233   static inline int equal (const simd_array_to_simduid *,
234 			   const simd_array_to_simduid *);
235 };
236 
237 inline hashval_t
hash(const simd_array_to_simduid * p)238 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
239 {
240   return DECL_UID (p->decl);
241 }
242 
243 inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)244 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
245 			      const simd_array_to_simduid *p2)
246 {
247   return p1->decl == p2->decl;
248 }
249 
250 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
251    into their corresponding constants and remove
252    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
253 
254 static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)255 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
256 {
257   basic_block bb;
258 
259   FOR_EACH_BB_FN (bb, cfun)
260     {
261       gimple_stmt_iterator i;
262 
263       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
264 	{
265 	  poly_uint64 vf = 1;
266 	  enum internal_fn ifn;
267 	  gimple *stmt = gsi_stmt (i);
268 	  tree t;
269 	  if (!is_gimple_call (stmt)
270 	      || !gimple_call_internal_p (stmt))
271 	    {
272 	      gsi_next (&i);
273 	      continue;
274 	    }
275 	  ifn = gimple_call_internal_fn (stmt);
276 	  switch (ifn)
277 	    {
278 	    case IFN_GOMP_SIMD_LANE:
279 	    case IFN_GOMP_SIMD_VF:
280 	    case IFN_GOMP_SIMD_LAST_LANE:
281 	      break;
282 	    case IFN_GOMP_SIMD_ORDERED_START:
283 	    case IFN_GOMP_SIMD_ORDERED_END:
284 	      if (integer_onep (gimple_call_arg (stmt, 0)))
285 		{
286 		  enum built_in_function bcode
287 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
288 		       ? BUILT_IN_GOMP_ORDERED_START
289 		       : BUILT_IN_GOMP_ORDERED_END);
290 		  gimple *g
291 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
292 		  gimple_move_vops (g, stmt);
293 		  gsi_replace (&i, g, true);
294 		  continue;
295 		}
296 	      gsi_remove (&i, true);
297 	      unlink_stmt_vdef (stmt);
298 	      continue;
299 	    default:
300 	      gsi_next (&i);
301 	      continue;
302 	    }
303 	  tree arg = gimple_call_arg (stmt, 0);
304 	  gcc_assert (arg != NULL_TREE);
305 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
306 	  simduid_to_vf *p = NULL, data;
307 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
308 	  /* Need to nullify loop safelen field since it's value is not
309 	     valid after transformation.  */
310 	  if (bb->loop_father && bb->loop_father->safelen > 0)
311 	    bb->loop_father->safelen = 0;
312 	  if (htab)
313 	    {
314 	      p = htab->find (&data);
315 	      if (p)
316 		vf = p->vf;
317 	    }
318 	  switch (ifn)
319 	    {
320 	    case IFN_GOMP_SIMD_VF:
321 	      t = build_int_cst (unsigned_type_node, vf);
322 	      break;
323 	    case IFN_GOMP_SIMD_LANE:
324 	      t = build_int_cst (unsigned_type_node, 0);
325 	      break;
326 	    case IFN_GOMP_SIMD_LAST_LANE:
327 	      t = gimple_call_arg (stmt, 1);
328 	      break;
329 	    default:
330 	      gcc_unreachable ();
331 	    }
332 	  tree lhs = gimple_call_lhs (stmt);
333 	  if (lhs)
334 	    replace_uses_by (lhs, t);
335 	  release_defs (stmt);
336 	  gsi_remove (&i, true);
337 	}
338     }
339 }
340 
341 /* Helper structure for note_simd_array_uses.  */
342 
343 struct note_simd_array_uses_struct
344 {
345   hash_table<simd_array_to_simduid> **htab;
346   unsigned int simduid;
347 };
348 
349 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
350 
351 static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)352 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
353 {
354   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
355   struct note_simd_array_uses_struct *ns
356     = (struct note_simd_array_uses_struct *) wi->info;
357 
358   if (TYPE_P (*tp))
359     *walk_subtrees = 0;
360   else if (VAR_P (*tp)
361 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
362 	   && DECL_CONTEXT (*tp) == current_function_decl)
363     {
364       simd_array_to_simduid data;
365       if (!*ns->htab)
366 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
367       data.decl = *tp;
368       data.simduid = ns->simduid;
369       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
370       if (*slot == NULL)
371 	{
372 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
373 	  *p = data;
374 	  *slot = p;
375 	}
376       else if ((*slot)->simduid != ns->simduid)
377 	(*slot)->simduid = -1U;
378       *walk_subtrees = 0;
379     }
380   return NULL_TREE;
381 }
382 
383 /* Find "omp simd array" temporaries and map them to corresponding
384    simduid.  */
385 
386 static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)387 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
388 {
389   basic_block bb;
390   gimple_stmt_iterator gsi;
391   struct walk_stmt_info wi;
392   struct note_simd_array_uses_struct ns;
393 
394   memset (&wi, 0, sizeof (wi));
395   wi.info = &ns;
396   ns.htab = htab;
397 
398   FOR_EACH_BB_FN (bb, cfun)
399     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
400       {
401 	gimple *stmt = gsi_stmt (gsi);
402 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
403 	  continue;
404 	switch (gimple_call_internal_fn (stmt))
405 	  {
406 	  case IFN_GOMP_SIMD_LANE:
407 	  case IFN_GOMP_SIMD_VF:
408 	  case IFN_GOMP_SIMD_LAST_LANE:
409 	    break;
410 	  default:
411 	    continue;
412 	  }
413 	tree lhs = gimple_call_lhs (stmt);
414 	if (lhs == NULL_TREE)
415 	  continue;
416 	imm_use_iterator use_iter;
417 	gimple *use_stmt;
418 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
419 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
420 	  if (!is_gimple_debug (use_stmt))
421 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
422       }
423 }
424 
425 /* Shrink arrays with "omp simd array" attribute to the corresponding
426    vectorization factor.  */
427 
428 static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)429 shrink_simd_arrays
430   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
431    hash_table<simduid_to_vf> *simduid_to_vf_htab)
432 {
433   for (hash_table<simd_array_to_simduid>::iterator iter
434 	 = simd_array_to_simduid_htab->begin ();
435        iter != simd_array_to_simduid_htab->end (); ++iter)
436     if ((*iter)->simduid != -1U)
437       {
438 	tree decl = (*iter)->decl;
439 	poly_uint64 vf = 1;
440 	if (simduid_to_vf_htab)
441 	  {
442 	    simduid_to_vf *p = NULL, data;
443 	    data.simduid = (*iter)->simduid;
444 	    p = simduid_to_vf_htab->find (&data);
445 	    if (p)
446 	      vf = p->vf;
447 	  }
448 	tree atype
449 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
450 	TREE_TYPE (decl) = atype;
451 	relayout_decl (decl);
452       }
453 
454   delete simd_array_to_simduid_htab;
455 }
456 
457 /* Initialize the vec_info with kind KIND_IN and target cost data
458    TARGET_COST_DATA_IN.  */
459 
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in,vec_info_shared * shared_)460 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in,
461 		    vec_info_shared *shared_)
462   : kind (kind_in),
463     shared (shared_),
464     target_cost_data (target_cost_data_in)
465 {
466   stmt_vec_infos.create (50);
467 }
468 
~vec_info()469 vec_info::~vec_info ()
470 {
471   slp_instance instance;
472   unsigned int i;
473 
474   FOR_EACH_VEC_ELT (slp_instances, i, instance)
475     vect_free_slp_instance (instance, true);
476 
477   destroy_cost_data (target_cost_data);
478   free_stmt_vec_infos ();
479 }
480 
vec_info_shared()481 vec_info_shared::vec_info_shared ()
482   : datarefs (vNULL),
483     datarefs_copy (vNULL),
484     ddrs (vNULL)
485 {
486 }
487 
~vec_info_shared()488 vec_info_shared::~vec_info_shared ()
489 {
490   free_data_refs (datarefs);
491   free_dependence_relations (ddrs);
492   datarefs_copy.release ();
493 }
494 
495 void
save_datarefs()496 vec_info_shared::save_datarefs ()
497 {
498   if (!flag_checking)
499     return;
500   datarefs_copy.reserve_exact (datarefs.length ());
501   for (unsigned i = 0; i < datarefs.length (); ++i)
502     datarefs_copy.quick_push (*datarefs[i]);
503 }
504 
505 void
check_datarefs()506 vec_info_shared::check_datarefs ()
507 {
508   if (!flag_checking)
509     return;
510   gcc_assert (datarefs.length () == datarefs_copy.length ());
511   for (unsigned i = 0; i < datarefs.length (); ++i)
512     if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0)
513       gcc_unreachable ();
514 }
515 
516 /* Record that STMT belongs to the vectorizable region.  Create and return
517    an associated stmt_vec_info.  */
518 
519 stmt_vec_info
add_stmt(gimple * stmt)520 vec_info::add_stmt (gimple *stmt)
521 {
522   stmt_vec_info res = new_stmt_vec_info (stmt);
523   set_vinfo_for_stmt (stmt, res);
524   return res;
525 }
526 
527 /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
528    return null.  It is safe to call this function on any statement, even if
529    it might not be part of the vectorizable region.  */
530 
531 stmt_vec_info
lookup_stmt(gimple * stmt)532 vec_info::lookup_stmt (gimple *stmt)
533 {
534   unsigned int uid = gimple_uid (stmt);
535   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
536     {
537       stmt_vec_info res = stmt_vec_infos[uid - 1];
538       if (res && res->stmt == stmt)
539 	return res;
540     }
541   return NULL;
542 }
543 
544 /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
545    return that stmt_vec_info, otherwise return null.  It is safe to call
546    this on arbitrary operands.  */
547 
548 stmt_vec_info
lookup_def(tree name)549 vec_info::lookup_def (tree name)
550 {
551   if (TREE_CODE (name) == SSA_NAME
552       && !SSA_NAME_IS_DEFAULT_DEF (name))
553     return lookup_stmt (SSA_NAME_DEF_STMT (name));
554   return NULL;
555 }
556 
557 /* See whether there is a single non-debug statement that uses LHS and
558    whether that statement has an associated stmt_vec_info.  Return the
559    stmt_vec_info if so, otherwise return null.  */
560 
561 stmt_vec_info
lookup_single_use(tree lhs)562 vec_info::lookup_single_use (tree lhs)
563 {
564   use_operand_p dummy;
565   gimple *use_stmt;
566   if (single_imm_use (lhs, &dummy, &use_stmt))
567     return lookup_stmt (use_stmt);
568   return NULL;
569 }
570 
571 /* Return vectorization information about DR.  */
572 
573 dr_vec_info *
lookup_dr(data_reference * dr)574 vec_info::lookup_dr (data_reference *dr)
575 {
576   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
577   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
578   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
579   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
580 }
581 
582 /* Record that NEW_STMT_INFO now implements the same data reference
583    as OLD_STMT_INFO.  */
584 
585 void
move_dr(stmt_vec_info new_stmt_info,stmt_vec_info old_stmt_info)586 vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
587 {
588   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
589   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
590   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
591   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
592     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
593   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
594     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
595 }
596 
597 /* Permanently remove the statement described by STMT_INFO from the
598    function.  */
599 
600 void
remove_stmt(stmt_vec_info stmt_info)601 vec_info::remove_stmt (stmt_vec_info stmt_info)
602 {
603   gcc_assert (!stmt_info->pattern_stmt_p);
604   set_vinfo_for_stmt (stmt_info->stmt, NULL);
605   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
606   unlink_stmt_vdef (stmt_info->stmt);
607   gsi_remove (&si, true);
608   release_defs (stmt_info->stmt);
609   free_stmt_vec_info (stmt_info);
610 }
611 
612 /* Replace the statement at GSI by NEW_STMT, both the vectorization
613    information and the function itself.  STMT_INFO describes the statement
614    at GSI.  */
615 
616 void
replace_stmt(gimple_stmt_iterator * gsi,stmt_vec_info stmt_info,gimple * new_stmt)617 vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
618 			gimple *new_stmt)
619 {
620   gimple *old_stmt = stmt_info->stmt;
621   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
622   set_vinfo_for_stmt (old_stmt, NULL);
623   set_vinfo_for_stmt (new_stmt, stmt_info);
624   stmt_info->stmt = new_stmt;
625   gsi_replace (gsi, new_stmt, true);
626 }
627 
628 /* Create and initialize a new stmt_vec_info struct for STMT.  */
629 
630 stmt_vec_info
new_stmt_vec_info(gimple * stmt)631 vec_info::new_stmt_vec_info (gimple *stmt)
632 {
633   stmt_vec_info res = XCNEW (class _stmt_vec_info);
634   res->vinfo = this;
635   res->stmt = stmt;
636 
637   STMT_VINFO_TYPE (res) = undef_vec_info_type;
638   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
639   STMT_VINFO_VECTORIZABLE (res) = true;
640   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
641   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
642   STMT_VINFO_REDUC_FN (res) = IFN_LAST;
643   STMT_VINFO_REDUC_IDX (res) = -1;
644   STMT_VINFO_SLP_VECT_ONLY (res) = false;
645 
646   if (gimple_code (stmt) == GIMPLE_PHI
647       && is_loop_header_bb_p (gimple_bb (stmt)))
648     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
649   else
650     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
651 
652   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
653   STMT_SLP_TYPE (res) = loop_vect;
654 
655   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
656   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
657 
658   return res;
659 }
660 
661 /* Associate STMT with INFO.  */
662 
663 void
set_vinfo_for_stmt(gimple * stmt,stmt_vec_info info)664 vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info)
665 {
666   unsigned int uid = gimple_uid (stmt);
667   if (uid == 0)
668     {
669       gcc_checking_assert (info);
670       uid = stmt_vec_infos.length () + 1;
671       gimple_set_uid (stmt, uid);
672       stmt_vec_infos.safe_push (info);
673     }
674   else
675     {
676       gcc_checking_assert (info == NULL);
677       stmt_vec_infos[uid - 1] = info;
678     }
679 }
680 
681 /* Free the contents of stmt_vec_infos.  */
682 
683 void
free_stmt_vec_infos(void)684 vec_info::free_stmt_vec_infos (void)
685 {
686   unsigned int i;
687   stmt_vec_info info;
688   FOR_EACH_VEC_ELT (stmt_vec_infos, i, info)
689     if (info != NULL)
690       free_stmt_vec_info (info);
691   stmt_vec_infos.release ();
692 }
693 
694 /* Free STMT_INFO.  */
695 
696 void
free_stmt_vec_info(stmt_vec_info stmt_info)697 vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
698 {
699   if (stmt_info->pattern_stmt_p)
700     {
701       gimple_set_bb (stmt_info->stmt, NULL);
702       tree lhs = gimple_get_lhs (stmt_info->stmt);
703       if (lhs && TREE_CODE (lhs) == SSA_NAME)
704 	release_ssa_name (lhs);
705     }
706 
707   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
708   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
709   free (stmt_info);
710 }
711 
712 /* A helper function to free scev and LOOP niter information, as well as
713    clear loop constraint LOOP_C_FINITE.  */
714 
715 void
vect_free_loop_info_assumptions(class loop * loop)716 vect_free_loop_info_assumptions (class loop *loop)
717 {
718   scev_reset_htab ();
719   /* We need to explicitly reset upper bound information since they are
720      used even after free_numbers_of_iterations_estimates.  */
721   loop->any_upper_bound = false;
722   loop->any_likely_upper_bound = false;
723   free_numbers_of_iterations_estimates (loop);
724   loop_constraint_clear (loop, LOOP_C_FINITE);
725 }
726 
727 /* If LOOP has been versioned during ifcvt, return the internal call
728    guarding it.  */
729 
730 gimple *
vect_loop_vectorized_call(class loop * loop,gcond ** cond)731 vect_loop_vectorized_call (class loop *loop, gcond **cond)
732 {
733   basic_block bb = loop_preheader_edge (loop)->src;
734   gimple *g;
735   do
736     {
737       g = last_stmt (bb);
738       if (g)
739 	break;
740       if (!single_pred_p (bb))
741 	break;
742       bb = single_pred (bb);
743     }
744   while (1);
745   if (g && gimple_code (g) == GIMPLE_COND)
746     {
747       if (cond)
748 	*cond = as_a <gcond *> (g);
749       gimple_stmt_iterator gsi = gsi_for_stmt (g);
750       gsi_prev (&gsi);
751       if (!gsi_end_p (gsi))
752 	{
753 	  g = gsi_stmt (gsi);
754 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
755 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
756 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
757 	    return g;
758 	}
759     }
760   return NULL;
761 }
762 
763 /* If LOOP has been versioned during loop distribution, return the gurading
764    internal call.  */
765 
766 static gimple *
vect_loop_dist_alias_call(class loop * loop)767 vect_loop_dist_alias_call (class loop *loop)
768 {
769   basic_block bb;
770   basic_block entry;
771   class loop *outer, *orig;
772   gimple_stmt_iterator gsi;
773   gimple *g;
774 
775   if (loop->orig_loop_num == 0)
776     return NULL;
777 
778   orig = get_loop (cfun, loop->orig_loop_num);
779   if (orig == NULL)
780     {
781       /* The original loop is somehow destroyed.  Clear the information.  */
782       loop->orig_loop_num = 0;
783       return NULL;
784     }
785 
786   if (loop != orig)
787     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
788   else
789     bb = loop_preheader_edge (loop)->src;
790 
791   outer = bb->loop_father;
792   entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
793 
794   /* Look upward in dominance tree.  */
795   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
796        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
797     {
798       g = last_stmt (bb);
799       if (g == NULL || gimple_code (g) != GIMPLE_COND)
800 	continue;
801 
802       gsi = gsi_for_stmt (g);
803       gsi_prev (&gsi);
804       if (gsi_end_p (gsi))
805 	continue;
806 
807       g = gsi_stmt (gsi);
808       /* The guarding internal function call must have the same distribution
809 	 alias id.  */
810       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
811 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
812 	return g;
813     }
814   return NULL;
815 }
816 
817 /* Set the uids of all the statements in basic blocks inside loop
818    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
819    call guarding the loop which has been if converted.  */
820 static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)821 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
822 {
823   tree arg = gimple_call_arg (loop_vectorized_call, 1);
824   basic_block *bbs;
825   unsigned int i;
826   class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
827 
828   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
829   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
830 		       == loop_vectorized_call);
831   /* If we are going to vectorize outer loop, prevent vectorization
832      of the inner loop in the scalar loop - either the scalar loop is
833      thrown away, so it is a wasted work, or is used only for
834      a few iterations.  */
835   if (scalar_loop->inner)
836     {
837       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
838       if (g)
839 	{
840 	  arg = gimple_call_arg (g, 0);
841 	  get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
842 	  fold_loop_internal_call (g, boolean_false_node);
843 	}
844     }
845   bbs = get_loop_body (scalar_loop);
846   for (i = 0; i < scalar_loop->num_nodes; i++)
847     {
848       basic_block bb = bbs[i];
849       gimple_stmt_iterator gsi;
850       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
851 	{
852 	  gimple *phi = gsi_stmt (gsi);
853 	  gimple_set_uid (phi, 0);
854 	}
855       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
856 	{
857 	  gimple *stmt = gsi_stmt (gsi);
858 	  gimple_set_uid (stmt, 0);
859 	}
860     }
861   free (bbs);
862 }
863 
864 /* Try to vectorize LOOP.  */
865 
866 static unsigned
try_vectorize_loop_1(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop,gimple * loop_vectorized_call,gimple * loop_dist_alias_call)867 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
868 		      unsigned *num_vectorized_loops, loop_p loop,
869 		      gimple *loop_vectorized_call,
870 		      gimple *loop_dist_alias_call)
871 {
872   unsigned ret = 0;
873   vec_info_shared shared;
874   auto_purge_vect_location sentinel;
875   vect_location = find_loop_location (loop);
876 
877   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
878       && dump_enabled_p ())
879     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
880 		 "\nAnalyzing loop at %s:%d\n",
881 		 LOCATION_FILE (vect_location.get_location_t ()),
882 		 LOCATION_LINE (vect_location.get_location_t ()));
883 
884   opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL);
885   /* In the case of epilogue vectorization the loop already has its
886      loop_vec_info set, we do not require to analyze the loop in this case.  */
887   if (loop_vec_info vinfo = loop_vec_info_for_loop (loop))
888     loop_vinfo = opt_loop_vec_info::success (vinfo);
889   else
890     {
891       /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
892       loop_vinfo = vect_analyze_loop (loop, &shared);
893       loop->aux = loop_vinfo;
894     }
895 
896   if (!loop_vinfo)
897     if (dump_enabled_p ())
898       if (opt_problem *problem = loop_vinfo.get_problem ())
899 	{
900 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
901 			   "couldn't vectorize loop\n");
902 	  problem->emit_and_clear ();
903 	}
904 
905   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
906     {
907       /* Free existing information if loop is analyzed with some
908 	 assumptions.  */
909       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
910 	vect_free_loop_info_assumptions (loop);
911 
912       /* If we applied if-conversion then try to vectorize the
913 	 BB of innermost loops.
914 	 ???  Ideally BB vectorization would learn to vectorize
915 	 control flow by applying if-conversion on-the-fly, the
916 	 following retains the if-converted loop body even when
917 	 only non-if-converted parts took part in BB vectorization.  */
918       if (flag_tree_slp_vectorize != 0
919 	  && loop_vectorized_call
920 	  && ! loop->inner)
921 	{
922 	  basic_block bb = loop->header;
923 	  bool require_loop_vectorize = false;
924 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
925 	       !gsi_end_p (gsi); gsi_next (&gsi))
926 	    {
927 	      gimple *stmt = gsi_stmt (gsi);
928 	      gcall *call = dyn_cast <gcall *> (stmt);
929 	      if (call && gimple_call_internal_p (call))
930 		{
931 		  internal_fn ifn = gimple_call_internal_fn (call);
932 		  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
933 		      /* Don't keep the if-converted parts when the ifn with
934 			 specifc type is not supported by the backend.  */
935 		      || (direct_internal_fn_p (ifn)
936 			  && !direct_internal_fn_supported_p
937 			  (call, OPTIMIZE_FOR_SPEED)))
938 		    {
939 		      require_loop_vectorize = true;
940 		      break;
941 		    }
942 		}
943 	      gimple_set_uid (stmt, -1);
944 	      gimple_set_visited (stmt, false);
945 	    }
946 	  if (!require_loop_vectorize && vect_slp_bb (bb))
947 	    {
948 	      if (dump_enabled_p ())
949 		dump_printf_loc (MSG_NOTE, vect_location,
950 				 "basic block vectorized\n");
951 	      fold_loop_internal_call (loop_vectorized_call,
952 				       boolean_true_node);
953 	      loop_vectorized_call = NULL;
954 	      ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
955 	    }
956 	}
957       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
958 	 loop, don't vectorize its inner loop; we'll attempt to
959 	 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
960 	 loop version.  */
961       if (loop_vectorized_call && loop->inner)
962 	loop->inner->dont_vectorize = true;
963       return ret;
964     }
965 
966   if (!dbg_cnt (vect_loop))
967     {
968       /* Free existing information if loop is analyzed with some
969 	 assumptions.  */
970       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
971 	vect_free_loop_info_assumptions (loop);
972       return ret;
973     }
974 
975   if (loop_vectorized_call)
976     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
977 
978   unsigned HOST_WIDE_INT bytes;
979   if (dump_enabled_p ())
980     {
981       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
982 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
983 			 "loop vectorized using %wu byte vectors\n", bytes);
984       else
985 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
986 			 "loop vectorized using variable length vectors\n");
987     }
988 
989   loop_p new_loop = vect_transform_loop (loop_vinfo,
990 					 loop_vectorized_call);
991   (*num_vectorized_loops)++;
992   /* Now that the loop has been vectorized, allow it to be unrolled
993      etc.  */
994   loop->force_vectorize = false;
995 
996   if (loop->simduid)
997     {
998       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
999       if (!simduid_to_vf_htab)
1000 	simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1001       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1002       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1003       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
1004 	  = simduid_to_vf_data;
1005     }
1006 
1007   if (loop_vectorized_call)
1008     {
1009       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1010       loop_vectorized_call = NULL;
1011       ret |= TODO_cleanup_cfg;
1012     }
1013   if (loop_dist_alias_call)
1014     {
1015       tree value = gimple_call_arg (loop_dist_alias_call, 1);
1016       fold_loop_internal_call (loop_dist_alias_call, value);
1017       loop_dist_alias_call = NULL;
1018       ret |= TODO_cleanup_cfg;
1019     }
1020 
1021   /* Epilogue of vectorized loop must be vectorized too.  */
1022   if (new_loop)
1023     {
1024       /* Don't include vectorized epilogues in the "vectorized loops" count.
1025        */
1026       unsigned dont_count = *num_vectorized_loops;
1027       ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count,
1028 				   new_loop, NULL, NULL);
1029     }
1030 
1031   return ret;
1032 }
1033 
1034 /* Try to vectorize LOOP.  */
1035 
1036 static unsigned
try_vectorize_loop(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop)1037 try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1038 		    unsigned *num_vectorized_loops, loop_p loop)
1039 {
1040   if (!((flag_tree_loop_vectorize
1041 	 && optimize_loop_nest_for_speed_p (loop))
1042 	|| loop->force_vectorize))
1043     return 0;
1044 
1045   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1046 			       vect_loop_vectorized_call (loop),
1047 			       vect_loop_dist_alias_call (loop));
1048 }
1049 
1050 
1051 /* Function vectorize_loops.
1052 
1053    Entry point to loop vectorization phase.  */
1054 
1055 unsigned
vectorize_loops(void)1056 vectorize_loops (void)
1057 {
1058   unsigned int i;
1059   unsigned int num_vectorized_loops = 0;
1060   unsigned int vect_loops_num;
1061   class loop *loop;
1062   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1063   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1064   bool any_ifcvt_loops = false;
1065   unsigned ret = 0;
1066 
1067   vect_loops_num = number_of_loops (cfun);
1068 
1069   /* Bail out if there are no loops.  */
1070   if (vect_loops_num <= 1)
1071     return 0;
1072 
1073   if (cfun->has_simduid_loops)
1074     note_simd_array_uses (&simd_array_to_simduid_htab);
1075 
1076   /*  ----------- Analyze loops. -----------  */
1077 
1078   /* If some loop was duplicated, it gets bigger number
1079      than all previously defined loops.  This fact allows us to run
1080      only over initial loops skipping newly generated ones.  */
1081   FOR_EACH_LOOP (loop, 0)
1082     if (loop->dont_vectorize)
1083       {
1084 	any_ifcvt_loops = true;
1085 	/* If-conversion sometimes versions both the outer loop
1086 	   (for the case when outer loop vectorization might be
1087 	   desirable) as well as the inner loop in the scalar version
1088 	   of the loop.  So we have:
1089 	    if (LOOP_VECTORIZED (1, 3))
1090 	      {
1091 		loop1
1092 		  loop2
1093 	      }
1094 	    else
1095 	      loop3 (copy of loop1)
1096 		if (LOOP_VECTORIZED (4, 5))
1097 		  loop4 (copy of loop2)
1098 		else
1099 		  loop5 (copy of loop4)
1100 	   If FOR_EACH_LOOP gives us loop3 first (which has
1101 	   dont_vectorize set), make sure to process loop1 before loop4;
1102 	   so that we can prevent vectorization of loop4 if loop1
1103 	   is successfully vectorized.  */
1104 	if (loop->inner)
1105 	  {
1106 	    gimple *loop_vectorized_call
1107 	      = vect_loop_vectorized_call (loop);
1108 	    if (loop_vectorized_call
1109 		&& vect_loop_vectorized_call (loop->inner))
1110 	      {
1111 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
1112 		class loop *vector_loop
1113 		  = get_loop (cfun, tree_to_shwi (arg));
1114 		if (vector_loop && vector_loop != loop)
1115 		  {
1116 		    /* Make sure we don't vectorize it twice.  */
1117 		    vector_loop->dont_vectorize = true;
1118 		    ret |= try_vectorize_loop (simduid_to_vf_htab,
1119 					       &num_vectorized_loops,
1120 					       vector_loop);
1121 		  }
1122 	      }
1123 	  }
1124       }
1125     else
1126       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1127 				 loop);
1128 
1129   vect_location = dump_user_location_t ();
1130 
1131   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
1132   if (dump_enabled_p ()
1133       || (num_vectorized_loops > 0 && dump_enabled_p ()))
1134     dump_printf_loc (MSG_NOTE, vect_location,
1135                      "vectorized %u loops in function.\n",
1136                      num_vectorized_loops);
1137 
1138   /*  ----------- Finalize. -----------  */
1139 
1140   if (any_ifcvt_loops)
1141     for (i = 1; i < number_of_loops (cfun); i++)
1142       {
1143 	loop = get_loop (cfun, i);
1144 	if (loop && loop->dont_vectorize)
1145 	  {
1146 	    gimple *g = vect_loop_vectorized_call (loop);
1147 	    if (g)
1148 	      {
1149 		fold_loop_internal_call (g, boolean_false_node);
1150 		ret |= TODO_cleanup_cfg;
1151 		g = NULL;
1152 	      }
1153 	    else
1154 	      g = vect_loop_dist_alias_call (loop);
1155 
1156 	    if (g)
1157 	      {
1158 		fold_loop_internal_call (g, boolean_false_node);
1159 		ret |= TODO_cleanup_cfg;
1160 	      }
1161 	  }
1162       }
1163 
1164   for (i = 1; i < number_of_loops (cfun); i++)
1165     {
1166       loop_vec_info loop_vinfo;
1167       bool has_mask_store;
1168 
1169       loop = get_loop (cfun, i);
1170       if (!loop || !loop->aux)
1171 	continue;
1172       loop_vinfo = (loop_vec_info) loop->aux;
1173       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1174       delete loop_vinfo;
1175       if (has_mask_store
1176 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1177 	optimize_mask_stores (loop);
1178       loop->aux = NULL;
1179     }
1180 
1181   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1182   if (cfun->has_simduid_loops)
1183     adjust_simduid_builtins (simduid_to_vf_htab);
1184 
1185   /* Shrink any "omp array simd" temporary arrays to the
1186      actual vectorization factors.  */
1187   if (simd_array_to_simduid_htab)
1188     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1189   delete simduid_to_vf_htab;
1190   cfun->has_simduid_loops = false;
1191 
1192   if (num_vectorized_loops > 0)
1193     {
1194       /* If we vectorized any loop only virtual SSA form needs to be updated.
1195 	 ???  Also while we try hard to update loop-closed SSA form we fail
1196 	 to properly do this in some corner-cases (see PR56286).  */
1197       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1198       return TODO_cleanup_cfg;
1199     }
1200 
1201   return ret;
1202 }
1203 
1204 
1205 /* Entry point to the simduid cleanup pass.  */
1206 
1207 namespace {
1208 
1209 const pass_data pass_data_simduid_cleanup =
1210 {
1211   GIMPLE_PASS, /* type */
1212   "simduid", /* name */
1213   OPTGROUP_NONE, /* optinfo_flags */
1214   TV_NONE, /* tv_id */
1215   ( PROP_ssa | PROP_cfg ), /* properties_required */
1216   0, /* properties_provided */
1217   0, /* properties_destroyed */
1218   0, /* todo_flags_start */
1219   0, /* todo_flags_finish */
1220 };
1221 
1222 class pass_simduid_cleanup : public gimple_opt_pass
1223 {
1224 public:
pass_simduid_cleanup(gcc::context * ctxt)1225   pass_simduid_cleanup (gcc::context *ctxt)
1226     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1227   {}
1228 
1229   /* opt_pass methods: */
clone()1230   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)1231   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
1232   virtual unsigned int execute (function *);
1233 
1234 }; // class pass_simduid_cleanup
1235 
1236 unsigned int
execute(function * fun)1237 pass_simduid_cleanup::execute (function *fun)
1238 {
1239   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1240 
1241   note_simd_array_uses (&simd_array_to_simduid_htab);
1242 
1243   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
1244   adjust_simduid_builtins (NULL);
1245 
1246   /* Shrink any "omp array simd" temporary arrays to the
1247      actual vectorization factors.  */
1248   if (simd_array_to_simduid_htab)
1249     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1250   fun->has_simduid_loops = false;
1251   return 0;
1252 }
1253 
1254 }  // anon namespace
1255 
1256 gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)1257 make_pass_simduid_cleanup (gcc::context *ctxt)
1258 {
1259   return new pass_simduid_cleanup (ctxt);
1260 }
1261 
1262 
1263 /*  Entry point to basic block SLP phase.  */
1264 
1265 namespace {
1266 
1267 const pass_data pass_data_slp_vectorize =
1268 {
1269   GIMPLE_PASS, /* type */
1270   "slp", /* name */
1271   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1272   TV_TREE_SLP_VECTORIZATION, /* tv_id */
1273   ( PROP_ssa | PROP_cfg ), /* properties_required */
1274   0, /* properties_provided */
1275   0, /* properties_destroyed */
1276   0, /* todo_flags_start */
1277   TODO_update_ssa, /* todo_flags_finish */
1278 };
1279 
1280 class pass_slp_vectorize : public gimple_opt_pass
1281 {
1282 public:
pass_slp_vectorize(gcc::context * ctxt)1283   pass_slp_vectorize (gcc::context *ctxt)
1284     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1285   {}
1286 
1287   /* opt_pass methods: */
clone()1288   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)1289   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
1290   virtual unsigned int execute (function *);
1291 
1292 }; // class pass_slp_vectorize
1293 
1294 unsigned int
execute(function * fun)1295 pass_slp_vectorize::execute (function *fun)
1296 {
1297   auto_purge_vect_location sentinel;
1298   basic_block bb;
1299 
1300   bool in_loop_pipeline = scev_initialized_p ();
1301   if (!in_loop_pipeline)
1302     {
1303       loop_optimizer_init (LOOPS_NORMAL);
1304       scev_initialize ();
1305     }
1306 
1307   /* Mark all stmts as not belonging to the current region and unvisited.  */
1308   FOR_EACH_BB_FN (bb, fun)
1309     {
1310       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1311 	   gsi_next (&gsi))
1312 	{
1313 	  gimple *stmt = gsi_stmt (gsi);
1314 	  gimple_set_uid (stmt, -1);
1315 	  gimple_set_visited (stmt, false);
1316 	}
1317     }
1318 
1319   FOR_EACH_BB_FN (bb, fun)
1320     {
1321       if (vect_slp_bb (bb))
1322 	if (dump_enabled_p ())
1323 	  dump_printf_loc (MSG_NOTE, vect_location, "basic block vectorized\n");
1324     }
1325 
1326   if (!in_loop_pipeline)
1327     {
1328       scev_finalize ();
1329       loop_optimizer_finalize ();
1330     }
1331 
1332   return 0;
1333 }
1334 
1335 } // anon namespace
1336 
1337 gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)1338 make_pass_slp_vectorize (gcc::context *ctxt)
1339 {
1340   return new pass_slp_vectorize (ctxt);
1341 }
1342 
1343 
1344 /* Increase alignment of global arrays to improve vectorization potential.
1345    TODO:
1346    - Consider also structs that have an array field.
1347    - Use ipa analysis to prune arrays that can't be vectorized?
1348      This should involve global alignment analysis and in the future also
1349      array padding.  */
1350 
1351 static unsigned get_vec_alignment_for_type (tree);
1352 static hash_map<tree, unsigned> *type_align_map;
1353 
1354 /* Return alignment of array's vector type corresponding to scalar type.
1355    0 if no vector type exists.  */
1356 static unsigned
get_vec_alignment_for_array_type(tree type)1357 get_vec_alignment_for_array_type (tree type)
1358 {
1359   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1360   poly_uint64 array_size, vector_size;
1361 
1362   tree scalar_type = strip_array_types (type);
1363   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
1364   if (!vectype
1365       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1366       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1367       || maybe_lt (array_size, vector_size))
1368     return 0;
1369 
1370   return TYPE_ALIGN (vectype);
1371 }
1372 
1373 /* Return alignment of field having maximum alignment of vector type
1374    corresponding to it's scalar type. For now, we only consider fields whose
1375    offset is a multiple of it's vector alignment.
1376    0 if no suitable field is found.  */
1377 static unsigned
get_vec_alignment_for_record_type(tree type)1378 get_vec_alignment_for_record_type (tree type)
1379 {
1380   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1381 
1382   unsigned max_align = 0, alignment;
1383   HOST_WIDE_INT offset;
1384   tree offset_tree;
1385 
1386   if (TYPE_PACKED (type))
1387     return 0;
1388 
1389   unsigned *slot = type_align_map->get (type);
1390   if (slot)
1391     return *slot;
1392 
1393   for (tree field = first_field (type);
1394        field != NULL_TREE;
1395        field = DECL_CHAIN (field))
1396     {
1397       /* Skip if not FIELD_DECL or if alignment is set by user.  */
1398       if (TREE_CODE (field) != FIELD_DECL
1399 	  || DECL_USER_ALIGN (field)
1400 	  || DECL_ARTIFICIAL (field))
1401 	continue;
1402 
1403       /* We don't need to process the type further if offset is variable,
1404 	 since the offsets of remaining members will also be variable.  */
1405       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1406 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1407 	break;
1408 
1409       /* Similarly stop processing the type if offset_tree
1410 	 does not fit in unsigned HOST_WIDE_INT.  */
1411       offset_tree = bit_position (field);
1412       if (!tree_fits_uhwi_p (offset_tree))
1413 	break;
1414 
1415       offset = tree_to_uhwi (offset_tree);
1416       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1417 
1418       /* Get maximum alignment of vectorized field/array among those members
1419 	 whose offset is multiple of the vector alignment.  */
1420       if (alignment
1421 	  && (offset % alignment == 0)
1422 	  && (alignment > max_align))
1423 	max_align = alignment;
1424     }
1425 
1426   type_align_map->put (type, max_align);
1427   return max_align;
1428 }
1429 
1430 /* Return alignment of vector type corresponding to decl's scalar type
1431    or 0 if it doesn't exist or the vector alignment is lesser than
1432    decl's alignment.  */
1433 static unsigned
get_vec_alignment_for_type(tree type)1434 get_vec_alignment_for_type (tree type)
1435 {
1436   if (type == NULL_TREE)
1437     return 0;
1438 
1439   gcc_assert (TYPE_P (type));
1440 
1441   static unsigned alignment = 0;
1442   switch (TREE_CODE (type))
1443     {
1444       case ARRAY_TYPE:
1445 	alignment = get_vec_alignment_for_array_type (type);
1446 	break;
1447       case RECORD_TYPE:
1448 	alignment = get_vec_alignment_for_record_type (type);
1449 	break;
1450       default:
1451 	alignment = 0;
1452 	break;
1453     }
1454 
1455   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1456 }
1457 
1458 /* Entry point to increase_alignment pass.  */
1459 static unsigned int
increase_alignment(void)1460 increase_alignment (void)
1461 {
1462   varpool_node *vnode;
1463 
1464   vect_location = dump_user_location_t ();
1465   type_align_map = new hash_map<tree, unsigned>;
1466 
1467   /* Increase the alignment of all global arrays for vectorization.  */
1468   FOR_EACH_DEFINED_VARIABLE (vnode)
1469     {
1470       tree decl = vnode->decl;
1471       unsigned int alignment;
1472 
1473       if ((decl_in_symtab_p (decl)
1474 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
1475 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1476 	continue;
1477 
1478       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1479       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1480         {
1481 	  vnode->increase_alignment (alignment);
1482 	  if (dump_enabled_p ())
1483 	    dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1484         }
1485     }
1486 
1487   delete type_align_map;
1488   return 0;
1489 }
1490 
1491 
1492 namespace {
1493 
1494 const pass_data pass_data_ipa_increase_alignment =
1495 {
1496   SIMPLE_IPA_PASS, /* type */
1497   "increase_alignment", /* name */
1498   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1499   TV_IPA_OPT, /* tv_id */
1500   0, /* properties_required */
1501   0, /* properties_provided */
1502   0, /* properties_destroyed */
1503   0, /* todo_flags_start */
1504   0, /* todo_flags_finish */
1505 };
1506 
1507 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1508 {
1509 public:
pass_ipa_increase_alignment(gcc::context * ctxt)1510   pass_ipa_increase_alignment (gcc::context *ctxt)
1511     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1512   {}
1513 
1514   /* opt_pass methods: */
gate(function *)1515   virtual bool gate (function *)
1516     {
1517       return flag_section_anchors && flag_tree_loop_vectorize;
1518     }
1519 
execute(function *)1520   virtual unsigned int execute (function *) { return increase_alignment (); }
1521 
1522 }; // class pass_ipa_increase_alignment
1523 
1524 } // anon namespace
1525 
1526 simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)1527 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1528 {
1529   return new pass_ipa_increase_alignment (ctxt);
1530 }
1531 
1532 /* If the condition represented by T is a comparison or the SSA name
1533    result of a comparison, extract the comparison's operands.  Represent
1534    T as NE_EXPR <T, 0> otherwise.  */
1535 
1536 void
get_cond_ops_from_tree(tree t)1537 scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1538 {
1539   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1540     {
1541       this->code = TREE_CODE (t);
1542       this->op0 = TREE_OPERAND (t, 0);
1543       this->op1 = TREE_OPERAND (t, 1);
1544       return;
1545     }
1546 
1547   if (TREE_CODE (t) == SSA_NAME)
1548     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1549       {
1550 	tree_code code = gimple_assign_rhs_code (stmt);
1551 	if (TREE_CODE_CLASS (code) == tcc_comparison)
1552 	  {
1553 	    this->code = code;
1554 	    this->op0 = gimple_assign_rhs1 (stmt);
1555 	    this->op1 = gimple_assign_rhs2 (stmt);
1556 	    return;
1557 	  }
1558       }
1559 
1560   this->code = NE_EXPR;
1561   this->op0 = t;
1562   this->op1 = build_zero_cst (TREE_TYPE (t));
1563 }
1564