1 /* Vectorizer
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #ifndef GCC_TREE_VECTORIZER_H
22 #define GCC_TREE_VECTORIZER_H
23 
24 typedef class _stmt_vec_info *stmt_vec_info;
25 typedef struct _slp_tree *slp_tree;
26 
27 #include "tree-data-ref.h"
28 #include "tree-hash-traits.h"
29 #include "target.h"
30 #include "internal-fn.h"
31 #include "tree-ssa-operands.h"
32 #include "gimple-match.h"
33 
34 /* Used for naming of new temporaries.  */
35 enum vect_var_kind {
36   vect_simple_var,
37   vect_pointer_var,
38   vect_scalar_var,
39   vect_mask_var
40 };
41 
42 /* Defines type of operation.  */
43 enum operation_type {
44   unary_op = 1,
45   binary_op,
46   ternary_op
47 };
48 
49 /* Define type of available alignment support.  */
50 enum dr_alignment_support {
51   dr_unaligned_unsupported,
52   dr_unaligned_supported,
53   dr_explicit_realign,
54   dr_explicit_realign_optimized,
55   dr_aligned
56 };
57 
58 /* Define type of def-use cross-iteration cycle.  */
59 enum vect_def_type {
60   vect_uninitialized_def = 0,
61   vect_constant_def = 1,
62   vect_external_def,
63   vect_internal_def,
64   vect_induction_def,
65   vect_reduction_def,
66   vect_double_reduction_def,
67   vect_nested_cycle,
68   vect_unknown_def_type
69 };
70 
71 /* Define type of reduction.  */
72 enum vect_reduction_type {
73   TREE_CODE_REDUCTION,
74   COND_REDUCTION,
75   INTEGER_INDUC_COND_REDUCTION,
76   CONST_COND_REDUCTION,
77 
78   /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop
79      to implement:
80 
81        for (int i = 0; i < VF; ++i)
82          res = cond[i] ? val[i] : res;  */
83   EXTRACT_LAST_REDUCTION,
84 
85   /* Use a folding reduction within the loop to implement:
86 
87        for (int i = 0; i < VF; ++i)
88 	 res = res OP val[i];
89 
90      (with no reassocation).  */
91   FOLD_LEFT_REDUCTION
92 };
93 
94 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \
95                                    || ((D) == vect_double_reduction_def) \
96                                    || ((D) == vect_nested_cycle))
97 
98 /* Structure to encapsulate information about a group of like
99    instructions to be presented to the target cost model.  */
100 struct stmt_info_for_cost {
101   int count;
102   enum vect_cost_for_stmt kind;
103   enum vect_cost_model_location where;
104   stmt_vec_info stmt_info;
105   slp_tree node;
106   tree vectype;
107   int misalign;
108 };
109 
110 typedef vec<stmt_info_for_cost> stmt_vector_for_cost;
111 
112 /* Maps base addresses to an innermost_loop_behavior and the stmt it was
113    derived from that gives the maximum known alignment for that base.  */
114 typedef hash_map<tree_operand_hash,
115 		 std::pair<stmt_vec_info, innermost_loop_behavior *> >
116 	  vec_base_alignments;
117 
118 /* Represents elements [START, START + LENGTH) of cyclical array OPS*
119    (i.e. OPS repeated to give at least START + LENGTH elements)  */
120 struct vect_scalar_ops_slice
121 {
122   tree op (unsigned int i) const;
123   bool all_same_p () const;
124 
125   vec<tree> *ops;
126   unsigned int start;
127   unsigned int length;
128 };
129 
130 /* Return element I of the slice.  */
131 inline tree
op(unsigned int i)132 vect_scalar_ops_slice::op (unsigned int i) const
133 {
134   return (*ops)[(i + start) % ops->length ()];
135 }
136 
137 /* Hash traits for vect_scalar_ops_slice.  */
138 struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice>
139 {
140   typedef vect_scalar_ops_slice value_type;
141   typedef vect_scalar_ops_slice compare_type;
142 
143   static const bool empty_zero_p = true;
144 
mark_deletedvect_scalar_ops_slice_hash145   static void mark_deleted (value_type &s) { s.length = ~0U; }
mark_emptyvect_scalar_ops_slice_hash146   static void mark_empty (value_type &s) { s.length = 0; }
is_deletedvect_scalar_ops_slice_hash147   static bool is_deleted (const value_type &s) { return s.length == ~0U; }
is_emptyvect_scalar_ops_slice_hash148   static bool is_empty (const value_type &s) { return s.length == 0; }
149   static hashval_t hash (const value_type &);
150   static bool equal (const value_type &, const compare_type &);
151 };
152 
153 /************************************************************************
154   SLP
155  ************************************************************************/
156 typedef vec<std::pair<unsigned, unsigned> > lane_permutation_t;
157 typedef vec<unsigned> load_permutation_t;
158 
159 /* A computation tree of an SLP instance.  Each node corresponds to a group of
160    stmts to be packed in a SIMD stmt.  */
161 struct _slp_tree {
162   _slp_tree ();
163   ~_slp_tree ();
164 
165   /* Nodes that contain def-stmts of this node statements operands.  */
166   vec<slp_tree> children;
167 
168   /* A group of scalar stmts to be vectorized together.  */
169   vec<stmt_vec_info> stmts;
170   /* A group of scalar operands to be vectorized together.  */
171   vec<tree> ops;
172   /* The representative that should be used for analysis and
173      code generation.  */
174   stmt_vec_info representative;
175 
176   /* Load permutation relative to the stores, NULL if there is no
177      permutation.  */
178   load_permutation_t load_permutation;
179   /* Lane permutation of the operands scalar lanes encoded as pairs
180      of { operand number, lane number }.  The number of elements
181      denotes the number of output lanes.  */
182   lane_permutation_t lane_permutation;
183 
184   tree vectype;
185   /* Vectorized stmt/s.  */
186   vec<gimple *> vec_stmts;
187   vec<tree> vec_defs;
188   /* Number of vector stmts that are created to replace the group of scalar
189      stmts. It is calculated during the transformation phase as the number of
190      scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
191      divided by vector size.  */
192   unsigned int vec_stmts_size;
193 
194   /* Reference count in the SLP graph.  */
195   unsigned int refcnt;
196   /* The maximum number of vector elements for the subtree rooted
197      at this node.  */
198   poly_uint64 max_nunits;
199   /* The DEF type of this node.  */
200   enum vect_def_type def_type;
201   /* The number of scalar lanes produced by this node.  */
202   unsigned int lanes;
203   /* The operation of this node.  */
204   enum tree_code code;
205 
206   int vertex;
207 
208   /* If not NULL this is a cached failed SLP discovery attempt with
209      the lanes that failed during SLP discovery as 'false'.  This is
210      a copy of the matches array.  */
211   bool *failed;
212 
213   /* Allocate from slp_tree_pool.  */
214   static void *operator new (size_t);
215 
216   /* Return memory to slp_tree_pool.  */
217   static void operator delete (void *, size_t);
218 
219   /* Linked list of nodes to release when we free the slp_tree_pool.  */
220   slp_tree next_node;
221   slp_tree prev_node;
222 };
223 
224 /* The enum describes the type of operations that an SLP instance
225    can perform. */
226 
227 enum slp_instance_kind {
228     slp_inst_kind_store,
229     slp_inst_kind_reduc_group,
230     slp_inst_kind_reduc_chain,
231     slp_inst_kind_bb_reduc,
232     slp_inst_kind_ctor
233 };
234 
235 /* SLP instance is a sequence of stmts in a loop that can be packed into
236    SIMD stmts.  */
237 typedef class _slp_instance {
238 public:
239   /* The root of SLP tree.  */
240   slp_tree root;
241 
242   /* For vector constructors, the constructor stmt that the SLP tree is built
243      from, NULL otherwise.  */
244   vec<stmt_vec_info> root_stmts;
245 
246   /* The unrolling factor required to vectorized this SLP instance.  */
247   poly_uint64 unrolling_factor;
248 
249   /* The group of nodes that contain loads of this SLP instance.  */
250   vec<slp_tree> loads;
251 
252   /* The SLP node containing the reduction PHIs.  */
253   slp_tree reduc_phis;
254 
255   /* Vector cost of this entry to the SLP graph.  */
256   stmt_vector_for_cost cost_vec;
257 
258   /* If this instance is the main entry of a subgraph the set of
259      entries into the same subgraph, including itself.  */
260   vec<_slp_instance *> subgraph_entries;
261 
262   /* The type of operation the SLP instance is performing.  */
263   slp_instance_kind kind;
264 
265   dump_user_location_t location () const;
266 } *slp_instance;
267 
268 
269 /* Access Functions.  */
270 #define SLP_INSTANCE_TREE(S)                     (S)->root
271 #define SLP_INSTANCE_UNROLLING_FACTOR(S)         (S)->unrolling_factor
272 #define SLP_INSTANCE_LOADS(S)                    (S)->loads
273 #define SLP_INSTANCE_ROOT_STMTS(S)               (S)->root_stmts
274 #define SLP_INSTANCE_KIND(S)                     (S)->kind
275 
276 #define SLP_TREE_CHILDREN(S)                     (S)->children
277 #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts
278 #define SLP_TREE_SCALAR_OPS(S)                   (S)->ops
279 #define SLP_TREE_REF_COUNT(S)                    (S)->refcnt
280 #define SLP_TREE_VEC_STMTS(S)                    (S)->vec_stmts
281 #define SLP_TREE_VEC_DEFS(S)                     (S)->vec_defs
282 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
283 #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
284 #define SLP_TREE_LANE_PERMUTATION(S)             (S)->lane_permutation
285 #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
286 #define SLP_TREE_VECTYPE(S)			 (S)->vectype
287 #define SLP_TREE_REPRESENTATIVE(S)		 (S)->representative
288 #define SLP_TREE_LANES(S)			 (S)->lanes
289 #define SLP_TREE_CODE(S)			 (S)->code
290 
291 /* Key for map that records association between
292    scalar conditions and corresponding loop mask, and
293    is populated by vect_record_loop_mask.  */
294 
295 struct scalar_cond_masked_key
296 {
scalar_cond_masked_keyscalar_cond_masked_key297   scalar_cond_masked_key (tree t, unsigned ncopies_)
298     : ncopies (ncopies_)
299   {
300     get_cond_ops_from_tree (t);
301   }
302 
303   void get_cond_ops_from_tree (tree);
304 
305   unsigned ncopies;
306   bool inverted_p;
307   tree_code code;
308   tree op0;
309   tree op1;
310 };
311 
312 template<>
313 struct default_hash_traits<scalar_cond_masked_key>
314 {
315   typedef scalar_cond_masked_key compare_type;
316   typedef scalar_cond_masked_key value_type;
317 
318   static inline hashval_t
319   hash (value_type v)
320   {
321     inchash::hash h;
322     h.add_int (v.code);
323     inchash::add_expr (v.op0, h, 0);
324     inchash::add_expr (v.op1, h, 0);
325     h.add_int (v.ncopies);
326     h.add_flag (v.inverted_p);
327     return h.end ();
328   }
329 
330   static inline bool
331   equal (value_type existing, value_type candidate)
332   {
333     return (existing.ncopies == candidate.ncopies
334 	    && existing.code == candidate.code
335 	    && existing.inverted_p == candidate.inverted_p
336 	    && operand_equal_p (existing.op0, candidate.op0, 0)
337 	    && operand_equal_p (existing.op1, candidate.op1, 0));
338   }
339 
340   static const bool empty_zero_p = true;
341 
342   static inline void
343   mark_empty (value_type &v)
344   {
345     v.ncopies = 0;
346     v.inverted_p = false;
347   }
348 
349   static inline bool
350   is_empty (value_type v)
351   {
352     return v.ncopies == 0;
353   }
354 
355   static inline void mark_deleted (value_type &) {}
356 
357   static inline bool is_deleted (const value_type &)
358   {
359     return false;
360   }
361 
362   static inline void remove (value_type &) {}
363 };
364 
365 typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
366 
367 /* Key and map that records association between vector conditions and
368    corresponding loop mask, and is populated by prepare_vec_mask.  */
369 
370 typedef pair_hash<tree_operand_hash, tree_operand_hash> tree_cond_mask_hash;
371 typedef hash_set<tree_cond_mask_hash> vec_cond_masked_set_type;
372 
373 /* Describes two objects whose addresses must be unequal for the vectorized
374    loop to be valid.  */
375 typedef std::pair<tree, tree> vec_object_pair;
376 
377 /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE.
378    UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR.  */
379 class vec_lower_bound {
380 public:
381   vec_lower_bound () {}
382   vec_lower_bound (tree e, bool u, poly_uint64 m)
383     : expr (e), unsigned_p (u), min_value (m) {}
384 
385   tree expr;
386   bool unsigned_p;
387   poly_uint64 min_value;
388 };
389 
390 /* Vectorizer state shared between different analyses like vector sizes
391    of the same CFG region.  */
392 class vec_info_shared {
393 public:
394   vec_info_shared();
395   ~vec_info_shared();
396 
397   void save_datarefs();
398   void check_datarefs();
399 
400   /* The number of scalar stmts.  */
401   unsigned n_stmts;
402 
403   /* All data references.  Freed by free_data_refs, so not an auto_vec.  */
404   vec<data_reference_p> datarefs;
405   vec<data_reference> datarefs_copy;
406 
407   /* The loop nest in which the data dependences are computed.  */
408   auto_vec<loop_p> loop_nest;
409 
410   /* All data dependences.  Freed by free_dependence_relations, so not
411      an auto_vec.  */
412   vec<ddr_p> ddrs;
413 };
414 
415 /* Vectorizer state common between loop and basic-block vectorization.  */
416 class vec_info {
417 public:
418   typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set;
419   enum vec_kind { bb, loop };
420 
421   vec_info (vec_kind, vec_info_shared *);
422   ~vec_info ();
423 
424   stmt_vec_info add_stmt (gimple *);
425   stmt_vec_info add_pattern_stmt (gimple *, stmt_vec_info);
426   stmt_vec_info lookup_stmt (gimple *);
427   stmt_vec_info lookup_def (tree);
428   stmt_vec_info lookup_single_use (tree);
429   class dr_vec_info *lookup_dr (data_reference *);
430   void move_dr (stmt_vec_info, stmt_vec_info);
431   void remove_stmt (stmt_vec_info);
432   void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *);
433   void insert_on_entry (stmt_vec_info, gimple *);
434   void insert_seq_on_entry (stmt_vec_info, gimple_seq);
435 
436   /* The type of vectorization.  */
437   vec_kind kind;
438 
439   /* Shared vectorizer state.  */
440   vec_info_shared *shared;
441 
442   /* The mapping of GIMPLE UID to stmt_vec_info.  */
443   vec<stmt_vec_info> stmt_vec_infos;
444   /* Whether the above mapping is complete.  */
445   bool stmt_vec_info_ro;
446 
447   /* The SLP graph.  */
448   auto_vec<slp_instance> slp_instances;
449 
450   /* Maps base addresses to an innermost_loop_behavior that gives the maximum
451      known alignment for that base.  */
452   vec_base_alignments base_alignments;
453 
454   /* All interleaving chains of stores, represented by the first
455      stmt in the chain.  */
456   auto_vec<stmt_vec_info> grouped_stores;
457 
458   /* The set of vector modes used in the vectorized region.  */
459   mode_set used_vector_modes;
460 
461   /* The argument we should pass to related_vector_mode when looking up
462      the vector mode for a scalar mode, or VOIDmode if we haven't yet
463      made any decisions about which vector modes to use.  */
464   machine_mode vector_mode;
465 
466 private:
467   stmt_vec_info new_stmt_vec_info (gimple *stmt);
468   void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
469   void free_stmt_vec_infos ();
470   void free_stmt_vec_info (stmt_vec_info);
471 };
472 
473 class _loop_vec_info;
474 class _bb_vec_info;
475 
476 template<>
477 template<>
478 inline bool
479 is_a_helper <_loop_vec_info *>::test (vec_info *i)
480 {
481   return i->kind == vec_info::loop;
482 }
483 
484 template<>
485 template<>
486 inline bool
487 is_a_helper <_bb_vec_info *>::test (vec_info *i)
488 {
489   return i->kind == vec_info::bb;
490 }
491 
492 /* In general, we can divide the vector statements in a vectorized loop
493    into related groups ("rgroups") and say that for each rgroup there is
494    some nS such that the rgroup operates on nS values from one scalar
495    iteration followed by nS values from the next.  That is, if VF is the
496    vectorization factor of the loop, the rgroup operates on a sequence:
497 
498      (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS)
499 
500    where (i,j) represents a scalar value with index j in a scalar
501    iteration with index i.
502 
503    [ We use the term "rgroup" to emphasise that this grouping isn't
504      necessarily the same as the grouping of statements used elsewhere.
505      For example, if we implement a group of scalar loads using gather
506      loads, we'll use a separate gather load for each scalar load, and
507      thus each gather load will belong to its own rgroup. ]
508 
509    In general this sequence will occupy nV vectors concatenated
510    together.  If these vectors have nL lanes each, the total number
511    of scalar values N is given by:
512 
513        N = nS * VF = nV * nL
514 
515    None of nS, VF, nV and nL are required to be a power of 2.  nS and nV
516    are compile-time constants but VF and nL can be variable (if the target
517    supports variable-length vectors).
518 
519    In classical vectorization, each iteration of the vector loop would
520    handle exactly VF iterations of the original scalar loop.  However,
521    in vector loops that are able to operate on partial vectors, a
522    particular iteration of the vector loop might handle fewer than VF
523    iterations of the scalar loop.  The vector lanes that correspond to
524    iterations of the scalar loop are said to be "active" and the other
525    lanes are said to be "inactive".
526 
527    In such vector loops, many rgroups need to be controlled to ensure
528    that they have no effect for the inactive lanes.  Conceptually, each
529    such rgroup needs a sequence of booleans in the same order as above,
530    but with each (i,j) replaced by a boolean that indicates whether
531    iteration i is active.  This sequence occupies nV vector controls
532    that again have nL lanes each.  Thus the control sequence as a whole
533    consists of VF independent booleans that are each repeated nS times.
534 
535    Taking mask-based approach as a partially-populated vectors example.
536    We make the simplifying assumption that if a sequence of nV masks is
537    suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by
538    VIEW_CONVERTing it.  This holds for all current targets that support
539    fully-masked loops.  For example, suppose the scalar loop is:
540 
541      float *f;
542      double *d;
543      for (int i = 0; i < n; ++i)
544        {
545 	 f[i * 2 + 0] += 1.0f;
546 	 f[i * 2 + 1] += 2.0f;
547 	 d[i] += 3.0;
548        }
549 
550    and suppose that vectors have 256 bits.  The vectorized f accesses
551    will belong to one rgroup and the vectorized d access to another:
552 
553      f rgroup: nS = 2, nV = 1, nL = 8
554      d rgroup: nS = 1, nV = 1, nL = 4
555 	       VF = 4
556 
557      [ In this simple example the rgroups do correspond to the normal
558        SLP grouping scheme. ]
559 
560    If only the first three lanes are active, the masks we need are:
561 
562      f rgroup: 1 1 | 1 1 | 1 1 | 0 0
563      d rgroup:  1  |  1  |  1  |  0
564 
565    Here we can use a mask calculated for f's rgroup for d's, but not
566    vice versa.
567 
568    Thus for each value of nV, it is enough to provide nV masks, with the
569    mask being calculated based on the highest nL (or, equivalently, based
570    on the highest nS) required by any rgroup with that nV.  We therefore
571    represent the entire collection of masks as a two-level table, with the
572    first level being indexed by nV - 1 (since nV == 0 doesn't exist) and
573    the second being indexed by the mask index 0 <= i < nV.  */
574 
575 /* The controls (like masks or lengths) needed by rgroups with nV vectors,
576    according to the description above.  */
577 struct rgroup_controls {
578   /* The largest nS for all rgroups that use these controls.  */
579   unsigned int max_nscalars_per_iter;
580 
581   /* For the largest nS recorded above, the loop controls divide each scalar
582      into FACTOR equal-sized pieces.  This is useful if we need to split
583      element-based accesses into byte-based accesses.  */
584   unsigned int factor;
585 
586   /* This is a vector type with MAX_NSCALARS_PER_ITER * VF / nV elements.
587      For mask-based controls, it is the type of the masks in CONTROLS.
588      For length-based controls, it can be any vector type that has the
589      specified number of elements; the type of the elements doesn't matter.  */
590   tree type;
591 
592   /* A vector of nV controls, in iteration order.  */
593   vec<tree> controls;
594 
595   /* In case of len_load and len_store with a bias there is only one
596      rgroup.  This holds the adjusted loop length for the this rgroup.  */
597   tree bias_adjusted_ctrl;
598 };
599 
600 typedef auto_vec<rgroup_controls> vec_loop_masks;
601 
602 typedef auto_vec<rgroup_controls> vec_loop_lens;
603 
604 typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
605 
606 /* Information about a reduction accumulator from the main loop that could
607    conceivably be reused as the input to a reduction in an epilogue loop.  */
608 struct vect_reusable_accumulator {
609   /* The final value of the accumulator, which forms the input to the
610      reduction operation.  */
611   tree reduc_input;
612 
613   /* The stmt_vec_info that describes the reduction (i.e. the one for
614      which is_reduc_info is true).  */
615   stmt_vec_info reduc_info;
616 };
617 
618 /*-----------------------------------------------------------------*/
619 /* Info on vectorized loops.                                       */
620 /*-----------------------------------------------------------------*/
621 typedef class _loop_vec_info : public vec_info {
622 public:
623   _loop_vec_info (class loop *, vec_info_shared *);
624   ~_loop_vec_info ();
625 
626   /* The loop to which this info struct refers to.  */
627   class loop *loop;
628 
629   /* The loop basic blocks.  */
630   basic_block *bbs;
631 
632   /* Number of latch executions.  */
633   tree num_itersm1;
634   /* Number of iterations.  */
635   tree num_iters;
636   /* Number of iterations of the original loop.  */
637   tree num_iters_unchanged;
638   /* Condition under which this loop is analyzed and versioned.  */
639   tree num_iters_assumptions;
640 
641   /* The cost of the vector code.  */
642   class vector_costs *vector_costs;
643 
644   /* The cost of the scalar code.  */
645   class vector_costs *scalar_costs;
646 
647   /* Threshold of number of iterations below which vectorization will not be
648      performed. It is calculated from MIN_PROFITABLE_ITERS and
649      param_min_vect_loop_bound.  */
650   unsigned int th;
651 
652   /* When applying loop versioning, the vector form should only be used
653      if the number of scalar iterations is >= this value, on top of all
654      the other requirements.  Ignored when loop versioning is not being
655      used.  */
656   poly_uint64 versioning_threshold;
657 
658   /* Unrolling factor  */
659   poly_uint64 vectorization_factor;
660 
661   /* If this loop is an epilogue loop whose main loop can be skipped,
662      MAIN_LOOP_EDGE is the edge from the main loop to this loop's
663      preheader.  SKIP_MAIN_LOOP_EDGE is then the edge that skips the
664      main loop and goes straight to this loop's preheader.
665 
666      Both fields are null otherwise.  */
667   edge main_loop_edge;
668   edge skip_main_loop_edge;
669 
670   /* If this loop is an epilogue loop that might be skipped after executing
671      the main loop, this edge is the one that skips the epilogue.  */
672   edge skip_this_loop_edge;
673 
674   /* The vectorized form of a standard reduction replaces the original
675      scalar code's final result (a loop-closed SSA PHI) with the result
676      of a vector-to-scalar reduction operation.  After vectorization,
677      this variable maps these vector-to-scalar results to information
678      about the reductions that generated them.  */
679   hash_map<tree, vect_reusable_accumulator> reusable_accumulators;
680 
681   /* The number of times that the target suggested we unroll the vector loop
682      in order to promote more ILP.  This value will be used to re-analyze the
683      loop for vectorization and if successful the value will be folded into
684      vectorization_factor (and therefore exactly divides
685      vectorization_factor).  */
686   unsigned int suggested_unroll_factor;
687 
688   /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR
689      if there is no particular limit.  */
690   unsigned HOST_WIDE_INT max_vectorization_factor;
691 
692   /* The masks that a fully-masked loop should use to avoid operating
693      on inactive scalars.  */
694   vec_loop_masks masks;
695 
696   /* The lengths that a loop with length should use to avoid operating
697      on inactive scalars.  */
698   vec_loop_lens lens;
699 
700   /* Set of scalar conditions that have loop mask applied.  */
701   scalar_cond_masked_set_type scalar_cond_masked_set;
702 
703   /* Set of vector conditions that have loop mask applied.  */
704   vec_cond_masked_set_type vec_cond_masked_set;
705 
706   /* If we are using a loop mask to align memory addresses, this variable
707      contains the number of vector elements that we should skip in the
708      first iteration of the vector loop (i.e. the number of leading
709      elements that should be false in the first mask).  */
710   tree mask_skip_niters;
711 
712   /* The type that the loop control IV should be converted to before
713      testing which of the VF scalars are active and inactive.
714      Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P.  */
715   tree rgroup_compare_type;
716 
717   /* For #pragma omp simd if (x) loops the x expression.  If constant 0,
718      the loop should not be vectorized, if constant non-zero, simd_if_cond
719      shouldn't be set and loop vectorized normally, if SSA_NAME, the loop
720      should be versioned on that condition, using scalar loop if the condition
721      is false and vectorized loop otherwise.  */
722   tree simd_if_cond;
723 
724   /* The type that the vector loop control IV should have when
725      LOOP_VINFO_USING_PARTIAL_VECTORS_P is true.  */
726   tree rgroup_iv_type;
727 
728   /* Unknown DRs according to which loop was peeled.  */
729   class dr_vec_info *unaligned_dr;
730 
731   /* peeling_for_alignment indicates whether peeling for alignment will take
732      place, and what the peeling factor should be:
733      peeling_for_alignment = X means:
734         If X=0: Peeling for alignment will not be applied.
735         If X>0: Peel first X iterations.
736         If X=-1: Generate a runtime test to calculate the number of iterations
737                  to be peeled, using the dataref recorded in the field
738                  unaligned_dr.  */
739   int peeling_for_alignment;
740 
741   /* The mask used to check the alignment of pointers or arrays.  */
742   int ptr_mask;
743 
744   /* Data Dependence Relations defining address ranges that are candidates
745      for a run-time aliasing check.  */
746   auto_vec<ddr_p> may_alias_ddrs;
747 
748   /* Data Dependence Relations defining address ranges together with segment
749      lengths from which the run-time aliasing check is built.  */
750   auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs;
751 
752   /* Check that the addresses of each pair of objects is unequal.  */
753   auto_vec<vec_object_pair> check_unequal_addrs;
754 
755   /* List of values that are required to be nonzero.  This is used to check
756      whether things like "x[i * n] += 1;" are safe and eventually gets added
757      to the checks for lower bounds below.  */
758   auto_vec<tree> check_nonzero;
759 
760   /* List of values that need to be checked for a minimum value.  */
761   auto_vec<vec_lower_bound> lower_bounds;
762 
763   /* Statements in the loop that have data references that are candidates for a
764      runtime (loop versioning) misalignment check.  */
765   auto_vec<stmt_vec_info> may_misalign_stmts;
766 
767   /* Reduction cycles detected in the loop. Used in loop-aware SLP.  */
768   auto_vec<stmt_vec_info> reductions;
769 
770   /* All reduction chains in the loop, represented by the first
771      stmt in the chain.  */
772   auto_vec<stmt_vec_info> reduction_chains;
773 
774   /* Cost vector for a single scalar iteration.  */
775   auto_vec<stmt_info_for_cost> scalar_cost_vec;
776 
777   /* Map of IV base/step expressions to inserted name in the preheader.  */
778   hash_map<tree_operand_hash, tree> *ivexpr_map;
779 
780   /* Map of OpenMP "omp simd array" scan variables to corresponding
781      rhs of the store of the initializer.  */
782   hash_map<tree, tree> *scan_map;
783 
784   /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
785      applied to the loop, i.e., no unrolling is needed, this is 1.  */
786   poly_uint64 slp_unrolling_factor;
787 
788   /* The factor used to over weight those statements in an inner loop
789      relative to the loop being vectorized.  */
790   unsigned int inner_loop_cost_factor;
791 
792   /* Is the loop vectorizable? */
793   bool vectorizable;
794 
795   /* Records whether we still have the option of vectorizing this loop
796      using partially-populated vectors; in other words, whether it is
797      still possible for one iteration of the vector loop to handle
798      fewer than VF scalars.  */
799   bool can_use_partial_vectors_p;
800 
801   /* True if we've decided to use partially-populated vectors, so that
802      the vector loop can handle fewer than VF scalars.  */
803   bool using_partial_vectors_p;
804 
805   /* True if we've decided to use partially-populated vectors for the
806      epilogue of loop.  */
807   bool epil_using_partial_vectors_p;
808 
809   /* The bias for len_load and len_store.  For now, only 0 and -1 are
810      supported.  -1 must be used when a backend does not support
811      len_load/len_store with a length of zero.  */
812   signed char partial_load_store_bias;
813 
814   /* When we have grouped data accesses with gaps, we may introduce invalid
815      memory accesses.  We peel the last iteration of the loop to prevent
816      this.  */
817   bool peeling_for_gaps;
818 
819   /* When the number of iterations is not a multiple of the vector size
820      we need to peel off iterations at the end to form an epilogue loop.  */
821   bool peeling_for_niter;
822 
823   /* True if there are no loop carried data dependencies in the loop.
824      If loop->safelen <= 1, then this is always true, either the loop
825      didn't have any loop carried data dependencies, or the loop is being
826      vectorized guarded with some runtime alias checks, or couldn't
827      be vectorized at all, but then this field shouldn't be used.
828      For loop->safelen >= 2, the user has asserted that there are no
829      backward dependencies, but there still could be loop carried forward
830      dependencies in such loops.  This flag will be false if normal
831      vectorizer data dependency analysis would fail or require versioning
832      for alias, but because of loop->safelen >= 2 it has been vectorized
833      even without versioning for alias.  E.g. in:
834      #pragma omp simd
835      for (int i = 0; i < m; i++)
836        a[i] = a[i + k] * c;
837      (or #pragma simd or #pragma ivdep) we can vectorize this and it will
838      DTRT even for k > 0 && k < m, but without safelen we would not
839      vectorize this, so this field would be false.  */
840   bool no_data_dependencies;
841 
842   /* Mark loops having masked stores.  */
843   bool has_mask_store;
844 
845   /* Queued scaling factor for the scalar loop.  */
846   profile_probability scalar_loop_scaling;
847 
848   /* If if-conversion versioned this loop before conversion, this is the
849      loop version without if-conversion.  */
850   class loop *scalar_loop;
851 
852   /* For loops being epilogues of already vectorized loops
853      this points to the original vectorized loop.  Otherwise NULL.  */
854   _loop_vec_info *orig_loop_info;
855 
856   /* Used to store loop_vec_infos of epilogues of this loop during
857      analysis.  */
858   vec<_loop_vec_info *> epilogue_vinfos;
859 
860 } *loop_vec_info;
861 
862 /* Access Functions.  */
863 #define LOOP_VINFO_LOOP(L)                 (L)->loop
864 #define LOOP_VINFO_BBS(L)                  (L)->bbs
865 #define LOOP_VINFO_NITERSM1(L)             (L)->num_itersm1
866 #define LOOP_VINFO_NITERS(L)               (L)->num_iters
867 /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after
868    prologue peeling retain total unchanged scalar loop iterations for
869    cost model.  */
870 #define LOOP_VINFO_NITERS_UNCHANGED(L)     (L)->num_iters_unchanged
871 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L)   (L)->num_iters_assumptions
872 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
873 #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
874 #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
875 #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
876 #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
877 #define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L)                             \
878   (L)->epil_using_partial_vectors_p
879 #define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias
880 #define LOOP_VINFO_VECT_FACTOR(L)          (L)->vectorization_factor
881 #define LOOP_VINFO_MAX_VECT_FACTOR(L)      (L)->max_vectorization_factor
882 #define LOOP_VINFO_MASKS(L)                (L)->masks
883 #define LOOP_VINFO_LENS(L)                 (L)->lens
884 #define LOOP_VINFO_MASK_SKIP_NITERS(L)     (L)->mask_skip_niters
885 #define LOOP_VINFO_RGROUP_COMPARE_TYPE(L)  (L)->rgroup_compare_type
886 #define LOOP_VINFO_RGROUP_IV_TYPE(L)       (L)->rgroup_iv_type
887 #define LOOP_VINFO_PTR_MASK(L)             (L)->ptr_mask
888 #define LOOP_VINFO_N_STMTS(L)		   (L)->shared->n_stmts
889 #define LOOP_VINFO_LOOP_NEST(L)            (L)->shared->loop_nest
890 #define LOOP_VINFO_DATAREFS(L)             (L)->shared->datarefs
891 #define LOOP_VINFO_DDRS(L)                 (L)->shared->ddrs
892 #define LOOP_VINFO_INT_NITERS(L)           (TREE_INT_CST_LOW ((L)->num_iters))
893 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
894 #define LOOP_VINFO_UNALIGNED_DR(L)         (L)->unaligned_dr
895 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts
896 #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs
897 #define LOOP_VINFO_COMP_ALIAS_DDRS(L)      (L)->comp_alias_ddrs
898 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L)  (L)->check_unequal_addrs
899 #define LOOP_VINFO_CHECK_NONZERO(L)        (L)->check_nonzero
900 #define LOOP_VINFO_LOWER_BOUNDS(L)         (L)->lower_bounds
901 #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
902 #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
903 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
904 #define LOOP_VINFO_REDUCTIONS(L)           (L)->reductions
905 #define LOOP_VINFO_REDUCTION_CHAINS(L)     (L)->reduction_chains
906 #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
907 #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
908 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
909 #define LOOP_VINFO_SCALAR_LOOP(L)	   (L)->scalar_loop
910 #define LOOP_VINFO_SCALAR_LOOP_SCALING(L)  (L)->scalar_loop_scaling
911 #define LOOP_VINFO_HAS_MASK_STORE(L)       (L)->has_mask_store
912 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
913 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
914 #define LOOP_VINFO_SIMD_IF_COND(L)         (L)->simd_if_cond
915 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
916 
917 #define LOOP_VINFO_FULLY_MASKED_P(L)		\
918   (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L)	\
919    && !LOOP_VINFO_MASKS (L).is_empty ())
920 
921 #define LOOP_VINFO_FULLY_WITH_LENGTH_P(L)	\
922   (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L)	\
923    && !LOOP_VINFO_LENS (L).is_empty ())
924 
925 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L)	\
926   ((L)->may_misalign_stmts.length () > 0)
927 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)		\
928   ((L)->comp_alias_ddrs.length () > 0 \
929    || (L)->check_unequal_addrs.length () > 0 \
930    || (L)->lower_bounds.length () > 0)
931 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L)		\
932   (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
933 #define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L)	\
934   (LOOP_VINFO_SIMD_IF_COND (L))
935 #define LOOP_REQUIRES_VERSIONING(L)			\
936   (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L)		\
937    || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L)		\
938    || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)		\
939    || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
940 
941 #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
942   (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
943 
944 #define LOOP_VINFO_EPILOGUE_P(L) \
945   (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
946 
947 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \
948   (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
949 
950 /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL
951    value signifies success, and a NULL value signifies failure, supporting
952    propagating an opt_problem * describing the failure back up the call
953    stack.  */
954 typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info;
955 
956 static inline loop_vec_info
957 loop_vec_info_for_loop (class loop *loop)
958 {
959   return (loop_vec_info) loop->aux;
960 }
961 
962 struct slp_root
963 {
964   slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
965 	    vec<stmt_vec_info> roots_)
966     : kind(kind_), stmts(stmts_), roots(roots_) {}
967   slp_instance_kind kind;
968   vec<stmt_vec_info> stmts;
969   vec<stmt_vec_info> roots;
970 };
971 
972 typedef class _bb_vec_info : public vec_info
973 {
974 public:
975   _bb_vec_info (vec<basic_block> bbs, vec_info_shared *);
976   ~_bb_vec_info ();
977 
978   /* The region we are operating on.  bbs[0] is the entry, excluding
979      its PHI nodes.  In the future we might want to track an explicit
980      entry edge to cover bbs[0] PHI nodes and have a region entry
981      insert location.  */
982   vec<basic_block> bbs;
983 
984   vec<slp_root> roots;
985 } *bb_vec_info;
986 
987 #define BB_VINFO_BB(B)               (B)->bb
988 #define BB_VINFO_GROUPED_STORES(B)   (B)->grouped_stores
989 #define BB_VINFO_SLP_INSTANCES(B)    (B)->slp_instances
990 #define BB_VINFO_DATAREFS(B)         (B)->shared->datarefs
991 #define BB_VINFO_DDRS(B)             (B)->shared->ddrs
992 
993 /*-----------------------------------------------------------------*/
994 /* Info on vectorized defs.                                        */
995 /*-----------------------------------------------------------------*/
996 enum stmt_vec_info_type {
997   undef_vec_info_type = 0,
998   load_vec_info_type,
999   store_vec_info_type,
1000   shift_vec_info_type,
1001   op_vec_info_type,
1002   call_vec_info_type,
1003   call_simd_clone_vec_info_type,
1004   assignment_vec_info_type,
1005   condition_vec_info_type,
1006   comparison_vec_info_type,
1007   reduc_vec_info_type,
1008   induc_vec_info_type,
1009   type_promotion_vec_info_type,
1010   type_demotion_vec_info_type,
1011   type_conversion_vec_info_type,
1012   cycle_phi_info_type,
1013   lc_phi_info_type,
1014   phi_info_type,
1015   loop_exit_ctrl_vec_info_type
1016 };
1017 
1018 /* Indicates whether/how a variable is used in the scope of loop/basic
1019    block.  */
1020 enum vect_relevant {
1021   vect_unused_in_scope = 0,
1022 
1023   /* The def is only used outside the loop.  */
1024   vect_used_only_live,
1025   /* The def is in the inner loop, and the use is in the outer loop, and the
1026      use is a reduction stmt.  */
1027   vect_used_in_outer_by_reduction,
1028   /* The def is in the inner loop, and the use is in the outer loop (and is
1029      not part of reduction).  */
1030   vect_used_in_outer,
1031 
1032   /* defs that feed computations that end up (only) in a reduction. These
1033      defs may be used by non-reduction stmts, but eventually, any
1034      computations/values that are affected by these defs are used to compute
1035      a reduction (i.e. don't get stored to memory, for example). We use this
1036      to identify computations that we can change the order in which they are
1037      computed.  */
1038   vect_used_by_reduction,
1039 
1040   vect_used_in_scope
1041 };
1042 
1043 /* The type of vectorization that can be applied to the stmt: regular loop-based
1044    vectorization; pure SLP - the stmt is a part of SLP instances and does not
1045    have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is
1046    a part of SLP instance and also must be loop-based vectorized, since it has
1047    uses outside SLP sequences.
1048 
1049    In the loop context the meanings of pure and hybrid SLP are slightly
1050    different. By saying that pure SLP is applied to the loop, we mean that we
1051    exploit only intra-iteration parallelism in the loop; i.e., the loop can be
1052    vectorized without doing any conceptual unrolling, cause we don't pack
1053    together stmts from different iterations, only within a single iteration.
1054    Loop hybrid SLP means that we exploit both intra-iteration and
1055    inter-iteration parallelism (e.g., number of elements in the vector is 4
1056    and the slp-group-size is 2, in which case we don't have enough parallelism
1057    within an iteration, so we obtain the rest of the parallelism from subsequent
1058    iterations by unrolling the loop by 2).  */
1059 enum slp_vect_type {
1060   loop_vect = 0,
1061   pure_slp,
1062   hybrid
1063 };
1064 
1065 /* Says whether a statement is a load, a store of a vectorized statement
1066    result, or a store of an invariant value.  */
1067 enum vec_load_store_type {
1068   VLS_LOAD,
1069   VLS_STORE,
1070   VLS_STORE_INVARIANT
1071 };
1072 
1073 /* Describes how we're going to vectorize an individual load or store,
1074    or a group of loads or stores.  */
1075 enum vect_memory_access_type {
1076   /* An access to an invariant address.  This is used only for loads.  */
1077   VMAT_INVARIANT,
1078 
1079   /* A simple contiguous access.  */
1080   VMAT_CONTIGUOUS,
1081 
1082   /* A contiguous access that goes down in memory rather than up,
1083      with no additional permutation.  This is used only for stores
1084      of invariants.  */
1085   VMAT_CONTIGUOUS_DOWN,
1086 
1087   /* A simple contiguous access in which the elements need to be permuted
1088      after loading or before storing.  Only used for loop vectorization;
1089      SLP uses separate permutes.  */
1090   VMAT_CONTIGUOUS_PERMUTE,
1091 
1092   /* A simple contiguous access in which the elements need to be reversed
1093      after loading or before storing.  */
1094   VMAT_CONTIGUOUS_REVERSE,
1095 
1096   /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
1097   VMAT_LOAD_STORE_LANES,
1098 
1099   /* An access in which each scalar element is loaded or stored
1100      individually.  */
1101   VMAT_ELEMENTWISE,
1102 
1103   /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
1104      SLP accesses.  Each unrolled iteration uses a contiguous load
1105      or store for the whole group, but the groups from separate iterations
1106      are combined in the same way as for VMAT_ELEMENTWISE.  */
1107   VMAT_STRIDED_SLP,
1108 
1109   /* The access uses gather loads or scatter stores.  */
1110   VMAT_GATHER_SCATTER
1111 };
1112 
1113 class dr_vec_info {
1114 public:
1115   /* The data reference itself.  */
1116   data_reference *dr;
1117   /* The statement that contains the data reference.  */
1118   stmt_vec_info stmt;
1119   /* The analysis group this DR belongs to when doing BB vectorization.
1120      DRs of the same group belong to the same conditional execution context.  */
1121   unsigned group;
1122   /* The misalignment in bytes of the reference, or -1 if not known.  */
1123   int misalignment;
1124   /* The byte alignment that we'd ideally like the reference to have,
1125      and the value that misalignment is measured against.  */
1126   poly_uint64 target_alignment;
1127   /* If true the alignment of base_decl needs to be increased.  */
1128   bool base_misaligned;
1129   tree base_decl;
1130 
1131   /* Stores current vectorized loop's offset.  To be added to the DR's
1132      offset to calculate current offset of data reference.  */
1133   tree offset;
1134 };
1135 
1136 typedef struct data_reference *dr_p;
1137 
1138 class _stmt_vec_info {
1139 public:
1140 
1141   enum stmt_vec_info_type type;
1142 
1143   /* Indicates whether this stmts is part of a computation whose result is
1144      used outside the loop.  */
1145   bool live;
1146 
1147   /* Stmt is part of some pattern (computation idiom)  */
1148   bool in_pattern_p;
1149 
1150   /* True if the statement was created during pattern recognition as
1151      part of the replacement for RELATED_STMT.  This implies that the
1152      statement isn't part of any basic block, although for convenience
1153      its gimple_bb is the same as for RELATED_STMT.  */
1154   bool pattern_stmt_p;
1155 
1156   /* Is this statement vectorizable or should it be skipped in (partial)
1157      vectorization.  */
1158   bool vectorizable;
1159 
1160   /* The stmt to which this info struct refers to.  */
1161   gimple *stmt;
1162 
1163   /* The vector type to be used for the LHS of this statement.  */
1164   tree vectype;
1165 
1166   /* The vectorized stmts.  */
1167   vec<gimple *> vec_stmts;
1168 
1169   /* The following is relevant only for stmts that contain a non-scalar
1170      data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have
1171      at most one such data-ref.  */
1172 
1173   dr_vec_info dr_aux;
1174 
1175   /* Information about the data-ref relative to this loop
1176      nest (the loop that is being considered for vectorization).  */
1177   innermost_loop_behavior dr_wrt_vec_loop;
1178 
1179   /* For loop PHI nodes, the base and evolution part of it.  This makes sure
1180      this information is still available in vect_update_ivs_after_vectorizer
1181      where we may not be able to re-analyze the PHI nodes evolution as
1182      peeling for the prologue loop can make it unanalyzable.  The evolution
1183      part is still correct after peeling, but the base may have changed from
1184      the version here.  */
1185   tree loop_phi_evolution_base_unchanged;
1186   tree loop_phi_evolution_part;
1187 
1188   /* Used for various bookkeeping purposes, generally holding a pointer to
1189      some other stmt S that is in some way "related" to this stmt.
1190      Current use of this field is:
1191         If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is
1192         true): S is the "pattern stmt" that represents (and replaces) the
1193         sequence of stmts that constitutes the pattern.  Similarly, the
1194         related_stmt of the "pattern stmt" points back to this stmt (which is
1195         the last stmt in the original sequence of stmts that constitutes the
1196         pattern).  */
1197   stmt_vec_info related_stmt;
1198 
1199   /* Used to keep a sequence of def stmts of a pattern stmt if such exists.
1200      The sequence is attached to the original statement rather than the
1201      pattern statement.  */
1202   gimple_seq pattern_def_seq;
1203 
1204   /* Selected SIMD clone's function info.  First vector element
1205      is SIMD clone's function decl, followed by a pair of trees (base + step)
1206      for linear arguments (pair of NULLs for other arguments).  */
1207   vec<tree> simd_clone_info;
1208 
1209   /* Classify the def of this stmt.  */
1210   enum vect_def_type def_type;
1211 
1212   /*  Whether the stmt is SLPed, loop-based vectorized, or both.  */
1213   enum slp_vect_type slp_type;
1214 
1215   /* Interleaving and reduction chains info.  */
1216   /* First element in the group.  */
1217   stmt_vec_info first_element;
1218   /* Pointer to the next element in the group.  */
1219   stmt_vec_info next_element;
1220   /* The size of the group.  */
1221   unsigned int size;
1222   /* For stores, number of stores from this group seen. We vectorize the last
1223      one.  */
1224   unsigned int store_count;
1225   /* For loads only, the gap from the previous load. For consecutive loads, GAP
1226      is 1.  */
1227   unsigned int gap;
1228 
1229   /* The minimum negative dependence distance this stmt participates in
1230      or zero if none.  */
1231   unsigned int min_neg_dist;
1232 
1233   /* Not all stmts in the loop need to be vectorized. e.g, the increment
1234      of the loop induction variable and computation of array indexes. relevant
1235      indicates whether the stmt needs to be vectorized.  */
1236   enum vect_relevant relevant;
1237 
1238   /* For loads if this is a gather, for stores if this is a scatter.  */
1239   bool gather_scatter_p;
1240 
1241   /* True if this is an access with loop-invariant stride.  */
1242   bool strided_p;
1243 
1244   /* For both loads and stores.  */
1245   unsigned simd_lane_access_p : 3;
1246 
1247   /* Classifies how the load or store is going to be implemented
1248      for loop vectorization.  */
1249   vect_memory_access_type memory_access_type;
1250 
1251   /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used.  */
1252   tree induc_cond_initial_val;
1253 
1254   /* If not NULL the value to be added to compute final reduction value.  */
1255   tree reduc_epilogue_adjustment;
1256 
1257   /* On a reduction PHI the reduction type as detected by
1258      vect_is_simple_reduction and vectorizable_reduction.  */
1259   enum vect_reduction_type reduc_type;
1260 
1261   /* The original reduction code, to be used in the epilogue.  */
1262   code_helper reduc_code;
1263   /* An internal function we should use in the epilogue.  */
1264   internal_fn reduc_fn;
1265 
1266   /* On a stmt participating in the reduction the index of the operand
1267      on the reduction SSA cycle.  */
1268   int reduc_idx;
1269 
1270   /* On a reduction PHI the def returned by vect_force_simple_reduction.
1271      On the def returned by vect_force_simple_reduction the
1272      corresponding PHI.  */
1273   stmt_vec_info reduc_def;
1274 
1275   /* The vector input type relevant for reduction vectorization.  */
1276   tree reduc_vectype_in;
1277 
1278   /* The vector type for performing the actual reduction.  */
1279   tree reduc_vectype;
1280 
1281   /* If IS_REDUC_INFO is true and if the vector code is performing
1282      N scalar reductions in parallel, this variable gives the initial
1283      scalar values of those N reductions.  */
1284   vec<tree> reduc_initial_values;
1285 
1286   /* If IS_REDUC_INFO is true and if the vector code is performing
1287      N scalar reductions in parallel, this variable gives the vectorized code's
1288      final (scalar) result for each of those N reductions.  In other words,
1289      REDUC_SCALAR_RESULTS[I] replaces the original scalar code's loop-closed
1290      SSA PHI for reduction number I.  */
1291   vec<tree> reduc_scalar_results;
1292 
1293   /* Only meaningful if IS_REDUC_INFO.  If non-null, the reduction is
1294      being performed by an epilogue loop and we have decided to reuse
1295      this accumulator from the main loop.  */
1296   vect_reusable_accumulator *reused_accumulator;
1297 
1298   /* Whether we force a single cycle PHI during reduction vectorization.  */
1299   bool force_single_cycle;
1300 
1301   /* Whether on this stmt reduction meta is recorded.  */
1302   bool is_reduc_info;
1303 
1304   /* If nonzero, the lhs of the statement could be truncated to this
1305      many bits without affecting any users of the result.  */
1306   unsigned int min_output_precision;
1307 
1308   /* If nonzero, all non-boolean input operands have the same precision,
1309      and they could each be truncated to this many bits without changing
1310      the result.  */
1311   unsigned int min_input_precision;
1312 
1313   /* If OPERATION_BITS is nonzero, the statement could be performed on
1314      an integer with the sign and number of bits given by OPERATION_SIGN
1315      and OPERATION_BITS without changing the result.  */
1316   unsigned int operation_precision;
1317   signop operation_sign;
1318 
1319   /* If the statement produces a boolean result, this value describes
1320      how we should choose the associated vector type.  The possible
1321      values are:
1322 
1323      - an integer precision N if we should use the vector mask type
1324        associated with N-bit integers.  This is only used if all relevant
1325        input booleans also want the vector mask type for N-bit integers,
1326        or if we can convert them into that form by pattern-matching.
1327 
1328      - ~0U if we considered choosing a vector mask type but decided
1329        to treat the boolean as a normal integer type instead.
1330 
1331      - 0 otherwise.  This means either that the operation isn't one that
1332        could have a vector mask type (and so should have a normal vector
1333        type instead) or that we simply haven't made a choice either way.  */
1334   unsigned int mask_precision;
1335 
1336   /* True if this is only suitable for SLP vectorization.  */
1337   bool slp_vect_only_p;
1338 
1339   /* True if this is a pattern that can only be handled by SLP
1340      vectorization.  */
1341   bool slp_vect_pattern_only_p;
1342 };
1343 
1344 /* Information about a gather/scatter call.  */
1345 struct gather_scatter_info {
1346   /* The internal function to use for the gather/scatter operation,
1347      or IFN_LAST if a built-in function should be used instead.  */
1348   internal_fn ifn;
1349 
1350   /* The FUNCTION_DECL for the built-in gather/scatter function,
1351      or null if an internal function should be used instead.  */
1352   tree decl;
1353 
1354   /* The loop-invariant base value.  */
1355   tree base;
1356 
1357   /* The original scalar offset, which is a non-loop-invariant SSA_NAME.  */
1358   tree offset;
1359 
1360   /* Each offset element should be multiplied by this amount before
1361      being added to the base.  */
1362   int scale;
1363 
1364   /* The definition type for the vectorized offset.  */
1365   enum vect_def_type offset_dt;
1366 
1367   /* The type of the vectorized offset.  */
1368   tree offset_vectype;
1369 
1370   /* The type of the scalar elements after loading or before storing.  */
1371   tree element_type;
1372 
1373   /* The type of the scalar elements being loaded or stored.  */
1374   tree memory_type;
1375 };
1376 
1377 /* Access Functions.  */
1378 #define STMT_VINFO_TYPE(S)                 (S)->type
1379 #define STMT_VINFO_STMT(S)                 (S)->stmt
1380 #define STMT_VINFO_RELEVANT(S)             (S)->relevant
1381 #define STMT_VINFO_LIVE_P(S)               (S)->live
1382 #define STMT_VINFO_VECTYPE(S)              (S)->vectype
1383 #define STMT_VINFO_VEC_STMTS(S)            (S)->vec_stmts
1384 #define STMT_VINFO_VECTORIZABLE(S)         (S)->vectorizable
1385 #define STMT_VINFO_DATA_REF(S)             ((S)->dr_aux.dr + 0)
1386 #define STMT_VINFO_GATHER_SCATTER_P(S)	   (S)->gather_scatter_p
1387 #define STMT_VINFO_STRIDED_P(S)	   	   (S)->strided_p
1388 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S)   (S)->memory_access_type
1389 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
1390 #define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val
1391 #define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment
1392 #define STMT_VINFO_REDUC_IDX(S)		   (S)->reduc_idx
1393 #define STMT_VINFO_FORCE_SINGLE_CYCLE(S)   (S)->force_single_cycle
1394 
1395 #define STMT_VINFO_DR_WRT_VEC_LOOP(S)      (S)->dr_wrt_vec_loop
1396 #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_wrt_vec_loop.base_address
1397 #define STMT_VINFO_DR_INIT(S)              (S)->dr_wrt_vec_loop.init
1398 #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_wrt_vec_loop.offset
1399 #define STMT_VINFO_DR_STEP(S)              (S)->dr_wrt_vec_loop.step
1400 #define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_wrt_vec_loop.base_alignment
1401 #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
1402   (S)->dr_wrt_vec_loop.base_misalignment
1403 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
1404   (S)->dr_wrt_vec_loop.offset_alignment
1405 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
1406   (S)->dr_wrt_vec_loop.step_alignment
1407 
1408 #define STMT_VINFO_DR_INFO(S) \
1409   (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux)
1410 
1411 #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p
1412 #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
1413 #define STMT_VINFO_PATTERN_DEF_SEQ(S)      (S)->pattern_def_seq
1414 #define STMT_VINFO_SIMD_CLONE_INFO(S)	   (S)->simd_clone_info
1415 #define STMT_VINFO_DEF_TYPE(S)             (S)->def_type
1416 #define STMT_VINFO_GROUPED_ACCESS(S) \
1417   ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S))
1418 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged
1419 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
1420 #define STMT_VINFO_MIN_NEG_DIST(S)	(S)->min_neg_dist
1421 #define STMT_VINFO_REDUC_TYPE(S)	(S)->reduc_type
1422 #define STMT_VINFO_REDUC_CODE(S)	(S)->reduc_code
1423 #define STMT_VINFO_REDUC_FN(S)		(S)->reduc_fn
1424 #define STMT_VINFO_REDUC_DEF(S)		(S)->reduc_def
1425 #define STMT_VINFO_REDUC_VECTYPE(S)     (S)->reduc_vectype
1426 #define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
1427 #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
1428 #define STMT_VINFO_SLP_VECT_ONLY_PATTERN(S) (S)->slp_vect_pattern_only_p
1429 
1430 #define DR_GROUP_FIRST_ELEMENT(S) \
1431   (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
1432 #define DR_GROUP_NEXT_ELEMENT(S) \
1433   (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
1434 #define DR_GROUP_SIZE(S) \
1435   (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
1436 #define DR_GROUP_STORE_COUNT(S) \
1437   (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
1438 #define DR_GROUP_GAP(S) \
1439   (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
1440 
1441 #define REDUC_GROUP_FIRST_ELEMENT(S) \
1442   (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element)
1443 #define REDUC_GROUP_NEXT_ELEMENT(S) \
1444   (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element)
1445 #define REDUC_GROUP_SIZE(S) \
1446   (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size)
1447 
1448 #define STMT_VINFO_RELEVANT_P(S)          ((S)->relevant != vect_unused_in_scope)
1449 
1450 #define HYBRID_SLP_STMT(S)                ((S)->slp_type == hybrid)
1451 #define PURE_SLP_STMT(S)                  ((S)->slp_type == pure_slp)
1452 #define STMT_SLP_TYPE(S)                   (S)->slp_type
1453 
1454 /* Contains the scalar or vector costs for a vec_info.  */
1455 class vector_costs
1456 {
1457 public:
1458   vector_costs (vec_info *, bool);
1459   virtual ~vector_costs () {}
1460 
1461   /* Update the costs in response to adding COUNT copies of a statement.
1462 
1463      - WHERE specifies whether the cost occurs in the loop prologue,
1464        the loop body, or the loop epilogue.
1465      - KIND is the kind of statement, which is always meaningful.
1466      - STMT_INFO or NODE, if nonnull, describe the statement that will be
1467        vectorized.
1468      - VECTYPE, if nonnull, is the vector type that the vectorized
1469        statement will operate on.  Note that this should be used in
1470        preference to STMT_VINFO_VECTYPE (STMT_INFO) since the latter
1471        is not correct for SLP.
1472      - for unaligned_load and unaligned_store statements, MISALIGN is
1473        the byte misalignment of the load or store relative to the target's
1474        preferred alignment for VECTYPE, or DR_MISALIGNMENT_UNKNOWN
1475        if the misalignment is not known.
1476 
1477      Return the calculated cost as well as recording it.  The return
1478      value is used for dumping purposes.  */
1479   virtual unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
1480 				      stmt_vec_info stmt_info,
1481 				      slp_tree node,
1482 				      tree vectype, int misalign,
1483 				      vect_cost_model_location where);
1484 
1485   /* Finish calculating the cost of the code.  The results can be
1486      read back using the functions below.
1487 
1488      If the costs describe vector code, SCALAR_COSTS gives the costs
1489      of the corresponding scalar code, otherwise it is null.  */
1490   virtual void finish_cost (const vector_costs *scalar_costs);
1491 
1492   /* The costs in THIS and OTHER both describe ways of vectorizing
1493      a main loop.  Return true if the costs described by THIS are
1494      cheaper than the costs described by OTHER.  Return false if any
1495      of the following are true:
1496 
1497      - THIS and OTHER are of equal cost
1498      - OTHER is better than THIS
1499      - we can't be sure about the relative costs of THIS and OTHER.  */
1500   virtual bool better_main_loop_than_p (const vector_costs *other) const;
1501 
1502   /* Likewise, but the costs in THIS and OTHER both describe ways of
1503      vectorizing an epilogue loop of MAIN_LOOP.  */
1504   virtual bool better_epilogue_loop_than_p (const vector_costs *other,
1505 					    loop_vec_info main_loop) const;
1506 
1507   unsigned int prologue_cost () const;
1508   unsigned int body_cost () const;
1509   unsigned int epilogue_cost () const;
1510   unsigned int outside_cost () const;
1511   unsigned int total_cost () const;
1512   unsigned int suggested_unroll_factor () const;
1513 
1514 protected:
1515   unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
1516 				 unsigned int);
1517   unsigned int adjust_cost_for_freq (stmt_vec_info, vect_cost_model_location,
1518 				     unsigned int);
1519   int compare_inside_loop_cost (const vector_costs *) const;
1520   int compare_outside_loop_cost (const vector_costs *) const;
1521 
1522   /* The region of code that we're considering vectorizing.  */
1523   vec_info *m_vinfo;
1524 
1525   /* True if we're costing the scalar code, false if we're costing
1526      the vector code.  */
1527   bool m_costing_for_scalar;
1528 
1529   /* The costs of the three regions, indexed by vect_cost_model_location.  */
1530   unsigned int m_costs[3];
1531 
1532   /* The suggested unrolling factor determined at finish_cost.  */
1533   unsigned int m_suggested_unroll_factor;
1534 
1535   /* True if finish_cost has been called.  */
1536   bool m_finished;
1537 };
1538 
1539 /* Create costs for VINFO.  COSTING_FOR_SCALAR is true if the costs
1540    are for scalar code, false if they are for vector code.  */
1541 
1542 inline
1543 vector_costs::vector_costs (vec_info *vinfo, bool costing_for_scalar)
1544   : m_vinfo (vinfo),
1545     m_costing_for_scalar (costing_for_scalar),
1546     m_costs (),
1547     m_suggested_unroll_factor(1),
1548     m_finished (false)
1549 {
1550 }
1551 
1552 /* Return the cost of the prologue code (in abstract units).  */
1553 
1554 inline unsigned int
1555 vector_costs::prologue_cost () const
1556 {
1557   gcc_checking_assert (m_finished);
1558   return m_costs[vect_prologue];
1559 }
1560 
1561 /* Return the cost of the body code (in abstract units).  */
1562 
1563 inline unsigned int
1564 vector_costs::body_cost () const
1565 {
1566   gcc_checking_assert (m_finished);
1567   return m_costs[vect_body];
1568 }
1569 
1570 /* Return the cost of the epilogue code (in abstract units).  */
1571 
1572 inline unsigned int
1573 vector_costs::epilogue_cost () const
1574 {
1575   gcc_checking_assert (m_finished);
1576   return m_costs[vect_epilogue];
1577 }
1578 
1579 /* Return the cost of the prologue and epilogue code (in abstract units).  */
1580 
1581 inline unsigned int
1582 vector_costs::outside_cost () const
1583 {
1584   return prologue_cost () + epilogue_cost ();
1585 }
1586 
1587 /* Return the cost of the prologue, body and epilogue code
1588    (in abstract units).  */
1589 
1590 inline unsigned int
1591 vector_costs::total_cost () const
1592 {
1593   return body_cost () + outside_cost ();
1594 }
1595 
1596 /* Return the suggested unroll factor.  */
1597 
1598 inline unsigned int
1599 vector_costs::suggested_unroll_factor () const
1600 {
1601   gcc_checking_assert (m_finished);
1602   return m_suggested_unroll_factor;
1603 }
1604 
1605 #define VECT_MAX_COST 1000
1606 
1607 /* The maximum number of intermediate steps required in multi-step type
1608    conversion.  */
1609 #define MAX_INTERM_CVT_STEPS         3
1610 
1611 #define MAX_VECTORIZATION_FACTOR INT_MAX
1612 
1613 /* Nonzero if TYPE represents a (scalar) boolean type or type
1614    in the middle-end compatible with it (unsigned precision 1 integral
1615    types).  Used to determine which types should be vectorized as
1616    VECTOR_BOOLEAN_TYPE_P.  */
1617 
1618 #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \
1619   (TREE_CODE (TYPE) == BOOLEAN_TYPE		\
1620    || ((TREE_CODE (TYPE) == INTEGER_TYPE	\
1621 	|| TREE_CODE (TYPE) == ENUMERAL_TYPE)	\
1622        && TYPE_PRECISION (TYPE) == 1		\
1623        && TYPE_UNSIGNED (TYPE)))
1624 
1625 static inline bool
1626 nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info)
1627 {
1628   return (loop->inner
1629 	  && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father));
1630 }
1631 
1632 /* PHI is either a scalar reduction phi or a scalar induction phi.
1633    Return the initial value of the variable on entry to the containing
1634    loop.  */
1635 
1636 static inline tree
1637 vect_phi_initial_value (gphi *phi)
1638 {
1639   basic_block bb = gimple_bb (phi);
1640   edge pe = loop_preheader_edge (bb->loop_father);
1641   gcc_assert (pe->dest == bb);
1642   return PHI_ARG_DEF_FROM_EDGE (phi, pe);
1643 }
1644 
1645 /* Return true if STMT_INFO should produce a vector mask type rather than
1646    a normal nonmask type.  */
1647 
1648 static inline bool
1649 vect_use_mask_type_p (stmt_vec_info stmt_info)
1650 {
1651   return stmt_info->mask_precision && stmt_info->mask_precision != ~0U;
1652 }
1653 
1654 /* Return TRUE if a statement represented by STMT_INFO is a part of a
1655    pattern.  */
1656 
1657 static inline bool
1658 is_pattern_stmt_p (stmt_vec_info stmt_info)
1659 {
1660   return stmt_info->pattern_stmt_p;
1661 }
1662 
1663 /* If STMT_INFO is a pattern statement, return the statement that it
1664    replaces, otherwise return STMT_INFO itself.  */
1665 
1666 inline stmt_vec_info
1667 vect_orig_stmt (stmt_vec_info stmt_info)
1668 {
1669   if (is_pattern_stmt_p (stmt_info))
1670     return STMT_VINFO_RELATED_STMT (stmt_info);
1671   return stmt_info;
1672 }
1673 
1674 /* Return the later statement between STMT1_INFO and STMT2_INFO.  */
1675 
1676 static inline stmt_vec_info
1677 get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
1678 {
1679   if (gimple_uid (vect_orig_stmt (stmt1_info)->stmt)
1680       > gimple_uid (vect_orig_stmt (stmt2_info)->stmt))
1681     return stmt1_info;
1682   else
1683     return stmt2_info;
1684 }
1685 
1686 /* If STMT_INFO has been replaced by a pattern statement, return the
1687    replacement statement, otherwise return STMT_INFO itself.  */
1688 
1689 inline stmt_vec_info
1690 vect_stmt_to_vectorize (stmt_vec_info stmt_info)
1691 {
1692   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1693     return STMT_VINFO_RELATED_STMT (stmt_info);
1694   return stmt_info;
1695 }
1696 
1697 /* Return true if BB is a loop header.  */
1698 
1699 static inline bool
1700 is_loop_header_bb_p (basic_block bb)
1701 {
1702   if (bb == (bb->loop_father)->header)
1703     return true;
1704   gcc_checking_assert (EDGE_COUNT (bb->preds) == 1);
1705   return false;
1706 }
1707 
1708 /* Return pow2 (X).  */
1709 
1710 static inline int
1711 vect_pow2 (int x)
1712 {
1713   int i, res = 1;
1714 
1715   for (i = 0; i < x; i++)
1716     res *= 2;
1717 
1718   return res;
1719 }
1720 
1721 /* Alias targetm.vectorize.builtin_vectorization_cost.  */
1722 
1723 static inline int
1724 builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
1725 			    tree vectype, int misalign)
1726 {
1727   return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1728 						       vectype, misalign);
1729 }
1730 
1731 /* Get cost by calling cost target builtin.  */
1732 
1733 static inline
1734 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
1735 {
1736   return builtin_vectorization_cost (type_of_cost, NULL, 0);
1737 }
1738 
1739 /* Alias targetm.vectorize.init_cost.  */
1740 
1741 static inline vector_costs *
1742 init_cost (vec_info *vinfo, bool costing_for_scalar)
1743 {
1744   return targetm.vectorize.create_costs (vinfo, costing_for_scalar);
1745 }
1746 
1747 extern void dump_stmt_cost (FILE *, int, enum vect_cost_for_stmt,
1748 			    stmt_vec_info, slp_tree, tree, int, unsigned,
1749 			    enum vect_cost_model_location);
1750 
1751 /* Alias targetm.vectorize.add_stmt_cost.  */
1752 
1753 static inline unsigned
1754 add_stmt_cost (vector_costs *costs, int count,
1755 	       enum vect_cost_for_stmt kind,
1756 	       stmt_vec_info stmt_info, slp_tree node,
1757 	       tree vectype, int misalign,
1758 	       enum vect_cost_model_location where)
1759 {
1760   unsigned cost = costs->add_stmt_cost (count, kind, stmt_info, node, vectype,
1761 					misalign, where);
1762   if (dump_file && (dump_flags & TDF_DETAILS))
1763     dump_stmt_cost (dump_file, count, kind, stmt_info, node, vectype, misalign,
1764 		    cost, where);
1765   return cost;
1766 }
1767 
1768 static inline unsigned
1769 add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
1770 	       enum vect_cost_model_location where)
1771 {
1772   gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
1773 	      || kind == scalar_stmt);
1774   return add_stmt_cost (costs, count, kind, NULL, NULL, NULL_TREE, 0, where);
1775 }
1776 
1777 /* Alias targetm.vectorize.add_stmt_cost.  */
1778 
1779 static inline unsigned
1780 add_stmt_cost (vector_costs *costs, stmt_info_for_cost *i)
1781 {
1782   return add_stmt_cost (costs, i->count, i->kind, i->stmt_info, i->node,
1783 			i->vectype, i->misalign, i->where);
1784 }
1785 
1786 /* Alias targetm.vectorize.finish_cost.  */
1787 
1788 static inline void
1789 finish_cost (vector_costs *costs, const vector_costs *scalar_costs,
1790 	     unsigned *prologue_cost, unsigned *body_cost,
1791 	     unsigned *epilogue_cost, unsigned *suggested_unroll_factor = NULL)
1792 {
1793   costs->finish_cost (scalar_costs);
1794   *prologue_cost = costs->prologue_cost ();
1795   *body_cost = costs->body_cost ();
1796   *epilogue_cost = costs->epilogue_cost ();
1797   if (suggested_unroll_factor)
1798     *suggested_unroll_factor = costs->suggested_unroll_factor ();
1799 }
1800 
1801 inline void
1802 add_stmt_costs (vector_costs *costs, stmt_vector_for_cost *cost_vec)
1803 {
1804   stmt_info_for_cost *cost;
1805   unsigned i;
1806   FOR_EACH_VEC_ELT (*cost_vec, i, cost)
1807     add_stmt_cost (costs, cost->count, cost->kind, cost->stmt_info,
1808 		   cost->node, cost->vectype, cost->misalign, cost->where);
1809 }
1810 
1811 /*-----------------------------------------------------------------*/
1812 /* Info on data references alignment.                              */
1813 /*-----------------------------------------------------------------*/
1814 #define DR_MISALIGNMENT_UNKNOWN (-1)
1815 #define DR_MISALIGNMENT_UNINITIALIZED (-2)
1816 
1817 inline void
1818 set_dr_misalignment (dr_vec_info *dr_info, int val)
1819 {
1820   dr_info->misalignment = val;
1821 }
1822 
1823 extern int dr_misalignment (dr_vec_info *dr_info, tree vectype,
1824 			    poly_int64 offset = 0);
1825 
1826 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
1827 
1828 /* Only defined once DR_MISALIGNMENT is defined.  */
1829 static inline const poly_uint64
1830 dr_target_alignment (dr_vec_info *dr_info)
1831 {
1832   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
1833     dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
1834   return dr_info->target_alignment;
1835 }
1836 #define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR)
1837 
1838 static inline void
1839 set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val)
1840 {
1841   dr_info->target_alignment = val;
1842 }
1843 #define SET_DR_TARGET_ALIGNMENT(DR, VAL) set_dr_target_alignment (DR, VAL)
1844 
1845 /* Return true if data access DR_INFO is aligned to the targets
1846    preferred alignment for VECTYPE (which may be less than a full vector).  */
1847 
1848 static inline bool
1849 aligned_access_p (dr_vec_info *dr_info, tree vectype)
1850 {
1851   return (dr_misalignment (dr_info, vectype) == 0);
1852 }
1853 
1854 /* Return TRUE if the (mis-)alignment of the data access is known with
1855    respect to the targets preferred alignment for VECTYPE, and FALSE
1856    otherwise.  */
1857 
1858 static inline bool
1859 known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype)
1860 {
1861   return (dr_misalignment (dr_info, vectype) != DR_MISALIGNMENT_UNKNOWN);
1862 }
1863 
1864 /* Return the minimum alignment in bytes that the vectorized version
1865    of DR_INFO is guaranteed to have.  */
1866 
1867 static inline unsigned int
1868 vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype)
1869 {
1870   int misalignment = dr_misalignment (dr_info, vectype);
1871   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
1872     return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
1873   else if (misalignment == 0)
1874     return known_alignment (DR_TARGET_ALIGNMENT (dr_info));
1875   return misalignment & -misalignment;
1876 }
1877 
1878 /* Return the behavior of DR_INFO with respect to the vectorization context
1879    (which for outer loop vectorization might not be the behavior recorded
1880    in DR_INFO itself).  */
1881 
1882 static inline innermost_loop_behavior *
1883 vect_dr_behavior (vec_info *vinfo, dr_vec_info *dr_info)
1884 {
1885   stmt_vec_info stmt_info = dr_info->stmt;
1886   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
1887   if (loop_vinfo == NULL
1888       || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info))
1889     return &DR_INNERMOST (dr_info->dr);
1890   else
1891     return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
1892 }
1893 
1894 /* Return the offset calculated by adding the offset of this DR_INFO to the
1895    corresponding data_reference's offset.  If CHECK_OUTER then use
1896    vect_dr_behavior to select the appropriate data_reference to use.  */
1897 
1898 inline tree
1899 get_dr_vinfo_offset (vec_info *vinfo,
1900 		     dr_vec_info *dr_info, bool check_outer = false)
1901 {
1902   innermost_loop_behavior *base;
1903   if (check_outer)
1904     base = vect_dr_behavior (vinfo, dr_info);
1905   else
1906     base = &dr_info->dr->innermost;
1907 
1908   tree offset = base->offset;
1909 
1910   if (!dr_info->offset)
1911     return offset;
1912 
1913   offset = fold_convert (sizetype, offset);
1914   return fold_build2 (PLUS_EXPR, TREE_TYPE (dr_info->offset), offset,
1915 		      dr_info->offset);
1916 }
1917 
1918 
1919 /* Return the vect cost model for LOOP.  */
1920 static inline enum vect_cost_model
1921 loop_cost_model (loop_p loop)
1922 {
1923   if (loop != NULL
1924       && loop->force_vectorize
1925       && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
1926     return flag_simd_cost_model;
1927   return flag_vect_cost_model;
1928 }
1929 
1930 /* Return true if the vect cost model is unlimited.  */
1931 static inline bool
1932 unlimited_cost_model (loop_p loop)
1933 {
1934   return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
1935 }
1936 
1937 /* Return true if the loop described by LOOP_VINFO is fully-masked and
1938    if the first iteration should use a partial mask in order to achieve
1939    alignment.  */
1940 
1941 static inline bool
1942 vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo)
1943 {
1944   return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
1945 	  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
1946 }
1947 
1948 /* Return the number of vectors of type VECTYPE that are needed to get
1949    NUNITS elements.  NUNITS should be based on the vectorization factor,
1950    so it is always a known multiple of the number of elements in VECTYPE.  */
1951 
1952 static inline unsigned int
1953 vect_get_num_vectors (poly_uint64 nunits, tree vectype)
1954 {
1955   return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
1956 }
1957 
1958 /* Return the number of copies needed for loop vectorization when
1959    a statement operates on vectors of type VECTYPE.  This is the
1960    vectorization factor divided by the number of elements in
1961    VECTYPE and is always known at compile time.  */
1962 
1963 static inline unsigned int
1964 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
1965 {
1966   return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
1967 }
1968 
1969 /* Update maximum unit count *MAX_NUNITS so that it accounts for
1970    NUNITS.  *MAX_NUNITS can be 1 if we haven't yet recorded anything.  */
1971 
1972 static inline void
1973 vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
1974 {
1975   /* All unit counts have the form vec_info::vector_size * X for some
1976      rational X, so two unit sizes must have a common multiple.
1977      Everything is a multiple of the initial value of 1.  */
1978   *max_nunits = force_common_multiple (*max_nunits, nunits);
1979 }
1980 
1981 /* Update maximum unit count *MAX_NUNITS so that it accounts for
1982    the number of units in vector type VECTYPE.  *MAX_NUNITS can be 1
1983    if we haven't yet recorded any vector types.  */
1984 
1985 static inline void
1986 vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
1987 {
1988   vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
1989 }
1990 
1991 /* Return the vectorization factor that should be used for costing
1992    purposes while vectorizing the loop described by LOOP_VINFO.
1993    Pick a reasonable estimate if the vectorization factor isn't
1994    known at compile time.  */
1995 
1996 static inline unsigned int
1997 vect_vf_for_cost (loop_vec_info loop_vinfo)
1998 {
1999   return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2000 }
2001 
2002 /* Estimate the number of elements in VEC_TYPE for costing purposes.
2003    Pick a reasonable estimate if the exact number isn't known at
2004    compile time.  */
2005 
2006 static inline unsigned int
2007 vect_nunits_for_cost (tree vec_type)
2008 {
2009   return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
2010 }
2011 
2012 /* Return the maximum possible vectorization factor for LOOP_VINFO.  */
2013 
2014 static inline unsigned HOST_WIDE_INT
2015 vect_max_vf (loop_vec_info loop_vinfo)
2016 {
2017   unsigned HOST_WIDE_INT vf;
2018   if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
2019     return vf;
2020   return MAX_VECTORIZATION_FACTOR;
2021 }
2022 
2023 /* Return the size of the value accessed by unvectorized data reference
2024    DR_INFO.  This is only valid once STMT_VINFO_VECTYPE has been calculated
2025    for the associated gimple statement, since that guarantees that DR_INFO
2026    accesses either a scalar or a scalar equivalent.  ("Scalar equivalent"
2027    here includes things like V1SI, which can be vectorized in the same way
2028    as a plain SI.)  */
2029 
2030 inline unsigned int
2031 vect_get_scalar_dr_size (dr_vec_info *dr_info)
2032 {
2033   return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
2034 }
2035 
2036 /* Return true if LOOP_VINFO requires a runtime check for whether the
2037    vector loop is profitable.  */
2038 
2039 inline bool
2040 vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo)
2041 {
2042   unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
2043   return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2044 	  && th >= vect_vf_for_cost (loop_vinfo));
2045 }
2046 
2047 /* Source location + hotness information. */
2048 extern dump_user_location_t vect_location;
2049 
2050 /* A macro for calling:
2051      dump_begin_scope (MSG, vect_location);
2052    via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc,
2053    and then calling
2054      dump_end_scope ();
2055    once the object goes out of scope, thus capturing the nesting of
2056    the scopes.
2057 
2058    These scopes affect dump messages within them: dump messages at the
2059    top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those
2060    in a nested scope implicitly default to MSG_PRIORITY_INTERNALS.  */
2061 
2062 #define DUMP_VECT_SCOPE(MSG) \
2063   AUTO_DUMP_SCOPE (MSG, vect_location)
2064 
2065 /* A sentinel class for ensuring that the "vect_location" global gets
2066    reset at the end of a scope.
2067 
2068    The "vect_location" global is used during dumping and contains a
2069    location_t, which could contain references to a tree block via the
2070    ad-hoc data.  This data is used for tracking inlining information,
2071    but it's not a GC root; it's simply assumed that such locations never
2072    get accessed if the blocks are optimized away.
2073 
2074    Hence we need to ensure that such locations are purged at the end
2075    of any operations using them (e.g. via this class).  */
2076 
2077 class auto_purge_vect_location
2078 {
2079  public:
2080   ~auto_purge_vect_location ();
2081 };
2082 
2083 /*-----------------------------------------------------------------*/
2084 /* Function prototypes.                                            */
2085 /*-----------------------------------------------------------------*/
2086 
2087 /* Simple loop peeling and versioning utilities for vectorizer's purposes -
2088    in tree-vect-loop-manip.cc.  */
2089 extern void vect_set_loop_condition (class loop *, loop_vec_info,
2090 				     tree, tree, tree, bool);
2091 extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge);
2092 class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *,
2093 						     class loop *, edge);
2094 class loop *vect_loop_versioning (loop_vec_info, gimple *);
2095 extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
2096 				    tree *, tree *, tree *, int, bool, bool,
2097 				    tree *);
2098 extern tree vect_get_main_loop_result (loop_vec_info, tree, tree);
2099 extern void vect_prepare_for_masked_peels (loop_vec_info);
2100 extern dump_user_location_t find_loop_location (class loop *);
2101 extern bool vect_can_advance_ivs_p (loop_vec_info);
2102 extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
2103 
2104 /* In tree-vect-stmts.cc.  */
2105 extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
2106 						 poly_uint64 = 0);
2107 extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
2108 extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
2109 extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0);
2110 extern tree get_same_sized_vectype (tree, tree);
2111 extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
2112 extern bool vect_get_loop_mask_type (loop_vec_info);
2113 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
2114 				stmt_vec_info * = NULL, gimple ** = NULL);
2115 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
2116 				tree *, stmt_vec_info * = NULL,
2117 				gimple ** = NULL);
2118 extern bool vect_is_simple_use (vec_info *, stmt_vec_info, slp_tree,
2119 				unsigned, tree *, slp_tree *,
2120 				enum vect_def_type *,
2121 				tree *, stmt_vec_info * = NULL);
2122 extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree);
2123 extern bool supportable_widening_operation (vec_info *,
2124 					    enum tree_code, stmt_vec_info,
2125 					    tree, tree, enum tree_code *,
2126 					    enum tree_code *, int *,
2127 					    vec<tree> *);
2128 extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
2129 					     enum tree_code *, int *,
2130 					     vec<tree> *);
2131 
2132 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2133 				  enum vect_cost_for_stmt, stmt_vec_info,
2134 				  tree, int, enum vect_cost_model_location);
2135 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2136 				  enum vect_cost_for_stmt, slp_tree,
2137 				  tree, int, enum vect_cost_model_location);
2138 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2139 				  enum vect_cost_for_stmt,
2140 				  enum vect_cost_model_location);
2141 
2142 /* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO.  */
2143 
2144 static inline unsigned
2145 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
2146 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
2147 		  int misalign, enum vect_cost_model_location where)
2148 {
2149   return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
2150 			   STMT_VINFO_VECTYPE (stmt_info), misalign, where);
2151 }
2152 
2153 extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
2154 extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
2155 					 gimple_stmt_iterator *);
2156 extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
2157 extern tree vect_get_store_rhs (stmt_vec_info);
2158 void vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info, unsigned,
2159 				    tree op, vec<tree> *, tree = NULL);
2160 void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned,
2161 			tree, vec<tree> *,
2162 			tree = NULL, vec<tree> * = NULL,
2163 			tree = NULL, vec<tree> * = NULL,
2164 			tree = NULL, vec<tree> * = NULL);
2165 void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned,
2166 			tree, vec<tree> *, tree,
2167 			tree = NULL, vec<tree> * = NULL, tree = NULL,
2168 			tree = NULL, vec<tree> * = NULL, tree = NULL,
2169 			tree = NULL, vec<tree> * = NULL, tree = NULL);
2170 extern tree vect_init_vector (vec_info *, stmt_vec_info, tree, tree,
2171                               gimple_stmt_iterator *);
2172 extern tree vect_get_slp_vect_def (slp_tree, unsigned);
2173 extern bool vect_transform_stmt (vec_info *, stmt_vec_info,
2174 				 gimple_stmt_iterator *,
2175 				 slp_tree, slp_instance);
2176 extern void vect_remove_stores (vec_info *, stmt_vec_info);
2177 extern bool vect_nop_conversion_p (stmt_vec_info);
2178 extern opt_result vect_analyze_stmt (vec_info *, stmt_vec_info, bool *,
2179 				     slp_tree,
2180 				     slp_instance, stmt_vector_for_cost *);
2181 extern void vect_get_load_cost (vec_info *, stmt_vec_info, int,
2182 				dr_alignment_support, int, bool,
2183 				unsigned int *, unsigned int *,
2184 				stmt_vector_for_cost *,
2185 				stmt_vector_for_cost *, bool);
2186 extern void vect_get_store_cost (vec_info *, stmt_vec_info, int,
2187 				 dr_alignment_support, int,
2188 				 unsigned int *, stmt_vector_for_cost *);
2189 extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
2190 extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
2191 extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
2192 extern void optimize_mask_stores (class loop*);
2193 extern tree vect_gen_while (gimple_seq *, tree, tree, tree,
2194 			    const char * = nullptr);
2195 extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
2196 extern opt_result vect_get_vector_types_for_stmt (vec_info *,
2197 						  stmt_vec_info, tree *,
2198 						  tree *, unsigned int = 0);
2199 extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
2200 
2201 /* In tree-vect-data-refs.cc.  */
2202 extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
2203 extern enum dr_alignment_support vect_supportable_dr_alignment
2204 				   (vec_info *, dr_vec_info *, tree, int);
2205 extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
2206 extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
2207 extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
2208 extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
2209 extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
2210 extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
2211 extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
2212 extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
2213 extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
2214 				      tree, int, internal_fn *, tree *);
2215 extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
2216 				       gather_scatter_info *);
2217 extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
2218 						 vec<data_reference_p> *,
2219 						 vec<int> *, int);
2220 extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
2221 extern void vect_record_base_alignments (vec_info *);
2222 extern tree vect_create_data_ref_ptr (vec_info *,
2223 				      stmt_vec_info, tree, class loop *, tree,
2224 				      tree *, gimple_stmt_iterator *,
2225 				      gimple **, bool,
2226 				      tree = NULL_TREE);
2227 extern tree bump_vector_ptr (vec_info *, tree, gimple *, gimple_stmt_iterator *,
2228 			     stmt_vec_info, tree);
2229 extern void vect_copy_ref_info (tree, tree);
2230 extern tree vect_create_destination_var (tree, tree);
2231 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
2232 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
2233 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
2234 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
2235 extern void vect_permute_store_chain (vec_info *, vec<tree> &,
2236 				      unsigned int, stmt_vec_info,
2237 				      gimple_stmt_iterator *, vec<tree> *);
2238 extern tree vect_setup_realignment (vec_info *,
2239 				    stmt_vec_info, gimple_stmt_iterator *,
2240 				    tree *, enum dr_alignment_support, tree,
2241 	                            class loop **);
2242 extern void vect_transform_grouped_load (vec_info *, stmt_vec_info, vec<tree>,
2243 					 int, gimple_stmt_iterator *);
2244 extern void vect_record_grouped_load_vectors (vec_info *,
2245 					      stmt_vec_info, vec<tree>);
2246 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
2247 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
2248 				   const char * = NULL);
2249 extern tree vect_create_addr_base_for_vector_ref (vec_info *,
2250 						  stmt_vec_info, gimple_seq *,
2251 						  tree);
2252 
2253 /* In tree-vect-loop.cc.  */
2254 extern tree neutral_op_for_reduction (tree, code_helper, tree);
2255 extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
2256 bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *);
2257 /* Used in tree-vect-loop-manip.cc */
2258 extern opt_result vect_determine_partial_vectors_and_peeling (loop_vec_info,
2259 							      bool);
2260 /* Used in gimple-loop-interchange.c and tree-parloops.cc.  */
2261 extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
2262 				  enum tree_code);
2263 extern bool needs_fold_left_reduction_p (tree, code_helper);
2264 /* Drive for loop analysis stage.  */
2265 extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *);
2266 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
2267 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
2268 					 tree *, bool);
2269 extern tree vect_halve_mask_nunits (tree, machine_mode);
2270 extern tree vect_double_mask_nunits (tree, machine_mode);
2271 extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
2272 				   unsigned int, tree, tree);
2273 extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
2274 				unsigned int, tree, unsigned int);
2275 extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
2276 				  tree, unsigned int);
2277 extern tree vect_get_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
2278 			       unsigned int);
2279 extern gimple_seq vect_gen_len (tree, tree, tree, tree);
2280 extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
2281 extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
2282 
2283 /* Drive for loop transformation stage.  */
2284 extern class loop *vect_transform_loop (loop_vec_info, gimple *);
2285 struct vect_loop_form_info
2286 {
2287   tree number_of_iterations;
2288   tree number_of_iterationsm1;
2289   tree assumptions;
2290   gcond *loop_cond;
2291   gcond *inner_loop_cond;
2292 };
2293 extern opt_result vect_analyze_loop_form (class loop *, vect_loop_form_info *);
2294 extern loop_vec_info vect_create_loop_vinfo (class loop *, vec_info_shared *,
2295 					     const vect_loop_form_info *,
2296 					     loop_vec_info = nullptr);
2297 extern bool vectorizable_live_operation (vec_info *,
2298 					 stmt_vec_info, gimple_stmt_iterator *,
2299 					 slp_tree, slp_instance, int,
2300 					 bool, stmt_vector_for_cost *);
2301 extern bool vectorizable_reduction (loop_vec_info, stmt_vec_info,
2302 				    slp_tree, slp_instance,
2303 				    stmt_vector_for_cost *);
2304 extern bool vectorizable_induction (loop_vec_info, stmt_vec_info,
2305 				    gimple **, slp_tree,
2306 				    stmt_vector_for_cost *);
2307 extern bool vect_transform_reduction (loop_vec_info, stmt_vec_info,
2308 				      gimple_stmt_iterator *,
2309 				      gimple **, slp_tree);
2310 extern bool vect_transform_cycle_phi (loop_vec_info, stmt_vec_info,
2311 				      gimple **,
2312 				      slp_tree, slp_instance);
2313 extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
2314 				 gimple **, slp_tree);
2315 extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
2316 			      stmt_vector_for_cost *);
2317 extern bool vect_emulated_vector_p (tree);
2318 extern bool vect_can_vectorize_without_simd_p (tree_code);
2319 extern bool vect_can_vectorize_without_simd_p (code_helper);
2320 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
2321 					stmt_vector_for_cost *,
2322 					stmt_vector_for_cost *,
2323 					stmt_vector_for_cost *);
2324 extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
2325 
2326 /* In tree-vect-slp.cc.  */
2327 extern void vect_slp_init (void);
2328 extern void vect_slp_fini (void);
2329 extern void vect_free_slp_instance (slp_instance);
2330 extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree> &,
2331 					  gimple_stmt_iterator *, poly_uint64,
2332 					  bool, unsigned *,
2333 					  unsigned * = nullptr, bool = false);
2334 extern bool vect_slp_analyze_operations (vec_info *);
2335 extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
2336 extern opt_result vect_analyze_slp (vec_info *, unsigned);
2337 extern bool vect_make_slp_decision (loop_vec_info);
2338 extern void vect_detect_hybrid_slp (loop_vec_info);
2339 extern void vect_optimize_slp (vec_info *);
2340 extern void vect_gather_slp_loads (vec_info *);
2341 extern void vect_get_slp_defs (slp_tree, vec<tree> *);
2342 extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
2343 			       unsigned n = -1U);
2344 extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
2345 extern bool vect_slp_function (function *);
2346 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
2347 extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
2348 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
2349 extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
2350 					    unsigned int * = NULL,
2351 					    tree * = NULL, tree * = NULL);
2352 extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
2353 				      const vec<tree> &, unsigned int, vec<tree> &);
2354 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
2355 extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
2356 extern void vect_free_slp_tree (slp_tree);
2357 extern bool compatible_calls_p (gcall *, gcall *);
2358 
2359 /* In tree-vect-patterns.cc.  */
2360 extern void
2361 vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
2362 
2363 /* Pattern recognition functions.
2364    Additional pattern recognition functions can (and will) be added
2365    in the future.  */
2366 void vect_pattern_recog (vec_info *);
2367 
2368 /* In tree-vectorizer.cc.  */
2369 unsigned vectorize_loops (void);
2370 void vect_free_loop_info_assumptions (class loop *);
2371 gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
2372 bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
2373 
2374 /* SLP Pattern matcher types, tree-vect-slp-patterns.cc.  */
2375 
2376 /* Forward declaration of possible two operands operation that can be matched
2377    by the complex numbers pattern matchers.  */
2378 enum _complex_operation : unsigned;
2379 
2380 /* All possible load permute values that could result from the partial data-flow
2381    analysis.  */
2382 typedef enum _complex_perm_kinds {
2383    PERM_UNKNOWN,
2384    PERM_EVENODD,
2385    PERM_ODDEVEN,
2386    PERM_ODDODD,
2387    PERM_EVENEVEN,
2388    /* Can be combined with any other PERM values.  */
2389    PERM_TOP
2390 } complex_perm_kinds_t;
2391 
2392 /* Cache from nodes to the load permutation they represent.  */
2393 typedef hash_map <slp_tree, complex_perm_kinds_t>
2394   slp_tree_to_load_perm_map_t;
2395 
2396 /* Cache from nodes pair to being compatible or not.  */
2397 typedef pair_hash <nofree_ptr_hash <_slp_tree>,
2398 		   nofree_ptr_hash <_slp_tree>> slp_node_hash;
2399 typedef hash_map <slp_node_hash, bool> slp_compat_nodes_map_t;
2400 
2401 
2402 /* Vector pattern matcher base class.  All SLP pattern matchers must inherit
2403    from this type.  */
2404 
2405 class vect_pattern
2406 {
2407   protected:
2408     /* The number of arguments that the IFN requires.  */
2409     unsigned m_num_args;
2410 
2411     /* The internal function that will be used when a pattern is created.  */
2412     internal_fn m_ifn;
2413 
2414     /* The current node being inspected.  */
2415     slp_tree *m_node;
2416 
2417     /* The list of operands to be the children for the node produced when the
2418        internal function is created.  */
2419     vec<slp_tree> m_ops;
2420 
2421     /* Default constructor where NODE is the root of the tree to inspect.  */
2422     vect_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
2423     {
2424       this->m_ifn = ifn;
2425       this->m_node = node;
2426       this->m_ops.create (0);
2427       if (m_ops)
2428 	this->m_ops.safe_splice (*m_ops);
2429     }
2430 
2431   public:
2432 
2433     /* Create a new instance of the pattern matcher class of the given type.  */
2434     static vect_pattern* recognize (slp_tree_to_load_perm_map_t *,
2435 				    slp_compat_nodes_map_t *, slp_tree *);
2436 
2437     /* Build the pattern from the data collected so far.  */
2438     virtual void build (vec_info *) = 0;
2439 
2440     /* Default destructor.  */
2441     virtual ~vect_pattern ()
2442     {
2443 	this->m_ops.release ();
2444     }
2445 };
2446 
2447 /* Function pointer to create a new pattern matcher from a generic type.  */
2448 typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree_to_load_perm_map_t *,
2449 					      slp_compat_nodes_map_t *,
2450 					      slp_tree *);
2451 
2452 /* List of supported pattern matchers.  */
2453 extern vect_pattern_decl_t slp_patterns[];
2454 
2455 /* Number of supported pattern matchers.  */
2456 extern size_t num__slp_patterns;
2457 
2458 /* ----------------------------------------------------------------------
2459    Target support routines
2460    -----------------------------------------------------------------------
2461    The following routines are provided to simplify costing decisions in
2462    target code.  Please add more as needed.  */
2463 
2464 /* Return true if an operaton of kind KIND for STMT_INFO represents
2465    the extraction of an element from a vector in preparation for
2466    storing the element to memory.  */
2467 inline bool
2468 vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
2469 {
2470   return (kind == vec_to_scalar
2471 	  && STMT_VINFO_DATA_REF (stmt_info)
2472 	  && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)));
2473 }
2474 
2475 /* Return true if STMT_INFO represents part of a reduction.  */
2476 inline bool
2477 vect_is_reduction (stmt_vec_info stmt_info)
2478 {
2479   return STMT_VINFO_REDUC_IDX (stmt_info) >= 0;
2480 }
2481 
2482 /* If STMT_INFO describes a reduction, return the vect_reduction_type
2483    of the reduction it describes, otherwise return -1.  */
2484 inline int
2485 vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info)
2486 {
2487   if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
2488     if (STMT_VINFO_REDUC_DEF (stmt_info))
2489       {
2490 	stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
2491 	return int (STMT_VINFO_REDUC_TYPE (reduc_info));
2492       }
2493   return -1;
2494 }
2495 
2496 /* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the
2497    scalar type of the values being compared.  Return null otherwise.  */
2498 inline tree
2499 vect_embedded_comparison_type (stmt_vec_info stmt_info)
2500 {
2501   if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
2502     if (gimple_assign_rhs_code (assign) == COND_EXPR)
2503       {
2504 	tree cond = gimple_assign_rhs1 (assign);
2505 	if (COMPARISON_CLASS_P (cond))
2506 	  return TREE_TYPE (TREE_OPERAND (cond, 0));
2507       }
2508   return NULL_TREE;
2509 }
2510 
2511 /* If STMT_INFO is a comparison or contains an embedded comparison, return the
2512    scalar type of the values being compared.  Return null otherwise.  */
2513 inline tree
2514 vect_comparison_type (stmt_vec_info stmt_info)
2515 {
2516   if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
2517     if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
2518       return TREE_TYPE (gimple_assign_rhs1 (assign));
2519   return vect_embedded_comparison_type (stmt_info);
2520 }
2521 
2522 /* Return true if STMT_INFO extends the result of a load.  */
2523 inline bool
2524 vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info)
2525 {
2526   /* Although this is quite large for an inline function, this part
2527      at least should be inline.  */
2528   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
2529   if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
2530     return false;
2531 
2532   tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2533   tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
2534   tree rhs_type = TREE_TYPE (rhs);
2535   if (!INTEGRAL_TYPE_P (lhs_type)
2536       || !INTEGRAL_TYPE_P (rhs_type)
2537       || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
2538     return false;
2539 
2540   stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
2541   return (def_stmt_info
2542 	  && STMT_VINFO_DATA_REF (def_stmt_info)
2543 	  && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
2544 }
2545 
2546 /* Return true if STMT_INFO is an integer truncation.  */
2547 inline bool
2548 vect_is_integer_truncation (stmt_vec_info stmt_info)
2549 {
2550   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
2551   if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
2552     return false;
2553 
2554   tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
2555   tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
2556   return (INTEGRAL_TYPE_P (lhs_type)
2557 	  && INTEGRAL_TYPE_P (rhs_type)
2558 	  && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
2559 }
2560 
2561 #endif  /* GCC_TREE_VECTORIZER_H  */
2562