110d565efSmrg /* Vectorizer 2*ec02198aSmrg Copyright (C) 2003-2020 Free Software Foundation, Inc. 310d565efSmrg Contributed by Dorit Naishlos <dorit@il.ibm.com> 410d565efSmrg 510d565efSmrg This file is part of GCC. 610d565efSmrg 710d565efSmrg GCC is free software; you can redistribute it and/or modify it under 810d565efSmrg the terms of the GNU General Public License as published by the Free 910d565efSmrg Software Foundation; either version 3, or (at your option) any later 1010d565efSmrg version. 1110d565efSmrg 1210d565efSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 1310d565efSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 1410d565efSmrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1510d565efSmrg for more details. 1610d565efSmrg 1710d565efSmrg You should have received a copy of the GNU General Public License 1810d565efSmrg along with GCC; see the file COPYING3. If not see 1910d565efSmrg <http://www.gnu.org/licenses/>. */ 2010d565efSmrg 2110d565efSmrg #ifndef GCC_TREE_VECTORIZER_H 2210d565efSmrg #define GCC_TREE_VECTORIZER_H 2310d565efSmrg 24*ec02198aSmrg typedef class _stmt_vec_info *stmt_vec_info; 250fc04c29Smrg 2610d565efSmrg #include "tree-data-ref.h" 27c7a68eb7Smrg #include "tree-hash-traits.h" 2810d565efSmrg #include "target.h" 29*ec02198aSmrg #include <utility> 3010d565efSmrg 3110d565efSmrg /* Used for naming of new temporaries. */ 3210d565efSmrg enum vect_var_kind { 3310d565efSmrg vect_simple_var, 3410d565efSmrg vect_pointer_var, 3510d565efSmrg vect_scalar_var, 3610d565efSmrg vect_mask_var 3710d565efSmrg }; 3810d565efSmrg 3910d565efSmrg /* Defines type of operation. */ 4010d565efSmrg enum operation_type { 4110d565efSmrg unary_op = 1, 4210d565efSmrg binary_op, 4310d565efSmrg ternary_op 4410d565efSmrg }; 4510d565efSmrg 4610d565efSmrg /* Define type of available alignment support. */ 4710d565efSmrg enum dr_alignment_support { 4810d565efSmrg dr_unaligned_unsupported, 4910d565efSmrg dr_unaligned_supported, 5010d565efSmrg dr_explicit_realign, 5110d565efSmrg dr_explicit_realign_optimized, 5210d565efSmrg dr_aligned 5310d565efSmrg }; 5410d565efSmrg 5510d565efSmrg /* Define type of def-use cross-iteration cycle. */ 5610d565efSmrg enum vect_def_type { 5710d565efSmrg vect_uninitialized_def = 0, 5810d565efSmrg vect_constant_def = 1, 5910d565efSmrg vect_external_def, 6010d565efSmrg vect_internal_def, 6110d565efSmrg vect_induction_def, 6210d565efSmrg vect_reduction_def, 6310d565efSmrg vect_double_reduction_def, 6410d565efSmrg vect_nested_cycle, 6510d565efSmrg vect_unknown_def_type 6610d565efSmrg }; 6710d565efSmrg 6810d565efSmrg /* Define type of reduction. */ 6910d565efSmrg enum vect_reduction_type { 7010d565efSmrg TREE_CODE_REDUCTION, 7110d565efSmrg COND_REDUCTION, 7210d565efSmrg INTEGER_INDUC_COND_REDUCTION, 73c7a68eb7Smrg CONST_COND_REDUCTION, 74c7a68eb7Smrg 75c7a68eb7Smrg /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop 76c7a68eb7Smrg to implement: 77c7a68eb7Smrg 78c7a68eb7Smrg for (int i = 0; i < VF; ++i) 79c7a68eb7Smrg res = cond[i] ? val[i] : res; */ 80c7a68eb7Smrg EXTRACT_LAST_REDUCTION, 81c7a68eb7Smrg 82c7a68eb7Smrg /* Use a folding reduction within the loop to implement: 83c7a68eb7Smrg 84c7a68eb7Smrg for (int i = 0; i < VF; ++i) 85c7a68eb7Smrg res = res OP val[i]; 86c7a68eb7Smrg 87c7a68eb7Smrg (with no reassocation). */ 88c7a68eb7Smrg FOLD_LEFT_REDUCTION 8910d565efSmrg }; 9010d565efSmrg 9110d565efSmrg #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ 9210d565efSmrg || ((D) == vect_double_reduction_def) \ 9310d565efSmrg || ((D) == vect_nested_cycle)) 9410d565efSmrg 9510d565efSmrg /* Structure to encapsulate information about a group of like 9610d565efSmrg instructions to be presented to the target cost model. */ 9710d565efSmrg struct stmt_info_for_cost { 9810d565efSmrg int count; 9910d565efSmrg enum vect_cost_for_stmt kind; 1000fc04c29Smrg enum vect_cost_model_location where; 1010fc04c29Smrg stmt_vec_info stmt_info; 10210d565efSmrg int misalign; 10310d565efSmrg }; 10410d565efSmrg 10510d565efSmrg typedef vec<stmt_info_for_cost> stmt_vector_for_cost; 10610d565efSmrg 107c7a68eb7Smrg /* Maps base addresses to an innermost_loop_behavior that gives the maximum 108c7a68eb7Smrg known alignment for that base. */ 109c7a68eb7Smrg typedef hash_map<tree_operand_hash, 110c7a68eb7Smrg innermost_loop_behavior *> vec_base_alignments; 111c7a68eb7Smrg 11210d565efSmrg /************************************************************************ 11310d565efSmrg SLP 11410d565efSmrg ************************************************************************/ 11510d565efSmrg typedef struct _slp_tree *slp_tree; 11610d565efSmrg 11710d565efSmrg /* A computation tree of an SLP instance. Each node corresponds to a group of 11810d565efSmrg stmts to be packed in a SIMD stmt. */ 11910d565efSmrg struct _slp_tree { 12010d565efSmrg /* Nodes that contain def-stmts of this node statements operands. */ 12110d565efSmrg vec<slp_tree> children; 12210d565efSmrg /* A group of scalar stmts to be vectorized together. */ 1230fc04c29Smrg vec<stmt_vec_info> stmts; 124*ec02198aSmrg /* A group of scalar operands to be vectorized together. */ 125*ec02198aSmrg vec<tree> ops; 12610d565efSmrg /* Load permutation relative to the stores, NULL if there is no 12710d565efSmrg permutation. */ 12810d565efSmrg vec<unsigned> load_permutation; 12910d565efSmrg /* Vectorized stmt/s. */ 1300fc04c29Smrg vec<stmt_vec_info> vec_stmts; 13110d565efSmrg /* Number of vector stmts that are created to replace the group of scalar 13210d565efSmrg stmts. It is calculated during the transformation phase as the number of 13310d565efSmrg scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF 13410d565efSmrg divided by vector size. */ 13510d565efSmrg unsigned int vec_stmts_size; 1360fc04c29Smrg /* Reference count in the SLP graph. */ 1370fc04c29Smrg unsigned int refcnt; 1380fc04c29Smrg /* The maximum number of vector elements for the subtree rooted 1390fc04c29Smrg at this node. */ 1400fc04c29Smrg poly_uint64 max_nunits; 14110d565efSmrg /* Whether the scalar computations use two different operators. */ 14210d565efSmrg bool two_operators; 14310d565efSmrg /* The DEF type of this node. */ 14410d565efSmrg enum vect_def_type def_type; 14510d565efSmrg }; 14610d565efSmrg 14710d565efSmrg 14810d565efSmrg /* SLP instance is a sequence of stmts in a loop that can be packed into 14910d565efSmrg SIMD stmts. */ 150*ec02198aSmrg typedef class _slp_instance { 151*ec02198aSmrg public: 15210d565efSmrg /* The root of SLP tree. */ 15310d565efSmrg slp_tree root; 15410d565efSmrg 155*ec02198aSmrg /* For vector constructors, the constructor stmt that the SLP tree is built 156*ec02198aSmrg from, NULL otherwise. */ 157*ec02198aSmrg stmt_vec_info root_stmt; 158*ec02198aSmrg 15910d565efSmrg /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ 16010d565efSmrg unsigned int group_size; 16110d565efSmrg 16210d565efSmrg /* The unrolling factor required to vectorized this SLP instance. */ 163c7a68eb7Smrg poly_uint64 unrolling_factor; 16410d565efSmrg 16510d565efSmrg /* The group of nodes that contain loads of this SLP instance. */ 16610d565efSmrg vec<slp_tree> loads; 167c7a68eb7Smrg 168c7a68eb7Smrg /* The SLP node containing the reduction PHIs. */ 169c7a68eb7Smrg slp_tree reduc_phis; 17010d565efSmrg } *slp_instance; 17110d565efSmrg 17210d565efSmrg 17310d565efSmrg /* Access Functions. */ 17410d565efSmrg #define SLP_INSTANCE_TREE(S) (S)->root 17510d565efSmrg #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size 17610d565efSmrg #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor 17710d565efSmrg #define SLP_INSTANCE_LOADS(S) (S)->loads 178*ec02198aSmrg #define SLP_INSTANCE_ROOT_STMT(S) (S)->root_stmt 17910d565efSmrg 18010d565efSmrg #define SLP_TREE_CHILDREN(S) (S)->children 18110d565efSmrg #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts 182*ec02198aSmrg #define SLP_TREE_SCALAR_OPS(S) (S)->ops 18310d565efSmrg #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts 18410d565efSmrg #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size 18510d565efSmrg #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation 18610d565efSmrg #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators 18710d565efSmrg #define SLP_TREE_DEF_TYPE(S) (S)->def_type 18810d565efSmrg 189*ec02198aSmrg /* Key for map that records association between 190*ec02198aSmrg scalar conditions and corresponding loop mask, and 191*ec02198aSmrg is populated by vect_record_loop_mask. */ 19210d565efSmrg 193*ec02198aSmrg struct scalar_cond_masked_key 194*ec02198aSmrg { scalar_cond_masked_keyscalar_cond_masked_key195*ec02198aSmrg scalar_cond_masked_key (tree t, unsigned ncopies_) 196*ec02198aSmrg : ncopies (ncopies_) 197*ec02198aSmrg { 198*ec02198aSmrg get_cond_ops_from_tree (t); 199*ec02198aSmrg } 200*ec02198aSmrg 201*ec02198aSmrg void get_cond_ops_from_tree (tree); 202*ec02198aSmrg 203*ec02198aSmrg unsigned ncopies; 204*ec02198aSmrg tree_code code; 205*ec02198aSmrg tree op0; 206*ec02198aSmrg tree op1; 207*ec02198aSmrg }; 208*ec02198aSmrg 209*ec02198aSmrg template<> 210*ec02198aSmrg struct default_hash_traits<scalar_cond_masked_key> 211*ec02198aSmrg { 212*ec02198aSmrg typedef scalar_cond_masked_key compare_type; 213*ec02198aSmrg typedef scalar_cond_masked_key value_type; 214*ec02198aSmrg 215*ec02198aSmrg static inline hashval_t 216*ec02198aSmrg hash (value_type v) 217*ec02198aSmrg { 218*ec02198aSmrg inchash::hash h; 219*ec02198aSmrg h.add_int (v.code); 220*ec02198aSmrg inchash::add_expr (v.op0, h, 0); 221*ec02198aSmrg inchash::add_expr (v.op1, h, 0); 222*ec02198aSmrg h.add_int (v.ncopies); 223*ec02198aSmrg return h.end (); 224*ec02198aSmrg } 225*ec02198aSmrg 226*ec02198aSmrg static inline bool 227*ec02198aSmrg equal (value_type existing, value_type candidate) 228*ec02198aSmrg { 229*ec02198aSmrg return (existing.ncopies == candidate.ncopies 230*ec02198aSmrg && existing.code == candidate.code 231*ec02198aSmrg && operand_equal_p (existing.op0, candidate.op0, 0) 232*ec02198aSmrg && operand_equal_p (existing.op1, candidate.op1, 0)); 233*ec02198aSmrg } 234*ec02198aSmrg 235*ec02198aSmrg static const bool empty_zero_p = true; 236*ec02198aSmrg 237*ec02198aSmrg static inline void 238*ec02198aSmrg mark_empty (value_type &v) 239*ec02198aSmrg { 240*ec02198aSmrg v.ncopies = 0; 241*ec02198aSmrg } 242*ec02198aSmrg 243*ec02198aSmrg static inline bool 244*ec02198aSmrg is_empty (value_type v) 245*ec02198aSmrg { 246*ec02198aSmrg return v.ncopies == 0; 247*ec02198aSmrg } 248*ec02198aSmrg 249*ec02198aSmrg static inline void mark_deleted (value_type &) {} 250*ec02198aSmrg 251*ec02198aSmrg static inline bool is_deleted (const value_type &) 252*ec02198aSmrg { 253*ec02198aSmrg return false; 254*ec02198aSmrg } 255*ec02198aSmrg 256*ec02198aSmrg static inline void remove (value_type &) {} 257*ec02198aSmrg }; 258*ec02198aSmrg 259*ec02198aSmrg typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type; 26010d565efSmrg 261c7a68eb7Smrg /* Describes two objects whose addresses must be unequal for the vectorized 262c7a68eb7Smrg loop to be valid. */ 263c7a68eb7Smrg typedef std::pair<tree, tree> vec_object_pair; 26410d565efSmrg 265c7a68eb7Smrg /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE. 266c7a68eb7Smrg UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */ 267*ec02198aSmrg class vec_lower_bound { 268*ec02198aSmrg public: 269c7a68eb7Smrg vec_lower_bound () {} 270c7a68eb7Smrg vec_lower_bound (tree e, bool u, poly_uint64 m) 271c7a68eb7Smrg : expr (e), unsigned_p (u), min_value (m) {} 27210d565efSmrg 273c7a68eb7Smrg tree expr; 274c7a68eb7Smrg bool unsigned_p; 275c7a68eb7Smrg poly_uint64 min_value; 27610d565efSmrg }; 27710d565efSmrg 2780fc04c29Smrg /* Vectorizer state shared between different analyses like vector sizes 2790fc04c29Smrg of the same CFG region. */ 280*ec02198aSmrg class vec_info_shared { 281*ec02198aSmrg public: 2820fc04c29Smrg vec_info_shared(); 2830fc04c29Smrg ~vec_info_shared(); 2840fc04c29Smrg 2850fc04c29Smrg void save_datarefs(); 2860fc04c29Smrg void check_datarefs(); 2870fc04c29Smrg 2880fc04c29Smrg /* All data references. Freed by free_data_refs, so not an auto_vec. */ 2890fc04c29Smrg vec<data_reference_p> datarefs; 2900fc04c29Smrg vec<data_reference> datarefs_copy; 2910fc04c29Smrg 2920fc04c29Smrg /* The loop nest in which the data dependences are computed. */ 2930fc04c29Smrg auto_vec<loop_p> loop_nest; 2940fc04c29Smrg 2950fc04c29Smrg /* All data dependences. Freed by free_dependence_relations, so not 2960fc04c29Smrg an auto_vec. */ 2970fc04c29Smrg vec<ddr_p> ddrs; 2980fc04c29Smrg }; 2990fc04c29Smrg 30010d565efSmrg /* Vectorizer state common between loop and basic-block vectorization. */ 301*ec02198aSmrg class vec_info { 302*ec02198aSmrg public: 303*ec02198aSmrg typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set; 304c7a68eb7Smrg enum vec_kind { bb, loop }; 305c7a68eb7Smrg 3060fc04c29Smrg vec_info (vec_kind, void *, vec_info_shared *); 307c7a68eb7Smrg ~vec_info (); 308c7a68eb7Smrg 3090fc04c29Smrg stmt_vec_info add_stmt (gimple *); 3100fc04c29Smrg stmt_vec_info lookup_stmt (gimple *); 3110fc04c29Smrg stmt_vec_info lookup_def (tree); 3120fc04c29Smrg stmt_vec_info lookup_single_use (tree); 313*ec02198aSmrg class dr_vec_info *lookup_dr (data_reference *); 3140fc04c29Smrg void move_dr (stmt_vec_info, stmt_vec_info); 3150fc04c29Smrg void remove_stmt (stmt_vec_info); 3160fc04c29Smrg void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *); 3170fc04c29Smrg 318c7a68eb7Smrg /* The type of vectorization. */ 319c7a68eb7Smrg vec_kind kind; 32010d565efSmrg 3210fc04c29Smrg /* Shared vectorizer state. */ 3220fc04c29Smrg vec_info_shared *shared; 3230fc04c29Smrg 3240fc04c29Smrg /* The mapping of GIMPLE UID to stmt_vec_info. */ 3250fc04c29Smrg vec<stmt_vec_info> stmt_vec_infos; 3260fc04c29Smrg 32710d565efSmrg /* All SLP instances. */ 328c7a68eb7Smrg auto_vec<slp_instance> slp_instances; 32910d565efSmrg 330c7a68eb7Smrg /* Maps base addresses to an innermost_loop_behavior that gives the maximum 331c7a68eb7Smrg known alignment for that base. */ 332c7a68eb7Smrg vec_base_alignments base_alignments; 333c7a68eb7Smrg 33410d565efSmrg /* All interleaving chains of stores, represented by the first 33510d565efSmrg stmt in the chain. */ 3360fc04c29Smrg auto_vec<stmt_vec_info> grouped_stores; 33710d565efSmrg 33810d565efSmrg /* Cost data used by the target cost model. */ 33910d565efSmrg void *target_cost_data; 3400fc04c29Smrg 341*ec02198aSmrg /* The set of vector modes used in the vectorized region. */ 342*ec02198aSmrg mode_set used_vector_modes; 343*ec02198aSmrg 344*ec02198aSmrg /* The argument we should pass to related_vector_mode when looking up 345*ec02198aSmrg the vector mode for a scalar mode, or VOIDmode if we haven't yet 346*ec02198aSmrg made any decisions about which vector modes to use. */ 347*ec02198aSmrg machine_mode vector_mode; 348*ec02198aSmrg 3490fc04c29Smrg private: 3500fc04c29Smrg stmt_vec_info new_stmt_vec_info (gimple *stmt); 3510fc04c29Smrg void set_vinfo_for_stmt (gimple *, stmt_vec_info); 3520fc04c29Smrg void free_stmt_vec_infos (); 3530fc04c29Smrg void free_stmt_vec_info (stmt_vec_info); 35410d565efSmrg }; 35510d565efSmrg 356*ec02198aSmrg class _loop_vec_info; 357*ec02198aSmrg class _bb_vec_info; 35810d565efSmrg 35910d565efSmrg template<> 36010d565efSmrg template<> 36110d565efSmrg inline bool 36210d565efSmrg is_a_helper <_loop_vec_info *>::test (vec_info *i) 36310d565efSmrg { 36410d565efSmrg return i->kind == vec_info::loop; 36510d565efSmrg } 36610d565efSmrg 36710d565efSmrg template<> 36810d565efSmrg template<> 36910d565efSmrg inline bool 37010d565efSmrg is_a_helper <_bb_vec_info *>::test (vec_info *i) 37110d565efSmrg { 37210d565efSmrg return i->kind == vec_info::bb; 37310d565efSmrg } 37410d565efSmrg 37510d565efSmrg 376c7a68eb7Smrg /* In general, we can divide the vector statements in a vectorized loop 377c7a68eb7Smrg into related groups ("rgroups") and say that for each rgroup there is 378c7a68eb7Smrg some nS such that the rgroup operates on nS values from one scalar 379c7a68eb7Smrg iteration followed by nS values from the next. That is, if VF is the 380c7a68eb7Smrg vectorization factor of the loop, the rgroup operates on a sequence: 381c7a68eb7Smrg 382c7a68eb7Smrg (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS) 383c7a68eb7Smrg 384c7a68eb7Smrg where (i,j) represents a scalar value with index j in a scalar 385c7a68eb7Smrg iteration with index i. 386c7a68eb7Smrg 387c7a68eb7Smrg [ We use the term "rgroup" to emphasise that this grouping isn't 388c7a68eb7Smrg necessarily the same as the grouping of statements used elsewhere. 389c7a68eb7Smrg For example, if we implement a group of scalar loads using gather 390c7a68eb7Smrg loads, we'll use a separate gather load for each scalar load, and 391c7a68eb7Smrg thus each gather load will belong to its own rgroup. ] 392c7a68eb7Smrg 393c7a68eb7Smrg In general this sequence will occupy nV vectors concatenated 394c7a68eb7Smrg together. If these vectors have nL lanes each, the total number 395c7a68eb7Smrg of scalar values N is given by: 396c7a68eb7Smrg 397c7a68eb7Smrg N = nS * VF = nV * nL 398c7a68eb7Smrg 399c7a68eb7Smrg None of nS, VF, nV and nL are required to be a power of 2. nS and nV 400c7a68eb7Smrg are compile-time constants but VF and nL can be variable (if the target 401c7a68eb7Smrg supports variable-length vectors). 402c7a68eb7Smrg 403c7a68eb7Smrg In classical vectorization, each iteration of the vector loop would 404c7a68eb7Smrg handle exactly VF iterations of the original scalar loop. However, 405c7a68eb7Smrg in a fully-masked loop, a particular iteration of the vector loop 406c7a68eb7Smrg might handle fewer than VF iterations of the scalar loop. The vector 407c7a68eb7Smrg lanes that correspond to iterations of the scalar loop are said to be 408c7a68eb7Smrg "active" and the other lanes are said to be "inactive". 409c7a68eb7Smrg 410c7a68eb7Smrg In a fully-masked loop, many rgroups need to be masked to ensure that 411c7a68eb7Smrg they have no effect for the inactive lanes. Each such rgroup needs a 412c7a68eb7Smrg sequence of booleans in the same order as above, but with each (i,j) 413c7a68eb7Smrg replaced by a boolean that indicates whether iteration i is active. 414c7a68eb7Smrg This sequence occupies nV vector masks that again have nL lanes each. 415c7a68eb7Smrg Thus the mask sequence as a whole consists of VF independent booleans 416c7a68eb7Smrg that are each repeated nS times. 417c7a68eb7Smrg 418c7a68eb7Smrg We make the simplifying assumption that if a sequence of nV masks is 419c7a68eb7Smrg suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by 420c7a68eb7Smrg VIEW_CONVERTing it. This holds for all current targets that support 421c7a68eb7Smrg fully-masked loops. For example, suppose the scalar loop is: 422c7a68eb7Smrg 423c7a68eb7Smrg float *f; 424c7a68eb7Smrg double *d; 425c7a68eb7Smrg for (int i = 0; i < n; ++i) 426c7a68eb7Smrg { 427c7a68eb7Smrg f[i * 2 + 0] += 1.0f; 428c7a68eb7Smrg f[i * 2 + 1] += 2.0f; 429c7a68eb7Smrg d[i] += 3.0; 430c7a68eb7Smrg } 431c7a68eb7Smrg 432c7a68eb7Smrg and suppose that vectors have 256 bits. The vectorized f accesses 433c7a68eb7Smrg will belong to one rgroup and the vectorized d access to another: 434c7a68eb7Smrg 435c7a68eb7Smrg f rgroup: nS = 2, nV = 1, nL = 8 436c7a68eb7Smrg d rgroup: nS = 1, nV = 1, nL = 4 437c7a68eb7Smrg VF = 4 438c7a68eb7Smrg 439c7a68eb7Smrg [ In this simple example the rgroups do correspond to the normal 440c7a68eb7Smrg SLP grouping scheme. ] 441c7a68eb7Smrg 442c7a68eb7Smrg If only the first three lanes are active, the masks we need are: 443c7a68eb7Smrg 444c7a68eb7Smrg f rgroup: 1 1 | 1 1 | 1 1 | 0 0 445c7a68eb7Smrg d rgroup: 1 | 1 | 1 | 0 446c7a68eb7Smrg 447c7a68eb7Smrg Here we can use a mask calculated for f's rgroup for d's, but not 448c7a68eb7Smrg vice versa. 449c7a68eb7Smrg 450c7a68eb7Smrg Thus for each value of nV, it is enough to provide nV masks, with the 451c7a68eb7Smrg mask being calculated based on the highest nL (or, equivalently, based 452c7a68eb7Smrg on the highest nS) required by any rgroup with that nV. We therefore 453c7a68eb7Smrg represent the entire collection of masks as a two-level table, with the 454c7a68eb7Smrg first level being indexed by nV - 1 (since nV == 0 doesn't exist) and 455c7a68eb7Smrg the second being indexed by the mask index 0 <= i < nV. */ 456c7a68eb7Smrg 457c7a68eb7Smrg /* The masks needed by rgroups with nV vectors, according to the 458c7a68eb7Smrg description above. */ 459c7a68eb7Smrg struct rgroup_masks { 460c7a68eb7Smrg /* The largest nS for all rgroups that use these masks. */ 461c7a68eb7Smrg unsigned int max_nscalars_per_iter; 462c7a68eb7Smrg 463c7a68eb7Smrg /* The type of mask to use, based on the highest nS recorded above. */ 464c7a68eb7Smrg tree mask_type; 465c7a68eb7Smrg 466c7a68eb7Smrg /* A vector of nV masks, in iteration order. */ 467c7a68eb7Smrg vec<tree> masks; 468c7a68eb7Smrg }; 469c7a68eb7Smrg 470c7a68eb7Smrg typedef auto_vec<rgroup_masks> vec_loop_masks; 471c7a68eb7Smrg 472*ec02198aSmrg typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec; 473*ec02198aSmrg 47410d565efSmrg /*-----------------------------------------------------------------*/ 47510d565efSmrg /* Info on vectorized loops. */ 47610d565efSmrg /*-----------------------------------------------------------------*/ 477*ec02198aSmrg typedef class _loop_vec_info : public vec_info { 478*ec02198aSmrg public: 479*ec02198aSmrg _loop_vec_info (class loop *, vec_info_shared *); 480c7a68eb7Smrg ~_loop_vec_info (); 48110d565efSmrg 48210d565efSmrg /* The loop to which this info struct refers to. */ 483*ec02198aSmrg class loop *loop; 48410d565efSmrg 48510d565efSmrg /* The loop basic blocks. */ 48610d565efSmrg basic_block *bbs; 48710d565efSmrg 48810d565efSmrg /* Number of latch executions. */ 48910d565efSmrg tree num_itersm1; 49010d565efSmrg /* Number of iterations. */ 49110d565efSmrg tree num_iters; 49210d565efSmrg /* Number of iterations of the original loop. */ 49310d565efSmrg tree num_iters_unchanged; 49410d565efSmrg /* Condition under which this loop is analyzed and versioned. */ 49510d565efSmrg tree num_iters_assumptions; 49610d565efSmrg 497*ec02198aSmrg /* Threshold of number of iterations below which vectorization will not be 49810d565efSmrg performed. It is calculated from MIN_PROFITABLE_ITERS and 499*ec02198aSmrg param_min_vect_loop_bound. */ 50010d565efSmrg unsigned int th; 50110d565efSmrg 502c7a68eb7Smrg /* When applying loop versioning, the vector form should only be used 503c7a68eb7Smrg if the number of scalar iterations is >= this value, on top of all 504c7a68eb7Smrg the other requirements. Ignored when loop versioning is not being 505c7a68eb7Smrg used. */ 506c7a68eb7Smrg poly_uint64 versioning_threshold; 507c7a68eb7Smrg 50810d565efSmrg /* Unrolling factor */ 509c7a68eb7Smrg poly_uint64 vectorization_factor; 510c7a68eb7Smrg 511c7a68eb7Smrg /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR 512c7a68eb7Smrg if there is no particular limit. */ 513c7a68eb7Smrg unsigned HOST_WIDE_INT max_vectorization_factor; 514c7a68eb7Smrg 515c7a68eb7Smrg /* The masks that a fully-masked loop should use to avoid operating 516c7a68eb7Smrg on inactive scalars. */ 517c7a68eb7Smrg vec_loop_masks masks; 518c7a68eb7Smrg 519*ec02198aSmrg /* Set of scalar conditions that have loop mask applied. */ 520*ec02198aSmrg scalar_cond_masked_set_type scalar_cond_masked_set; 521*ec02198aSmrg 522c7a68eb7Smrg /* If we are using a loop mask to align memory addresses, this variable 523c7a68eb7Smrg contains the number of vector elements that we should skip in the 524c7a68eb7Smrg first iteration of the vector loop (i.e. the number of leading 525c7a68eb7Smrg elements that should be false in the first mask). */ 526c7a68eb7Smrg tree mask_skip_niters; 527c7a68eb7Smrg 528c7a68eb7Smrg /* Type of the variables to use in the WHILE_ULT call for fully-masked 529c7a68eb7Smrg loops. */ 530c7a68eb7Smrg tree mask_compare_type; 53110d565efSmrg 5320fc04c29Smrg /* For #pragma omp simd if (x) loops the x expression. If constant 0, 5330fc04c29Smrg the loop should not be vectorized, if constant non-zero, simd_if_cond 5340fc04c29Smrg shouldn't be set and loop vectorized normally, if SSA_NAME, the loop 5350fc04c29Smrg should be versioned on that condition, using scalar loop if the condition 5360fc04c29Smrg is false and vectorized loop otherwise. */ 5370fc04c29Smrg tree simd_if_cond; 5380fc04c29Smrg 539*ec02198aSmrg /* Type of the IV to use in the WHILE_ULT call for fully-masked 540*ec02198aSmrg loops. */ 541*ec02198aSmrg tree iv_type; 542*ec02198aSmrg 54310d565efSmrg /* Unknown DRs according to which loop was peeled. */ 544*ec02198aSmrg class dr_vec_info *unaligned_dr; 54510d565efSmrg 54610d565efSmrg /* peeling_for_alignment indicates whether peeling for alignment will take 54710d565efSmrg place, and what the peeling factor should be: 54810d565efSmrg peeling_for_alignment = X means: 54910d565efSmrg If X=0: Peeling for alignment will not be applied. 55010d565efSmrg If X>0: Peel first X iterations. 55110d565efSmrg If X=-1: Generate a runtime test to calculate the number of iterations 55210d565efSmrg to be peeled, using the dataref recorded in the field 55310d565efSmrg unaligned_dr. */ 55410d565efSmrg int peeling_for_alignment; 55510d565efSmrg 55610d565efSmrg /* The mask used to check the alignment of pointers or arrays. */ 55710d565efSmrg int ptr_mask; 55810d565efSmrg 55910d565efSmrg /* Data Dependence Relations defining address ranges that are candidates 56010d565efSmrg for a run-time aliasing check. */ 561c7a68eb7Smrg auto_vec<ddr_p> may_alias_ddrs; 56210d565efSmrg 56310d565efSmrg /* Data Dependence Relations defining address ranges together with segment 56410d565efSmrg lengths from which the run-time aliasing check is built. */ 565c7a68eb7Smrg auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; 566c7a68eb7Smrg 567c7a68eb7Smrg /* Check that the addresses of each pair of objects is unequal. */ 568c7a68eb7Smrg auto_vec<vec_object_pair> check_unequal_addrs; 569c7a68eb7Smrg 570c7a68eb7Smrg /* List of values that are required to be nonzero. This is used to check 571c7a68eb7Smrg whether things like "x[i * n] += 1;" are safe and eventually gets added 572c7a68eb7Smrg to the checks for lower bounds below. */ 573c7a68eb7Smrg auto_vec<tree> check_nonzero; 574c7a68eb7Smrg 575c7a68eb7Smrg /* List of values that need to be checked for a minimum value. */ 576c7a68eb7Smrg auto_vec<vec_lower_bound> lower_bounds; 57710d565efSmrg 57810d565efSmrg /* Statements in the loop that have data references that are candidates for a 57910d565efSmrg runtime (loop versioning) misalignment check. */ 5800fc04c29Smrg auto_vec<stmt_vec_info> may_misalign_stmts; 58110d565efSmrg 58210d565efSmrg /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ 5830fc04c29Smrg auto_vec<stmt_vec_info> reductions; 58410d565efSmrg 58510d565efSmrg /* All reduction chains in the loop, represented by the first 58610d565efSmrg stmt in the chain. */ 5870fc04c29Smrg auto_vec<stmt_vec_info> reduction_chains; 58810d565efSmrg 58910d565efSmrg /* Cost vector for a single scalar iteration. */ 590c7a68eb7Smrg auto_vec<stmt_info_for_cost> scalar_cost_vec; 591c7a68eb7Smrg 592c7a68eb7Smrg /* Map of IV base/step expressions to inserted name in the preheader. */ 593c7a68eb7Smrg hash_map<tree_operand_hash, tree> *ivexpr_map; 59410d565efSmrg 595*ec02198aSmrg /* Map of OpenMP "omp simd array" scan variables to corresponding 596*ec02198aSmrg rhs of the store of the initializer. */ 597*ec02198aSmrg hash_map<tree, tree> *scan_map; 598*ec02198aSmrg 59910d565efSmrg /* The unrolling factor needed to SLP the loop. In case of that pure SLP is 60010d565efSmrg applied to the loop, i.e., no unrolling is needed, this is 1. */ 601c7a68eb7Smrg poly_uint64 slp_unrolling_factor; 60210d565efSmrg 60310d565efSmrg /* Cost of a single scalar iteration. */ 60410d565efSmrg int single_scalar_iteration_cost; 60510d565efSmrg 606*ec02198aSmrg /* The cost of the vector prologue and epilogue, including peeled 607*ec02198aSmrg iterations and set-up code. */ 608*ec02198aSmrg int vec_outside_cost; 609*ec02198aSmrg 610*ec02198aSmrg /* The cost of the vector loop body. */ 611*ec02198aSmrg int vec_inside_cost; 612*ec02198aSmrg 61310d565efSmrg /* Is the loop vectorizable? */ 61410d565efSmrg bool vectorizable; 61510d565efSmrg 616c7a68eb7Smrg /* Records whether we still have the option of using a fully-masked loop. */ 617c7a68eb7Smrg bool can_fully_mask_p; 618c7a68eb7Smrg 619c7a68eb7Smrg /* True if have decided to use a fully-masked loop. */ 620c7a68eb7Smrg bool fully_masked_p; 621c7a68eb7Smrg 62210d565efSmrg /* When we have grouped data accesses with gaps, we may introduce invalid 62310d565efSmrg memory accesses. We peel the last iteration of the loop to prevent 62410d565efSmrg this. */ 62510d565efSmrg bool peeling_for_gaps; 62610d565efSmrg 62710d565efSmrg /* When the number of iterations is not a multiple of the vector size 62810d565efSmrg we need to peel off iterations at the end to form an epilogue loop. */ 62910d565efSmrg bool peeling_for_niter; 63010d565efSmrg 63110d565efSmrg /* True if there are no loop carried data dependencies in the loop. 63210d565efSmrg If loop->safelen <= 1, then this is always true, either the loop 63310d565efSmrg didn't have any loop carried data dependencies, or the loop is being 63410d565efSmrg vectorized guarded with some runtime alias checks, or couldn't 63510d565efSmrg be vectorized at all, but then this field shouldn't be used. 63610d565efSmrg For loop->safelen >= 2, the user has asserted that there are no 63710d565efSmrg backward dependencies, but there still could be loop carried forward 63810d565efSmrg dependencies in such loops. This flag will be false if normal 63910d565efSmrg vectorizer data dependency analysis would fail or require versioning 64010d565efSmrg for alias, but because of loop->safelen >= 2 it has been vectorized 64110d565efSmrg even without versioning for alias. E.g. in: 64210d565efSmrg #pragma omp simd 64310d565efSmrg for (int i = 0; i < m; i++) 64410d565efSmrg a[i] = a[i + k] * c; 64510d565efSmrg (or #pragma simd or #pragma ivdep) we can vectorize this and it will 64610d565efSmrg DTRT even for k > 0 && k < m, but without safelen we would not 64710d565efSmrg vectorize this, so this field would be false. */ 64810d565efSmrg bool no_data_dependencies; 64910d565efSmrg 65010d565efSmrg /* Mark loops having masked stores. */ 65110d565efSmrg bool has_mask_store; 65210d565efSmrg 653*ec02198aSmrg /* Queued scaling factor for the scalar loop. */ 654*ec02198aSmrg profile_probability scalar_loop_scaling; 655*ec02198aSmrg 65610d565efSmrg /* If if-conversion versioned this loop before conversion, this is the 65710d565efSmrg loop version without if-conversion. */ 658*ec02198aSmrg class loop *scalar_loop; 65910d565efSmrg 66010d565efSmrg /* For loops being epilogues of already vectorized loops 66110d565efSmrg this points to the original vectorized loop. Otherwise NULL. */ 66210d565efSmrg _loop_vec_info *orig_loop_info; 66310d565efSmrg 664*ec02198aSmrg /* Used to store loop_vec_infos of epilogues of this loop during 665*ec02198aSmrg analysis. */ 666*ec02198aSmrg vec<_loop_vec_info *> epilogue_vinfos; 667*ec02198aSmrg 66810d565efSmrg } *loop_vec_info; 66910d565efSmrg 67010d565efSmrg /* Access Functions. */ 67110d565efSmrg #define LOOP_VINFO_LOOP(L) (L)->loop 67210d565efSmrg #define LOOP_VINFO_BBS(L) (L)->bbs 67310d565efSmrg #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1 67410d565efSmrg #define LOOP_VINFO_NITERS(L) (L)->num_iters 67510d565efSmrg /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after 67610d565efSmrg prologue peeling retain total unchanged scalar loop iterations for 67710d565efSmrg cost model. */ 67810d565efSmrg #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged 67910d565efSmrg #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions 68010d565efSmrg #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th 681c7a68eb7Smrg #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold 68210d565efSmrg #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable 683c7a68eb7Smrg #define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p 684c7a68eb7Smrg #define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p 68510d565efSmrg #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor 686c7a68eb7Smrg #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor 687c7a68eb7Smrg #define LOOP_VINFO_MASKS(L) (L)->masks 688c7a68eb7Smrg #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters 689c7a68eb7Smrg #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type 690*ec02198aSmrg #define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type 69110d565efSmrg #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask 6920fc04c29Smrg #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest 6930fc04c29Smrg #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs 6940fc04c29Smrg #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs 69510d565efSmrg #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) 69610d565efSmrg #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment 69710d565efSmrg #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr 69810d565efSmrg #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts 69910d565efSmrg #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs 70010d565efSmrg #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs 701c7a68eb7Smrg #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs 702c7a68eb7Smrg #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero 703c7a68eb7Smrg #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds 70410d565efSmrg #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores 70510d565efSmrg #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances 70610d565efSmrg #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor 70710d565efSmrg #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions 70810d565efSmrg #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains 70910d565efSmrg #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data 71010d565efSmrg #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps 71110d565efSmrg #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter 71210d565efSmrg #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies 71310d565efSmrg #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop 714*ec02198aSmrg #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling 71510d565efSmrg #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store 71610d565efSmrg #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec 71710d565efSmrg #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost 71810d565efSmrg #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info 7190fc04c29Smrg #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond 72010d565efSmrg 72110d565efSmrg #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ 72210d565efSmrg ((L)->may_misalign_stmts.length () > 0) 72310d565efSmrg #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ 724c7a68eb7Smrg ((L)->comp_alias_ddrs.length () > 0 \ 725c7a68eb7Smrg || (L)->check_unequal_addrs.length () > 0 \ 726c7a68eb7Smrg || (L)->lower_bounds.length () > 0) 72710d565efSmrg #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ 72810d565efSmrg (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) 7290fc04c29Smrg #define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \ 7300fc04c29Smrg (LOOP_VINFO_SIMD_IF_COND (L)) 73110d565efSmrg #define LOOP_REQUIRES_VERSIONING(L) \ 73210d565efSmrg (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ 73310d565efSmrg || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ 7340fc04c29Smrg || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ 7350fc04c29Smrg || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) 73610d565efSmrg 73710d565efSmrg #define LOOP_VINFO_NITERS_KNOWN_P(L) \ 73810d565efSmrg (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) 73910d565efSmrg 74010d565efSmrg #define LOOP_VINFO_EPILOGUE_P(L) \ 74110d565efSmrg (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) 74210d565efSmrg 743c7a68eb7Smrg #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ 744c7a68eb7Smrg (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) 74510d565efSmrg 7460fc04c29Smrg /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL 7470fc04c29Smrg value signifies success, and a NULL value signifies failure, supporting 7480fc04c29Smrg propagating an opt_problem * describing the failure back up the call 7490fc04c29Smrg stack. */ 7500fc04c29Smrg typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info; 7510fc04c29Smrg 75210d565efSmrg static inline loop_vec_info 753*ec02198aSmrg loop_vec_info_for_loop (class loop *loop) 75410d565efSmrg { 75510d565efSmrg return (loop_vec_info) loop->aux; 75610d565efSmrg } 75710d565efSmrg 758*ec02198aSmrg typedef class _bb_vec_info : public vec_info 75910d565efSmrg { 760*ec02198aSmrg public: 7610fc04c29Smrg _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator, vec_info_shared *); 762c7a68eb7Smrg ~_bb_vec_info (); 763c7a68eb7Smrg 76410d565efSmrg basic_block bb; 76510d565efSmrg gimple_stmt_iterator region_begin; 76610d565efSmrg gimple_stmt_iterator region_end; 76710d565efSmrg } *bb_vec_info; 76810d565efSmrg 76910d565efSmrg #define BB_VINFO_BB(B) (B)->bb 77010d565efSmrg #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores 77110d565efSmrg #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances 7720fc04c29Smrg #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs 7730fc04c29Smrg #define BB_VINFO_DDRS(B) (B)->shared->ddrs 77410d565efSmrg #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data 77510d565efSmrg 77610d565efSmrg static inline bb_vec_info 77710d565efSmrg vec_info_for_bb (basic_block bb) 77810d565efSmrg { 77910d565efSmrg return (bb_vec_info) bb->aux; 78010d565efSmrg } 78110d565efSmrg 78210d565efSmrg /*-----------------------------------------------------------------*/ 78310d565efSmrg /* Info on vectorized defs. */ 78410d565efSmrg /*-----------------------------------------------------------------*/ 78510d565efSmrg enum stmt_vec_info_type { 78610d565efSmrg undef_vec_info_type = 0, 78710d565efSmrg load_vec_info_type, 78810d565efSmrg store_vec_info_type, 78910d565efSmrg shift_vec_info_type, 79010d565efSmrg op_vec_info_type, 79110d565efSmrg call_vec_info_type, 79210d565efSmrg call_simd_clone_vec_info_type, 79310d565efSmrg assignment_vec_info_type, 79410d565efSmrg condition_vec_info_type, 79510d565efSmrg comparison_vec_info_type, 79610d565efSmrg reduc_vec_info_type, 79710d565efSmrg induc_vec_info_type, 79810d565efSmrg type_promotion_vec_info_type, 79910d565efSmrg type_demotion_vec_info_type, 80010d565efSmrg type_conversion_vec_info_type, 801*ec02198aSmrg cycle_phi_info_type, 802*ec02198aSmrg lc_phi_info_type, 80310d565efSmrg loop_exit_ctrl_vec_info_type 80410d565efSmrg }; 80510d565efSmrg 80610d565efSmrg /* Indicates whether/how a variable is used in the scope of loop/basic 80710d565efSmrg block. */ 80810d565efSmrg enum vect_relevant { 80910d565efSmrg vect_unused_in_scope = 0, 81010d565efSmrg 81110d565efSmrg /* The def is only used outside the loop. */ 81210d565efSmrg vect_used_only_live, 81310d565efSmrg /* The def is in the inner loop, and the use is in the outer loop, and the 81410d565efSmrg use is a reduction stmt. */ 81510d565efSmrg vect_used_in_outer_by_reduction, 81610d565efSmrg /* The def is in the inner loop, and the use is in the outer loop (and is 81710d565efSmrg not part of reduction). */ 81810d565efSmrg vect_used_in_outer, 81910d565efSmrg 82010d565efSmrg /* defs that feed computations that end up (only) in a reduction. These 82110d565efSmrg defs may be used by non-reduction stmts, but eventually, any 82210d565efSmrg computations/values that are affected by these defs are used to compute 82310d565efSmrg a reduction (i.e. don't get stored to memory, for example). We use this 82410d565efSmrg to identify computations that we can change the order in which they are 82510d565efSmrg computed. */ 82610d565efSmrg vect_used_by_reduction, 82710d565efSmrg 82810d565efSmrg vect_used_in_scope 82910d565efSmrg }; 83010d565efSmrg 83110d565efSmrg /* The type of vectorization that can be applied to the stmt: regular loop-based 83210d565efSmrg vectorization; pure SLP - the stmt is a part of SLP instances and does not 83310d565efSmrg have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is 83410d565efSmrg a part of SLP instance and also must be loop-based vectorized, since it has 83510d565efSmrg uses outside SLP sequences. 83610d565efSmrg 83710d565efSmrg In the loop context the meanings of pure and hybrid SLP are slightly 83810d565efSmrg different. By saying that pure SLP is applied to the loop, we mean that we 83910d565efSmrg exploit only intra-iteration parallelism in the loop; i.e., the loop can be 84010d565efSmrg vectorized without doing any conceptual unrolling, cause we don't pack 84110d565efSmrg together stmts from different iterations, only within a single iteration. 84210d565efSmrg Loop hybrid SLP means that we exploit both intra-iteration and 84310d565efSmrg inter-iteration parallelism (e.g., number of elements in the vector is 4 84410d565efSmrg and the slp-group-size is 2, in which case we don't have enough parallelism 84510d565efSmrg within an iteration, so we obtain the rest of the parallelism from subsequent 84610d565efSmrg iterations by unrolling the loop by 2). */ 84710d565efSmrg enum slp_vect_type { 84810d565efSmrg loop_vect = 0, 84910d565efSmrg pure_slp, 85010d565efSmrg hybrid 85110d565efSmrg }; 85210d565efSmrg 853c7a68eb7Smrg /* Says whether a statement is a load, a store of a vectorized statement 854c7a68eb7Smrg result, or a store of an invariant value. */ 855c7a68eb7Smrg enum vec_load_store_type { 856c7a68eb7Smrg VLS_LOAD, 857c7a68eb7Smrg VLS_STORE, 858c7a68eb7Smrg VLS_STORE_INVARIANT 859c7a68eb7Smrg }; 860c7a68eb7Smrg 86110d565efSmrg /* Describes how we're going to vectorize an individual load or store, 86210d565efSmrg or a group of loads or stores. */ 86310d565efSmrg enum vect_memory_access_type { 86410d565efSmrg /* An access to an invariant address. This is used only for loads. */ 86510d565efSmrg VMAT_INVARIANT, 86610d565efSmrg 86710d565efSmrg /* A simple contiguous access. */ 86810d565efSmrg VMAT_CONTIGUOUS, 86910d565efSmrg 87010d565efSmrg /* A contiguous access that goes down in memory rather than up, 87110d565efSmrg with no additional permutation. This is used only for stores 87210d565efSmrg of invariants. */ 87310d565efSmrg VMAT_CONTIGUOUS_DOWN, 87410d565efSmrg 87510d565efSmrg /* A simple contiguous access in which the elements need to be permuted 87610d565efSmrg after loading or before storing. Only used for loop vectorization; 87710d565efSmrg SLP uses separate permutes. */ 87810d565efSmrg VMAT_CONTIGUOUS_PERMUTE, 87910d565efSmrg 88010d565efSmrg /* A simple contiguous access in which the elements need to be reversed 88110d565efSmrg after loading or before storing. */ 88210d565efSmrg VMAT_CONTIGUOUS_REVERSE, 88310d565efSmrg 88410d565efSmrg /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */ 88510d565efSmrg VMAT_LOAD_STORE_LANES, 88610d565efSmrg 88710d565efSmrg /* An access in which each scalar element is loaded or stored 88810d565efSmrg individually. */ 88910d565efSmrg VMAT_ELEMENTWISE, 89010d565efSmrg 89110d565efSmrg /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped 89210d565efSmrg SLP accesses. Each unrolled iteration uses a contiguous load 89310d565efSmrg or store for the whole group, but the groups from separate iterations 89410d565efSmrg are combined in the same way as for VMAT_ELEMENTWISE. */ 89510d565efSmrg VMAT_STRIDED_SLP, 89610d565efSmrg 89710d565efSmrg /* The access uses gather loads or scatter stores. */ 89810d565efSmrg VMAT_GATHER_SCATTER 89910d565efSmrg }; 90010d565efSmrg 901*ec02198aSmrg class dr_vec_info { 902*ec02198aSmrg public: 9030fc04c29Smrg /* The data reference itself. */ 9040fc04c29Smrg data_reference *dr; 9050fc04c29Smrg /* The statement that contains the data reference. */ 9060fc04c29Smrg stmt_vec_info stmt; 9070fc04c29Smrg /* The misalignment in bytes of the reference, or -1 if not known. */ 9080fc04c29Smrg int misalignment; 9090fc04c29Smrg /* The byte alignment that we'd ideally like the reference to have, 9100fc04c29Smrg and the value that misalignment is measured against. */ 9110fc04c29Smrg poly_uint64 target_alignment; 9120fc04c29Smrg /* If true the alignment of base_decl needs to be increased. */ 9130fc04c29Smrg bool base_misaligned; 9140fc04c29Smrg tree base_decl; 915*ec02198aSmrg 916*ec02198aSmrg /* Stores current vectorized loop's offset. To be added to the DR's 917*ec02198aSmrg offset to calculate current offset of data reference. */ 918*ec02198aSmrg tree offset; 9190fc04c29Smrg }; 9200fc04c29Smrg 92110d565efSmrg typedef struct data_reference *dr_p; 92210d565efSmrg 923*ec02198aSmrg class _stmt_vec_info { 924*ec02198aSmrg public: 92510d565efSmrg 92610d565efSmrg enum stmt_vec_info_type type; 92710d565efSmrg 92810d565efSmrg /* Indicates whether this stmts is part of a computation whose result is 92910d565efSmrg used outside the loop. */ 93010d565efSmrg bool live; 93110d565efSmrg 93210d565efSmrg /* Stmt is part of some pattern (computation idiom) */ 93310d565efSmrg bool in_pattern_p; 93410d565efSmrg 9350fc04c29Smrg /* True if the statement was created during pattern recognition as 9360fc04c29Smrg part of the replacement for RELATED_STMT. This implies that the 9370fc04c29Smrg statement isn't part of any basic block, although for convenience 9380fc04c29Smrg its gimple_bb is the same as for RELATED_STMT. */ 9390fc04c29Smrg bool pattern_stmt_p; 9400fc04c29Smrg 94110d565efSmrg /* Is this statement vectorizable or should it be skipped in (partial) 94210d565efSmrg vectorization. */ 94310d565efSmrg bool vectorizable; 94410d565efSmrg 94510d565efSmrg /* The stmt to which this info struct refers to. */ 94610d565efSmrg gimple *stmt; 94710d565efSmrg 94810d565efSmrg /* The vec_info with respect to which STMT is vectorized. */ 94910d565efSmrg vec_info *vinfo; 95010d565efSmrg 95110d565efSmrg /* The vector type to be used for the LHS of this statement. */ 95210d565efSmrg tree vectype; 95310d565efSmrg 95410d565efSmrg /* The vectorized version of the stmt. */ 9550fc04c29Smrg stmt_vec_info vectorized_stmt; 95610d565efSmrg 95710d565efSmrg 958c7a68eb7Smrg /* The following is relevant only for stmts that contain a non-scalar 95910d565efSmrg data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have 960c7a68eb7Smrg at most one such data-ref. */ 96110d565efSmrg 9620fc04c29Smrg dr_vec_info dr_aux; 96310d565efSmrg 96410d565efSmrg /* Information about the data-ref relative to this loop 96510d565efSmrg nest (the loop that is being considered for vectorization). */ 966c7a68eb7Smrg innermost_loop_behavior dr_wrt_vec_loop; 96710d565efSmrg 96810d565efSmrg /* For loop PHI nodes, the base and evolution part of it. This makes sure 96910d565efSmrg this information is still available in vect_update_ivs_after_vectorizer 97010d565efSmrg where we may not be able to re-analyze the PHI nodes evolution as 97110d565efSmrg peeling for the prologue loop can make it unanalyzable. The evolution 97210d565efSmrg part is still correct after peeling, but the base may have changed from 97310d565efSmrg the version here. */ 97410d565efSmrg tree loop_phi_evolution_base_unchanged; 97510d565efSmrg tree loop_phi_evolution_part; 97610d565efSmrg 97710d565efSmrg /* Used for various bookkeeping purposes, generally holding a pointer to 97810d565efSmrg some other stmt S that is in some way "related" to this stmt. 97910d565efSmrg Current use of this field is: 98010d565efSmrg If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is 98110d565efSmrg true): S is the "pattern stmt" that represents (and replaces) the 98210d565efSmrg sequence of stmts that constitutes the pattern. Similarly, the 98310d565efSmrg related_stmt of the "pattern stmt" points back to this stmt (which is 98410d565efSmrg the last stmt in the original sequence of stmts that constitutes the 98510d565efSmrg pattern). */ 9860fc04c29Smrg stmt_vec_info related_stmt; 98710d565efSmrg 9880fc04c29Smrg /* Used to keep a sequence of def stmts of a pattern stmt if such exists. 9890fc04c29Smrg The sequence is attached to the original statement rather than the 9900fc04c29Smrg pattern statement. */ 99110d565efSmrg gimple_seq pattern_def_seq; 99210d565efSmrg 99310d565efSmrg /* List of datarefs that are known to have the same alignment as the dataref 99410d565efSmrg of this stmt. */ 99510d565efSmrg vec<dr_p> same_align_refs; 99610d565efSmrg 99710d565efSmrg /* Selected SIMD clone's function info. First vector element 99810d565efSmrg is SIMD clone's function decl, followed by a pair of trees (base + step) 99910d565efSmrg for linear arguments (pair of NULLs for other arguments). */ 100010d565efSmrg vec<tree> simd_clone_info; 100110d565efSmrg 100210d565efSmrg /* Classify the def of this stmt. */ 100310d565efSmrg enum vect_def_type def_type; 100410d565efSmrg 100510d565efSmrg /* Whether the stmt is SLPed, loop-based vectorized, or both. */ 100610d565efSmrg enum slp_vect_type slp_type; 100710d565efSmrg 100810d565efSmrg /* Interleaving and reduction chains info. */ 100910d565efSmrg /* First element in the group. */ 10100fc04c29Smrg stmt_vec_info first_element; 101110d565efSmrg /* Pointer to the next element in the group. */ 10120fc04c29Smrg stmt_vec_info next_element; 101310d565efSmrg /* The size of the group. */ 101410d565efSmrg unsigned int size; 101510d565efSmrg /* For stores, number of stores from this group seen. We vectorize the last 101610d565efSmrg one. */ 101710d565efSmrg unsigned int store_count; 101810d565efSmrg /* For loads only, the gap from the previous load. For consecutive loads, GAP 101910d565efSmrg is 1. */ 102010d565efSmrg unsigned int gap; 102110d565efSmrg 102210d565efSmrg /* The minimum negative dependence distance this stmt participates in 102310d565efSmrg or zero if none. */ 102410d565efSmrg unsigned int min_neg_dist; 102510d565efSmrg 102610d565efSmrg /* Not all stmts in the loop need to be vectorized. e.g, the increment 102710d565efSmrg of the loop induction variable and computation of array indexes. relevant 102810d565efSmrg indicates whether the stmt needs to be vectorized. */ 102910d565efSmrg enum vect_relevant relevant; 103010d565efSmrg 103110d565efSmrg /* For loads if this is a gather, for stores if this is a scatter. */ 103210d565efSmrg bool gather_scatter_p; 103310d565efSmrg 103410d565efSmrg /* True if this is an access with loop-invariant stride. */ 103510d565efSmrg bool strided_p; 103610d565efSmrg 103710d565efSmrg /* For both loads and stores. */ 1038*ec02198aSmrg unsigned simd_lane_access_p : 3; 103910d565efSmrg 104010d565efSmrg /* Classifies how the load or store is going to be implemented 104110d565efSmrg for loop vectorization. */ 104210d565efSmrg vect_memory_access_type memory_access_type; 104310d565efSmrg 1044*ec02198aSmrg /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */ 1045*ec02198aSmrg tree induc_cond_initial_val; 104610d565efSmrg 1047*ec02198aSmrg /* If not NULL the value to be added to compute final reduction value. */ 1048*ec02198aSmrg tree reduc_epilogue_adjustment; 104910d565efSmrg 1050c7a68eb7Smrg /* On a reduction PHI the reduction type as detected by 1051*ec02198aSmrg vect_is_simple_reduction and vectorizable_reduction. */ 1052c7a68eb7Smrg enum vect_reduction_type reduc_type; 1053c7a68eb7Smrg 1054*ec02198aSmrg /* The original reduction code, to be used in the epilogue. */ 1055*ec02198aSmrg enum tree_code reduc_code; 1056*ec02198aSmrg /* An internal function we should use in the epilogue. */ 1057*ec02198aSmrg internal_fn reduc_fn; 1058*ec02198aSmrg 1059*ec02198aSmrg /* On a stmt participating in the reduction the index of the operand 1060*ec02198aSmrg on the reduction SSA cycle. */ 1061*ec02198aSmrg int reduc_idx; 1062*ec02198aSmrg 1063c7a68eb7Smrg /* On a reduction PHI the def returned by vect_force_simple_reduction. 1064c7a68eb7Smrg On the def returned by vect_force_simple_reduction the 1065c7a68eb7Smrg corresponding PHI. */ 10660fc04c29Smrg stmt_vec_info reduc_def; 1067c7a68eb7Smrg 1068*ec02198aSmrg /* The vector input type relevant for reduction vectorization. */ 1069*ec02198aSmrg tree reduc_vectype_in; 1070*ec02198aSmrg 1071*ec02198aSmrg /* The vector type for performing the actual reduction. */ 1072*ec02198aSmrg tree reduc_vectype; 1073*ec02198aSmrg 1074*ec02198aSmrg /* Whether we force a single cycle PHI during reduction vectorization. */ 1075*ec02198aSmrg bool force_single_cycle; 1076*ec02198aSmrg 1077*ec02198aSmrg /* Whether on this stmt reduction meta is recorded. */ 1078*ec02198aSmrg bool is_reduc_info; 1079*ec02198aSmrg 108010d565efSmrg /* The number of scalar stmt references from active SLP instances. */ 108110d565efSmrg unsigned int num_slp_uses; 10820fc04c29Smrg 10830fc04c29Smrg /* If nonzero, the lhs of the statement could be truncated to this 10840fc04c29Smrg many bits without affecting any users of the result. */ 10850fc04c29Smrg unsigned int min_output_precision; 10860fc04c29Smrg 10870fc04c29Smrg /* If nonzero, all non-boolean input operands have the same precision, 10880fc04c29Smrg and they could each be truncated to this many bits without changing 10890fc04c29Smrg the result. */ 10900fc04c29Smrg unsigned int min_input_precision; 10910fc04c29Smrg 10920fc04c29Smrg /* If OPERATION_BITS is nonzero, the statement could be performed on 10930fc04c29Smrg an integer with the sign and number of bits given by OPERATION_SIGN 10940fc04c29Smrg and OPERATION_BITS without changing the result. */ 10950fc04c29Smrg unsigned int operation_precision; 10960fc04c29Smrg signop operation_sign; 1097*ec02198aSmrg 1098*ec02198aSmrg /* If the statement produces a boolean result, this value describes 1099*ec02198aSmrg how we should choose the associated vector type. The possible 1100*ec02198aSmrg values are: 1101*ec02198aSmrg 1102*ec02198aSmrg - an integer precision N if we should use the vector mask type 1103*ec02198aSmrg associated with N-bit integers. This is only used if all relevant 1104*ec02198aSmrg input booleans also want the vector mask type for N-bit integers, 1105*ec02198aSmrg or if we can convert them into that form by pattern-matching. 1106*ec02198aSmrg 1107*ec02198aSmrg - ~0U if we considered choosing a vector mask type but decided 1108*ec02198aSmrg to treat the boolean as a normal integer type instead. 1109*ec02198aSmrg 1110*ec02198aSmrg - 0 otherwise. This means either that the operation isn't one that 1111*ec02198aSmrg could have a vector mask type (and so should have a normal vector 1112*ec02198aSmrg type instead) or that we simply haven't made a choice either way. */ 1113*ec02198aSmrg unsigned int mask_precision; 1114*ec02198aSmrg 1115*ec02198aSmrg /* True if this is only suitable for SLP vectorization. */ 1116*ec02198aSmrg bool slp_vect_only_p; 11170fc04c29Smrg }; 111810d565efSmrg 111910d565efSmrg /* Information about a gather/scatter call. */ 112010d565efSmrg struct gather_scatter_info { 1121c7a68eb7Smrg /* The internal function to use for the gather/scatter operation, 1122c7a68eb7Smrg or IFN_LAST if a built-in function should be used instead. */ 1123c7a68eb7Smrg internal_fn ifn; 1124c7a68eb7Smrg 1125c7a68eb7Smrg /* The FUNCTION_DECL for the built-in gather/scatter function, 1126c7a68eb7Smrg or null if an internal function should be used instead. */ 112710d565efSmrg tree decl; 112810d565efSmrg 112910d565efSmrg /* The loop-invariant base value. */ 113010d565efSmrg tree base; 113110d565efSmrg 113210d565efSmrg /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ 113310d565efSmrg tree offset; 113410d565efSmrg 113510d565efSmrg /* Each offset element should be multiplied by this amount before 113610d565efSmrg being added to the base. */ 113710d565efSmrg int scale; 113810d565efSmrg 113910d565efSmrg /* The definition type for the vectorized offset. */ 114010d565efSmrg enum vect_def_type offset_dt; 114110d565efSmrg 114210d565efSmrg /* The type of the vectorized offset. */ 114310d565efSmrg tree offset_vectype; 1144c7a68eb7Smrg 1145c7a68eb7Smrg /* The type of the scalar elements after loading or before storing. */ 1146c7a68eb7Smrg tree element_type; 1147c7a68eb7Smrg 1148c7a68eb7Smrg /* The type of the scalar elements being loaded or stored. */ 1149c7a68eb7Smrg tree memory_type; 115010d565efSmrg }; 115110d565efSmrg 115210d565efSmrg /* Access Functions. */ 115310d565efSmrg #define STMT_VINFO_TYPE(S) (S)->type 115410d565efSmrg #define STMT_VINFO_STMT(S) (S)->stmt 115510d565efSmrg inline loop_vec_info 115610d565efSmrg STMT_VINFO_LOOP_VINFO (stmt_vec_info stmt_vinfo) 115710d565efSmrg { 115810d565efSmrg if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_vinfo->vinfo)) 115910d565efSmrg return loop_vinfo; 116010d565efSmrg return NULL; 116110d565efSmrg } 116210d565efSmrg inline bb_vec_info 116310d565efSmrg STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo) 116410d565efSmrg { 116510d565efSmrg if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (stmt_vinfo->vinfo)) 116610d565efSmrg return bb_vinfo; 116710d565efSmrg return NULL; 116810d565efSmrg } 116910d565efSmrg #define STMT_VINFO_RELEVANT(S) (S)->relevant 117010d565efSmrg #define STMT_VINFO_LIVE_P(S) (S)->live 117110d565efSmrg #define STMT_VINFO_VECTYPE(S) (S)->vectype 117210d565efSmrg #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt 117310d565efSmrg #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable 11740fc04c29Smrg #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0) 117510d565efSmrg #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p 117610d565efSmrg #define STMT_VINFO_STRIDED_P(S) (S)->strided_p 117710d565efSmrg #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type 117810d565efSmrg #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p 1179*ec02198aSmrg #define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val 1180*ec02198aSmrg #define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment 1181*ec02198aSmrg #define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx 1182*ec02198aSmrg #define STMT_VINFO_FORCE_SINGLE_CYCLE(S) (S)->force_single_cycle 118310d565efSmrg 1184c7a68eb7Smrg #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop 1185c7a68eb7Smrg #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address 1186c7a68eb7Smrg #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init 1187c7a68eb7Smrg #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset 1188c7a68eb7Smrg #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step 1189c7a68eb7Smrg #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment 1190c7a68eb7Smrg #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ 1191c7a68eb7Smrg (S)->dr_wrt_vec_loop.base_misalignment 1192c7a68eb7Smrg #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ 1193c7a68eb7Smrg (S)->dr_wrt_vec_loop.offset_alignment 1194c7a68eb7Smrg #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ 1195c7a68eb7Smrg (S)->dr_wrt_vec_loop.step_alignment 119610d565efSmrg 11970fc04c29Smrg #define STMT_VINFO_DR_INFO(S) \ 11980fc04c29Smrg (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux) 11990fc04c29Smrg 120010d565efSmrg #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p 120110d565efSmrg #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt 120210d565efSmrg #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq 120310d565efSmrg #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs 120410d565efSmrg #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info 120510d565efSmrg #define STMT_VINFO_DEF_TYPE(S) (S)->def_type 12060fc04c29Smrg #define STMT_VINFO_GROUPED_ACCESS(S) \ 12070fc04c29Smrg ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S)) 120810d565efSmrg #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged 120910d565efSmrg #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part 121010d565efSmrg #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist 121110d565efSmrg #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses 1212c7a68eb7Smrg #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type 1213*ec02198aSmrg #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code 1214*ec02198aSmrg #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn 1215c7a68eb7Smrg #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def 1216*ec02198aSmrg #define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype 1217*ec02198aSmrg #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in 1218*ec02198aSmrg #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p 121910d565efSmrg 12200fc04c29Smrg #define DR_GROUP_FIRST_ELEMENT(S) \ 12210fc04c29Smrg (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) 12220fc04c29Smrg #define DR_GROUP_NEXT_ELEMENT(S) \ 12230fc04c29Smrg (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element) 12240fc04c29Smrg #define DR_GROUP_SIZE(S) \ 12250fc04c29Smrg (gcc_checking_assert ((S)->dr_aux.dr), (S)->size) 12260fc04c29Smrg #define DR_GROUP_STORE_COUNT(S) \ 12270fc04c29Smrg (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) 12280fc04c29Smrg #define DR_GROUP_GAP(S) \ 12290fc04c29Smrg (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) 12300fc04c29Smrg 12310fc04c29Smrg #define REDUC_GROUP_FIRST_ELEMENT(S) \ 12320fc04c29Smrg (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) 12330fc04c29Smrg #define REDUC_GROUP_NEXT_ELEMENT(S) \ 12340fc04c29Smrg (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element) 12350fc04c29Smrg #define REDUC_GROUP_SIZE(S) \ 12360fc04c29Smrg (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size) 123710d565efSmrg 123810d565efSmrg #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) 123910d565efSmrg 124010d565efSmrg #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) 124110d565efSmrg #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) 124210d565efSmrg #define STMT_SLP_TYPE(S) (S)->slp_type 124310d565efSmrg 124410d565efSmrg #define VECT_MAX_COST 1000 124510d565efSmrg 124610d565efSmrg /* The maximum number of intermediate steps required in multi-step type 124710d565efSmrg conversion. */ 124810d565efSmrg #define MAX_INTERM_CVT_STEPS 3 124910d565efSmrg 1250c7a68eb7Smrg #define MAX_VECTORIZATION_FACTOR INT_MAX 125110d565efSmrg 125210d565efSmrg /* Nonzero if TYPE represents a (scalar) boolean type or type 125310d565efSmrg in the middle-end compatible with it (unsigned precision 1 integral 125410d565efSmrg types). Used to determine which types should be vectorized as 125510d565efSmrg VECTOR_BOOLEAN_TYPE_P. */ 125610d565efSmrg 125710d565efSmrg #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \ 125810d565efSmrg (TREE_CODE (TYPE) == BOOLEAN_TYPE \ 125910d565efSmrg || ((TREE_CODE (TYPE) == INTEGER_TYPE \ 126010d565efSmrg || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ 126110d565efSmrg && TYPE_PRECISION (TYPE) == 1 \ 126210d565efSmrg && TYPE_UNSIGNED (TYPE))) 126310d565efSmrg 12640fc04c29Smrg static inline bool 1265*ec02198aSmrg nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info) 126610d565efSmrg { 12670fc04c29Smrg return (loop->inner 12680fc04c29Smrg && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); 126910d565efSmrg } 127010d565efSmrg 1271*ec02198aSmrg /* Return true if STMT_INFO should produce a vector mask type rather than 1272*ec02198aSmrg a normal nonmask type. */ 1273*ec02198aSmrg 1274*ec02198aSmrg static inline bool 1275*ec02198aSmrg vect_use_mask_type_p (stmt_vec_info stmt_info) 1276*ec02198aSmrg { 1277*ec02198aSmrg return stmt_info->mask_precision && stmt_info->mask_precision != ~0U; 1278*ec02198aSmrg } 1279*ec02198aSmrg 128010d565efSmrg /* Return TRUE if a statement represented by STMT_INFO is a part of a 128110d565efSmrg pattern. */ 128210d565efSmrg 128310d565efSmrg static inline bool 128410d565efSmrg is_pattern_stmt_p (stmt_vec_info stmt_info) 128510d565efSmrg { 12860fc04c29Smrg return stmt_info->pattern_stmt_p; 128710d565efSmrg } 128810d565efSmrg 12890fc04c29Smrg /* If STMT_INFO is a pattern statement, return the statement that it 12900fc04c29Smrg replaces, otherwise return STMT_INFO itself. */ 129110d565efSmrg 12920fc04c29Smrg inline stmt_vec_info 12930fc04c29Smrg vect_orig_stmt (stmt_vec_info stmt_info) 129410d565efSmrg { 12950fc04c29Smrg if (is_pattern_stmt_p (stmt_info)) 12960fc04c29Smrg return STMT_VINFO_RELATED_STMT (stmt_info); 12970fc04c29Smrg return stmt_info; 12980fc04c29Smrg } 129910d565efSmrg 13000fc04c29Smrg /* Return the later statement between STMT1_INFO and STMT2_INFO. */ 130110d565efSmrg 13020fc04c29Smrg static inline stmt_vec_info 13030fc04c29Smrg get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) 13040fc04c29Smrg { 13050fc04c29Smrg if (gimple_uid (vect_orig_stmt (stmt1_info)->stmt) 13060fc04c29Smrg > gimple_uid (vect_orig_stmt (stmt2_info)->stmt)) 13070fc04c29Smrg return stmt1_info; 130810d565efSmrg else 13090fc04c29Smrg return stmt2_info; 13100fc04c29Smrg } 13110fc04c29Smrg 13120fc04c29Smrg /* If STMT_INFO has been replaced by a pattern statement, return the 13130fc04c29Smrg replacement statement, otherwise return STMT_INFO itself. */ 13140fc04c29Smrg 13150fc04c29Smrg inline stmt_vec_info 13160fc04c29Smrg vect_stmt_to_vectorize (stmt_vec_info stmt_info) 13170fc04c29Smrg { 13180fc04c29Smrg if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 13190fc04c29Smrg return STMT_VINFO_RELATED_STMT (stmt_info); 13200fc04c29Smrg return stmt_info; 132110d565efSmrg } 132210d565efSmrg 132310d565efSmrg /* Return true if BB is a loop header. */ 132410d565efSmrg 132510d565efSmrg static inline bool 132610d565efSmrg is_loop_header_bb_p (basic_block bb) 132710d565efSmrg { 132810d565efSmrg if (bb == (bb->loop_father)->header) 132910d565efSmrg return true; 133010d565efSmrg gcc_checking_assert (EDGE_COUNT (bb->preds) == 1); 133110d565efSmrg return false; 133210d565efSmrg } 133310d565efSmrg 133410d565efSmrg /* Return pow2 (X). */ 133510d565efSmrg 133610d565efSmrg static inline int 133710d565efSmrg vect_pow2 (int x) 133810d565efSmrg { 133910d565efSmrg int i, res = 1; 134010d565efSmrg 134110d565efSmrg for (i = 0; i < x; i++) 134210d565efSmrg res *= 2; 134310d565efSmrg 134410d565efSmrg return res; 134510d565efSmrg } 134610d565efSmrg 134710d565efSmrg /* Alias targetm.vectorize.builtin_vectorization_cost. */ 134810d565efSmrg 134910d565efSmrg static inline int 135010d565efSmrg builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 135110d565efSmrg tree vectype, int misalign) 135210d565efSmrg { 135310d565efSmrg return targetm.vectorize.builtin_vectorization_cost (type_of_cost, 135410d565efSmrg vectype, misalign); 135510d565efSmrg } 135610d565efSmrg 135710d565efSmrg /* Get cost by calling cost target builtin. */ 135810d565efSmrg 135910d565efSmrg static inline 136010d565efSmrg int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) 136110d565efSmrg { 136210d565efSmrg return builtin_vectorization_cost (type_of_cost, NULL, 0); 136310d565efSmrg } 136410d565efSmrg 136510d565efSmrg /* Alias targetm.vectorize.init_cost. */ 136610d565efSmrg 136710d565efSmrg static inline void * 1368*ec02198aSmrg init_cost (class loop *loop_info) 136910d565efSmrg { 137010d565efSmrg return targetm.vectorize.init_cost (loop_info); 137110d565efSmrg } 137210d565efSmrg 13730fc04c29Smrg extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt, 13740fc04c29Smrg stmt_vec_info, int, unsigned, 13750fc04c29Smrg enum vect_cost_model_location); 13760fc04c29Smrg 137710d565efSmrg /* Alias targetm.vectorize.add_stmt_cost. */ 137810d565efSmrg 137910d565efSmrg static inline unsigned 138010d565efSmrg add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, 138110d565efSmrg stmt_vec_info stmt_info, int misalign, 138210d565efSmrg enum vect_cost_model_location where) 138310d565efSmrg { 13840fc04c29Smrg unsigned cost = targetm.vectorize.add_stmt_cost (data, count, kind, 138510d565efSmrg stmt_info, misalign, where); 13860fc04c29Smrg if (dump_file && (dump_flags & TDF_DETAILS)) 13870fc04c29Smrg dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign, 13880fc04c29Smrg cost, where); 13890fc04c29Smrg return cost; 139010d565efSmrg } 139110d565efSmrg 139210d565efSmrg /* Alias targetm.vectorize.finish_cost. */ 139310d565efSmrg 139410d565efSmrg static inline void 139510d565efSmrg finish_cost (void *data, unsigned *prologue_cost, 139610d565efSmrg unsigned *body_cost, unsigned *epilogue_cost) 139710d565efSmrg { 139810d565efSmrg targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost); 139910d565efSmrg } 140010d565efSmrg 140110d565efSmrg /* Alias targetm.vectorize.destroy_cost_data. */ 140210d565efSmrg 140310d565efSmrg static inline void 140410d565efSmrg destroy_cost_data (void *data) 140510d565efSmrg { 140610d565efSmrg targetm.vectorize.destroy_cost_data (data); 140710d565efSmrg } 140810d565efSmrg 14090fc04c29Smrg inline void 14100fc04c29Smrg add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec) 14110fc04c29Smrg { 14120fc04c29Smrg stmt_info_for_cost *cost; 14130fc04c29Smrg unsigned i; 14140fc04c29Smrg FOR_EACH_VEC_ELT (*cost_vec, i, cost) 14150fc04c29Smrg add_stmt_cost (data, cost->count, cost->kind, cost->stmt_info, 14160fc04c29Smrg cost->misalign, cost->where); 14170fc04c29Smrg } 14180fc04c29Smrg 141910d565efSmrg /*-----------------------------------------------------------------*/ 142010d565efSmrg /* Info on data references alignment. */ 142110d565efSmrg /*-----------------------------------------------------------------*/ 14220fc04c29Smrg #define DR_MISALIGNMENT_UNKNOWN (-1) 14230fc04c29Smrg #define DR_MISALIGNMENT_UNINITIALIZED (-2) 14240fc04c29Smrg 142510d565efSmrg inline void 14260fc04c29Smrg set_dr_misalignment (dr_vec_info *dr_info, int val) 142710d565efSmrg { 14280fc04c29Smrg dr_info->misalignment = val; 142910d565efSmrg } 143010d565efSmrg 143110d565efSmrg inline int 14320fc04c29Smrg dr_misalignment (dr_vec_info *dr_info) 143310d565efSmrg { 14340fc04c29Smrg int misalign = dr_info->misalignment; 14350fc04c29Smrg gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); 14360fc04c29Smrg return misalign; 143710d565efSmrg } 143810d565efSmrg 143910d565efSmrg /* Reflects actual alignment of first access in the vectorized loop, 144010d565efSmrg taking into account peeling/versioning if applied. */ 144110d565efSmrg #define DR_MISALIGNMENT(DR) dr_misalignment (DR) 144210d565efSmrg #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) 144310d565efSmrg 1444c7a68eb7Smrg /* Only defined once DR_MISALIGNMENT is defined. */ 14450fc04c29Smrg #define DR_TARGET_ALIGNMENT(DR) ((DR)->target_alignment) 1446c7a68eb7Smrg 14470fc04c29Smrg /* Return true if data access DR_INFO is aligned to its target alignment 1448c7a68eb7Smrg (which may be less than a full vector). */ 144910d565efSmrg 145010d565efSmrg static inline bool 14510fc04c29Smrg aligned_access_p (dr_vec_info *dr_info) 145210d565efSmrg { 14530fc04c29Smrg return (DR_MISALIGNMENT (dr_info) == 0); 145410d565efSmrg } 145510d565efSmrg 145610d565efSmrg /* Return TRUE if the alignment of the data access is known, and FALSE 145710d565efSmrg otherwise. */ 145810d565efSmrg 145910d565efSmrg static inline bool 14600fc04c29Smrg known_alignment_for_access_p (dr_vec_info *dr_info) 146110d565efSmrg { 14620fc04c29Smrg return (DR_MISALIGNMENT (dr_info) != DR_MISALIGNMENT_UNKNOWN); 146310d565efSmrg } 146410d565efSmrg 1465c7a68eb7Smrg /* Return the minimum alignment in bytes that the vectorized version 14660fc04c29Smrg of DR_INFO is guaranteed to have. */ 1467c7a68eb7Smrg 1468c7a68eb7Smrg static inline unsigned int 14690fc04c29Smrg vect_known_alignment_in_bytes (dr_vec_info *dr_info) 1470c7a68eb7Smrg { 14710fc04c29Smrg if (DR_MISALIGNMENT (dr_info) == DR_MISALIGNMENT_UNKNOWN) 14720fc04c29Smrg return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr))); 14730fc04c29Smrg if (DR_MISALIGNMENT (dr_info) == 0) 14740fc04c29Smrg return known_alignment (DR_TARGET_ALIGNMENT (dr_info)); 14750fc04c29Smrg return DR_MISALIGNMENT (dr_info) & -DR_MISALIGNMENT (dr_info); 1476c7a68eb7Smrg } 1477c7a68eb7Smrg 14780fc04c29Smrg /* Return the behavior of DR_INFO with respect to the vectorization context 1479c7a68eb7Smrg (which for outer loop vectorization might not be the behavior recorded 14800fc04c29Smrg in DR_INFO itself). */ 1481c7a68eb7Smrg 1482c7a68eb7Smrg static inline innermost_loop_behavior * 14830fc04c29Smrg vect_dr_behavior (dr_vec_info *dr_info) 1484c7a68eb7Smrg { 14850fc04c29Smrg stmt_vec_info stmt_info = dr_info->stmt; 1486c7a68eb7Smrg loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1487c7a68eb7Smrg if (loop_vinfo == NULL 14880fc04c29Smrg || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)) 14890fc04c29Smrg return &DR_INNERMOST (dr_info->dr); 1490c7a68eb7Smrg else 1491c7a68eb7Smrg return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); 1492c7a68eb7Smrg } 149310d565efSmrg 1494*ec02198aSmrg /* Return the offset calculated by adding the offset of this DR_INFO to the 1495*ec02198aSmrg corresponding data_reference's offset. If CHECK_OUTER then use 1496*ec02198aSmrg vect_dr_behavior to select the appropriate data_reference to use. */ 1497*ec02198aSmrg 1498*ec02198aSmrg inline tree 1499*ec02198aSmrg get_dr_vinfo_offset (dr_vec_info *dr_info, bool check_outer = false) 1500*ec02198aSmrg { 1501*ec02198aSmrg innermost_loop_behavior *base; 1502*ec02198aSmrg if (check_outer) 1503*ec02198aSmrg base = vect_dr_behavior (dr_info); 1504*ec02198aSmrg else 1505*ec02198aSmrg base = &dr_info->dr->innermost; 1506*ec02198aSmrg 1507*ec02198aSmrg tree offset = base->offset; 1508*ec02198aSmrg 1509*ec02198aSmrg if (!dr_info->offset) 1510*ec02198aSmrg return offset; 1511*ec02198aSmrg 1512*ec02198aSmrg offset = fold_convert (sizetype, offset); 1513*ec02198aSmrg return fold_build2 (PLUS_EXPR, TREE_TYPE (dr_info->offset), offset, 1514*ec02198aSmrg dr_info->offset); 1515*ec02198aSmrg } 1516*ec02198aSmrg 1517*ec02198aSmrg 151810d565efSmrg /* Return true if the vect cost model is unlimited. */ 151910d565efSmrg static inline bool 152010d565efSmrg unlimited_cost_model (loop_p loop) 152110d565efSmrg { 152210d565efSmrg if (loop != NULL && loop->force_vectorize 152310d565efSmrg && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) 152410d565efSmrg return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; 152510d565efSmrg return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); 152610d565efSmrg } 152710d565efSmrg 1528c7a68eb7Smrg /* Return true if the loop described by LOOP_VINFO is fully-masked and 1529c7a68eb7Smrg if the first iteration should use a partial mask in order to achieve 1530c7a68eb7Smrg alignment. */ 1531c7a68eb7Smrg 1532c7a68eb7Smrg static inline bool 1533c7a68eb7Smrg vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo) 1534c7a68eb7Smrg { 1535c7a68eb7Smrg return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 1536c7a68eb7Smrg && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); 1537c7a68eb7Smrg } 1538c7a68eb7Smrg 1539c7a68eb7Smrg /* Return the number of vectors of type VECTYPE that are needed to get 1540c7a68eb7Smrg NUNITS elements. NUNITS should be based on the vectorization factor, 1541c7a68eb7Smrg so it is always a known multiple of the number of elements in VECTYPE. */ 1542c7a68eb7Smrg 1543c7a68eb7Smrg static inline unsigned int 1544c7a68eb7Smrg vect_get_num_vectors (poly_uint64 nunits, tree vectype) 1545c7a68eb7Smrg { 1546c7a68eb7Smrg return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant (); 1547c7a68eb7Smrg } 1548c7a68eb7Smrg 1549c7a68eb7Smrg /* Return the number of copies needed for loop vectorization when 1550c7a68eb7Smrg a statement operates on vectors of type VECTYPE. This is the 1551c7a68eb7Smrg vectorization factor divided by the number of elements in 1552c7a68eb7Smrg VECTYPE and is always known at compile time. */ 1553c7a68eb7Smrg 1554c7a68eb7Smrg static inline unsigned int 1555c7a68eb7Smrg vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) 1556c7a68eb7Smrg { 1557c7a68eb7Smrg return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype); 1558c7a68eb7Smrg } 1559c7a68eb7Smrg 1560c7a68eb7Smrg /* Update maximum unit count *MAX_NUNITS so that it accounts for 15610fc04c29Smrg NUNITS. *MAX_NUNITS can be 1 if we haven't yet recorded anything. */ 15620fc04c29Smrg 15630fc04c29Smrg static inline void 15640fc04c29Smrg vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits) 15650fc04c29Smrg { 1566*ec02198aSmrg /* All unit counts have the form vec_info::vector_size * X for some 15670fc04c29Smrg rational X, so two unit sizes must have a common multiple. 15680fc04c29Smrg Everything is a multiple of the initial value of 1. */ 15690fc04c29Smrg *max_nunits = force_common_multiple (*max_nunits, nunits); 15700fc04c29Smrg } 15710fc04c29Smrg 15720fc04c29Smrg /* Update maximum unit count *MAX_NUNITS so that it accounts for 1573c7a68eb7Smrg the number of units in vector type VECTYPE. *MAX_NUNITS can be 1 1574c7a68eb7Smrg if we haven't yet recorded any vector types. */ 1575c7a68eb7Smrg 1576c7a68eb7Smrg static inline void 1577c7a68eb7Smrg vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype) 1578c7a68eb7Smrg { 15790fc04c29Smrg vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype)); 1580c7a68eb7Smrg } 1581c7a68eb7Smrg 1582c7a68eb7Smrg /* Return the vectorization factor that should be used for costing 1583c7a68eb7Smrg purposes while vectorizing the loop described by LOOP_VINFO. 1584c7a68eb7Smrg Pick a reasonable estimate if the vectorization factor isn't 1585c7a68eb7Smrg known at compile time. */ 1586c7a68eb7Smrg 1587c7a68eb7Smrg static inline unsigned int 1588c7a68eb7Smrg vect_vf_for_cost (loop_vec_info loop_vinfo) 1589c7a68eb7Smrg { 1590c7a68eb7Smrg return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); 1591c7a68eb7Smrg } 1592c7a68eb7Smrg 1593c7a68eb7Smrg /* Estimate the number of elements in VEC_TYPE for costing purposes. 1594c7a68eb7Smrg Pick a reasonable estimate if the exact number isn't known at 1595c7a68eb7Smrg compile time. */ 1596c7a68eb7Smrg 1597c7a68eb7Smrg static inline unsigned int 1598c7a68eb7Smrg vect_nunits_for_cost (tree vec_type) 1599c7a68eb7Smrg { 1600c7a68eb7Smrg return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type)); 1601c7a68eb7Smrg } 1602c7a68eb7Smrg 1603c7a68eb7Smrg /* Return the maximum possible vectorization factor for LOOP_VINFO. */ 1604c7a68eb7Smrg 1605c7a68eb7Smrg static inline unsigned HOST_WIDE_INT 1606c7a68eb7Smrg vect_max_vf (loop_vec_info loop_vinfo) 1607c7a68eb7Smrg { 1608c7a68eb7Smrg unsigned HOST_WIDE_INT vf; 1609c7a68eb7Smrg if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) 1610c7a68eb7Smrg return vf; 1611c7a68eb7Smrg return MAX_VECTORIZATION_FACTOR; 1612c7a68eb7Smrg } 1613c7a68eb7Smrg 16140fc04c29Smrg /* Return the size of the value accessed by unvectorized data reference 16150fc04c29Smrg DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated 16160fc04c29Smrg for the associated gimple statement, since that guarantees that DR_INFO 16170fc04c29Smrg accesses either a scalar or a scalar equivalent. ("Scalar equivalent" 16180fc04c29Smrg here includes things like V1SI, which can be vectorized in the same way 1619c7a68eb7Smrg as a plain SI.) */ 1620c7a68eb7Smrg 1621c7a68eb7Smrg inline unsigned int 16220fc04c29Smrg vect_get_scalar_dr_size (dr_vec_info *dr_info) 1623c7a68eb7Smrg { 16240fc04c29Smrg return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr)))); 1625c7a68eb7Smrg } 1626c7a68eb7Smrg 1627*ec02198aSmrg /* Return true if LOOP_VINFO requires a runtime check for whether the 1628*ec02198aSmrg vector loop is profitable. */ 1629*ec02198aSmrg 1630*ec02198aSmrg inline bool 1631*ec02198aSmrg vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo) 1632*ec02198aSmrg { 1633*ec02198aSmrg unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); 1634*ec02198aSmrg return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 1635*ec02198aSmrg && th >= vect_vf_for_cost (loop_vinfo)); 1636*ec02198aSmrg } 1637*ec02198aSmrg 16380fc04c29Smrg /* Source location + hotness information. */ 16390fc04c29Smrg extern dump_user_location_t vect_location; 16400fc04c29Smrg 16410fc04c29Smrg /* A macro for calling: 16420fc04c29Smrg dump_begin_scope (MSG, vect_location); 16430fc04c29Smrg via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc, 16440fc04c29Smrg and then calling 16450fc04c29Smrg dump_end_scope (); 16460fc04c29Smrg once the object goes out of scope, thus capturing the nesting of 16470fc04c29Smrg the scopes. 16480fc04c29Smrg 16490fc04c29Smrg These scopes affect dump messages within them: dump messages at the 16500fc04c29Smrg top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those 16510fc04c29Smrg in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */ 16520fc04c29Smrg 16530fc04c29Smrg #define DUMP_VECT_SCOPE(MSG) \ 16540fc04c29Smrg AUTO_DUMP_SCOPE (MSG, vect_location) 16550fc04c29Smrg 16560fc04c29Smrg /* A sentinel class for ensuring that the "vect_location" global gets 16570fc04c29Smrg reset at the end of a scope. 16580fc04c29Smrg 16590fc04c29Smrg The "vect_location" global is used during dumping and contains a 16600fc04c29Smrg location_t, which could contain references to a tree block via the 16610fc04c29Smrg ad-hoc data. This data is used for tracking inlining information, 16620fc04c29Smrg but it's not a GC root; it's simply assumed that such locations never 16630fc04c29Smrg get accessed if the blocks are optimized away. 16640fc04c29Smrg 16650fc04c29Smrg Hence we need to ensure that such locations are purged at the end 16660fc04c29Smrg of any operations using them (e.g. via this class). */ 16670fc04c29Smrg 16680fc04c29Smrg class auto_purge_vect_location 16690fc04c29Smrg { 16700fc04c29Smrg public: 16710fc04c29Smrg ~auto_purge_vect_location (); 16720fc04c29Smrg }; 167310d565efSmrg 167410d565efSmrg /*-----------------------------------------------------------------*/ 167510d565efSmrg /* Function prototypes. */ 167610d565efSmrg /*-----------------------------------------------------------------*/ 167710d565efSmrg 167810d565efSmrg /* Simple loop peeling and versioning utilities for vectorizer's purposes - 167910d565efSmrg in tree-vect-loop-manip.c. */ 1680*ec02198aSmrg extern void vect_set_loop_condition (class loop *, loop_vec_info, 1681c7a68eb7Smrg tree, tree, tree, bool); 1682*ec02198aSmrg extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge); 1683*ec02198aSmrg class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, 1684*ec02198aSmrg class loop *, edge); 1685*ec02198aSmrg class loop *vect_loop_versioning (loop_vec_info, gimple *); 1686*ec02198aSmrg extern class loop *vect_do_peeling (loop_vec_info, tree, tree, 1687*ec02198aSmrg tree *, tree *, tree *, int, bool, bool, 1688*ec02198aSmrg tree *); 1689c7a68eb7Smrg extern void vect_prepare_for_masked_peels (loop_vec_info); 1690*ec02198aSmrg extern dump_user_location_t find_loop_location (class loop *); 169110d565efSmrg extern bool vect_can_advance_ivs_p (loop_vec_info); 1692*ec02198aSmrg extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); 169310d565efSmrg 169410d565efSmrg /* In tree-vect-stmts.c. */ 1695*ec02198aSmrg extern tree get_related_vectype_for_scalar_type (machine_mode, tree, 1696*ec02198aSmrg poly_uint64 = 0); 1697*ec02198aSmrg extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); 1698*ec02198aSmrg extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); 1699*ec02198aSmrg extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0); 170010d565efSmrg extern tree get_same_sized_vectype (tree, tree); 1701*ec02198aSmrg extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); 1702c7a68eb7Smrg extern bool vect_get_loop_mask_type (loop_vec_info); 17030fc04c29Smrg extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, 17040fc04c29Smrg stmt_vec_info * = NULL, gimple ** = NULL); 17050fc04c29Smrg extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, 17060fc04c29Smrg tree *, stmt_vec_info * = NULL, 17070fc04c29Smrg gimple ** = NULL); 17080fc04c29Smrg extern bool supportable_widening_operation (enum tree_code, stmt_vec_info, 17090fc04c29Smrg tree, tree, enum tree_code *, 171010d565efSmrg enum tree_code *, int *, 171110d565efSmrg vec<tree> *); 171210d565efSmrg extern bool supportable_narrowing_operation (enum tree_code, tree, tree, 1713*ec02198aSmrg enum tree_code *, int *, 1714*ec02198aSmrg vec<tree> *); 171510d565efSmrg extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, 171610d565efSmrg enum vect_cost_for_stmt, stmt_vec_info, 171710d565efSmrg int, enum vect_cost_model_location); 17180fc04c29Smrg extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *); 17190fc04c29Smrg extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *, 172010d565efSmrg gimple_stmt_iterator *); 1721*ec02198aSmrg extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *); 17220fc04c29Smrg extern tree vect_get_store_rhs (stmt_vec_info); 17230fc04c29Smrg extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type); 17240fc04c29Smrg extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL); 17250fc04c29Smrg extern void vect_get_vec_defs (tree, tree, stmt_vec_info, vec<tree> *, 1726c7a68eb7Smrg vec<tree> *, slp_tree); 17270fc04c29Smrg extern void vect_get_vec_defs_for_stmt_copy (vec_info *, 1728c7a68eb7Smrg vec<tree> *, vec<tree> *); 17290fc04c29Smrg extern tree vect_init_vector (stmt_vec_info, tree, tree, 173010d565efSmrg gimple_stmt_iterator *); 17310fc04c29Smrg extern tree vect_get_vec_def_for_stmt_copy (vec_info *, tree); 17320fc04c29Smrg extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *, 17330fc04c29Smrg slp_tree, slp_instance); 17340fc04c29Smrg extern void vect_remove_stores (stmt_vec_info); 1735*ec02198aSmrg extern bool vect_nop_conversion_p (stmt_vec_info); 17360fc04c29Smrg extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, 17370fc04c29Smrg slp_instance, stmt_vector_for_cost *); 17380fc04c29Smrg extern void vect_get_load_cost (stmt_vec_info, int, bool, 173910d565efSmrg unsigned int *, unsigned int *, 174010d565efSmrg stmt_vector_for_cost *, 174110d565efSmrg stmt_vector_for_cost *, bool); 17420fc04c29Smrg extern void vect_get_store_cost (stmt_vec_info, int, 174310d565efSmrg unsigned int *, stmt_vector_for_cost *); 1744*ec02198aSmrg extern bool vect_supportable_shift (vec_info *, enum tree_code, tree); 1745c7a68eb7Smrg extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); 1746c7a68eb7Smrg extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); 1747*ec02198aSmrg extern void optimize_mask_stores (class loop*); 1748c7a68eb7Smrg extern gcall *vect_gen_while (tree, tree, tree); 1749c7a68eb7Smrg extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); 17500fc04c29Smrg extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, 1751*ec02198aSmrg tree *, unsigned int = 0); 1752*ec02198aSmrg extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0); 175310d565efSmrg 175410d565efSmrg /* In tree-vect-data-refs.c. */ 17550fc04c29Smrg extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); 175610d565efSmrg extern enum dr_alignment_support vect_supportable_dr_alignment 17570fc04c29Smrg (dr_vec_info *, bool); 17580fc04c29Smrg extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *, 175910d565efSmrg HOST_WIDE_INT *); 17600fc04c29Smrg extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); 176110d565efSmrg extern bool vect_slp_analyze_instance_dependence (slp_instance); 17620fc04c29Smrg extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); 17630fc04c29Smrg extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); 17640fc04c29Smrg extern opt_result vect_verify_datarefs_alignment (loop_vec_info); 176510d565efSmrg extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); 17660fc04c29Smrg extern opt_result vect_analyze_data_ref_accesses (vec_info *); 17670fc04c29Smrg extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); 1768*ec02198aSmrg extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, 1769*ec02198aSmrg tree, int, internal_fn *, tree *); 17700fc04c29Smrg extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, 177110d565efSmrg gather_scatter_info *); 17720fc04c29Smrg extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, 17730fc04c29Smrg vec<data_reference_p> *); 1774*ec02198aSmrg extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); 1775c7a68eb7Smrg extern void vect_record_base_alignments (vec_info *); 1776*ec02198aSmrg extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, class loop *, tree, 177710d565efSmrg tree *, gimple_stmt_iterator *, 17780fc04c29Smrg gimple **, bool, 1779c7a68eb7Smrg tree = NULL_TREE, tree = NULL_TREE); 17800fc04c29Smrg extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, 17810fc04c29Smrg stmt_vec_info, tree); 1782c7a68eb7Smrg extern void vect_copy_ref_info (tree, tree); 178310d565efSmrg extern tree vect_create_destination_var (tree, tree); 178410d565efSmrg extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); 1785c7a68eb7Smrg extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); 178610d565efSmrg extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); 1787c7a68eb7Smrg extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); 17880fc04c29Smrg extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info, 178910d565efSmrg gimple_stmt_iterator *, vec<tree> *); 17900fc04c29Smrg extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *, 17910fc04c29Smrg tree *, enum dr_alignment_support, tree, 1792*ec02198aSmrg class loop **); 17930fc04c29Smrg extern void vect_transform_grouped_load (stmt_vec_info, vec<tree> , int, 179410d565efSmrg gimple_stmt_iterator *); 17950fc04c29Smrg extern void vect_record_grouped_load_vectors (stmt_vec_info, vec<tree>); 179610d565efSmrg extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); 179710d565efSmrg extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, 179810d565efSmrg const char * = NULL); 17990fc04c29Smrg extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, 1800c7a68eb7Smrg tree, tree = NULL_TREE); 180110d565efSmrg 180210d565efSmrg /* In tree-vect-loop.c. */ 1803*ec02198aSmrg extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); 1804*ec02198aSmrg /* Used in tree-vect-loop-manip.c */ 1805*ec02198aSmrg extern void determine_peel_for_niter (loop_vec_info); 1806*ec02198aSmrg /* Used in gimple-loop-interchange.c and tree-parloops.c. */ 18070fc04c29Smrg extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, 1808c7a68eb7Smrg enum tree_code); 1809*ec02198aSmrg extern bool needs_fold_left_reduction_p (tree, tree_code); 181010d565efSmrg /* Drive for loop analysis stage. */ 1811*ec02198aSmrg extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); 1812c7a68eb7Smrg extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); 1813c7a68eb7Smrg extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, 1814c7a68eb7Smrg tree *, bool); 1815*ec02198aSmrg extern tree vect_halve_mask_nunits (tree, machine_mode); 1816*ec02198aSmrg extern tree vect_double_mask_nunits (tree, machine_mode); 1817c7a68eb7Smrg extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *, 1818*ec02198aSmrg unsigned int, tree, tree); 1819c7a68eb7Smrg extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *, 1820c7a68eb7Smrg unsigned int, tree, unsigned int); 1821*ec02198aSmrg extern stmt_vec_info info_for_reduction (stmt_vec_info); 1822c7a68eb7Smrg 182310d565efSmrg /* Drive for loop transformation stage. */ 1824*ec02198aSmrg extern class loop *vect_transform_loop (loop_vec_info, gimple *); 1825*ec02198aSmrg extern opt_loop_vec_info vect_analyze_loop_form (class loop *, 18260fc04c29Smrg vec_info_shared *); 18270fc04c29Smrg extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *, 1828*ec02198aSmrg slp_tree, slp_instance, int, 1829*ec02198aSmrg bool, stmt_vector_for_cost *); 1830*ec02198aSmrg extern bool vectorizable_reduction (stmt_vec_info, slp_tree, slp_instance, 18310fc04c29Smrg stmt_vector_for_cost *); 18320fc04c29Smrg extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, 18330fc04c29Smrg stmt_vec_info *, slp_tree, 18340fc04c29Smrg stmt_vector_for_cost *); 1835*ec02198aSmrg extern bool vect_transform_reduction (stmt_vec_info, gimple_stmt_iterator *, 1836*ec02198aSmrg stmt_vec_info *, slp_tree); 1837*ec02198aSmrg extern bool vect_transform_cycle_phi (stmt_vec_info, stmt_vec_info *, 1838*ec02198aSmrg slp_tree, slp_instance); 1839*ec02198aSmrg extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree); 1840c7a68eb7Smrg extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); 184110d565efSmrg extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, 184210d565efSmrg stmt_vector_for_cost *, 184310d565efSmrg stmt_vector_for_cost *, 184410d565efSmrg stmt_vector_for_cost *); 1845c7a68eb7Smrg extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); 184610d565efSmrg 184710d565efSmrg /* In tree-vect-slp.c. */ 18480fc04c29Smrg extern void vect_free_slp_instance (slp_instance, bool); 184910d565efSmrg extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , 1850c7a68eb7Smrg gimple_stmt_iterator *, poly_uint64, 185110d565efSmrg slp_instance, bool, unsigned *); 1852c7a68eb7Smrg extern bool vect_slp_analyze_operations (vec_info *); 18530fc04c29Smrg extern void vect_schedule_slp (vec_info *); 18540fc04c29Smrg extern opt_result vect_analyze_slp (vec_info *, unsigned); 185510d565efSmrg extern bool vect_make_slp_decision (loop_vec_info); 185610d565efSmrg extern void vect_detect_hybrid_slp (loop_vec_info); 1857*ec02198aSmrg extern void vect_get_slp_defs (slp_tree, vec<vec<tree> > *, unsigned n = -1U); 185810d565efSmrg extern bool vect_slp_bb (basic_block); 18590fc04c29Smrg extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); 18600fc04c29Smrg extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); 1861*ec02198aSmrg extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, 1862c7a68eb7Smrg unsigned int * = NULL, 1863c7a68eb7Smrg tree * = NULL, tree * = NULL); 1864*ec02198aSmrg extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, 1865*ec02198aSmrg vec<tree>, unsigned int, vec<tree> &); 18660fc04c29Smrg extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); 186710d565efSmrg 186810d565efSmrg /* In tree-vect-patterns.c. */ 186910d565efSmrg /* Pattern recognition functions. 187010d565efSmrg Additional pattern recognition functions can (and will) be added 187110d565efSmrg in the future. */ 187210d565efSmrg void vect_pattern_recog (vec_info *); 187310d565efSmrg 187410d565efSmrg /* In tree-vectorizer.c. */ 187510d565efSmrg unsigned vectorize_loops (void); 1876*ec02198aSmrg void vect_free_loop_info_assumptions (class loop *); 1877*ec02198aSmrg gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL); 1878*ec02198aSmrg 187910d565efSmrg 188010d565efSmrg #endif /* GCC_TREE_VECTORIZER_H */ 1881