1 /* A representation of vector permutation indices. 2 Copyright (C) 2017-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "vec-perm-indices.h" 24 #include "tree.h" 25 #include "fold-const.h" 26 #include "tree-vector-builder.h" 27 #include "backend.h" 28 #include "rtl.h" 29 #include "memmodel.h" 30 #include "emit-rtl.h" 31 #include "selftest.h" 32 #include "rtx-vector-builder.h" 33 34 /* Switch to a new permutation vector that selects between NINPUTS vector 35 inputs that have NELTS_PER_INPUT elements each. Take the elements of the 36 new permutation vector from ELEMENTS, clamping each one to be in range. */ 37 38 void 39 vec_perm_indices::new_vector (const vec_perm_builder &elements, 40 unsigned int ninputs, 41 poly_uint64 nelts_per_input) 42 { 43 m_ninputs = ninputs; 44 m_nelts_per_input = nelts_per_input; 45 /* If the vector has a constant number of elements, expand the 46 encoding and clamp each element. E.g. { 0, 2, 4, ... } might 47 wrap halfway if there is only one vector input, and we want 48 the wrapped form to be the canonical one. 49 50 If the vector has a variable number of elements, just copy 51 the encoding. In that case the unwrapped form is canonical 52 and there is no way of representing the wrapped form. */ 53 poly_uint64 full_nelts = elements.full_nelts (); 54 unsigned HOST_WIDE_INT copy_nelts; 55 if (full_nelts.is_constant (©_nelts)) 56 m_encoding.new_vector (full_nelts, copy_nelts, 1); 57 else 58 { 59 copy_nelts = elements.encoded_nelts (); 60 m_encoding.new_vector (full_nelts, elements.npatterns (), 61 elements.nelts_per_pattern ()); 62 } 63 unsigned int npatterns = m_encoding.npatterns (); 64 for (unsigned int i = 0; i < npatterns; ++i) 65 m_encoding.quick_push (clamp (elements.elt (i))); 66 /* Use the fact that: 67 68 (a + b) % c == ((a % c) + (b % c)) % c 69 70 to simplify the clamping of variable-length vectors. */ 71 for (unsigned int i = npatterns; i < copy_nelts; ++i) 72 { 73 element_type step = clamp (elements.elt (i) 74 - elements.elt (i - npatterns)); 75 m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step)); 76 } 77 m_encoding.finalize (); 78 } 79 80 /* Switch to a new permutation vector that selects the same input elements 81 as ORIG, but with each element split into FACTOR pieces. For example, 82 if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is 83 { 2, 3, 4, 5, 0, 1, 6, 7 }. */ 84 85 void 86 vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig, 87 unsigned int factor) 88 { 89 m_ninputs = orig.m_ninputs; 90 m_nelts_per_input = orig.m_nelts_per_input * factor; 91 m_encoding.new_vector (orig.m_encoding.full_nelts () * factor, 92 orig.m_encoding.npatterns () * factor, 93 orig.m_encoding.nelts_per_pattern ()); 94 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); 95 for (unsigned int i = 0; i < encoded_nelts; ++i) 96 { 97 element_type base = orig.m_encoding[i] * factor; 98 for (unsigned int j = 0; j < factor; ++j) 99 m_encoding.quick_push (base + j); 100 } 101 m_encoding.finalize (); 102 } 103 104 /* Rotate the inputs of the permutation right by DELTA inputs. This changes 105 the values of the permutation vector but it doesn't change the way that 106 the elements are encoded. */ 107 108 void 109 vec_perm_indices::rotate_inputs (int delta) 110 { 111 element_type element_delta = delta * m_nelts_per_input; 112 for (unsigned int i = 0; i < m_encoding.length (); ++i) 113 m_encoding[i] = clamp (m_encoding[i] + element_delta); 114 } 115 116 /* Return true if index OUT_BASE + I * OUT_STEP selects input 117 element IN_BASE + I * IN_STEP. For example, the call to test 118 whether a permute reverses a vector of N elements would be: 119 120 series_p (0, 1, N - 1, -1) 121 122 which would return true for { N - 1, N - 2, N - 3, ... }. 123 The calls to test for an interleaving of elements starting 124 at N1 and N2 would be: 125 126 series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1). 127 128 which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */ 129 130 bool 131 vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, 132 element_type in_base, element_type in_step) const 133 { 134 /* Check the base value. */ 135 if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base))) 136 return false; 137 138 element_type full_nelts = m_encoding.full_nelts (); 139 unsigned int npatterns = m_encoding.npatterns (); 140 141 /* Calculate which multiple of OUT_STEP elements we need to get 142 back to the same pattern. */ 143 unsigned int cycle_length = least_common_multiple (out_step, npatterns); 144 145 /* Check the steps. */ 146 in_step = clamp (in_step); 147 out_base += out_step; 148 unsigned int limit = 0; 149 for (;;) 150 { 151 /* Succeed if we've checked all the elements in the vector. */ 152 if (known_ge (out_base, full_nelts)) 153 return true; 154 155 if (out_base >= npatterns) 156 { 157 /* We've got to the end of the "foreground" values. Check 158 2 elements from each pattern in the "background" values. */ 159 if (limit == 0) 160 limit = out_base + cycle_length * 2; 161 else if (out_base >= limit) 162 return true; 163 } 164 165 element_type v0 = m_encoding.elt (out_base - out_step); 166 element_type v1 = m_encoding.elt (out_base); 167 if (maybe_ne (clamp (v1 - v0), in_step)) 168 return false; 169 170 out_base += out_step; 171 } 172 return true; 173 } 174 175 /* Return true if all elements of the permutation vector are in the range 176 [START, START + SIZE). */ 177 178 bool 179 vec_perm_indices::all_in_range_p (element_type start, element_type size) const 180 { 181 /* Check the first two elements of each pattern. */ 182 unsigned int npatterns = m_encoding.npatterns (); 183 unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern (); 184 unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2); 185 for (unsigned int i = 0; i < base_nelts; ++i) 186 if (!known_in_range_p (m_encoding[i], start, size)) 187 return false; 188 189 /* For stepped encodings, check the full range of the series. */ 190 if (nelts_per_pattern == 3) 191 { 192 element_type limit = input_nelts (); 193 194 /* The number of elements in each pattern beyond the first two 195 that we checked above. */ 196 poly_int64 step_nelts = exact_div (m_encoding.full_nelts (), 197 npatterns) - 2; 198 for (unsigned int i = 0; i < npatterns; ++i) 199 { 200 /* BASE1 has been checked but BASE2 hasn't. */ 201 element_type base1 = m_encoding[i + npatterns]; 202 element_type base2 = m_encoding[i + base_nelts]; 203 204 /* The step to add to get from BASE1 to each subsequent value. */ 205 element_type step = clamp (base2 - base1); 206 207 /* STEP has no inherent sign, so a value near LIMIT can 208 act as a negative step. The series is in range if it 209 is in range according to one of the two interpretations. 210 211 Since we're dealing with clamped values, ELEMENT_TYPE is 212 wide enough for overflow not to be a problem. */ 213 element_type headroom_down = base1 - start; 214 element_type headroom_up = size - headroom_down - 1; 215 HOST_WIDE_INT diff; 216 if ((!step.is_constant (&diff) 217 || maybe_lt (headroom_up, diff * step_nelts)) 218 && (!(limit - step).is_constant (&diff) 219 || maybe_lt (headroom_down, diff * step_nelts))) 220 return false; 221 } 222 } 223 return true; 224 } 225 226 /* Try to read the contents of VECTOR_CST CST as a constant permutation 227 vector. Return true and add the elements to BUILDER on success, 228 otherwise return false without modifying BUILDER. */ 229 230 bool 231 tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst) 232 { 233 unsigned int encoded_nelts = vector_cst_encoded_nelts (cst); 234 for (unsigned int i = 0; i < encoded_nelts; ++i) 235 if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i))) 236 return false; 237 238 builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)), 239 VECTOR_CST_NPATTERNS (cst), 240 VECTOR_CST_NELTS_PER_PATTERN (cst)); 241 for (unsigned int i = 0; i < encoded_nelts; ++i) 242 builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i))); 243 return true; 244 } 245 246 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */ 247 248 tree 249 vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices) 250 { 251 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ())); 252 tree_vector_builder sel (type, indices.encoding ().npatterns (), 253 indices.encoding ().nelts_per_pattern ()); 254 unsigned int encoded_nelts = sel.encoded_nelts (); 255 for (unsigned int i = 0; i < encoded_nelts; i++) 256 sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i])); 257 return sel.build (); 258 } 259 260 /* Return a CONST_VECTOR of mode MODE that contains the elements of 261 INDICES. */ 262 263 rtx 264 vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices) 265 { 266 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 267 && known_eq (GET_MODE_NUNITS (mode), indices.length ())); 268 rtx_vector_builder sel (mode, indices.encoding ().npatterns (), 269 indices.encoding ().nelts_per_pattern ()); 270 unsigned int encoded_nelts = sel.encoded_nelts (); 271 for (unsigned int i = 0; i < encoded_nelts; i++) 272 sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode))); 273 return sel.build (); 274 } 275 276 #if CHECKING_P 277 278 namespace selftest { 279 280 /* Test a 12-element vector. */ 281 282 static void 283 test_vec_perm_12 (void) 284 { 285 vec_perm_builder builder (12, 12, 1); 286 for (unsigned int i = 0; i < 4; ++i) 287 { 288 builder.quick_push (i * 5); 289 builder.quick_push (3 + i); 290 builder.quick_push (2 + 3 * i); 291 } 292 vec_perm_indices indices (builder, 1, 12); 293 ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); 294 ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); 295 ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); 296 ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); 297 ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); 298 299 ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); 300 ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); 301 302 ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); 303 ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); 304 305 ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); 306 ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); 307 308 ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); 309 ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); 310 ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); 311 } 312 313 /* Run selftests for this file. */ 314 315 void 316 vec_perm_indices_c_tests () 317 { 318 test_vec_perm_12 (); 319 } 320 321 } // namespace selftest 322 323 #endif 324