1 /* A class for building vector constant patterns. 2 Copyright (C) 2017-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #ifndef GCC_VECTOR_BUILDER_H 21 #define GCC_VECTOR_BUILDER_H 22 23 /* This class is a wrapper around auto_vec<T> for building vectors of T. 24 It aims to encode each vector as npatterns interleaved patterns, 25 where each pattern represents a sequence: 26 27 { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... } 28 29 The first three elements in each pattern provide enough information 30 to derive the other elements. If all patterns have a STEP of zero, 31 we only need to encode the first two elements in each pattern. 32 If BASE1 is also equal to BASE0 for all patterns, we only need to 33 encode the first element in each pattern. The number of encoded 34 elements per pattern is given by nelts_per_pattern. 35 36 The class can be used in two ways: 37 38 1. It can be used to build a full image of the vector, which is then 39 canonicalized by finalize (). In this case npatterns is initially 40 the number of elements in the vector and nelts_per_pattern is 41 initially 1. 42 43 2. It can be used to build a vector that already has a known encoding. 44 This is preferred since it is more efficient and copes with 45 variable-length vectors. finalize () then canonicalizes the encoding 46 to a simpler form if possible. 47 48 The derived class Derived provides this functionality for specific Ts. 49 Derived needs to provide the following interface: 50 51 bool equal_p (T elt1, T elt2) const; 52 53 Return true if elements ELT1 and ELT2 are equal. 54 55 bool allow_steps_p () const; 56 57 Return true if a stepped representation is OK. We don't allow 58 linear series for anything other than integers, to avoid problems 59 with rounding. 60 61 bool integral_p (T elt) const; 62 63 Return true if element ELT can be interpreted as an integer. 64 65 StepType step (T elt1, T elt2) const; 66 67 Return the value of element ELT2 minus the value of element ELT1, 68 given integral_p (ELT1) && integral_p (ELT2). There is no fixed 69 choice of StepType. 70 71 T apply_step (T base, unsigned int factor, StepType step) const; 72 73 Return a vector element with the value BASE + FACTOR * STEP. 74 75 bool can_elide_p (T elt) const; 76 77 Return true if we can drop element ELT, even if the retained 78 elements are different. This is provided for TREE_OVERFLOW 79 handling. 80 81 void note_representative (T *elt1_ptr, T elt2); 82 83 Record that ELT2 is being elided, given that ELT1_PTR points to 84 the last encoded element for the containing pattern. This is 85 again provided for TREE_OVERFLOW handling. */ 86 87 template<typename T, typename Derived> 88 class vector_builder : public auto_vec<T, 32> 89 { 90 public: 91 vector_builder (); 92 93 poly_uint64 full_nelts () const { return m_full_nelts; } 94 unsigned int npatterns () const { return m_npatterns; } 95 unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; } 96 unsigned int encoded_nelts () const; 97 bool encoded_full_vector_p () const; 98 T elt (unsigned int) const; 99 100 bool operator == (const Derived &) const; 101 bool operator != (const Derived &x) const { return !operator == (x); } 102 103 void finalize (); 104 105 protected: 106 void new_vector (poly_uint64, unsigned int, unsigned int); 107 void reshape (unsigned int, unsigned int); 108 bool repeating_sequence_p (unsigned int, unsigned int, unsigned int); 109 bool stepped_sequence_p (unsigned int, unsigned int, unsigned int); 110 bool try_npatterns (unsigned int); 111 112 private: 113 vector_builder (const vector_builder &); 114 vector_builder &operator= (const vector_builder &); 115 Derived *derived () { return static_cast<Derived *> (this); } 116 const Derived *derived () const; 117 118 poly_uint64 m_full_nelts; 119 unsigned int m_npatterns; 120 unsigned int m_nelts_per_pattern; 121 }; 122 123 template<typename T, typename Derived> 124 inline const Derived * 125 vector_builder<T, Derived>::derived () const 126 { 127 return static_cast<const Derived *> (this); 128 } 129 130 template<typename T, typename Derived> 131 inline 132 vector_builder<T, Derived>::vector_builder () 133 : m_full_nelts (0), 134 m_npatterns (0), 135 m_nelts_per_pattern (0) 136 {} 137 138 /* Return the number of elements that are explicitly encoded. The vec 139 starts with these explicitly-encoded elements and may contain additional 140 elided elements. */ 141 142 template<typename T, typename Derived> 143 inline unsigned int 144 vector_builder<T, Derived>::encoded_nelts () const 145 { 146 return m_npatterns * m_nelts_per_pattern; 147 } 148 149 /* Return true if every element of the vector is explicitly encoded. */ 150 151 template<typename T, typename Derived> 152 inline bool 153 vector_builder<T, Derived>::encoded_full_vector_p () const 154 { 155 return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts); 156 } 157 158 /* Start building a vector that has FULL_NELTS elements. Initially 159 encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */ 160 161 template<typename T, typename Derived> 162 void 163 vector_builder<T, Derived>::new_vector (poly_uint64 full_nelts, 164 unsigned int npatterns, 165 unsigned int nelts_per_pattern) 166 { 167 m_full_nelts = full_nelts; 168 m_npatterns = npatterns; 169 m_nelts_per_pattern = nelts_per_pattern; 170 this->reserve (encoded_nelts ()); 171 this->truncate (0); 172 } 173 174 /* Return true if this vector and OTHER have the same elements and 175 are encoded in the same way. */ 176 177 template<typename T, typename Derived> 178 bool 179 vector_builder<T, Derived>::operator == (const Derived &other) const 180 { 181 if (maybe_ne (m_full_nelts, other.m_full_nelts) 182 || m_npatterns != other.m_npatterns 183 || m_nelts_per_pattern != other.m_nelts_per_pattern) 184 return false; 185 186 unsigned int nelts = encoded_nelts (); 187 for (unsigned int i = 0; i < nelts; ++i) 188 if (!derived ()->equal_p ((*this)[i], other[i])) 189 return false; 190 191 return true; 192 } 193 194 /* Return the value of vector element I, which might or might not be 195 encoded explicitly. */ 196 197 template<typename T, typename Derived> 198 T 199 vector_builder<T, Derived>::elt (unsigned int i) const 200 { 201 /* This only makes sense if the encoding has been fully populated. */ 202 gcc_checking_assert (encoded_nelts () <= this->length ()); 203 204 /* First handle elements that are already present in the underlying 205 vector, regardless of whether they're part of the encoding or not. */ 206 if (i < this->length ()) 207 return (*this)[i]; 208 209 /* Identify the pattern that contains element I and work out the index of 210 the last encoded element for that pattern. */ 211 unsigned int pattern = i % m_npatterns; 212 unsigned int count = i / m_npatterns; 213 unsigned int final_i = encoded_nelts () - m_npatterns + pattern; 214 T final = (*this)[final_i]; 215 216 /* If there are no steps, the final encoded value is the right one. */ 217 if (m_nelts_per_pattern <= 2) 218 return final; 219 220 /* Otherwise work out the value from the last two encoded elements. */ 221 T prev = (*this)[final_i - m_npatterns]; 222 return derived ()->apply_step (final, count - 2, 223 derived ()->step (prev, final)); 224 } 225 226 /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each, 227 but without changing the underlying vector. */ 228 229 template<typename T, typename Derived> 230 void 231 vector_builder<T, Derived>::reshape (unsigned int npatterns, 232 unsigned int nelts_per_pattern) 233 { 234 unsigned int old_encoded_nelts = encoded_nelts (); 235 unsigned int new_encoded_nelts = npatterns * nelts_per_pattern; 236 gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts); 237 unsigned int next = new_encoded_nelts - npatterns; 238 for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i) 239 { 240 derived ()->note_representative (&(*this)[next], (*this)[i]); 241 next += 1; 242 if (next == new_encoded_nelts) 243 next -= npatterns; 244 } 245 m_npatterns = npatterns; 246 m_nelts_per_pattern = nelts_per_pattern; 247 } 248 249 /* Return true if elements [START, END) contain a repeating sequence of 250 STEP elements. */ 251 252 template<typename T, typename Derived> 253 bool 254 vector_builder<T, Derived>::repeating_sequence_p (unsigned int start, 255 unsigned int end, 256 unsigned int step) 257 { 258 for (unsigned int i = start; i < end - step; ++i) 259 if (!derived ()->equal_p ((*this)[i], (*this)[i + step])) 260 return false; 261 return true; 262 } 263 264 /* Return true if elements [START, END) contain STEP interleaved linear 265 series. */ 266 267 template<typename T, typename Derived> 268 bool 269 vector_builder<T, Derived>::stepped_sequence_p (unsigned int start, 270 unsigned int end, 271 unsigned int step) 272 { 273 if (!derived ()->allow_steps_p ()) 274 return false; 275 276 for (unsigned int i = start + step * 2; i < end; ++i) 277 { 278 T elt1 = (*this)[i - step * 2]; 279 T elt2 = (*this)[i - step]; 280 T elt3 = (*this)[i]; 281 282 if (!derived ()->integral_p (elt1) 283 || !derived ()->integral_p (elt2) 284 || !derived ()->integral_p (elt3)) 285 return false; 286 287 if (maybe_ne (derived ()->step (elt1, elt2), 288 derived ()->step (elt2, elt3))) 289 return false; 290 291 if (!derived ()->can_elide_p (elt3)) 292 return false; 293 } 294 return true; 295 } 296 297 /* Try to change the number of encoded patterns to NPATTERNS, returning 298 true on success. */ 299 300 template<typename T, typename Derived> 301 bool 302 vector_builder<T, Derived>::try_npatterns (unsigned int npatterns) 303 { 304 if (m_nelts_per_pattern == 1) 305 { 306 /* See whether NPATTERNS is valid with the current 1-element-per-pattern 307 encoding. */ 308 if (repeating_sequence_p (0, encoded_nelts (), npatterns)) 309 { 310 reshape (npatterns, 1); 311 return true; 312 } 313 314 /* We can only increase the number of elements per pattern if all 315 elements are still encoded explicitly. */ 316 if (!encoded_full_vector_p ()) 317 return false; 318 } 319 320 if (m_nelts_per_pattern <= 2) 321 { 322 /* See whether NPATTERNS is valid with a 2-element-per-pattern 323 encoding. */ 324 if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns)) 325 { 326 reshape (npatterns, 2); 327 return true; 328 } 329 330 /* We can only increase the number of elements per pattern if all 331 elements are still encoded explicitly. */ 332 if (!encoded_full_vector_p ()) 333 return false; 334 } 335 336 if (m_nelts_per_pattern <= 3) 337 { 338 /* See whether we have NPATTERNS interleaved linear series, 339 giving a 3-element-per-pattern encoding. */ 340 if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns)) 341 { 342 reshape (npatterns, 3); 343 return true; 344 } 345 return false; 346 } 347 348 gcc_unreachable (); 349 } 350 351 /* Replace the current encoding with the canonical form. */ 352 353 template<typename T, typename Derived> 354 void 355 vector_builder<T, Derived>::finalize () 356 { 357 /* The encoding requires the same number of elements to come from each 358 pattern. */ 359 gcc_assert (multiple_p (m_full_nelts, m_npatterns)); 360 361 /* Allow the caller to build more elements than necessary. For example, 362 it's often convenient to build a stepped vector from the natural 363 encoding of three elements even if the vector itself only has two. */ 364 unsigned HOST_WIDE_INT const_full_nelts; 365 if (m_full_nelts.is_constant (&const_full_nelts) 366 && const_full_nelts <= encoded_nelts ()) 367 { 368 m_npatterns = const_full_nelts; 369 m_nelts_per_pattern = 1; 370 } 371 372 /* Try to whittle down the number of elements per pattern. That is: 373 374 1. If we have stepped patterns whose steps are all 0, reduce the 375 number of elements per pattern from 3 to 2. 376 377 2. If we have background fill values that are the same as the 378 foreground values, reduce the number of elements per pattern 379 from 2 to 1. */ 380 while (m_nelts_per_pattern > 1 381 && repeating_sequence_p (encoded_nelts () - m_npatterns * 2, 382 encoded_nelts (), m_npatterns)) 383 /* The last two sequences of M_NPATTERNS elements are equal, 384 so remove the last one. */ 385 reshape (m_npatterns, m_nelts_per_pattern - 1); 386 387 if (pow2p_hwi (m_npatterns)) 388 { 389 /* Try to halve the number of patterns while doing so gives a 390 valid pattern. This approach is linear in the number of 391 elements, whereas searcing from 1 up would be O(n*log(n)). 392 393 Each halving step tries to keep the number of elements per pattern 394 the same. If that isn't possible, and if all elements are still 395 explicitly encoded, the halving step can instead increase the number 396 of elements per pattern. 397 398 E.g. for: 399 400 { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8 401 402 we first realize that the second half of the sequence is not 403 equal to the first, so we cannot maintain 1 element per pattern 404 for npatterns == 4. Instead we halve the number of patterns 405 and double the number of elements per pattern, treating this 406 as a "foreground" { 0, 2, 3, 4 } against a "background" of 407 { 5, 6, 7, 8 | 5, 6, 7, 8 ... }: 408 409 { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4 410 411 Next we realize that this is *not* a foreround of { 0, 2 } 412 against a background of { 3, 4 | 3, 4 ... }, so the only 413 remaining option for reducing the number of patterns is 414 to use a foreground of { 0, 2 } against a stepped background 415 of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still 416 haven't elided any elements: 417 418 { 0, 2 | 3, 4 | 5, 6 } npatterns == 2 419 420 This in turn can be reduced to a foreground of { 0 } against a 421 stepped background of { 1 | 2 | 3 ... }: 422 423 { 0 | 2 | 3 } npatterns == 1 424 425 This last step would not have been possible for: 426 427 { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */ 428 while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2)) 429 continue; 430 431 /* Builders of arbitrary fixed-length vectors can use: 432 433 new_vector (x, x, 1) 434 435 so that every element is specified explicitly. Handle cases 436 that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 } 437 would be for 2-bit elements. We'll have treated them as 438 duplicates in the loop above. */ 439 if (m_nelts_per_pattern == 1 440 && m_full_nelts.is_constant (&const_full_nelts) 441 && this->length () >= const_full_nelts 442 && (m_npatterns & 3) == 0 443 && stepped_sequence_p (m_npatterns / 4, const_full_nelts, 444 m_npatterns / 4)) 445 { 446 reshape (m_npatterns / 4, 3); 447 while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2)) 448 continue; 449 } 450 } 451 else 452 /* For the non-power-of-2 case, do a simple search up from 1. */ 453 for (unsigned int i = 1; i <= m_npatterns / 2; ++i) 454 if (m_npatterns % i == 0 && try_npatterns (i)) 455 break; 456 } 457 458 #endif 459