1 /* A representation of vector permutation indices.
2 Copyright (C) 2017-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "vec-perm-indices.h"
24 #include "tree.h"
25 #include "fold-const.h"
26 #include "tree-vector-builder.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "memmodel.h"
30 #include "emit-rtl.h"
31 #include "selftest.h"
32 #include "rtx-vector-builder.h"
33
34 /* Switch to a new permutation vector that selects between NINPUTS vector
35 inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 new permutation vector from ELEMENTS, clamping each one to be in range. */
37
38 void
new_vector(const vec_perm_builder & elements,unsigned int ninputs,poly_uint64 nelts_per_input)39 vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 unsigned int ninputs,
41 poly_uint64 nelts_per_input)
42 {
43 m_ninputs = ninputs;
44 m_nelts_per_input = nelts_per_input;
45 /* If the vector has a constant number of elements, expand the
46 encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 wrap halfway if there is only one vector input, and we want
48 the wrapped form to be the canonical one.
49
50 If the vector has a variable number of elements, just copy
51 the encoding. In that case the unwrapped form is canonical
52 and there is no way of representing the wrapped form. */
53 poly_uint64 full_nelts = elements.full_nelts ();
54 unsigned HOST_WIDE_INT copy_nelts;
55 if (full_nelts.is_constant (©_nelts))
56 m_encoding.new_vector (full_nelts, copy_nelts, 1);
57 else
58 {
59 copy_nelts = elements.encoded_nelts ();
60 m_encoding.new_vector (full_nelts, elements.npatterns (),
61 elements.nelts_per_pattern ());
62 }
63 unsigned int npatterns = m_encoding.npatterns ();
64 for (unsigned int i = 0; i < npatterns; ++i)
65 m_encoding.quick_push (clamp (elements.elt (i)));
66 /* Use the fact that:
67
68 (a + b) % c == ((a % c) + (b % c)) % c
69
70 to simplify the clamping of variable-length vectors. */
71 for (unsigned int i = npatterns; i < copy_nelts; ++i)
72 {
73 element_type step = clamp (elements.elt (i)
74 - elements.elt (i - npatterns));
75 m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
76 }
77 m_encoding.finalize ();
78 }
79
80 /* Switch to a new permutation vector that selects the same input elements
81 as ORIG, but with each element split into FACTOR pieces. For example,
82 if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 { 2, 3, 4, 5, 0, 1, 6, 7 }. */
84
85 void
new_expanded_vector(const vec_perm_indices & orig,unsigned int factor)86 vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 unsigned int factor)
88 {
89 m_ninputs = orig.m_ninputs;
90 m_nelts_per_input = orig.m_nelts_per_input * factor;
91 m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 orig.m_encoding.npatterns () * factor,
93 orig.m_encoding.nelts_per_pattern ());
94 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 for (unsigned int i = 0; i < encoded_nelts; ++i)
96 {
97 element_type base = orig.m_encoding[i] * factor;
98 for (unsigned int j = 0; j < factor; ++j)
99 m_encoding.quick_push (base + j);
100 }
101 m_encoding.finalize ();
102 }
103
104 /* Rotate the inputs of the permutation right by DELTA inputs. This changes
105 the values of the permutation vector but it doesn't change the way that
106 the elements are encoded. */
107
108 void
rotate_inputs(int delta)109 vec_perm_indices::rotate_inputs (int delta)
110 {
111 element_type element_delta = delta * m_nelts_per_input;
112 for (unsigned int i = 0; i < m_encoding.length (); ++i)
113 m_encoding[i] = clamp (m_encoding[i] + element_delta);
114 }
115
116 /* Return true if index OUT_BASE + I * OUT_STEP selects input
117 element IN_BASE + I * IN_STEP. For example, the call to test
118 whether a permute reverses a vector of N elements would be:
119
120 series_p (0, 1, N - 1, -1)
121
122 which would return true for { N - 1, N - 2, N - 3, ... }.
123 The calls to test for an interleaving of elements starting
124 at N1 and N2 would be:
125
126 series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
127
128 which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
129
130 bool
series_p(unsigned int out_base,unsigned int out_step,element_type in_base,element_type in_step)131 vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
132 element_type in_base, element_type in_step) const
133 {
134 /* Check the base value. */
135 if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
136 return false;
137
138 element_type full_nelts = m_encoding.full_nelts ();
139 unsigned int npatterns = m_encoding.npatterns ();
140
141 /* Calculate which multiple of OUT_STEP elements we need to get
142 back to the same pattern. */
143 unsigned int cycle_length = least_common_multiple (out_step, npatterns);
144
145 /* Check the steps. */
146 in_step = clamp (in_step);
147 out_base += out_step;
148 unsigned int limit = 0;
149 for (;;)
150 {
151 /* Succeed if we've checked all the elements in the vector. */
152 if (known_ge (out_base, full_nelts))
153 return true;
154
155 if (out_base >= npatterns)
156 {
157 /* We've got to the end of the "foreground" values. Check
158 2 elements from each pattern in the "background" values. */
159 if (limit == 0)
160 limit = out_base + cycle_length * 2;
161 else if (out_base >= limit)
162 return true;
163 }
164
165 element_type v0 = m_encoding.elt (out_base - out_step);
166 element_type v1 = m_encoding.elt (out_base);
167 if (maybe_ne (clamp (v1 - v0), in_step))
168 return false;
169
170 out_base += out_step;
171 }
172 return true;
173 }
174
175 /* Return true if all elements of the permutation vector are in the range
176 [START, START + SIZE). */
177
178 bool
all_in_range_p(element_type start,element_type size)179 vec_perm_indices::all_in_range_p (element_type start, element_type size) const
180 {
181 /* Check the first two elements of each pattern. */
182 unsigned int npatterns = m_encoding.npatterns ();
183 unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
184 unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
185 for (unsigned int i = 0; i < base_nelts; ++i)
186 if (!known_in_range_p (m_encoding[i], start, size))
187 return false;
188
189 /* For stepped encodings, check the full range of the series. */
190 if (nelts_per_pattern == 3)
191 {
192 element_type limit = input_nelts ();
193
194 /* The number of elements in each pattern beyond the first two
195 that we checked above. */
196 poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
197 npatterns) - 2;
198 for (unsigned int i = 0; i < npatterns; ++i)
199 {
200 /* BASE1 has been checked but BASE2 hasn't. */
201 element_type base1 = m_encoding[i + npatterns];
202 element_type base2 = m_encoding[i + base_nelts];
203
204 /* The step to add to get from BASE1 to each subsequent value. */
205 element_type step = clamp (base2 - base1);
206
207 /* STEP has no inherent sign, so a value near LIMIT can
208 act as a negative step. The series is in range if it
209 is in range according to one of the two interpretations.
210
211 Since we're dealing with clamped values, ELEMENT_TYPE is
212 wide enough for overflow not to be a problem. */
213 element_type headroom_down = base1 - start;
214 element_type headroom_up = size - headroom_down - 1;
215 HOST_WIDE_INT diff;
216 if ((!step.is_constant (&diff)
217 || maybe_lt (headroom_up, diff * step_nelts))
218 && (!(limit - step).is_constant (&diff)
219 || maybe_lt (headroom_down, diff * step_nelts)))
220 return false;
221 }
222 }
223 return true;
224 }
225
226 /* Try to read the contents of VECTOR_CST CST as a constant permutation
227 vector. Return true and add the elements to BUILDER on success,
228 otherwise return false without modifying BUILDER. */
229
230 bool
tree_to_vec_perm_builder(vec_perm_builder * builder,tree cst)231 tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
232 {
233 unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
234 for (unsigned int i = 0; i < encoded_nelts; ++i)
235 if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
236 return false;
237
238 builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
239 VECTOR_CST_NPATTERNS (cst),
240 VECTOR_CST_NELTS_PER_PATTERN (cst));
241 for (unsigned int i = 0; i < encoded_nelts; ++i)
242 builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
243 return true;
244 }
245
246 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
247
248 tree
vec_perm_indices_to_tree(tree type,const vec_perm_indices & indices)249 vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
250 {
251 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
252 tree_vector_builder sel (type, indices.encoding ().npatterns (),
253 indices.encoding ().nelts_per_pattern ());
254 unsigned int encoded_nelts = sel.encoded_nelts ();
255 for (unsigned int i = 0; i < encoded_nelts; i++)
256 sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
257 return sel.build ();
258 }
259
260 /* Return a CONST_VECTOR of mode MODE that contains the elements of
261 INDICES. */
262
263 rtx
vec_perm_indices_to_rtx(machine_mode mode,const vec_perm_indices & indices)264 vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
265 {
266 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
267 && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
268 rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
269 indices.encoding ().nelts_per_pattern ());
270 unsigned int encoded_nelts = sel.encoded_nelts ();
271 for (unsigned int i = 0; i < encoded_nelts; i++)
272 sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
273 return sel.build ();
274 }
275
276 #if CHECKING_P
277
278 namespace selftest {
279
280 /* Test a 12-element vector. */
281
282 static void
test_vec_perm_12(void)283 test_vec_perm_12 (void)
284 {
285 vec_perm_builder builder (12, 12, 1);
286 for (unsigned int i = 0; i < 4; ++i)
287 {
288 builder.quick_push (i * 5);
289 builder.quick_push (3 + i);
290 builder.quick_push (2 + 3 * i);
291 }
292 vec_perm_indices indices (builder, 1, 12);
293 ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
294 ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
295 ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
296 ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
297 ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
298
299 ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
300 ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
301
302 ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
303 ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
304
305 ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
306 ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
307
308 ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
309 ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
310 ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
311 }
312
313 /* Run selftests for this file. */
314
315 void
vec_perm_indices_c_tests()316 vec_perm_indices_c_tests ()
317 {
318 test_vec_perm_12 ();
319 }
320
321 } // namespace selftest
322
323 #endif
324