1 /* A representation of vector permutation indices.
2    Copyright (C) 2017-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "vec-perm-indices.h"
24 #include "tree.h"
25 #include "fold-const.h"
26 #include "tree-vector-builder.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "memmodel.h"
30 #include "emit-rtl.h"
31 #include "selftest.h"
32 #include "rtx-vector-builder.h"
33 
34 /* Switch to a new permutation vector that selects between NINPUTS vector
35    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
36    new permutation vector from ELEMENTS, clamping each one to be in range.  */
37 
38 void
new_vector(const vec_perm_builder & elements,unsigned int ninputs,poly_uint64 nelts_per_input)39 vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 			      unsigned int ninputs,
41 			      poly_uint64 nelts_per_input)
42 {
43   m_ninputs = ninputs;
44   m_nelts_per_input = nelts_per_input;
45   /* If the vector has a constant number of elements, expand the
46      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
47      wrap halfway if there is only one vector input, and we want
48      the wrapped form to be the canonical one.
49 
50      If the vector has a variable number of elements, just copy
51      the encoding.  In that case the unwrapped form is canonical
52      and there is no way of representing the wrapped form.  */
53   poly_uint64 full_nelts = elements.full_nelts ();
54   unsigned HOST_WIDE_INT copy_nelts;
55   if (full_nelts.is_constant (&copy_nelts))
56     m_encoding.new_vector (full_nelts, copy_nelts, 1);
57   else
58     {
59       copy_nelts = elements.encoded_nelts ();
60       m_encoding.new_vector (full_nelts, elements.npatterns (),
61 			     elements.nelts_per_pattern ());
62     }
63   unsigned int npatterns = m_encoding.npatterns ();
64   for (unsigned int i = 0; i < npatterns; ++i)
65     m_encoding.quick_push (clamp (elements.elt (i)));
66   /* Use the fact that:
67 
68 	(a + b) % c == ((a % c) + (b % c)) % c
69 
70      to simplify the clamping of variable-length vectors.  */
71   for (unsigned int i = npatterns; i < copy_nelts; ++i)
72     {
73       element_type step = clamp (elements.elt (i)
74 				 - elements.elt (i - npatterns));
75       m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
76     }
77   m_encoding.finalize ();
78 }
79 
80 /* Switch to a new permutation vector that selects the same input elements
81    as ORIG, but with each element split into FACTOR pieces.  For example,
82    if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83    { 2, 3, 4, 5, 0, 1, 6, 7 }.  */
84 
85 void
new_expanded_vector(const vec_perm_indices & orig,unsigned int factor)86 vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 				       unsigned int factor)
88 {
89   m_ninputs = orig.m_ninputs;
90   m_nelts_per_input = orig.m_nelts_per_input * factor;
91   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 			 orig.m_encoding.npatterns () * factor,
93 			 orig.m_encoding.nelts_per_pattern ());
94   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95   for (unsigned int i = 0; i < encoded_nelts; ++i)
96     {
97       element_type base = orig.m_encoding[i] * factor;
98       for (unsigned int j = 0; j < factor; ++j)
99 	m_encoding.quick_push (base + j);
100     }
101   m_encoding.finalize ();
102 }
103 
104 /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
105    the values of the permutation vector but it doesn't change the way that
106    the elements are encoded.  */
107 
108 void
rotate_inputs(int delta)109 vec_perm_indices::rotate_inputs (int delta)
110 {
111   element_type element_delta = delta * m_nelts_per_input;
112   for (unsigned int i = 0; i < m_encoding.length (); ++i)
113     m_encoding[i] = clamp (m_encoding[i] + element_delta);
114 }
115 
116 /* Return true if index OUT_BASE + I * OUT_STEP selects input
117    element IN_BASE + I * IN_STEP.  For example, the call to test
118    whether a permute reverses a vector of N elements would be:
119 
120      series_p (0, 1, N - 1, -1)
121 
122    which would return true for { N - 1, N - 2, N - 3, ... }.
123    The calls to test for an interleaving of elements starting
124    at N1 and N2 would be:
125 
126      series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
127 
128    which would return true for { N1, N2, N1 + 1, N2 + 1, ... }.  */
129 
130 bool
series_p(unsigned int out_base,unsigned int out_step,element_type in_base,element_type in_step)131 vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
132 			    element_type in_base, element_type in_step) const
133 {
134   /* Check the base value.  */
135   if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
136     return false;
137 
138   element_type full_nelts = m_encoding.full_nelts ();
139   unsigned int npatterns = m_encoding.npatterns ();
140 
141   /* Calculate which multiple of OUT_STEP elements we need to get
142      back to the same pattern.  */
143   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
144 
145   /* Check the steps.  */
146   in_step = clamp (in_step);
147   out_base += out_step;
148   unsigned int limit = 0;
149   for (;;)
150     {
151       /* Succeed if we've checked all the elements in the vector.  */
152       if (known_ge (out_base, full_nelts))
153 	return true;
154 
155       if (out_base >= npatterns)
156 	{
157 	  /* We've got to the end of the "foreground" values.  Check
158 	     2 elements from each pattern in the "background" values.  */
159 	  if (limit == 0)
160 	    limit = out_base + cycle_length * 2;
161 	  else if (out_base >= limit)
162 	    return true;
163 	}
164 
165       element_type v0 = m_encoding.elt (out_base - out_step);
166       element_type v1 = m_encoding.elt (out_base);
167       if (maybe_ne (clamp (v1 - v0), in_step))
168 	return false;
169 
170       out_base += out_step;
171     }
172   return true;
173 }
174 
175 /* Return true if all elements of the permutation vector are in the range
176    [START, START + SIZE).  */
177 
178 bool
all_in_range_p(element_type start,element_type size)179 vec_perm_indices::all_in_range_p (element_type start, element_type size) const
180 {
181   /* Check the first two elements of each pattern.  */
182   unsigned int npatterns = m_encoding.npatterns ();
183   unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
184   unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
185   for (unsigned int i = 0; i < base_nelts; ++i)
186     if (!known_in_range_p (m_encoding[i], start, size))
187       return false;
188 
189   /* For stepped encodings, check the full range of the series.  */
190   if (nelts_per_pattern == 3)
191     {
192       element_type limit = input_nelts ();
193 
194       /* The number of elements in each pattern beyond the first two
195 	 that we checked above.  */
196       poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
197 					 npatterns) - 2;
198       for (unsigned int i = 0; i < npatterns; ++i)
199 	{
200 	  /* BASE1 has been checked but BASE2 hasn't.   */
201 	  element_type base1 = m_encoding[i + npatterns];
202 	  element_type base2 = m_encoding[i + base_nelts];
203 
204 	  /* The step to add to get from BASE1 to each subsequent value.  */
205 	  element_type step = clamp (base2 - base1);
206 
207 	  /* STEP has no inherent sign, so a value near LIMIT can
208 	     act as a negative step.  The series is in range if it
209 	     is in range according to one of the two interpretations.
210 
211 	     Since we're dealing with clamped values, ELEMENT_TYPE is
212 	     wide enough for overflow not to be a problem.  */
213 	  element_type headroom_down = base1 - start;
214 	  element_type headroom_up = size - headroom_down - 1;
215 	  HOST_WIDE_INT diff;
216 	  if ((!step.is_constant (&diff)
217 	       || maybe_lt (headroom_up, diff * step_nelts))
218 	      && (!(limit - step).is_constant (&diff)
219 		  || maybe_lt (headroom_down, diff * step_nelts)))
220 	    return false;
221 	}
222     }
223   return true;
224 }
225 
226 /* Try to read the contents of VECTOR_CST CST as a constant permutation
227    vector.  Return true and add the elements to BUILDER on success,
228    otherwise return false without modifying BUILDER.  */
229 
230 bool
tree_to_vec_perm_builder(vec_perm_builder * builder,tree cst)231 tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
232 {
233   unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
234   for (unsigned int i = 0; i < encoded_nelts; ++i)
235     if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
236       return false;
237 
238   builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
239 		       VECTOR_CST_NPATTERNS (cst),
240 		       VECTOR_CST_NELTS_PER_PATTERN (cst));
241   for (unsigned int i = 0; i < encoded_nelts; ++i)
242     builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
243   return true;
244 }
245 
246 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
247 
248 tree
vec_perm_indices_to_tree(tree type,const vec_perm_indices & indices)249 vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
250 {
251   gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
252   tree_vector_builder sel (type, indices.encoding ().npatterns (),
253 			   indices.encoding ().nelts_per_pattern ());
254   unsigned int encoded_nelts = sel.encoded_nelts ();
255   for (unsigned int i = 0; i < encoded_nelts; i++)
256     sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
257   return sel.build ();
258 }
259 
260 /* Return a CONST_VECTOR of mode MODE that contains the elements of
261    INDICES.  */
262 
263 rtx
vec_perm_indices_to_rtx(machine_mode mode,const vec_perm_indices & indices)264 vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
265 {
266   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
267 	      && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
268   rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
269 			  indices.encoding ().nelts_per_pattern ());
270   unsigned int encoded_nelts = sel.encoded_nelts ();
271   for (unsigned int i = 0; i < encoded_nelts; i++)
272     sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
273   return sel.build ();
274 }
275 
276 #if CHECKING_P
277 
278 namespace selftest {
279 
280 /* Test a 12-element vector.  */
281 
282 static void
test_vec_perm_12(void)283 test_vec_perm_12 (void)
284 {
285   vec_perm_builder builder (12, 12, 1);
286   for (unsigned int i = 0; i < 4; ++i)
287     {
288       builder.quick_push (i * 5);
289       builder.quick_push (3 + i);
290       builder.quick_push (2 + 3 * i);
291     }
292   vec_perm_indices indices (builder, 1, 12);
293   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
294   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
295   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
296   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
297   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
298 
299   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
300   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
301 
302   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
303   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
304 
305   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
306   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
307 
308   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
309   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
310   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
311 }
312 
313 /* Run selftests for this file.  */
314 
315 void
vec_perm_indices_c_tests()316 vec_perm_indices_c_tests ()
317 {
318   test_vec_perm_12 ();
319 }
320 
321 } // namespace selftest
322 
323 #endif
324