1 // Optimizations for random number extensions, aarch64 version -*- C++ -*-
2 
3 // Copyright (C) 2017-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file ext/random.tcc
26  *  This is an internal header file, included by other library headers.
27  *  Do not attempt to use it directly. @headername{ext/random}
28  */
29 
30 #ifndef _EXT_OPT_RANDOM_H
31 #define _EXT_OPT_RANDOM_H 1
32 
33 #pragma GCC system_header
34 
35 #ifdef __ARM_NEON
36 
37 #ifdef __ARM_BIG_ENDIAN
38 # define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \
39     {16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
40      24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C})
41 #else
42 # define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \
43     {_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
44      _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15})
45 #endif
46 
47 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
_GLIBCXX_VISIBILITY(default)48 namespace __gnu_cxx _GLIBCXX_VISIBILITY (default)
49 {
50 _GLIBCXX_BEGIN_NAMESPACE_VERSION
51 
52   namespace {
53     // Logical Shift right 128-bits by c * 8 bits
54 
55     __extension__ extern __inline __Uint32x4_t
56     __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
57     __aarch64_lsr_128 (__Uint8x16_t __a, __const int __c)
58     {
59       const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
60 				   0, 0, 0, 0, 0, 0, 0, 0};
61 
62       return (__Uint32x4_t) __VEXT (__zero, __a, __c);
63     }
64 
65     // Logical Shift left 128-bits by c * 8 bits
66 
67     __extension__ extern __inline __Uint32x4_t
68     __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
69     __aarch64_lsl_128 (__Uint8x16_t __a, __const int __c)
70     {
71       const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
72 				   0, 0, 0, 0, 0, 0, 0, 0};
73 
74       return (__Uint32x4_t) __VEXT (__a, __zero, 16 - __c);
75     }
76 
77     template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2>
78       inline __Uint32x4_t __aarch64_recursion (__Uint32x4_t __a,
79 					       __Uint32x4_t __b,
80 					       __Uint32x4_t __c,
81 					       __Uint32x4_t __d,
82 					       __Uint32x4_t __e)
83     {
84       __Uint32x4_t __y = (__b >> __sr1);
85       __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2);
86 
87       __Uint32x4_t __v = __d << __sl1;
88 
89       __z = __z ^ __a;
90       __z = __z ^ __v;
91 
92       __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2);
93 
94       __y = __y & __e;
95       __z = __z ^ __x;
96       return __z ^ __y;
97     }
98 }
99 
100 #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ	1
101   template<typename _UIntType, size_t __m,
102 	   size_t __pos1, size_t __sl1, size_t __sl2,
103 	   size_t __sr1, size_t __sr2,
104 	   uint32_t __msk1, uint32_t __msk2,
105 	   uint32_t __msk3, uint32_t __msk4,
106 	   uint32_t __parity1, uint32_t __parity2,
107 	   uint32_t __parity3, uint32_t __parity4>
108     void simd_fast_mersenne_twister_engine<_UIntType, __m,
109 					   __pos1, __sl1, __sl2, __sr1, __sr2,
110 					   __msk1, __msk2, __msk3, __msk4,
111 					   __parity1, __parity2, __parity3,
112 					   __parity4>::
113     _M_gen_rand (void)
114     {
115       __Uint32x4_t __r1 = _M_state[_M_nstate - 2];
116       __Uint32x4_t __r2 = _M_state[_M_nstate - 1];
117 
118       __Uint32x4_t __aData = {__msk1, __msk2, __msk3, __msk4};
119 
120       size_t __i;
121       for (__i = 0; __i < _M_nstate - __pos1; ++__i)
122 	{
123 	  __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
124 	    (_M_state[__i], _M_state[__i + __pos1], __r1, __r2, __aData);
125 
126 	  _M_state[__i] = __r;
127 
128 	  __r1 = __r2;
129 	  __r2 = __r;
130 	}
131       for (; __i < _M_nstate; ++__i)
132 	{
133 	  __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
134 	    (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2,
135 	     __aData);
136 
137 	  _M_state[__i] = __r;
138 
139 	  __r1 = __r2;
140 	  __r2 = __r;
141 	}
142 
143       _M_pos = 0;
144     }
145 
146 
147 #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL	1
148   template<typename _UIntType, size_t __m,
149 	   size_t __pos1, size_t __sl1, size_t __sl2,
150 	   size_t __sr1, size_t __sr2,
151 	   uint32_t __msk1, uint32_t __msk2,
152 	   uint32_t __msk3, uint32_t __msk4,
153 	   uint32_t __parity1, uint32_t __parity2,
154 	   uint32_t __parity3, uint32_t __parity4>
155     bool
156     operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
157 	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
158 	       __msk1, __msk2, __msk3, __msk4,
159 	       __parity1, __parity2, __parity3, __parity4>& __lhs,
160 	       const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
161 	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
162 	       __msk1, __msk2, __msk3, __msk4,
163 	       __parity1, __parity2, __parity3, __parity4>& __rhs)
164     {
165       if (__lhs._M_pos != __rhs._M_pos)
166 	return false;
167 
168       __Uint32x4_t __res = __lhs._M_state[0] ^ __rhs._M_state[0];
169 
170       for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
171 	__res |= __lhs._M_state[__i] ^ __rhs._M_state[__i];
172 
173       return (__int128) __res == 0;
174     }
175 
176 _GLIBCXX_END_NAMESPACE_VERSION
177   } // namespace
178 
179 #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
180 #endif // __ARM_NEON
181 
182 #endif // _EXT_OPT_RANDOM_H
183