1 /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #ifndef __AMMINTRIN_H
11 #define __AMMINTRIN_H
12
13 #include <pmmintrin.h>
14
15 /* Define the default attributes for the functions in this file. */
16 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
17
18 /// Extracts the specified bits from the lower 64 bits of the 128-bit
19 /// integer vector operand at the index \a idx and of the length \a len.
20 ///
21 /// \headerfile <x86intrin.h>
22 ///
23 /// \code
24 /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
25 /// \endcode
26 ///
27 /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
28 ///
29 /// \param x
30 /// The value from which bits are extracted.
31 /// \param len
32 /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
33 /// are zero, the length is interpreted as 64.
34 /// \param idx
35 /// Bits [5:0] specify the index of the least significant bit; the other
36 /// bits are ignored. If the sum of the index and length is greater than 64,
37 /// the result is undefined. If the length and index are both zero, bits
38 /// [63:0] of parameter \a x are extracted. If the length is zero but the
39 /// index is non-zero, the result is undefined.
40 /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
41 /// extracted from the source operand.
42 #define _mm_extracti_si64(x, len, idx) \
43 ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
44 (char)(len), (char)(idx)))
45
46 /// Extracts the specified bits from the lower 64 bits of the 128-bit
47 /// integer vector operand at the index and of the length specified by
48 /// \a __y.
49 ///
50 /// \headerfile <x86intrin.h>
51 ///
52 /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
53 ///
54 /// \param __x
55 /// The value from which bits are extracted.
56 /// \param __y
57 /// Specifies the index of the least significant bit at [13:8] and the
58 /// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
59 /// length is interpreted as 64. If the sum of the index and length is
60 /// greater than 64, the result is undefined. If the length and index are
61 /// both zero, bits [63:0] of parameter \a __x are extracted. If the length
62 /// is zero but the index is non-zero, the result is undefined.
63 /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
64 /// from the source operand.
65 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_extract_si64(__m128i __x,__m128i __y)66 _mm_extract_si64(__m128i __x, __m128i __y)
67 {
68 return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
69 }
70
71 /// Inserts bits of a specified length from the source integer vector
72 /// \a y into the lower 64 bits of the destination integer vector \a x at
73 /// the index \a idx and of the length \a len.
74 ///
75 /// \headerfile <x86intrin.h>
76 ///
77 /// \code
78 /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
79 /// const int idx);
80 /// \endcode
81 ///
82 /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
83 ///
84 /// \param x
85 /// The destination operand where bits will be inserted. The inserted bits
86 /// are defined by the length \a len and by the index \a idx specifying the
87 /// least significant bit.
88 /// \param y
89 /// The source operand containing the bits to be extracted. The extracted
90 /// bits are the least significant bits of operand \a y of length \a len.
91 /// \param len
92 /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
93 /// are zero, the length is interpreted as 64.
94 /// \param idx
95 /// Bits [5:0] specify the index of the least significant bit; the other
96 /// bits are ignored. If the sum of the index and length is greater than 64,
97 /// the result is undefined. If the length and index are both zero, bits
98 /// [63:0] of parameter \a y are inserted into parameter \a x. If the length
99 /// is zero but the index is non-zero, the result is undefined.
100 /// \returns A 128-bit integer vector containing the original lower 64-bits of
101 /// destination operand \a x with the specified bitfields replaced by the
102 /// lower bits of source operand \a y. The upper 64 bits of the return value
103 /// are undefined.
104 #define _mm_inserti_si64(x, y, len, idx) \
105 ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
106 (__v2di)(__m128i)(y), \
107 (char)(len), (char)(idx)))
108
109 /// Inserts bits of a specified length from the source integer vector
110 /// \a __y into the lower 64 bits of the destination integer vector \a __x
111 /// at the index and of the length specified by \a __y.
112 ///
113 /// \headerfile <x86intrin.h>
114 ///
115 /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
116 ///
117 /// \param __x
118 /// The destination operand where bits will be inserted. The inserted bits
119 /// are defined by the length and by the index of the least significant bit
120 /// specified by operand \a __y.
121 /// \param __y
122 /// The source operand containing the bits to be extracted. The extracted
123 /// bits are the least significant bits of operand \a __y with length
124 /// specified by bits [69:64]. These are inserted into the destination at the
125 /// index specified by bits [77:72]; all other bits are ignored. If bits
126 /// [69:64] are zero, the length is interpreted as 64. If the sum of the
127 /// index and length is greater than 64, the result is undefined. If the
128 /// length and index are both zero, bits [63:0] of parameter \a __y are
129 /// inserted into parameter \a __x. If the length is zero but the index is
130 /// non-zero, the result is undefined.
131 /// \returns A 128-bit integer vector containing the original lower 64-bits of
132 /// destination operand \a __x with the specified bitfields replaced by the
133 /// lower bits of source operand \a __y. The upper 64 bits of the return
134 /// value are undefined.
135 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_insert_si64(__m128i __x,__m128i __y)136 _mm_insert_si64(__m128i __x, __m128i __y)
137 {
138 return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
139 }
140
141 /// Stores a 64-bit double-precision value in a 64-bit memory location.
142 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
143 /// used again soon).
144 ///
145 /// \headerfile <x86intrin.h>
146 ///
147 /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
148 ///
149 /// \param __p
150 /// The 64-bit memory location used to store the register value.
151 /// \param __a
152 /// The 64-bit double-precision floating-point register value to be stored.
153 static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_sd(double * __p,__m128d __a)154 _mm_stream_sd(double *__p, __m128d __a)
155 {
156 __builtin_ia32_movntsd(__p, (__v2df)__a);
157 }
158
159 /// Stores a 32-bit single-precision floating-point value in a 32-bit
160 /// memory location. To minimize caching, the data is flagged as
161 /// non-temporal (unlikely to be used again soon).
162 ///
163 /// \headerfile <x86intrin.h>
164 ///
165 /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
166 ///
167 /// \param __p
168 /// The 32-bit memory location used to store the register value.
169 /// \param __a
170 /// The 32-bit single-precision floating-point register value to be stored.
171 static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_ss(float * __p,__m128 __a)172 _mm_stream_ss(float *__p, __m128 __a)
173 {
174 __builtin_ia32_movntss(__p, (__v4sf)__a);
175 }
176
177 #undef __DEFAULT_FN_ATTRS
178
179 #endif /* __AMMINTRIN_H */
180