1/// @ref core
2/// @file glm/detail/func_common_simd.inl
3
4#if GLM_ARCH & GLM_ARCH_SSE2_BIT
5
6#include "../simd/common.h"
7
8#include <immintrin.h>
9
10namespace glm{
11namespace detail
12{
13	template <precision P>
14	struct compute_abs_vector<float, P, tvec4, true>
15	{
16		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
17		{
18			tvec4<float, P> result(uninitialize);
19			result.data = glm_vec4_abs(v.data);
20			return result;
21		}
22	};
23
24	template <precision P>
25	struct compute_abs_vector<int, P, tvec4, true>
26	{
27		GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & v)
28		{
29			tvec4<int, P> result(uninitialize);
30			result.data = glm_ivec4_abs(v.data);
31			return result;
32		}
33	};
34
35	template <precision P>
36	struct compute_floor<float, P, tvec4, true>
37	{
38		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
39		{
40			tvec4<float, P> result(uninitialize);
41			result.data = glm_vec4_floor(v.data);
42			return result;
43		}
44	};
45
46	template <precision P>
47	struct compute_ceil<float, P, tvec4, true>
48	{
49		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
50		{
51			tvec4<float, P> result(uninitialize);
52			result.data = glm_vec4_ceil(v.data);
53			return result;
54		}
55	};
56
57	template <precision P>
58	struct compute_fract<float, P, tvec4, true>
59	{
60		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
61		{
62			tvec4<float, P> result(uninitialize);
63			result.data = glm_vec4_fract(v.data);
64			return result;
65		}
66	};
67
68	template <precision P>
69	struct compute_round<float, P, tvec4, true>
70	{
71		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
72		{
73			tvec4<float, P> result(uninitialize);
74			result.data = glm_vec4_round(v.data);
75			return result;
76		}
77	};
78
79	template <precision P>
80	struct compute_mod<float, P, tvec4, true>
81	{
82		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y)
83		{
84			tvec4<float, P> result(uninitialize);
85			result.data = glm_vec4_mod(x.data, y.data);
86			return result;
87		}
88	};
89
90	template <precision P>
91	struct compute_min_vector<float, P, tvec4, true>
92	{
93		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
94		{
95			tvec4<float, P> result(uninitialize);
96			result.data = _mm_min_ps(v1.data, v2.data);
97			return result;
98		}
99	};
100
101	template <precision P>
102	struct compute_min_vector<int32, P, tvec4, true>
103	{
104		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
105		{
106			tvec4<int32, P> result(uninitialize);
107			result.data = _mm_min_epi32(v1.data, v2.data);
108			return result;
109		}
110	};
111
112	template <precision P>
113	struct compute_min_vector<uint32, P, tvec4, true>
114	{
115		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2)
116		{
117			tvec4<uint32, P> result(uninitialize);
118			result.data = _mm_min_epu32(v1.data, v2.data);
119			return result;
120		}
121	};
122
123	template <precision P>
124	struct compute_max_vector<float, P, tvec4, true>
125	{
126		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
127		{
128			tvec4<float, P> result(uninitialize);
129			result.data = _mm_max_ps(v1.data, v2.data);
130			return result;
131		}
132	};
133
134	template <precision P>
135	struct compute_max_vector<int32, P, tvec4, true>
136	{
137		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
138		{
139			tvec4<int32, P> result(uninitialize);
140			result.data = _mm_max_epi32(v1.data, v2.data);
141			return result;
142		}
143	};
144
145	template <precision P>
146	struct compute_max_vector<uint32, P, tvec4, true>
147	{
148		GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2)
149		{
150			tvec4<uint32, P> result(uninitialize);
151			result.data = _mm_max_epu32(v1.data, v2.data);
152			return result;
153		}
154	};
155
156	template <precision P>
157	struct compute_clamp_vector<float, P, tvec4, true>
158	{
159		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & minVal, tvec4<float, P> const & maxVal)
160		{
161			tvec4<float, P> result(uninitialize);
162			result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data);
163			return result;
164		}
165	};
166
167	template <precision P>
168	struct compute_clamp_vector<int32, P, tvec4, true>
169	{
170		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & x, tvec4<int32, P> const & minVal, tvec4<int32, P> const & maxVal)
171		{
172			tvec4<int32, P> result(uninitialize);
173			result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data);
174			return result;
175		}
176	};
177
178	template <precision P>
179	struct compute_clamp_vector<uint32, P, tvec4, true>
180	{
181		GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & x, tvec4<uint32, P> const & minVal, tvec4<uint32, P> const & maxVal)
182		{
183			tvec4<uint32, P> result(uninitialize);
184			result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data);
185			return result;
186		}
187	};
188
189	template <precision P>
190	struct compute_mix_vector<float, bool, P, tvec4, true>
191	{
192		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y, tvec4<bool, P> const & a)
193		{
194			__m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x);
195			__m128 const Mask = _mm_castsi128_ps(Load);
196
197			tvec4<float, P> Result(uninitialize);
198#			if 0 && GLM_ARCH & GLM_ARCH_AVX
199				Result.data = _mm_blendv_ps(x.data, y.data, Mask);
200#			else
201				Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data));
202#			endif
203			return Result;
204		}
205	};
206/* FIXME
207	template <precision P>
208	struct compute_step_vector<float, P, tvec4>
209	{
210		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge, tvec4<float, P> const& x)
211		{
212			tvec4<float, P> result(uninitialize);
213			result.data = glm_vec4_step(edge.data, x.data);
214			return result;
215		}
216	};
217*/
218	template <precision P>
219	struct compute_smoothstep_vector<float, P, tvec4, true>
220	{
221		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge0, tvec4<float, P> const& edge1, tvec4<float, P> const& x)
222		{
223			tvec4<float, P> result(uninitialize);
224			result.data = glm_vec4_smoothstep(edge0.data, edge1.data, x.data);
225			return result;
226		}
227	};
228}//namespace detail
229}//namespace glm
230
231#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
232