1/// @ref gtx_simd_quat
2/// @file glm/gtx/simd_quat.inl
3
4namespace glm{
5namespace detail{
6
7
8//////////////////////////////////////
9// Debugging
10#if 0
11void print(__m128 v)
12{
13    GLM_ALIGN(16) float result[4];
14    _mm_store_ps(result, v);
15
16    printf("__m128:    %f %f %f %f\n", result[0], result[1], result[2], result[3]);
17}
18
19void print(const fvec4SIMD &v)
20{
21    printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
22}
23#endif
24
25//////////////////////////////////////
26// Implicit basic constructors
27
28#	if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT)
29	GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
30#		ifdef GLM_FORCE_NO_CTOR_INIT
31			: Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
32#		endif
33	{}
34#	endif
35
36#	if !GLM_HAS_DEFAULTED_FUNCTIONS
37	GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
38		Data(q.Data)
39	{}
40#	endif//!GLM_HAS_DEFAULTED_FUNCTIONS
41
42GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
43	Data(Data)
44{}
45
46//////////////////////////////////////
47// Explicit basic constructors
48
49GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
50	Data(_mm_set_ps(w, z, y, x))
51{}
52
53GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
54	Data(_mm_set_ps(q.w, q.z, q.y, q.x))
55{}
56
57GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
58{
59	vec3 c = glm::cos(eulerAngles * 0.5f);
60	vec3 s = glm::sin(eulerAngles * 0.5f);
61
62	Data = _mm_set_ps(
63		(c.x * c.y * c.z) + (s.x * s.y * s.z),
64		(c.x * c.y * s.z) - (s.x * s.y * c.z),
65		(c.x * s.y * c.z) + (s.x * c.y * s.z),
66		(s.x * c.y * c.z) - (c.x * s.y * s.z));
67}
68
69
70//////////////////////////////////////
71// Unary arithmetic operators
72
73#if !GLM_HAS_DEFAULTED_FUNCTIONS
74	GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
75	{
76		this->Data = q.Data;
77		return *this;
78	}
79#endif//!GLM_HAS_DEFAULTED_FUNCTIONS
80
81GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
82{
83	this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
84	return *this;
85}
86
87GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
88{
89	this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
90	return *this;
91}
92
93
94
95// negate operator
96GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
97{
98    return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
99}
100
101// operator+
102GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
103{
104	return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
105}
106
107//operator*
108GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
109{
110    // SSE2 STATS:
111    //    11 shuffle
112    //    8  mul
113    //    8  add
114
115    // SSE4 STATS:
116    //    3 shuffle
117    //    4 mul
118    //    4 dpps
119
120    __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
121    __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
122    __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
123    __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
124
125#   if(GLM_ARCH & GLM_ARCH_SSE41_BIT)
126    __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f), 0xff);
127    __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f), 0xff);
128    __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f), 0xff);
129    __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);
130#   else
131           mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f));
132    __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
133           add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));
134
135           mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f));
136    __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
137           add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));
138
139           mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f));
140    __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
141           add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));
142
143           mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
144    __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
145           add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
146#endif
147
148
149    // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than
150    // the final code below. I'll keep this here for reference - maybe somebody else can do something better...
151    //
152    //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));
153    //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));
154    //
155    //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));
156
157    float x;
158    float y;
159    float z;
160    float w;
161
162    _mm_store_ss(&x, add0);
163    _mm_store_ss(&y, add1);
164    _mm_store_ss(&z, add2);
165    _mm_store_ss(&w, add3);
166
167    return detail::fquatSIMD(w, x, y, z);
168}
169
170GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
171{
172    static const __m128 two = _mm_set1_ps(2.0f);
173
174    __m128 q_wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
175    __m128 q_swp0  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
176	__m128 q_swp1  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
177	__m128 v_swp0  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));
178	__m128 v_swp1  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));
179
180	__m128 uv      = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
181    __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
182    __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
183    __m128 uuv     = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
184
185
186    uv  = _mm_mul_ps(uv,  _mm_mul_ps(q_wwww, two));
187    uuv = _mm_mul_ps(uuv, two);
188
189    return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
190}
191
192GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
193{
194	return glm::inverse(q) * v;
195}
196
197GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
198{
199	return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
200}
201
202GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
203{
204	return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
205}
206
207
208//operator/
209GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
210{
211	return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
212}
213
214
215}//namespace detail
216
217
218GLM_FUNC_QUALIFIER quat quat_cast
219(
220	detail::fquatSIMD const & x
221)
222{
223	GLM_ALIGN(16) quat Result;
224	_mm_store_ps(&Result[0], x.Data);
225
226	return Result;
227}
228
229template <typename T>
230GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])
231{
232    T trace = m0[0] + m1[1] + m2[2] + T(1.0);
233    if (trace > T(0))
234    {
235        T s = static_cast<T>(0.5) / sqrt(trace);
236
237        return _mm_set_ps(
238            static_cast<float>(T(0.25) / s),
239            static_cast<float>((m0[1] - m1[0]) * s),
240            static_cast<float>((m2[0] - m0[2]) * s),
241            static_cast<float>((m1[2] - m2[1]) * s));
242    }
243    else
244    {
245        if (m0[0] > m1[1])
246        {
247            if (m0[0] > m2[2])
248            {
249                // X is biggest.
250                T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);
251
252                return _mm_set_ps(
253                    static_cast<float>((m1[2] - m2[1]) * s),
254                    static_cast<float>((m2[0] + m0[2]) * s),
255                    static_cast<float>((m0[1] + m1[0]) * s),
256                    static_cast<float>(T(0.5)          * s));
257            }
258        }
259        else
260        {
261            if (m1[1] > m2[2])
262            {
263                // Y is biggest.
264                T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);
265
266                return _mm_set_ps(
267                    static_cast<float>((m2[0] - m0[2]) * s),
268                    static_cast<float>((m1[2] + m2[1]) * s),
269                    static_cast<float>(T(0.5)          * s),
270                    static_cast<float>((m0[1] + m1[0]) * s));
271            }
272        }
273
274        // Z is biggest.
275        T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);
276
277        return _mm_set_ps(
278            static_cast<float>((m0[1] - m1[0]) * s),
279            static_cast<float>(T(0.5)          * s),
280            static_cast<float>((m1[2] + m2[1]) * s),
281            static_cast<float>((m2[0] + m0[2]) * s));
282    }
283}
284
285GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
286(
287	detail::fmat4x4SIMD const & m
288)
289{
290    // Scalar implementation for now.
291    GLM_ALIGN(16) float m0[4];
292    GLM_ALIGN(16) float m1[4];
293    GLM_ALIGN(16) float m2[4];
294
295    _mm_store_ps(m0, m[0].Data);
296    _mm_store_ps(m1, m[1].Data);
297    _mm_store_ps(m2, m[2].Data);
298
299    return quatSIMD_cast_impl(m0, m1, m2);
300}
301
302template <typename T, precision P>
303GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
304(
305    tmat4x4<T, P> const & m
306)
307{
308    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
309}
310
311template <typename T, precision P>
312GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
313(
314    tmat3x3<T, P> const & m
315)
316{
317    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
318}
319
320
321GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
322(
323	detail::fquatSIMD const & q
324)
325{
326    detail::fmat4x4SIMD result;
327
328    __m128 _wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
329    __m128 _xyzw  = q.Data;
330    __m128 _zxyw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
331    __m128 _yzxw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
332
333    __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
334    __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
335    __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
336
337    __m128 _tmp0  = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
338           _tmp0  = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));
339
340    __m128 _tmp1  = _mm_mul_ps(_yzxw2, _xyzw);
341           _tmp1  = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));
342
343    __m128 _tmp2  = _mm_mul_ps(_zxyw2, _xyzw);
344           _tmp2  = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));
345
346
347    // There's probably a better, more politically correct way of doing this...
348    result[0].Data = _mm_set_ps(
349        0.0f,
350        reinterpret_cast<float*>(&_tmp2)[0],
351        reinterpret_cast<float*>(&_tmp1)[0],
352        reinterpret_cast<float*>(&_tmp0)[0]);
353
354    result[1].Data = _mm_set_ps(
355        0.0f,
356        reinterpret_cast<float*>(&_tmp1)[1],
357        reinterpret_cast<float*>(&_tmp0)[1],
358        reinterpret_cast<float*>(&_tmp2)[1]);
359
360    result[2].Data = _mm_set_ps(
361        0.0f,
362        reinterpret_cast<float*>(&_tmp0)[2],
363        reinterpret_cast<float*>(&_tmp2)[2],
364        reinterpret_cast<float*>(&_tmp1)[2]);
365
366   result[3].Data = _mm_set_ps(
367        1.0f,
368        0.0f,
369        0.0f,
370        0.0f);
371
372
373    return result;
374}
375
376GLM_FUNC_QUALIFIER mat4 mat4_cast
377(
378	detail::fquatSIMD const & q
379)
380{
381    return mat4_cast(mat4SIMD_cast(q));
382}
383
384
385
386GLM_FUNC_QUALIFIER float length
387(
388	detail::fquatSIMD const & q
389)
390{
391    return glm::sqrt(dot(q, q));
392}
393
394GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
395(
396	detail::fquatSIMD const & q
397)
398{
399    return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
400}
401
402GLM_FUNC_QUALIFIER float dot
403(
404	detail::fquatSIMD const & q1,
405	detail::fquatSIMD const & q2
406)
407{
408    float result;
409    _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));
410
411    return result;
412}
413
414GLM_FUNC_QUALIFIER detail::fquatSIMD mix
415(
416	detail::fquatSIMD const & x,
417	detail::fquatSIMD const & y,
418	float const & a
419)
420{
421	float cosTheta = dot(x, y);
422
423    if (cosTheta > 1.0f - glm::epsilon<float>())
424    {
425	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
426    }
427    else
428    {
429        float angle = glm::acos(cosTheta);
430
431
432        float s0 = glm::sin((1.0f - a) * angle);
433        float s1 = glm::sin(a * angle);
434        float d  = 1.0f / glm::sin(angle);
435
436        return (s0 * x + s1 * y) * d;
437    }
438}
439
440GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
441(
442	detail::fquatSIMD const & x,
443	detail::fquatSIMD const & y,
444	float const & a
445)
446{
447	// Lerp is only defined in [0, 1]
448	assert(a >= 0.0f);
449	assert(a <= 1.0f);
450
451    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
452}
453
454GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
455(
456	detail::fquatSIMD const & x,
457	detail::fquatSIMD const & y,
458	float const & a
459)
460{
461	detail::fquatSIMD z = y;
462
463	float cosTheta = dot(x, y);
464
465	// If cosTheta < 0, the interpolation will take the long way around the sphere.
466	// To fix this, one quat must be negated.
467	if (cosTheta < 0.0f)
468	{
469		z        = -y;
470		cosTheta = -cosTheta;
471	}
472
473	// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
474	if(cosTheta > 1.0f - epsilon<float>())
475	{
476		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
477	}
478	else
479	{
480        float angle = glm::acos(cosTheta);
481
482
483		float s0 = glm::sin((1.0f - a) * angle);
484        float s1 = glm::sin(a * angle);
485        float d  = 1.0f / glm::sin(angle);
486
487        return (s0 * x + s1 * y) * d;
488	}
489}
490
491
492GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
493(
494	detail::fquatSIMD const & x,
495	detail::fquatSIMD const & y,
496	float const & a
497)
498{
499	float cosTheta = dot(x, y);
500
501    if (cosTheta > 1.0f - glm::epsilon<float>())
502    {
503	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
504    }
505    else
506    {
507        float angle = glm::fastAcos(cosTheta);
508
509
510        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
511
512        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
513        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
514        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
515
516        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
517    }
518}
519
520GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
521(
522	detail::fquatSIMD const & x,
523	detail::fquatSIMD const & y,
524	float const & a
525)
526{
527	detail::fquatSIMD z = y;
528
529	float cosTheta = dot(x, y);
530	if (cosTheta < 0.0f)
531	{
532		z        = -y;
533		cosTheta = -cosTheta;
534	}
535
536
537	if(cosTheta > 1.0f - epsilon<float>())
538	{
539		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
540	}
541	else
542	{
543        float angle = glm::fastAcos(cosTheta);
544
545
546        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
547
548        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
549        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
550        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
551
552        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
553	}
554}
555
556
557
558GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
559(
560	detail::fquatSIMD const & q
561)
562{
563	return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
564}
565
566GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
567(
568	detail::fquatSIMD const & q
569)
570{
571	return conjugate(q) / dot(q, q);
572}
573
574
575GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
576(
577	float const & angle,
578	vec3 const & v
579)
580{
581	float s = glm::sin(angle * 0.5f);
582
583	return _mm_set_ps(
584		glm::cos(angle * 0.5f),
585		v.z * s,
586		v.y * s,
587		v.x * s);
588}
589
590GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
591(
592	float const & angle,
593	float const & x,
594	float const & y,
595	float const & z
596)
597{
598	return angleAxisSIMD(angle, vec3(x, y, z));
599}
600
601
602GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
603{
604	static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
605	static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
606	static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
607
608	__m128 x3 = _mm_mul_ps(x,  _mm_mul_ps(x, x));
609	__m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
610	__m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
611
612	__m128 y0 = _mm_mul_ps(x3, c0);
613	__m128 y1 = _mm_mul_ps(x5, c1);
614	__m128 y2 = _mm_mul_ps(x7, c2);
615
616	return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
617}
618
619
620}//namespace glm
621