1 /* Copyright 2005 Nicholas Bishop
2  *
3  * This file is part of SharpConstruct.
4  *
5  * SharpConstruct is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * SharpConstruct is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with SharpConstruct; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
18 
19 #include "Optimized.h"
20 #include <algorithm>
21 #include <functional>
22 
23 #include <iostream>
24 
25 using namespace SharpConstruct::Optimized;
26 
FastDistance(const __m128 in,float & d)27 void Point3D::FastDistance( const __m128 in, float& d )
28 {
29 	register __m128 t( data_ );
30 	const register __m128 v( _mm_set_ps1( -0.0 ) );
31 	t = _mm_sub_ps( t, in );
32 	t = _mm_andnot_ps( v, t ); // Get absolute value
33 	t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 1, 0, 3 ) ) ); // Add components
34 	t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 2, 2, 2 ) ) ); // Final add, answer is in [0]
35 	_mm_store_ss( &d, t );
36 }
37 
Midpoint(const Point3D & a,const Point3D & b)38 void Point3D::Midpoint( const Point3D& a, const Point3D& b )
39 {
40 	*this = ( a + b ) / 2;
41 }
42 
CalculatePlaneNormal(const __m128 p1,const __m128 p2,const __m128 p3)43 void Point3D::CalculatePlaneNormal( const __m128 p1, const __m128 p2,
44 				    const __m128 p3 )
45 {
46 	register __m128 a, b, tmp1, tmp2, shuff1, shuff2;
47 
48 	a = _mm_sub_ps( p1, p2 );
49 	b = _mm_sub_ps( p3, p1 );
50 	// Shuffles a into Y,Z,X format
51 	shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 0, 2, 1 ) );
52 	// Shuffles b into Z,X,Y format
53 	shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 1, 1, 0, 2 ) );
54 	tmp1 = _mm_mul_ps( shuff1, shuff2 );
55 	shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 1, 0, 2 ) );
56 	shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 3, 0, 2, 1 ) );
57 	tmp2 = _mm_mul_ps( shuff1, shuff2 );
58 	a = _mm_sub_ps( tmp1, tmp2 );
59 
60 	// And normalize 'a':
61 	b = _mm_mul_ps( a, a );
62 
63 	b = _mm_add_ps( b, _mm_shuffle_ps( b, b, _MM_SHUFFLE( 2, 1, 0, 3 ) ) );
64 	b = _mm_add_ps( b, _mm_shuffle_ps( b, b, _MM_SHUFFLE( 2, 2, 2, 2 ) ) );
65 
66 	b = _mm_rsqrt_ss( b );
67 	b = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 0, 0, 0, 0 ) );
68 	data_ = _mm_mul_ps( a, b );
69 }
CalculatePlaneUnormal(const __m128 p1,const __m128 p2,const __m128 p3)70 void Point3D::CalculatePlaneUnormal( const __m128 p1, const __m128 p2,
71 				     const __m128 p3 )
72 {
73 	register __m128 a, b, tmp1, tmp2, shuff1, shuff2;
74 
75 	a = _mm_sub_ps( p1, p2 );
76 	b = _mm_sub_ps( p3, p1 );
77 	// Shuffles a into Y,Z,X format
78 	shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 0, 2, 1 ) );
79 	// Shuffles b into Z,X,Y format
80 	shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 1, 1, 0, 2 ) );
81 	tmp1 = _mm_mul_ps( shuff1, shuff2 );
82 	shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 1, 0, 2 ) );
83 	shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 3, 0, 2, 1 ) );
84 	tmp2 = _mm_mul_ps( shuff1, shuff2 );
85 	data_ = _mm_sub_ps( tmp1, tmp2 );
86 }
87 
Normalize(Point3D & p)88 void SharpConstruct::Optimized::Normalize( Point3D& p )
89 {
90 	register __m128 tmp( p );
91 	register __m128 length;
92 	register __m128 shuff;
93 	// Square all the components
94 	tmp = _mm_mul_ps( tmp, tmp );
95 	// Next six lines are to put X + Y + Z into length
96 	shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 1, 2, 3, 0 ) );
97 	length = shuff;
98 	shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 2, 3, 0, 1 ) );
99 	length = _mm_add_ss( length, shuff );
100 	shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 3, 0, 1, 2 ) );
101 	length = _mm_add_ss( length, shuff );
102 	// Take the reciprocal square root
103 	length = _mm_rsqrt_ss( length );
104 	// Move the rsqrt into all four positions
105 	length = _mm_shuffle_ps( length, length, _MM_SHUFFLE( 0, 0, 0, 0 ) );
106 	// Multiply it by data
107 	p = _mm_mul_ps( *p.RawData(), length );
108 }
109 
Normalize(Point3DVector & v)110 void SharpConstruct::Optimized::Normalize( Point3DVector& v )
111 {
112 	const unsigned size( v.size() );
113 	for( unsigned i = 0; i < size; ++i )
114 		Normalize( v[ i ] );
115 }
116 /*void Point3DVector::NormalizeAll()
117 {
118 	for( unsigned i = 0; i < size(); i++ )
119 	{
120 		register __m128 sq( data_[ i ] );
121 
122 		// Square each component
123 		sq = _mm_mul_ps( sq, sq );
124 
125 		// Horizontal add
126 		sq = _mm_add_ps( sq, _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 2, 1, 0, 3 ) ) );
127 		sq = _mm_add_ps( sq, _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 2, 2, 2, 2 ) ) );
128 
129 		sq = _mm_rsqrt_ss( sq );
130 		sq = _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 0, 0, 0, 0 ) );
131 
132 		data_[ i ] = _mm_mul_ps( data_[ i ], sq );
133 	}
134 }*/
135