1 /* Copyright 2004, 2005 Nicholas Bishop
2  *
3  * This file is part of SharpConstruct.
4  *
5  * SharpConstruct is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * SharpConstruct is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with SharpConstruct; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
18 
19 #ifndef OPTIMIZED_H
20 #define OPTIMIZED_H
21 
22 #include "Align.hh"
23 #include <xmmintrin.h>
24 #include <vector>
25 
26 namespace SharpConstruct
27 {
28 	namespace Optimized
29 	{
30 		class Point3D
31 		{
32 		public:
Point3D()33 			inline Point3D()
34 			: data_( _mm_setzero_ps() )
35 			{}
Point3D(const __m128 & in)36 			inline Point3D( const __m128& in )
37 			: data_( in )
38 			{}
Point3D(const float x,const float y,const float z)39 			inline Point3D( const float x, const float y, const float z )
40 			: data_( _mm_set_ps( 0, z, y, x ) )
41 			{}
42 
43 			void CalculatePlaneNormal( const __m128 p1, const __m128 p2,
44 			                           const __m128 p3 );
45 			void CalculatePlaneUnormal( const __m128 p1, const __m128 p2,
46 			                            const __m128 p3 );
Distance(const Point3D & in)47 			inline float Distance( const Point3D& in )
48 			{
49 				register __m128 p1 = data_, shuff, dis;
50 				p1 = _mm_sub_ps( p1, in.data_ );
51 				p1 = _mm_mul_ps( p1, p1 );
52 				shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 1, 2, 3, 0 ) );
53 				dis = shuff;
54 				shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 2, 3, 0, 1 ) );
55 				dis = _mm_add_ss( dis, shuff );
56 				shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 3, 0, 1, 2 ) );
57 				dis = _mm_add_ss( dis, shuff );
58 				dis = _mm_rsqrt_ss( dis );
59 				dis = _mm_rcp_ss( dis );
60 				// Is there a faster way to do this?
61 				//return ((float*)&dis)[0];
62 				// Yes: _mm_store_ss
63 				float d;
64 				_mm_store_ss( &d, dis );
65 				return d;
66 			}
67 			// Not a true distance!
68 			void FastDistance( const __m128 in, float& d );
69 
70 			void Midpoint( const Point3D& a, const Point3D& b );
CopyTo(float * loc)71 			inline void CopyTo( float* loc )
72 			{
73 				_mm_storeu_ps( loc, data_ );
74 			}
CopyToAligned(float * loc)75 			inline void CopyToAligned( float* loc )
76 			{
77 				_mm_store_ps( loc, data_ );
78 			}
HorizAdd()79 			inline float HorizAdd() const
80 			{
81 				float a = 0;
82 				register __m128 t = data_;
83 				t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 1, 0, 3 ) ) );
84 				t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 2, 2, 2 ) ) );
85 				_mm_store_ss( &a, t );
86 				return a;
87 			}
Zero()88 			inline void Zero()
89 			{
90 				data_ = _mm_setzero_ps();
91 			}
Abs()92 			inline Point3D& Abs()
93 			{
94 				register __m128 v( _mm_set_ps1( -0.0 ) );
95 				data_ = _mm_andnot_ps( v, data_ );
96 				return *this;
97 			}
98 
X()99 			inline float& X()
100 			{
101 				return ( ( float* )&data_ )[ 0 ];
102 			}
Y()103 			inline float& Y()
104 			{
105 				return ( ( float* )&data_ )[ 1 ];
106 			}
Z()107 			inline float& Z()
108 			{
109 				return ( ( float* )&data_ )[ 2 ];
110 			}
W()111 			inline float& W()
112 			{
113 				return ( ( float* )&data_ )[ 3 ];
114 			}
X()115 			inline const float& X() const
116 			{
117 				return ( ( float* )&data_ )[ 0 ];
118 			}
Y()119 			inline const float& Y() const
120 			{
121 				return ( ( float* )&data_ )[ 1 ];
122 			}
Z()123 			inline const float& Z() const
124 			{
125 				return ( ( float* )&data_ )[ 2 ];
126 			}
127 
__m128()128 			inline operator __m128() const
129 			{
130 				return data_;
131 			}
132 
133 			/*inline bool operator==( const Point3D& p ) const
134 			{
135 				return X() == p.X() && Y() == p.Y() && Z() == p.Z();
136 				}*/
137 			inline Point3D operator+( const Point3D& in ) const
138 			{
139 				return Point3D( _mm_add_ps( data_, in.data_ ) );
140 			}
141 			inline Point3D operator-( const Point3D& in ) const
142 			{
143 				return Point3D( _mm_sub_ps( data_, in.data_ ) );
144 			}
145 			inline Point3D operator*( const Point3D& in ) const
146 			{
147 				return Point3D( _mm_mul_ps( data_, in.data_ ) );
148 			}
149 			inline Point3D operator*( const float in ) const
150 			{
151 				return Point3D( X() * in, Y() * in, Z() * in );
152 			}
153 			inline Point3D operator/( const float in ) const
154 			{
155 				return Point3D( X() / in, Y() / in, Z() / in );
156 			}
157 			inline void operator+=( const Point3D& in )
158 			{
159 				data_ = _mm_add_ps( data_, in.data_ );
160 			}
161 			inline void operator-=( const Point3D& in )
162 			{
163 				data_ = _mm_sub_ps( data_, in.data_ );
164 			}
165 			inline void operator*=( const Point3D& in )
166 			{
167 				data_ = _mm_mul_ps( data_, in.data_ );
168 			}
169 			inline void operator/=( const float in )
170 			{
171 				register __m128 d = data_, div = _mm_set1_ps( in );
172 				d = _mm_div_ps( d, div );
173 				data_ = d;
174 			}
175 
Data()176 			inline const __m128& Data() const
177 			{
178 				return data_;
179 			}
180 
RawData()181 			inline const __m128* RawData() const
182 			{
183 				return &data_;
184 			}
RawData()185 			inline __m128* RawData()
186 			{
187 				return &data_;
188 			}
189 		private:
190 			__m128 data_;
191 		};
192 
193 		void Normalize( Point3D& );
194 
195 		typedef Point3D Normal3D;
196 		typedef std::vector< Point3D > Point3DVector;
197 
198 		void Normalize( Point3DVector& );
199 		/*class Point3DVector
200 		{
201 		public:
202 			inline Point3DVector() : _data( 0 ), _size( 0 ), _real_size( 0 )
203 			{}
204 			inline Point3DVector( const Point3DVector& in ) : _data( 0 ), _size( 0 ), _real_size( 0 )
205 			{
206 				resize( in._size );
207 				for( int i = 0; i < _size; ++i )
208 					_data[ i ] = in._data[ i ];
209 			}
210 			inline ~Point3DVector()
211 			{
212 				clear();
213 			}
214 
215 			void NormalizeAll();
216 
217 			inline unsigned size() const
218 			{
219 				return _size;
220 			}
221 			inline void clear()
222 			{
223 				free( _data );
224 				_data = 0;
225 				_size = 0;
226 				_real_size = 0;
227 			}
228 			inline void resize( int size )
229 			{
230 				if( size > _real_size )
231 				{
232 					Point3DVector tmp;
233 					if( size > 0 )
234 						tmp = *this;
235 
236 					clear();
237 
238 					_real_size = static_cast< int >(
239 						pow( 2, round( log( size ) / log( 2 ) ) ) );
240 					if( _real_size < size )
241 						_real_size *= 2;
242 
243 					void* mem;
244 					posix_memalign( &mem, __alignof( __m128 ),
245 									_real_size * sizeof( __m128 ) );
246 					_data = ( __m128* )mem;
247 					for( int i = 0; i < _real_size; ++i )
248 						new( &_data[ i ] ) __m128;
249 
250 					if( size > 0 )
251 					{
252 						for( int i = 0; i < tmp._size; ++i )
253 							_data[ i ] = tmp._data[ i ];
254 					}
255 				}
256 
257 				_size = size;
258 			}
259 			inline void push_back( const Point3D& in )
260 			{
261 				resize( _size + 1 );
262 				_data[ _size - 1 ] = in;
263 			}
264 			inline Point3DProxy operator[]( int i )
265 			{
266 				//_proxy.Set( &_data[ i ] );
267 				//return _proxy;
268 				return Point3DProxy( &_data[ i ] );
269 			}
270 			inline Point3DVector& operator=( const Point3DVector& in )
271 			{
272 				resize( in._size );
273 				for( int i = 0; i < _size; ++i )
274 					_data[ i ] = in._data[ i ];
275 
276 				return ( *this );
277 			}
278 			inline __m128* RawData() const
279 			{
280 				return _data;
281 			}
282 		private:
283 			__m128* _data;
284 			int _size;
285 			int _real_size;
286 			};*/
287 	}
288 }
289 
290 #endif // OPTIMIZED_H
291