1 /* Copyright 2004, 2005 Nicholas Bishop 2 * 3 * This file is part of SharpConstruct. 4 * 5 * SharpConstruct is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * SharpConstruct is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with SharpConstruct; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ 18 19 #ifndef OPTIMIZED_H 20 #define OPTIMIZED_H 21 22 #include "Align.hh" 23 #include <xmmintrin.h> 24 #include <vector> 25 26 namespace SharpConstruct 27 { 28 namespace Optimized 29 { 30 class Point3D 31 { 32 public: Point3D()33 inline Point3D() 34 : data_( _mm_setzero_ps() ) 35 {} Point3D(const __m128 & in)36 inline Point3D( const __m128& in ) 37 : data_( in ) 38 {} Point3D(const float x,const float y,const float z)39 inline Point3D( const float x, const float y, const float z ) 40 : data_( _mm_set_ps( 0, z, y, x ) ) 41 {} 42 43 void CalculatePlaneNormal( const __m128 p1, const __m128 p2, 44 const __m128 p3 ); 45 void CalculatePlaneUnormal( const __m128 p1, const __m128 p2, 46 const __m128 p3 ); Distance(const Point3D & in)47 inline float Distance( const Point3D& in ) 48 { 49 register __m128 p1 = data_, shuff, dis; 50 p1 = _mm_sub_ps( p1, in.data_ ); 51 p1 = _mm_mul_ps( p1, p1 ); 52 shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 1, 2, 3, 0 ) ); 53 dis = shuff; 54 shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 2, 3, 0, 1 ) ); 55 dis = _mm_add_ss( dis, shuff ); 56 shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 3, 0, 1, 2 ) ); 57 dis = _mm_add_ss( dis, shuff ); 58 dis = _mm_rsqrt_ss( dis ); 59 dis = _mm_rcp_ss( dis ); 60 // Is there a faster way to do this? 61 //return ((float*)&dis)[0]; 62 // Yes: _mm_store_ss 63 float d; 64 _mm_store_ss( &d, dis ); 65 return d; 66 } 67 // Not a true distance! 68 void FastDistance( const __m128 in, float& d ); 69 70 void Midpoint( const Point3D& a, const Point3D& b ); CopyTo(float * loc)71 inline void CopyTo( float* loc ) 72 { 73 _mm_storeu_ps( loc, data_ ); 74 } CopyToAligned(float * loc)75 inline void CopyToAligned( float* loc ) 76 { 77 _mm_store_ps( loc, data_ ); 78 } HorizAdd()79 inline float HorizAdd() const 80 { 81 float a = 0; 82 register __m128 t = data_; 83 t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 1, 0, 3 ) ) ); 84 t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 2, 2, 2 ) ) ); 85 _mm_store_ss( &a, t ); 86 return a; 87 } Zero()88 inline void Zero() 89 { 90 data_ = _mm_setzero_ps(); 91 } Abs()92 inline Point3D& Abs() 93 { 94 register __m128 v( _mm_set_ps1( -0.0 ) ); 95 data_ = _mm_andnot_ps( v, data_ ); 96 return *this; 97 } 98 X()99 inline float& X() 100 { 101 return ( ( float* )&data_ )[ 0 ]; 102 } Y()103 inline float& Y() 104 { 105 return ( ( float* )&data_ )[ 1 ]; 106 } Z()107 inline float& Z() 108 { 109 return ( ( float* )&data_ )[ 2 ]; 110 } W()111 inline float& W() 112 { 113 return ( ( float* )&data_ )[ 3 ]; 114 } X()115 inline const float& X() const 116 { 117 return ( ( float* )&data_ )[ 0 ]; 118 } Y()119 inline const float& Y() const 120 { 121 return ( ( float* )&data_ )[ 1 ]; 122 } Z()123 inline const float& Z() const 124 { 125 return ( ( float* )&data_ )[ 2 ]; 126 } 127 __m128()128 inline operator __m128() const 129 { 130 return data_; 131 } 132 133 /*inline bool operator==( const Point3D& p ) const 134 { 135 return X() == p.X() && Y() == p.Y() && Z() == p.Z(); 136 }*/ 137 inline Point3D operator+( const Point3D& in ) const 138 { 139 return Point3D( _mm_add_ps( data_, in.data_ ) ); 140 } 141 inline Point3D operator-( const Point3D& in ) const 142 { 143 return Point3D( _mm_sub_ps( data_, in.data_ ) ); 144 } 145 inline Point3D operator*( const Point3D& in ) const 146 { 147 return Point3D( _mm_mul_ps( data_, in.data_ ) ); 148 } 149 inline Point3D operator*( const float in ) const 150 { 151 return Point3D( X() * in, Y() * in, Z() * in ); 152 } 153 inline Point3D operator/( const float in ) const 154 { 155 return Point3D( X() / in, Y() / in, Z() / in ); 156 } 157 inline void operator+=( const Point3D& in ) 158 { 159 data_ = _mm_add_ps( data_, in.data_ ); 160 } 161 inline void operator-=( const Point3D& in ) 162 { 163 data_ = _mm_sub_ps( data_, in.data_ ); 164 } 165 inline void operator*=( const Point3D& in ) 166 { 167 data_ = _mm_mul_ps( data_, in.data_ ); 168 } 169 inline void operator/=( const float in ) 170 { 171 register __m128 d = data_, div = _mm_set1_ps( in ); 172 d = _mm_div_ps( d, div ); 173 data_ = d; 174 } 175 Data()176 inline const __m128& Data() const 177 { 178 return data_; 179 } 180 RawData()181 inline const __m128* RawData() const 182 { 183 return &data_; 184 } RawData()185 inline __m128* RawData() 186 { 187 return &data_; 188 } 189 private: 190 __m128 data_; 191 }; 192 193 void Normalize( Point3D& ); 194 195 typedef Point3D Normal3D; 196 typedef std::vector< Point3D > Point3DVector; 197 198 void Normalize( Point3DVector& ); 199 /*class Point3DVector 200 { 201 public: 202 inline Point3DVector() : _data( 0 ), _size( 0 ), _real_size( 0 ) 203 {} 204 inline Point3DVector( const Point3DVector& in ) : _data( 0 ), _size( 0 ), _real_size( 0 ) 205 { 206 resize( in._size ); 207 for( int i = 0; i < _size; ++i ) 208 _data[ i ] = in._data[ i ]; 209 } 210 inline ~Point3DVector() 211 { 212 clear(); 213 } 214 215 void NormalizeAll(); 216 217 inline unsigned size() const 218 { 219 return _size; 220 } 221 inline void clear() 222 { 223 free( _data ); 224 _data = 0; 225 _size = 0; 226 _real_size = 0; 227 } 228 inline void resize( int size ) 229 { 230 if( size > _real_size ) 231 { 232 Point3DVector tmp; 233 if( size > 0 ) 234 tmp = *this; 235 236 clear(); 237 238 _real_size = static_cast< int >( 239 pow( 2, round( log( size ) / log( 2 ) ) ) ); 240 if( _real_size < size ) 241 _real_size *= 2; 242 243 void* mem; 244 posix_memalign( &mem, __alignof( __m128 ), 245 _real_size * sizeof( __m128 ) ); 246 _data = ( __m128* )mem; 247 for( int i = 0; i < _real_size; ++i ) 248 new( &_data[ i ] ) __m128; 249 250 if( size > 0 ) 251 { 252 for( int i = 0; i < tmp._size; ++i ) 253 _data[ i ] = tmp._data[ i ]; 254 } 255 } 256 257 _size = size; 258 } 259 inline void push_back( const Point3D& in ) 260 { 261 resize( _size + 1 ); 262 _data[ _size - 1 ] = in; 263 } 264 inline Point3DProxy operator[]( int i ) 265 { 266 //_proxy.Set( &_data[ i ] ); 267 //return _proxy; 268 return Point3DProxy( &_data[ i ] ); 269 } 270 inline Point3DVector& operator=( const Point3DVector& in ) 271 { 272 resize( in._size ); 273 for( int i = 0; i < _size; ++i ) 274 _data[ i ] = in._data[ i ]; 275 276 return ( *this ); 277 } 278 inline __m128* RawData() const 279 { 280 return _data; 281 } 282 private: 283 __m128* _data; 284 int _size; 285 int _real_size; 286 };*/ 287 } 288 } 289 290 #endif // OPTIMIZED_H 291