Optimized.h - OpenGrok cross reference for /dports/graphics/sharpconstruct/sharpconstruct-0.11/include/Optimized.h

/* Copyright 2004, 2005 Nicholas Bishop
 *
 * This file is part of SharpConstruct.
 *
 * SharpConstruct is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * SharpConstruct is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with SharpConstruct; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#ifndef OPTIMIZED_H
#define OPTIMIZED_H

#include "Align.hh"
#include <xmmintrin.h>
#include <vector>

namespace SharpConstruct
{
	namespace Optimized
	{
		class Point3D
		{
		public:
			inline Point3D()
			: data_( _mm_setzero_ps() )
			{}
			inline Point3D( const __m128& in )
			: data_( in )
			{}
			inline Point3D( const float x, const float y, const float z )
			: data_( _mm_set_ps( 0, z, y, x ) )
			{}

			void CalculatePlaneNormal( const __m128 p1, const __m128 p2,
			                           const __m128 p3 );
			void CalculatePlaneUnormal( const __m128 p1, const __m128 p2,
			                            const __m128 p3 );
			inline float Distance( const Point3D& in )
			{
				register __m128 p1 = data_, shuff, dis;
				p1 = _mm_sub_ps( p1, in.data_ );
				p1 = _mm_mul_ps( p1, p1 );
				shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 1, 2, 3, 0 ) );
				dis = shuff;
				shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 2, 3, 0, 1 ) );
				dis = _mm_add_ss( dis, shuff );
				shuff = _mm_shuffle_ps( p1, p1, _MM_SHUFFLE( 3, 0, 1, 2 ) );
				dis = _mm_add_ss( dis, shuff );
				dis = _mm_rsqrt_ss( dis );
				dis = _mm_rcp_ss( dis );
				// Is there a faster way to do this?
				//return ((float*)&dis)[0];
				// Yes: _mm_store_ss
				float d;
				_mm_store_ss( &d, dis );
				return d;
			}
			// Not a true distance!
			void FastDistance( const __m128 in, float& d );

			void Midpoint( const Point3D& a, const Point3D& b );
			inline void CopyTo( float* loc )
			{
				_mm_storeu_ps( loc, data_ );
			}
			inline void CopyToAligned( float* loc )
			{
				_mm_store_ps( loc, data_ );
			}
			inline float HorizAdd() const
			{
				float a = 0;
				register __m128 t = data_;
				t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 1, 0, 3 ) ) );
				t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 2, 2, 2 ) ) );
				_mm_store_ss( &a, t );
				return a;
			}
			inline void Zero()
			{
				data_ = _mm_setzero_ps();
			}
			inline Point3D& Abs()
			{
				register __m128 v( _mm_set_ps1( -0.0 ) );
				data_ = _mm_andnot_ps( v, data_ );
				return *this;
			}

			inline float& X()
			{
				return ( ( float* )&data_ )[ 0 ];
			}
			inline float& Y()
			{
				return ( ( float* )&data_ )[ 1 ];
			}
			inline float& Z()
			{
				return ( ( float* )&data_ )[ 2 ];
			}
			inline float& W()
			{
				return ( ( float* )&data_ )[ 3 ];
			}
			inline const float& X() const
			{
				return ( ( float* )&data_ )[ 0 ];
			}
			inline const float& Y() const
			{
				return ( ( float* )&data_ )[ 1 ];
			}
			inline const float& Z() const
			{
				return ( ( float* )&data_ )[ 2 ];
			}

			inline operator __m128() const
			{
				return data_;
			}

			/*inline bool operator==( const Point3D& p ) const
			{
				return X() == p.X() && Y() == p.Y() && Z() == p.Z();
				}*/
			inline Point3D operator+( const Point3D& in ) const
			{
				return Point3D( _mm_add_ps( data_, in.data_ ) );
			}
			inline Point3D operator-( const Point3D& in ) const
			{
				return Point3D( _mm_sub_ps( data_, in.data_ ) );
			}
			inline Point3D operator*( const Point3D& in ) const
			{
				return Point3D( _mm_mul_ps( data_, in.data_ ) );
			}
			inline Point3D operator*( const float in ) const
			{
				return Point3D( X() * in, Y() * in, Z() * in );
			}
			inline Point3D operator/( const float in ) const
			{
				return Point3D( X() / in, Y() / in, Z() / in );
			}
			inline void operator+=( const Point3D& in )
			{
				data_ = _mm_add_ps( data_, in.data_ );
			}
			inline void operator-=( const Point3D& in )
			{
				data_ = _mm_sub_ps( data_, in.data_ );
			}
			inline void operator*=( const Point3D& in )
			{
				data_ = _mm_mul_ps( data_, in.data_ );
			}
			inline void operator/=( const float in )
			{
				register __m128 d = data_, div = _mm_set1_ps( in );
				d = _mm_div_ps( d, div );
				data_ = d;
			}

			inline const __m128& Data() const
			{
				return data_;
			}

			inline const __m128* RawData() const
			{
				return &data_;
			}
			inline __m128* RawData()
			{
				return &data_;
			}
		private:
			__m128 data_;
		};

		void Normalize( Point3D& );

		typedef Point3D Normal3D;
		typedef std::vector< Point3D > Point3DVector;

		void Normalize( Point3DVector& );
		/*class Point3DVector
		{
		public:
			inline Point3DVector() : _data( 0 ), _size( 0 ), _real_size( 0 )
			{}
			inline Point3DVector( const Point3DVector& in ) : _data( 0 ), _size( 0 ), _real_size( 0 )
			{
				resize( in._size );
				for( int i = 0; i < _size; ++i )
					_data[ i ] = in._data[ i ];
			}
			inline ~Point3DVector()
			{
				clear();
			}

			void NormalizeAll();

			inline unsigned size() const
			{
				return _size;
			}
			inline void clear()
			{
				free( _data );
				_data = 0;
				_size = 0;
				_real_size = 0;
			}
			inline void resize( int size )
			{
				if( size > _real_size )
				{
					Point3DVector tmp;
					if( size > 0 )
						tmp = *this;

					clear();

					_real_size = static_cast< int >(
						pow( 2, round( log( size ) / log( 2 ) ) ) );
					if( _real_size < size )
						_real_size *= 2;

					void* mem;
					posix_memalign( &mem, __alignof( __m128 ),
									_real_size * sizeof( __m128 ) );
					_data = ( __m128* )mem;
					for( int i = 0; i < _real_size; ++i )
						new( &_data[ i ] ) __m128;

					if( size > 0 )
					{
						for( int i = 0; i < tmp._size; ++i )
							_data[ i ] = tmp._data[ i ];
					}
				}

				_size = size;
			}
			inline void push_back( const Point3D& in )
			{
				resize( _size + 1 );
				_data[ _size - 1 ] = in;
			}
			inline Point3DProxy operator[]( int i )
			{
				//_proxy.Set( &_data[ i ] );
				//return _proxy;
				return Point3DProxy( &_data[ i ] );
			}
			inline Point3DVector& operator=( const Point3DVector& in )
			{
				resize( in._size );
				for( int i = 0; i < _size; ++i )
					_data[ i ] = in._data[ i ];

				return ( *this );
			}
			inline __m128* RawData() const
			{
				return _data;
			}
		private:
			__m128* _data;
			int _size;
			int _real_size;
			};*/
	}
}

#endif // OPTIMIZED_H