1 /* Copyright 2005 Nicholas Bishop
2 *
3 * This file is part of SharpConstruct.
4 *
5 * SharpConstruct is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * SharpConstruct is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SharpConstruct; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
18
19 #include "Optimized.h"
20 #include <algorithm>
21 #include <functional>
22
23 #include <iostream>
24
25 using namespace SharpConstruct::Optimized;
26
FastDistance(const __m128 in,float & d)27 void Point3D::FastDistance( const __m128 in, float& d )
28 {
29 register __m128 t( data_ );
30 const register __m128 v( _mm_set_ps1( -0.0 ) );
31 t = _mm_sub_ps( t, in );
32 t = _mm_andnot_ps( v, t ); // Get absolute value
33 t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 1, 0, 3 ) ) ); // Add components
34 t = _mm_add_ps( t, _mm_shuffle_ps( t, t, _MM_SHUFFLE( 2, 2, 2, 2 ) ) ); // Final add, answer is in [0]
35 _mm_store_ss( &d, t );
36 }
37
Midpoint(const Point3D & a,const Point3D & b)38 void Point3D::Midpoint( const Point3D& a, const Point3D& b )
39 {
40 *this = ( a + b ) / 2;
41 }
42
CalculatePlaneNormal(const __m128 p1,const __m128 p2,const __m128 p3)43 void Point3D::CalculatePlaneNormal( const __m128 p1, const __m128 p2,
44 const __m128 p3 )
45 {
46 register __m128 a, b, tmp1, tmp2, shuff1, shuff2;
47
48 a = _mm_sub_ps( p1, p2 );
49 b = _mm_sub_ps( p3, p1 );
50 // Shuffles a into Y,Z,X format
51 shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 0, 2, 1 ) );
52 // Shuffles b into Z,X,Y format
53 shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 1, 1, 0, 2 ) );
54 tmp1 = _mm_mul_ps( shuff1, shuff2 );
55 shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 1, 0, 2 ) );
56 shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 3, 0, 2, 1 ) );
57 tmp2 = _mm_mul_ps( shuff1, shuff2 );
58 a = _mm_sub_ps( tmp1, tmp2 );
59
60 // And normalize 'a':
61 b = _mm_mul_ps( a, a );
62
63 b = _mm_add_ps( b, _mm_shuffle_ps( b, b, _MM_SHUFFLE( 2, 1, 0, 3 ) ) );
64 b = _mm_add_ps( b, _mm_shuffle_ps( b, b, _MM_SHUFFLE( 2, 2, 2, 2 ) ) );
65
66 b = _mm_rsqrt_ss( b );
67 b = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 0, 0, 0, 0 ) );
68 data_ = _mm_mul_ps( a, b );
69 }
CalculatePlaneUnormal(const __m128 p1,const __m128 p2,const __m128 p3)70 void Point3D::CalculatePlaneUnormal( const __m128 p1, const __m128 p2,
71 const __m128 p3 )
72 {
73 register __m128 a, b, tmp1, tmp2, shuff1, shuff2;
74
75 a = _mm_sub_ps( p1, p2 );
76 b = _mm_sub_ps( p3, p1 );
77 // Shuffles a into Y,Z,X format
78 shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 0, 2, 1 ) );
79 // Shuffles b into Z,X,Y format
80 shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 1, 1, 0, 2 ) );
81 tmp1 = _mm_mul_ps( shuff1, shuff2 );
82 shuff1 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 1, 0, 2 ) );
83 shuff2 = _mm_shuffle_ps( b, b, _MM_SHUFFLE( 3, 0, 2, 1 ) );
84 tmp2 = _mm_mul_ps( shuff1, shuff2 );
85 data_ = _mm_sub_ps( tmp1, tmp2 );
86 }
87
Normalize(Point3D & p)88 void SharpConstruct::Optimized::Normalize( Point3D& p )
89 {
90 register __m128 tmp( p );
91 register __m128 length;
92 register __m128 shuff;
93 // Square all the components
94 tmp = _mm_mul_ps( tmp, tmp );
95 // Next six lines are to put X + Y + Z into length
96 shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 1, 2, 3, 0 ) );
97 length = shuff;
98 shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 2, 3, 0, 1 ) );
99 length = _mm_add_ss( length, shuff );
100 shuff = _mm_shuffle_ps( tmp, tmp, _MM_SHUFFLE( 3, 0, 1, 2 ) );
101 length = _mm_add_ss( length, shuff );
102 // Take the reciprocal square root
103 length = _mm_rsqrt_ss( length );
104 // Move the rsqrt into all four positions
105 length = _mm_shuffle_ps( length, length, _MM_SHUFFLE( 0, 0, 0, 0 ) );
106 // Multiply it by data
107 p = _mm_mul_ps( *p.RawData(), length );
108 }
109
Normalize(Point3DVector & v)110 void SharpConstruct::Optimized::Normalize( Point3DVector& v )
111 {
112 const unsigned size( v.size() );
113 for( unsigned i = 0; i < size; ++i )
114 Normalize( v[ i ] );
115 }
116 /*void Point3DVector::NormalizeAll()
117 {
118 for( unsigned i = 0; i < size(); i++ )
119 {
120 register __m128 sq( data_[ i ] );
121
122 // Square each component
123 sq = _mm_mul_ps( sq, sq );
124
125 // Horizontal add
126 sq = _mm_add_ps( sq, _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 2, 1, 0, 3 ) ) );
127 sq = _mm_add_ps( sq, _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 2, 2, 2, 2 ) ) );
128
129 sq = _mm_rsqrt_ss( sq );
130 sq = _mm_shuffle_ps( sq, sq, _MM_SHUFFLE( 0, 0, 0, 0 ) );
131
132 data_[ i ] = _mm_mul_ps( data_[ i ], sq );
133 }
134 }*/
135