1 /*! \file altivec.c */
2 //
3 // ALTIVEC specific utility functions
4 // tips from Doug
5 // from http://home.san.rr.com/altivec/Pages/TipArchive.html
6 //First we have the two missing logical operators:
7 #ifdef ALTIVEC
8 #include "migration.h"
9 #include "altivec.h"
10 #include "sighandler.h"
11
12 MYINLINE vector float vector_square_root( vector float v );
13 MYINLINE vector float vector_divide( vector float a, vector float b );
14 MYINLINE vector float vector_reciprocal( vector float v );
15 MYINLINE vector float vector_reciprocal_square_root( vector float v );
16
17 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
18 //functions
19 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
20 /* load a single float and spread it to all vector elements */
load_float_splat(float * thisptr)21 MYINLINE FloatVec load_float_splat(float *thisptr)
22 {
23 vector unsigned char tempc;
24 FloatVec temp;
25 temp.v = vec_lde(0,thisptr);
26 tempc = vec_lvsl(0,thisptr);
27 temp.v = vec_perm(temp.v,temp.v,tempc);
28 temp.v = vec_splat(temp.v,0);
29 return temp;
30 }
31
vec_float_splat(float * thisptr)32 MYINLINE vector float vec_float_splat(float *thisptr)
33 {
34 vector unsigned char tempc;
35 vector float temp;
36 temp = vec_lde(0,thisptr);
37 tempc = vec_lvsl(0,thisptr);
38 temp = vec_perm(temp,temp,tempc);
39 temp = vec_splat(temp,0);
40 return temp;
41 }
42
43
vec_zero(void)44 MYINLINE vector float vec_zero(void)
45 {
46 return vec_ctf(vec_splat_u32(0),0);
47 }
48
vec_float_one(void)49 MYINLINE vector float vec_float_one(void)
50 {
51 return vec_ctf(vec_splat_u32(1),0);
52 }
53
load_double_floatvec(FloatVec * out,double * in,long size)54 MYINLINE void load_double_floatvec(FloatVec *out, double *in, long size)
55 {
56 long i;
57 float *tmp = (float *) mycalloc(size, sizeof(float));
58 for(i=0; i < size; i++)
59 tmp[i] = (float) in[i];
60 memcpy(out,tmp,sizeof(float)*size);
61 myfree(tmp);
62 }
63
load_float_floatvec(FloatVec * out,float * in,long size)64 MYINLINE void load_float_floatvec(FloatVec *out, float *in, long size)
65 {
66 long i;
67 float *tmp = (float *) mycalloc(size, sizeof(float));
68 for(i=0; i < size; i++)
69 tmp[i] = (float) in[i];
70 memcpy(out,tmp,sizeof(float)*size);
71 myfree(tmp);
72 }
73
74 ///
75 /// Vector multiply operation, for vectors 4 floats long,
76 /// this is not very efficient because the pipelines are not full, try to use
77 /// the vdot_product() or then use standard FPU and for-loops.
vector_multiply(vector float * v1,vector float * v2)78 MYINLINE MYREAL vector_multiply(vector float *v1, vector float *v2)
79 {
80 float result;
81 vector float temp;
82 vector float zero = vec_zero();
83 temp = vec_madd(*v1, *v2, zero);
84 temp = vec_add(temp, vec_sld(temp,temp,4));
85 temp = vec_add(temp, vec_sld(temp,temp,8));
86 vec_ste(temp,0,&result);
87 return (MYREAL) result;
88 }
89
90 ///
91 /// Calculates the dot product of two vectors
92 /// of size of 4 floats each
93 MYREAL
vdot_product(FloatVec * v1,FloatVec * v2,int size)94 vdot_product (FloatVec *v1, FloatVec *v2, int size)
95 {
96 // size needs to be a multiple of 4 !!!!!!
97 vector float temp1 = (vector float) vec_splat_s8(0);
98 vector float temp2 = temp1;
99 vector float temp3 = temp1;
100 vector float temp4 = temp1;
101 float result;
102 int i;
103 for (i = 0; i < size; i += 4)
104 {
105 temp1 = vec_madd(v1[i].v, v2[i].v, temp1);
106 temp2 = vec_madd(v1[i+1].v, v2[i+1].v, temp2);
107 temp3 = vec_madd(v1[i+2].v, v2[i+2].v, temp3);
108 temp4 = vec_madd(v1[i+3].v, v2[i+3].v, temp4);
109 }
110 // sum the vectors
111 temp1 = vec_add(temp1,temp2);
112 temp3 = vec_add(temp3,temp4);
113 temp1 = vec_add(temp1,temp3);
114
115 // sum across vector
116 temp1 = vec_add(temp1, vec_sld(temp1,temp1,4));
117 temp1 = vec_add(temp1, vec_sld(temp1,temp1,8));
118
119 // copy result to the stach to return to FPU
120 vec_ste(temp1,0,&result);
121 return (MYREAL) result;
122 }
123
124 //result = a + b
vector_add(FloatVec a,FloatVec b)125 MYINLINE FloatVec vector_add(FloatVec a, FloatVec b )
126 {
127 FloatVec temp;
128 temp.v = vec_add( a.v, b.v);
129 return temp;
130 }
131
132 //result = a/b
vector_divide(vector float a,vector float b)133 MYINLINE vector float vector_divide( vector float a, vector float b )
134 {
135 return vec_madd( a, vector_reciprocal( b ), (vector float)(0) );
136 }
137
138
139 //result = v^0.5
vector_square_root(vector float v)140 MYINLINE vector float vector_square_root( vector float v )
141 {
142
143 return vec_madd( v, vector_reciprocal_square_root( v ), (vector float)(0) );
144
145
146 }
147
148 ///
149 ///result = b^-1
vector_reciprocal(vector float v)150 MYINLINE vector float vector_reciprocal( vector float v )
151 {
152
153 //Get the reciprocal estimate
154 vector float estimate = vec_re( v );
155
156 //One round of Newton-Raphson refinement
157 return vec_madd( vec_nmsub( estimate, v, (vector float) (1.0) ), estimate, estimate );
158
159
160
161 }
162
163 ///
164 ///result = v^-0.5
vector_reciprocal_square_root(vector float v)165 MYINLINE vector float vector_reciprocal_square_root( vector float v )
166 {
167
168 //Get the square root reciprocal estimate
169 vector float zero = (vector float)(0);
170 vector float oneHalf = (vector float)(0.5);
171 vector float one = (vector float)(1.0);
172 vector float estimate = vec_rsqrte( v );
173
174 //One round of Newton-Raphson refinement
175 vector float estimateSquared = vec_madd( estimate, estimate, zero );
176 vector float halfEstimate = vec_madd( estimate, oneHalf, zero );
177 return vec_madd( vec_nmsub( v, estimateSquared, one ), halfEstimate, estimate );
178 }
179
180 #endif /*ALTIVEC*/
181