1 /*! \file altivec.c */
2 //
3 // ALTIVEC specific utility functions
4 // tips from Doug
5 // from http://home.san.rr.com/altivec/Pages/TipArchive.html
6 //First we have the two missing logical operators:
7 #ifdef ALTIVEC
8 #include "migration.h"
9 #include "altivec.h"
10 #include "sighandler.h"
11 
12 MYINLINE vector float vector_square_root( vector float v );
13 MYINLINE vector float vector_divide( vector float a, vector float b );
14 MYINLINE vector float vector_reciprocal( vector float v );
15 MYINLINE vector float vector_reciprocal_square_root( vector float v );
16 
17 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
18 //functions
19 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
20 /* load a single float and spread it to all vector elements */
load_float_splat(float * thisptr)21 MYINLINE FloatVec load_float_splat(float *thisptr)
22 {
23     vector unsigned char tempc;
24     FloatVec temp;
25     temp.v = vec_lde(0,thisptr);
26     tempc  = vec_lvsl(0,thisptr);
27     temp.v  = vec_perm(temp.v,temp.v,tempc);
28     temp.v = vec_splat(temp.v,0);
29     return temp;
30 }
31 
vec_float_splat(float * thisptr)32 MYINLINE vector float vec_float_splat(float *thisptr)
33 {
34     vector unsigned char tempc;
35     vector float temp;
36     temp = vec_lde(0,thisptr);
37     tempc  = vec_lvsl(0,thisptr);
38     temp  = vec_perm(temp,temp,tempc);
39     temp = vec_splat(temp,0);
40     return temp;
41 }
42 
43 
vec_zero(void)44 MYINLINE vector float vec_zero(void)
45 {
46     return vec_ctf(vec_splat_u32(0),0);
47 }
48 
vec_float_one(void)49 MYINLINE vector float vec_float_one(void)
50 {
51     return vec_ctf(vec_splat_u32(1),0);
52 }
53 
load_double_floatvec(FloatVec * out,double * in,long size)54 MYINLINE void load_double_floatvec(FloatVec *out, double *in, long size)
55 {
56     long i;
57     float *tmp = (float *) mycalloc(size, sizeof(float));
58     for(i=0; i < size; i++)
59         tmp[i] = (float) in[i];
60     memcpy(out,tmp,sizeof(float)*size);
61     myfree(tmp);
62 }
63 
load_float_floatvec(FloatVec * out,float * in,long size)64 MYINLINE void load_float_floatvec(FloatVec *out, float *in, long size)
65 {
66     long i;
67     float *tmp = (float *) mycalloc(size, sizeof(float));
68     for(i=0; i < size; i++)
69         tmp[i] = (float) in[i];
70     memcpy(out,tmp,sizeof(float)*size);
71     myfree(tmp);
72 }
73 
74 ///
75 /// Vector multiply operation, for vectors 4 floats long,
76 /// this is not very efficient because the pipelines are not full, try to use
77 /// the vdot_product() or then use standard FPU and for-loops.
vector_multiply(vector float * v1,vector float * v2)78 MYINLINE MYREAL vector_multiply(vector float *v1, vector float *v2)
79 {
80     float result;
81     vector float temp;
82     vector float zero = vec_zero();
83     temp = vec_madd(*v1, *v2, zero);
84     temp = vec_add(temp, vec_sld(temp,temp,4));
85     temp = vec_add(temp, vec_sld(temp,temp,8));
86     vec_ste(temp,0,&result);
87     return (MYREAL) result;
88 }
89 
90 ///
91 /// Calculates the dot product of two vectors
92 /// of size of 4 floats each
93 MYREAL
vdot_product(FloatVec * v1,FloatVec * v2,int size)94 vdot_product (FloatVec *v1, FloatVec *v2, int size)
95 {
96     // size needs to be a multiple of 4 !!!!!!
97     vector float temp1 = (vector float) vec_splat_s8(0);
98     vector float temp2 = temp1;
99     vector float temp3 = temp1;
100     vector float temp4 = temp1;
101     float result;
102     int i;
103     for (i = 0; i < size; i += 4)
104     {
105         temp1 = vec_madd(v1[i].v, v2[i].v, temp1);
106         temp2 = vec_madd(v1[i+1].v, v2[i+1].v, temp2);
107         temp3 = vec_madd(v1[i+2].v, v2[i+2].v, temp3);
108         temp4 = vec_madd(v1[i+3].v, v2[i+3].v, temp4);
109     }
110     // sum the vectors
111     temp1 = vec_add(temp1,temp2);
112     temp3 = vec_add(temp3,temp4);
113     temp1 = vec_add(temp1,temp3);
114 
115     // sum across vector
116     temp1 = vec_add(temp1, vec_sld(temp1,temp1,4));
117     temp1 = vec_add(temp1, vec_sld(temp1,temp1,8));
118 
119     // copy result to the stach to return to FPU
120     vec_ste(temp1,0,&result);
121     return (MYREAL) result;
122 }
123 
124 //result = a + b
vector_add(FloatVec a,FloatVec b)125 MYINLINE FloatVec vector_add(FloatVec a, FloatVec b )
126 {
127     FloatVec temp;
128     temp.v = vec_add( a.v, b.v);
129     return temp;
130 }
131 
132 //result = a/b
vector_divide(vector float a,vector float b)133 MYINLINE vector float vector_divide( vector float a, vector float b )
134 {
135     return vec_madd( a, vector_reciprocal( b ), (vector float)(0) );
136 }
137 
138 
139 //result = v^0.5
vector_square_root(vector float v)140 MYINLINE vector float vector_square_root( vector float v )
141 {
142 
143     return vec_madd( v, vector_reciprocal_square_root( v ), (vector float)(0) );
144 
145 
146 }
147 
148 ///
149 ///result = b^-1
vector_reciprocal(vector float v)150 MYINLINE vector float vector_reciprocal( vector float v )
151 {
152 
153     //Get the reciprocal estimate
154     vector float estimate = vec_re( v );
155 
156     //One round of Newton-Raphson refinement
157     return vec_madd( vec_nmsub( estimate, v, (vector float) (1.0) ), estimate, estimate );
158 
159 
160 
161 }
162 
163 ///
164 ///result = v^-0.5
vector_reciprocal_square_root(vector float v)165 MYINLINE vector float vector_reciprocal_square_root( vector float v )
166 {
167 
168     //Get the square root reciprocal estimate
169     vector float zero = (vector float)(0);
170     vector float oneHalf = (vector float)(0.5);
171     vector float one = (vector float)(1.0);
172     vector float estimate = vec_rsqrte( v );
173 
174     //One round of Newton-Raphson refinement
175     vector float estimateSquared = vec_madd( estimate, estimate, zero );
176     vector float halfEstimate = vec_madd( estimate, oneHalf, zero );
177     return vec_madd( vec_nmsub( v, estimateSquared, one ), halfEstimate, estimate );
178 }
179 
180 #endif /*ALTIVEC*/
181