1 //  altivec vector class
2 //
3 //  Copyright (C) 2011 Tim Blechmann
4 //
5 //  This program is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU General Public License as published by
7 //  the Free Software Foundation; either version 2 of the License, or
8 //  (at your option) any later version.
9 //
10 //  This program is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU General Public License for more details.
14 //
15 //  You should have received a copy of the GNU General Public License
16 //  along with this program; see the file COPYING.  If not, write to
17 //  the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 //  Boston, MA 02111-1307, USA.
19 
20 #ifndef VEC_ALTIVEC_HPP
21 #define VEC_ALTIVEC_HPP
22 
23 #include <altivec.h>
24 #undef bool
25 
26 #include "../detail/vec_math.hpp"
27 #include "vec_int_altivec.hpp"
28 #include "../detail/math.hpp"
29 #include "vec_base.hpp"
30 
31 #if defined(__GNUC__) && defined(NDEBUG)
32 #define always_inline inline  __attribute__((always_inline))
33 #else
34 #define always_inline inline
35 #endif
36 
37 namespace nova
38 {
39 
40 template <>
41 struct vec<float>:
42     vec_base<float, vector float, 4>
43 {
44     typedef vector float internal_vector_type;
45     typedef float float_type;
46 
47 private:
48     typedef vec_base<float, vector float, 4> base;
49 
set_vectornova::vec50     static internal_vector_type set_vector(float f0, float f1, float f2, float f3)
51     {
52         union {
53             float f[4];
54             internal_vector_type v;
55         } ret;
56 
57         ret.f[0] = f0;
58         ret.f[1] = f1;
59         ret.f[2] = f2;
60         ret.f[3] = f3;
61         return ret.v;
62     }
63 
set_vectornova::vec64     static internal_vector_type set_vector(float f)
65     {
66         return set_vector(f, f, f, f);
67     }
68 
69 public:
70     static const bool has_compare_bitmask = true;
71 
gen_sign_masknova::vec72     static inline internal_vector_type gen_sign_mask(void)
73     {
74         return set_bitmask(0x80000000);
75     }
76 
gen_abs_masknova::vec77     static inline internal_vector_type gen_abs_mask(void)
78     {
79         return set_bitmask(0x7fffffff);
80     }
81 
gen_onenova::vec82     static inline internal_vector_type gen_one(void)
83     {
84         return set_vector(1.f);
85     }
86 
gen_05nova::vec87     static inline internal_vector_type gen_05(void)
88     {
89         return set_vector(0.5f);
90     }
91 
set_bitmasknova::vec92     static inline internal_vector_type set_bitmask(unsigned int mask)
93     {
94         union {
95             unsigned int i;
96             float f;
97         } u;
98         u.i = mask;
99         return set_vector(u.f);
100     }
101 
gen_exp_masknova::vec102     static inline internal_vector_type gen_exp_mask(void)
103     {
104         return set_bitmask(0x7F800000);
105     }
106 
gen_exp_mask_1nova::vec107     static inline internal_vector_type gen_exp_mask_1(void)
108     {
109         return set_bitmask(0x3F000000);
110     }
111 
gen_onesnova::vec112     static inline internal_vector_type gen_ones(void)
113     {
114         return set_bitmask(0xFFFFFFFF);
115     }
116 
gen_zeronova::vec117     static inline internal_vector_type gen_zero(void)
118     {
119         return (internal_vector_type)vec_splat_u32(0);
120     }
121 
vecnova::vec122     vec(internal_vector_type const & arg):
123         base(arg)
124     {}
125 
126 public:
127     static const int size = 4;
128     static const int objects_per_cacheline = 64/sizeof(float);
129 
is_alignednova::vec130     static bool is_aligned(float* ptr)
131     {
132         return ((intptr_t)(ptr) & (intptr_t)(size * sizeof(float) - 1)) == 0;
133     }
134 
135     /* @{ */
136     /** constructors */
vecnova::vec137     vec(void)
138     {}
139 
vecnova::vec140     vec(float f)
141     {
142         set_vec(f);
143     }
144 
vecnova::vec145     vec(vec const & rhs):
146         base(rhs.data_)
147     {}
148     /* @} */
149 
150     /* @{ */
151     /** io */
loadnova::vec152     void load(const float * data)
153     {
154         base::data_ = vec_ld(0, data);
155     }
156 
load_alignednova::vec157     void load_aligned(const float * data)
158     {
159         base::data_ = vec_ld(0, data);
160     }
161 
load_firstnova::vec162     void load_first(const float * data)
163     {
164         clear();
165         base::set(0, *data);
166     }
167 
storenova::vec168     void store(float * dest) const
169     {
170         vec_st(base::data_, 0, dest);
171     }
172 
store_alignednova::vec173     void store_aligned(float * dest) const
174     {
175         vec_st(base::data_, 0, dest);
176     }
177 
store_aligned_streamnova::vec178     void store_aligned_stream(float * dest) const
179     {
180         vec_st(base::data_, 0, dest);
181     }
182 
clearnova::vec183     void clear(void)
184     {
185         base::data_ = gen_zero();
186     }
187 
operator internal_vector_typenova::vec188     operator internal_vector_type (void) const
189     {
190         return base::data_;
191     }
192 
193     /* @} */
194 
195     /* @{ */
196     /** element access */
set_vecnova::vec197     void set_vec (float value)
198     {
199         data_ = set_vector(value, value, value, value);
200     }
201 
set_slopenova::vec202     float set_slope(float start, float slope)
203     {
204         float v1 = start + slope;
205         float v2 = start + slope + slope;
206         float v3 = start + slope + slope + slope;
207         data_ = set_vector(start, v1, v2, v3);
208         return slope + slope + slope + slope;
209     }
210 
set_expnova::vec211     float set_exp(float start, float curve)
212     {
213         float v1 = start * curve;
214         float v2 = v1 * curve;
215         float v3 = v2 * curve;
216         data_ = set_vector(start, v1, v2, v3);
217         return v3 * curve;
218     }
219     /* @} */
220 
221     /* @{ */
222 
223 private:
vec_mulnova::vec224     static internal_vector_type vec_mul(internal_vector_type const & lhs, internal_vector_type const & rhs)
225     {
226         return vec_madd(lhs, rhs, gen_zero());
227     }
228 
vec_reciprocalnova::vec229     static internal_vector_type vec_reciprocal(internal_vector_type const & arg)
230     {
231         // adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html
232 
233         // Get the reciprocal estimate
234         vector float estimate = vec_re(arg);
235 
236         // One round of Newton-Raphson refinement
237         return vec_madd(vec_nmsub(estimate, arg, gen_one()), estimate, estimate);
238     }
239 
vec_divnova::vec240     static internal_vector_type vec_div(internal_vector_type const & lhs, internal_vector_type const & rhs)
241     {
242         return vec_mul(lhs, vec_reciprocal(rhs));
243     }
244 
245 public:
246     /** arithmetic operators */
247 #define OPERATOR_ASSIGNMENT(op, opcode) \
248     vec & operator op(vec const & rhs) \
249     { \
250         data_ = opcode(data_, rhs.data_);\
251         return *this;\
252     }
253 
254     OPERATOR_ASSIGNMENT(+=, vec_add)
255     OPERATOR_ASSIGNMENT(-=, vec_sub)
256     OPERATOR_ASSIGNMENT(*=, vec_mul)
257     OPERATOR_ASSIGNMENT(/=, vec_div)
258 
259 #define ARITHMETIC_OPERATOR(op, opcode) \
260     vec operator op(vec const & rhs) const \
261     { \
262         return opcode(data_, rhs.data_); \
263     } \
264  \
265     friend vec operator op(vec const & lhs, float f)  \
266     { \
267         return opcode(lhs.data_, vec(f).data_); \
268     } \
269     \
270     friend vec operator op(float f, vec const & rhs)  \
271     { \
272         return opcode(vec(f).data_, rhs.data_); \
273     }
274 
275     ARITHMETIC_OPERATOR(+, vec_add)
276     ARITHMETIC_OPERATOR(-, vec_sub)
277     ARITHMETIC_OPERATOR(*, vec_mul)
278     ARITHMETIC_OPERATOR(/, vec_div)
279 
operator -(const vec & arg)280     friend vec operator -(const vec & arg)
281     {
282         return vec_xor(arg.data_, gen_sign_mask());
283     }
284 
fast_reciprocal(const vec & arg)285     friend vec fast_reciprocal(const vec & arg)
286     {
287         vector float estimate = vec_re(arg);
288         return estimate;
289     }
290 
reciprocal(const vec & arg)291     friend vec reciprocal(const vec & arg)
292     {
293         return vec_reciprocal(arg.data_);
294     }
295 
madd(vec const & arg1,vec const & arg2,vec const & arg3)296     friend vec madd(vec const & arg1, vec const & arg2, vec const & arg3)
297     {
298         return vec_madd(arg1.data_, arg2.data_, arg3.data_);
299     }
300 
301 private:
vec_notnova::vec302     static internal_vector_type vec_not(internal_vector_type const & arg)
303     {
304         return vec_nor(arg, arg);
305     }
306 
vec_cmpneqnova::vec307     static internal_vector_type vec_cmpneq(internal_vector_type const & lhs, internal_vector_type const & rhs)
308     {
309         internal_vector_type equal = (internal_vector_type)vec_cmpeq(lhs, rhs);
310         return vec_not(equal);
311     }
312 
313 public:
314 
315 #define RELATIONAL_OPERATOR(op, opcode) \
316     vec operator op(vec const & rhs) const \
317     { \
318         const internal_vector_type one = gen_one(); \
319         vector unsigned int mask = (vector unsigned int)opcode(data_, rhs.data_); \
320         return (internal_vector_type)vec_and(mask, (vector unsigned int)one); \
321     }
322 
323 #define vec_cmple_(a, b) vec_cmpge(b, a)
324 
325     RELATIONAL_OPERATOR(<, vec_cmplt)
326     RELATIONAL_OPERATOR(<=, vec_cmple_)
327     RELATIONAL_OPERATOR(>, vec_cmpgt)
328     RELATIONAL_OPERATOR(>=, vec_cmpge)
329     RELATIONAL_OPERATOR(==, vec_cmpeq)
330     RELATIONAL_OPERATOR(!=, vec_cmpneq)
331 
332 
333 #undef RELATIONAL_OPERATOR
334 
335     /* @{ */
336 #define BITWISE_OPERATOR(op, opcode) \
337     vec operator op(vec const & rhs) const \
338     { \
339         return opcode(data_, rhs.data_); \
340     }
341 
342     BITWISE_OPERATOR(&, vec_and)
343     BITWISE_OPERATOR(|, vec_or)
344     BITWISE_OPERATOR(^, vec_xor)
345 
andnot(vec const & lhs,vec const & rhs)346     friend inline vec andnot(vec const & lhs, vec const & rhs)
347     {
348         return vec_andc(lhs.data_, rhs.data_);
349     }
350 
351 #undef BITWISE_OPERATOR
352 
353 #define RELATIONAL_MASK_OPERATOR(op, opcode) \
354     friend vec mask_##op(vec const & lhs, vec const & rhs) \
355     { \
356         return internal_vector_type(opcode(lhs.data_, rhs.data_)); \
357     }
358 
RELATIONAL_MASK_OPERATOR(lt,vec_cmplt)359     RELATIONAL_MASK_OPERATOR(lt, vec_cmplt)
360     RELATIONAL_MASK_OPERATOR(le, vec_cmple_)
361     RELATIONAL_MASK_OPERATOR(gt, vec_cmpgt)
362     RELATIONAL_MASK_OPERATOR(ge, vec_cmpge)
363     RELATIONAL_MASK_OPERATOR(eq, vec_cmpeq)
364     RELATIONAL_MASK_OPERATOR(neq, vec_cmpneq)
365 
366 #undef RELATIONAL_MASK_OPERATOR
367 
368     friend inline vec select(vec lhs, vec rhs, vec bitmask)
369     {
370         return vec_sel(lhs.data_, rhs.data_, (vector unsigned int)bitmask.data_);
371     }
372 
373     /* @} */
374 
375     /* @{ */
376     /** unary functions */
abs(vec const & arg)377     friend inline vec abs(vec const & arg)
378     {
379         return vec_abs(arg.data_);
380     }
381 
sign(vec const & arg)382     friend always_inline vec sign(vec const & arg)
383     {
384         return detail::vec_sign(arg);
385     }
386 
square(vec const & arg)387     friend inline vec square(vec const & arg)
388     {
389         return vec_mul(arg.data_, arg.data_);
390     }
391 
392 private:
vec_rsqrtnova::vec393     static internal_vector_type vec_rsqrt(internal_vector_type const & arg)
394     {
395         // adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html
396 
397         //Get the square root reciprocal estimate
398         vector float zero =    gen_zero();
399         vector float oneHalf = gen_05();
400         vector float one =     gen_one();
401         vector float estimate = vec_rsqrte(arg);
402 
403         //One round of Newton-Raphson refinement
404         vector float estimateSquared = vec_madd(estimate, estimate, zero);
405         vector float halfEstimate = vec_madd(estimate, oneHalf, zero);
406         return vec_madd(vec_nmsub(arg, estimateSquared, one), halfEstimate, estimate);
407     }
408 
vec_sqrtnova::vec409     static internal_vector_type vec_sqrt(internal_vector_type const & arg)
410     {
411         // adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html
412         return vec_mul(arg, vec_rsqrt(arg));
413     }
414 
415 public:
sqrt(vec const & arg)416     friend inline vec sqrt(vec const & arg)
417     {
418         return vec_sqrt(arg.data_);
419     }
420 
cube(vec const & arg)421     friend inline vec cube(vec const & arg)
422     {
423         return vec_mul(arg.data_, vec_mul(arg.data_, arg.data_));
424     }
425     /* @} */
426 
427     /* @{ */
428     /** binary functions */
max_(vec const & lhs,vec const & rhs)429     friend inline vec max_(vec const & lhs, vec const & rhs)
430     {
431         return vec_max(lhs.data_, rhs.data_);
432     }
433 
min_(vec const & lhs,vec const & rhs)434     friend inline vec min_(vec const & lhs, vec const & rhs)
435     {
436         return vec_min(lhs.data_, rhs.data_);
437     }
438     /* @} */
439 
440     /* @{ */
441     /** rounding functions */
round(vec const & arg)442     friend inline vec round(vec const & arg)
443     {
444         return detail::vec_round_float(arg);
445         // return vec_round(arg.data_); testsuite fails: seems to round differently than we do?
446     }
447 
frac(vec const & arg)448     friend inline vec frac(vec const & arg)
449     {
450         vec floor_result = floor(arg);
451         return arg - floor_result;
452     }
453 
floor(vec const & arg)454     friend inline vec floor(vec const & arg)
455     {
456         return vec_floor(arg.data_);
457     }
458 
ceil(vec const & arg)459     friend inline vec ceil(vec const & arg)
460     {
461         return vec_ceil(arg.data_);
462     }
463 
trunc(vec const & arg)464     friend inline vec trunc(vec const & arg)
465     {
466         return arg.truncate_to_int().convert_to_float();
467     }
468 
469     typedef detail::int_vec_altivec int_vec;
470 
vecnova::vec471     vec (int_vec const & rhs):
472         base((internal_vector_type)rhs.data_)
473     {}
474 
truncate_to_intnova::vec475     int_vec truncate_to_int(void) const
476     {
477         return int_vec(vec_ctu(data_, 0));
478     }
479     /* @} */
480 
481 
482     /* @{ */
483     /** mathematical functions */
484 
485 #if 0
486     // FIXME: vector math support seems to be broken
487     typedef nova::detail::int_vec_altivec int_vec;
488 
489     friend inline vec exp(vec const & arg)
490     {
491         return detail::vec_exp_float(arg);
492     }
493 
494     friend inline vec log(vec const & arg)
495     {
496         return detail::vec_log_float(arg);
497     }
498 
499     friend inline vec pow(vec const & arg1, vec const & arg2)
500     {
501         return detail::vec_pow(arg1, arg2);
502     }
503 
504     friend inline vec sin(vec const & arg)
505     {
506         return detail::vec_sin_float(arg);
507     }
508 
509     friend inline vec cos(vec const & arg)
510     {
511         return detail::vec_cos_float(arg);
512     }
513 
514     friend inline vec tan(vec const & arg)
515     {
516         return detail::vec_tan_float(arg);
517     }
518 
519     friend inline vec asin(vec const & arg)
520     {
521         return detail::vec_asin_float(arg);
522     }
523 
524     friend inline vec acos(vec const & arg)
525     {
526         return detail::vec_acos_float(arg);
527     }
528 
529     friend inline vec atan(vec const & arg)
530     {
531         return detail::vec_atan_float(arg);
532     }
533 
534     friend inline vec tanh(vec const & arg)
535     {
536         return detail::vec_tanh_float(arg);
537     }
538 
539     friend inline vec signed_pow(vec const & lhs, vec const & rhs)
540     {
541         return detail::vec_signed_pow(lhs, rhs);
542     }
543 
544     friend inline vec signed_sqrt(vec const & arg)
545     {
546         return detail::vec_signed_sqrt(arg);
547     }
548 
549     friend inline vec log2(vec const & arg)
550     {
551         return detail::vec_log2(arg);
552     }
553 
554     friend inline vec log10(vec const & arg)
555     {
556         return detail::vec_log10(arg);
557     }
558 
559 
560 
561 #else
562 
563     NOVA_SIMD_DELEGATE_BINARY_TO_BASE(pow)
564     NOVA_SIMD_DELEGATE_BINARY_TO_BASE(signed_pow)
565 
566     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(log)
567     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(log2)
568     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(log10)
569     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(exp)
570 
571     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(sin)
572     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(cos)
573     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(tan)
574 
575     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(asin)
576     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(acos)
577     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(atan)
578 
579     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(tanh)
580 
581     NOVA_SIMD_DELEGATE_UNARY_TO_BASE(signed_sqrt)
582 
583 #endif
584     /* @} */
585 };
586 
587 } /* namespace nova */
588 
589 #undef always_inline
590 #undef vec_cmplt_
591 
592 #endif /* VEC_ALTIVEC_HPP */
593