1 // altivec vector class 2 // 3 // Copyright (C) 2011 Tim Blechmann 4 // 5 // This program is free software; you can redistribute it and/or modify 6 // it under the terms of the GNU General Public License as published by 7 // the Free Software Foundation; either version 2 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU General Public License for more details. 14 // 15 // You should have received a copy of the GNU General Public License 16 // along with this program; see the file COPYING. If not, write to 17 // the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 // Boston, MA 02111-1307, USA. 19 20 #ifndef VEC_ALTIVEC_HPP 21 #define VEC_ALTIVEC_HPP 22 23 #include <altivec.h> 24 #undef bool 25 26 #include "../detail/vec_math.hpp" 27 #include "vec_int_altivec.hpp" 28 #include "../detail/math.hpp" 29 #include "vec_base.hpp" 30 31 #if defined(__GNUC__) && defined(NDEBUG) 32 #define always_inline inline __attribute__((always_inline)) 33 #else 34 #define always_inline inline 35 #endif 36 37 namespace nova 38 { 39 40 template <> 41 struct vec<float>: 42 vec_base<float, vector float, 4> 43 { 44 typedef vector float internal_vector_type; 45 typedef float float_type; 46 47 private: 48 typedef vec_base<float, vector float, 4> base; 49 set_vectornova::vec50 static internal_vector_type set_vector(float f0, float f1, float f2, float f3) 51 { 52 union { 53 float f[4]; 54 internal_vector_type v; 55 } ret; 56 57 ret.f[0] = f0; 58 ret.f[1] = f1; 59 ret.f[2] = f2; 60 ret.f[3] = f3; 61 return ret.v; 62 } 63 set_vectornova::vec64 static internal_vector_type set_vector(float f) 65 { 66 return set_vector(f, f, f, f); 67 } 68 69 public: 70 static const bool has_compare_bitmask = true; 71 gen_sign_masknova::vec72 static inline internal_vector_type gen_sign_mask(void) 73 { 74 return set_bitmask(0x80000000); 75 } 76 gen_abs_masknova::vec77 static inline internal_vector_type gen_abs_mask(void) 78 { 79 return set_bitmask(0x7fffffff); 80 } 81 gen_onenova::vec82 static inline internal_vector_type gen_one(void) 83 { 84 return set_vector(1.f); 85 } 86 gen_05nova::vec87 static inline internal_vector_type gen_05(void) 88 { 89 return set_vector(0.5f); 90 } 91 set_bitmasknova::vec92 static inline internal_vector_type set_bitmask(unsigned int mask) 93 { 94 union { 95 unsigned int i; 96 float f; 97 } u; 98 u.i = mask; 99 return set_vector(u.f); 100 } 101 gen_exp_masknova::vec102 static inline internal_vector_type gen_exp_mask(void) 103 { 104 return set_bitmask(0x7F800000); 105 } 106 gen_exp_mask_1nova::vec107 static inline internal_vector_type gen_exp_mask_1(void) 108 { 109 return set_bitmask(0x3F000000); 110 } 111 gen_onesnova::vec112 static inline internal_vector_type gen_ones(void) 113 { 114 return set_bitmask(0xFFFFFFFF); 115 } 116 gen_zeronova::vec117 static inline internal_vector_type gen_zero(void) 118 { 119 return (internal_vector_type)vec_splat_u32(0); 120 } 121 vecnova::vec122 vec(internal_vector_type const & arg): 123 base(arg) 124 {} 125 126 public: 127 static const int size = 4; 128 static const int objects_per_cacheline = 64/sizeof(float); 129 is_alignednova::vec130 static bool is_aligned(float* ptr) 131 { 132 return ((intptr_t)(ptr) & (intptr_t)(size * sizeof(float) - 1)) == 0; 133 } 134 135 /* @{ */ 136 /** constructors */ vecnova::vec137 vec(void) 138 {} 139 vecnova::vec140 vec(float f) 141 { 142 set_vec(f); 143 } 144 vecnova::vec145 vec(vec const & rhs): 146 base(rhs.data_) 147 {} 148 /* @} */ 149 150 /* @{ */ 151 /** io */ loadnova::vec152 void load(const float * data) 153 { 154 base::data_ = vec_ld(0, data); 155 } 156 load_alignednova::vec157 void load_aligned(const float * data) 158 { 159 base::data_ = vec_ld(0, data); 160 } 161 load_firstnova::vec162 void load_first(const float * data) 163 { 164 clear(); 165 base::set(0, *data); 166 } 167 storenova::vec168 void store(float * dest) const 169 { 170 vec_st(base::data_, 0, dest); 171 } 172 store_alignednova::vec173 void store_aligned(float * dest) const 174 { 175 vec_st(base::data_, 0, dest); 176 } 177 store_aligned_streamnova::vec178 void store_aligned_stream(float * dest) const 179 { 180 vec_st(base::data_, 0, dest); 181 } 182 clearnova::vec183 void clear(void) 184 { 185 base::data_ = gen_zero(); 186 } 187 operator internal_vector_typenova::vec188 operator internal_vector_type (void) const 189 { 190 return base::data_; 191 } 192 193 /* @} */ 194 195 /* @{ */ 196 /** element access */ set_vecnova::vec197 void set_vec (float value) 198 { 199 data_ = set_vector(value, value, value, value); 200 } 201 set_slopenova::vec202 float set_slope(float start, float slope) 203 { 204 float v1 = start + slope; 205 float v2 = start + slope + slope; 206 float v3 = start + slope + slope + slope; 207 data_ = set_vector(start, v1, v2, v3); 208 return slope + slope + slope + slope; 209 } 210 set_expnova::vec211 float set_exp(float start, float curve) 212 { 213 float v1 = start * curve; 214 float v2 = v1 * curve; 215 float v3 = v2 * curve; 216 data_ = set_vector(start, v1, v2, v3); 217 return v3 * curve; 218 } 219 /* @} */ 220 221 /* @{ */ 222 223 private: vec_mulnova::vec224 static internal_vector_type vec_mul(internal_vector_type const & lhs, internal_vector_type const & rhs) 225 { 226 return vec_madd(lhs, rhs, gen_zero()); 227 } 228 vec_reciprocalnova::vec229 static internal_vector_type vec_reciprocal(internal_vector_type const & arg) 230 { 231 // adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html 232 233 // Get the reciprocal estimate 234 vector float estimate = vec_re(arg); 235 236 // One round of Newton-Raphson refinement 237 return vec_madd(vec_nmsub(estimate, arg, gen_one()), estimate, estimate); 238 } 239 vec_divnova::vec240 static internal_vector_type vec_div(internal_vector_type const & lhs, internal_vector_type const & rhs) 241 { 242 return vec_mul(lhs, vec_reciprocal(rhs)); 243 } 244 245 public: 246 /** arithmetic operators */ 247 #define OPERATOR_ASSIGNMENT(op, opcode) \ 248 vec & operator op(vec const & rhs) \ 249 { \ 250 data_ = opcode(data_, rhs.data_);\ 251 return *this;\ 252 } 253 254 OPERATOR_ASSIGNMENT(+=, vec_add) 255 OPERATOR_ASSIGNMENT(-=, vec_sub) 256 OPERATOR_ASSIGNMENT(*=, vec_mul) 257 OPERATOR_ASSIGNMENT(/=, vec_div) 258 259 #define ARITHMETIC_OPERATOR(op, opcode) \ 260 vec operator op(vec const & rhs) const \ 261 { \ 262 return opcode(data_, rhs.data_); \ 263 } \ 264 \ 265 friend vec operator op(vec const & lhs, float f) \ 266 { \ 267 return opcode(lhs.data_, vec(f).data_); \ 268 } \ 269 \ 270 friend vec operator op(float f, vec const & rhs) \ 271 { \ 272 return opcode(vec(f).data_, rhs.data_); \ 273 } 274 275 ARITHMETIC_OPERATOR(+, vec_add) 276 ARITHMETIC_OPERATOR(-, vec_sub) 277 ARITHMETIC_OPERATOR(*, vec_mul) 278 ARITHMETIC_OPERATOR(/, vec_div) 279 operator -(const vec & arg)280 friend vec operator -(const vec & arg) 281 { 282 return vec_xor(arg.data_, gen_sign_mask()); 283 } 284 fast_reciprocal(const vec & arg)285 friend vec fast_reciprocal(const vec & arg) 286 { 287 vector float estimate = vec_re(arg); 288 return estimate; 289 } 290 reciprocal(const vec & arg)291 friend vec reciprocal(const vec & arg) 292 { 293 return vec_reciprocal(arg.data_); 294 } 295 madd(vec const & arg1,vec const & arg2,vec const & arg3)296 friend vec madd(vec const & arg1, vec const & arg2, vec const & arg3) 297 { 298 return vec_madd(arg1.data_, arg2.data_, arg3.data_); 299 } 300 301 private: vec_notnova::vec302 static internal_vector_type vec_not(internal_vector_type const & arg) 303 { 304 return vec_nor(arg, arg); 305 } 306 vec_cmpneqnova::vec307 static internal_vector_type vec_cmpneq(internal_vector_type const & lhs, internal_vector_type const & rhs) 308 { 309 internal_vector_type equal = (internal_vector_type)vec_cmpeq(lhs, rhs); 310 return vec_not(equal); 311 } 312 313 public: 314 315 #define RELATIONAL_OPERATOR(op, opcode) \ 316 vec operator op(vec const & rhs) const \ 317 { \ 318 const internal_vector_type one = gen_one(); \ 319 vector unsigned int mask = (vector unsigned int)opcode(data_, rhs.data_); \ 320 return (internal_vector_type)vec_and(mask, (vector unsigned int)one); \ 321 } 322 323 #define vec_cmple_(a, b) vec_cmpge(b, a) 324 325 RELATIONAL_OPERATOR(<, vec_cmplt) 326 RELATIONAL_OPERATOR(<=, vec_cmple_) 327 RELATIONAL_OPERATOR(>, vec_cmpgt) 328 RELATIONAL_OPERATOR(>=, vec_cmpge) 329 RELATIONAL_OPERATOR(==, vec_cmpeq) 330 RELATIONAL_OPERATOR(!=, vec_cmpneq) 331 332 333 #undef RELATIONAL_OPERATOR 334 335 /* @{ */ 336 #define BITWISE_OPERATOR(op, opcode) \ 337 vec operator op(vec const & rhs) const \ 338 { \ 339 return opcode(data_, rhs.data_); \ 340 } 341 342 BITWISE_OPERATOR(&, vec_and) 343 BITWISE_OPERATOR(|, vec_or) 344 BITWISE_OPERATOR(^, vec_xor) 345 andnot(vec const & lhs,vec const & rhs)346 friend inline vec andnot(vec const & lhs, vec const & rhs) 347 { 348 return vec_andc(lhs.data_, rhs.data_); 349 } 350 351 #undef BITWISE_OPERATOR 352 353 #define RELATIONAL_MASK_OPERATOR(op, opcode) \ 354 friend vec mask_##op(vec const & lhs, vec const & rhs) \ 355 { \ 356 return internal_vector_type(opcode(lhs.data_, rhs.data_)); \ 357 } 358 RELATIONAL_MASK_OPERATOR(lt,vec_cmplt)359 RELATIONAL_MASK_OPERATOR(lt, vec_cmplt) 360 RELATIONAL_MASK_OPERATOR(le, vec_cmple_) 361 RELATIONAL_MASK_OPERATOR(gt, vec_cmpgt) 362 RELATIONAL_MASK_OPERATOR(ge, vec_cmpge) 363 RELATIONAL_MASK_OPERATOR(eq, vec_cmpeq) 364 RELATIONAL_MASK_OPERATOR(neq, vec_cmpneq) 365 366 #undef RELATIONAL_MASK_OPERATOR 367 368 friend inline vec select(vec lhs, vec rhs, vec bitmask) 369 { 370 return vec_sel(lhs.data_, rhs.data_, (vector unsigned int)bitmask.data_); 371 } 372 373 /* @} */ 374 375 /* @{ */ 376 /** unary functions */ abs(vec const & arg)377 friend inline vec abs(vec const & arg) 378 { 379 return vec_abs(arg.data_); 380 } 381 sign(vec const & arg)382 friend always_inline vec sign(vec const & arg) 383 { 384 return detail::vec_sign(arg); 385 } 386 square(vec const & arg)387 friend inline vec square(vec const & arg) 388 { 389 return vec_mul(arg.data_, arg.data_); 390 } 391 392 private: vec_rsqrtnova::vec393 static internal_vector_type vec_rsqrt(internal_vector_type const & arg) 394 { 395 // adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html 396 397 //Get the square root reciprocal estimate 398 vector float zero = gen_zero(); 399 vector float oneHalf = gen_05(); 400 vector float one = gen_one(); 401 vector float estimate = vec_rsqrte(arg); 402 403 //One round of Newton-Raphson refinement 404 vector float estimateSquared = vec_madd(estimate, estimate, zero); 405 vector float halfEstimate = vec_madd(estimate, oneHalf, zero); 406 return vec_madd(vec_nmsub(arg, estimateSquared, one), halfEstimate, estimate); 407 } 408 vec_sqrtnova::vec409 static internal_vector_type vec_sqrt(internal_vector_type const & arg) 410 { 411 // adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html 412 return vec_mul(arg, vec_rsqrt(arg)); 413 } 414 415 public: sqrt(vec const & arg)416 friend inline vec sqrt(vec const & arg) 417 { 418 return vec_sqrt(arg.data_); 419 } 420 cube(vec const & arg)421 friend inline vec cube(vec const & arg) 422 { 423 return vec_mul(arg.data_, vec_mul(arg.data_, arg.data_)); 424 } 425 /* @} */ 426 427 /* @{ */ 428 /** binary functions */ max_(vec const & lhs,vec const & rhs)429 friend inline vec max_(vec const & lhs, vec const & rhs) 430 { 431 return vec_max(lhs.data_, rhs.data_); 432 } 433 min_(vec const & lhs,vec const & rhs)434 friend inline vec min_(vec const & lhs, vec const & rhs) 435 { 436 return vec_min(lhs.data_, rhs.data_); 437 } 438 /* @} */ 439 440 /* @{ */ 441 /** rounding functions */ round(vec const & arg)442 friend inline vec round(vec const & arg) 443 { 444 return detail::vec_round_float(arg); 445 // return vec_round(arg.data_); testsuite fails: seems to round differently than we do? 446 } 447 frac(vec const & arg)448 friend inline vec frac(vec const & arg) 449 { 450 vec floor_result = floor(arg); 451 return arg - floor_result; 452 } 453 floor(vec const & arg)454 friend inline vec floor(vec const & arg) 455 { 456 return vec_floor(arg.data_); 457 } 458 ceil(vec const & arg)459 friend inline vec ceil(vec const & arg) 460 { 461 return vec_ceil(arg.data_); 462 } 463 trunc(vec const & arg)464 friend inline vec trunc(vec const & arg) 465 { 466 return arg.truncate_to_int().convert_to_float(); 467 } 468 469 typedef detail::int_vec_altivec int_vec; 470 vecnova::vec471 vec (int_vec const & rhs): 472 base((internal_vector_type)rhs.data_) 473 {} 474 truncate_to_intnova::vec475 int_vec truncate_to_int(void) const 476 { 477 return int_vec(vec_ctu(data_, 0)); 478 } 479 /* @} */ 480 481 482 /* @{ */ 483 /** mathematical functions */ 484 485 #if 0 486 // FIXME: vector math support seems to be broken 487 typedef nova::detail::int_vec_altivec int_vec; 488 489 friend inline vec exp(vec const & arg) 490 { 491 return detail::vec_exp_float(arg); 492 } 493 494 friend inline vec log(vec const & arg) 495 { 496 return detail::vec_log_float(arg); 497 } 498 499 friend inline vec pow(vec const & arg1, vec const & arg2) 500 { 501 return detail::vec_pow(arg1, arg2); 502 } 503 504 friend inline vec sin(vec const & arg) 505 { 506 return detail::vec_sin_float(arg); 507 } 508 509 friend inline vec cos(vec const & arg) 510 { 511 return detail::vec_cos_float(arg); 512 } 513 514 friend inline vec tan(vec const & arg) 515 { 516 return detail::vec_tan_float(arg); 517 } 518 519 friend inline vec asin(vec const & arg) 520 { 521 return detail::vec_asin_float(arg); 522 } 523 524 friend inline vec acos(vec const & arg) 525 { 526 return detail::vec_acos_float(arg); 527 } 528 529 friend inline vec atan(vec const & arg) 530 { 531 return detail::vec_atan_float(arg); 532 } 533 534 friend inline vec tanh(vec const & arg) 535 { 536 return detail::vec_tanh_float(arg); 537 } 538 539 friend inline vec signed_pow(vec const & lhs, vec const & rhs) 540 { 541 return detail::vec_signed_pow(lhs, rhs); 542 } 543 544 friend inline vec signed_sqrt(vec const & arg) 545 { 546 return detail::vec_signed_sqrt(arg); 547 } 548 549 friend inline vec log2(vec const & arg) 550 { 551 return detail::vec_log2(arg); 552 } 553 554 friend inline vec log10(vec const & arg) 555 { 556 return detail::vec_log10(arg); 557 } 558 559 560 561 #else 562 563 NOVA_SIMD_DELEGATE_BINARY_TO_BASE(pow) 564 NOVA_SIMD_DELEGATE_BINARY_TO_BASE(signed_pow) 565 566 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(log) 567 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(log2) 568 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(log10) 569 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(exp) 570 571 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(sin) 572 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(cos) 573 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(tan) 574 575 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(asin) 576 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(acos) 577 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(atan) 578 579 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(tanh) 580 581 NOVA_SIMD_DELEGATE_UNARY_TO_BASE(signed_sqrt) 582 583 #endif 584 /* @} */ 585 }; 586 587 } /* namespace nova */ 588 589 #undef always_inline 590 #undef vec_cmplt_ 591 592 #endif /* VEC_ALTIVEC_HPP */ 593