1 // license:BSD-3-Clause 2 // copyright-holders:Vas Crabb, Ryan Holtz 3 /*************************************************************************** 4 5 rgbvmx.h 6 7 VMX/Altivec optimised RGB utilities. 8 9 ***************************************************************************/ 10 11 #ifndef MAME_EMU_VIDEO_RGBVMX_H 12 #define MAME_EMU_VIDEO_RGBVMX_H 13 14 #pragma once 15 16 #include <altivec.h> 17 18 /*************************************************************************** 19 TYPE DEFINITIONS 20 ***************************************************************************/ 21 22 class rgbaint_t 23 { 24 protected: 25 typedef __vector signed char VECS8; 26 typedef __vector unsigned char VECU8; 27 typedef __vector signed short VECS16; 28 typedef __vector unsigned short VECU16; 29 typedef __vector signed int VECS32; 30 typedef __vector unsigned int VECU32; 31 32 public: rgbaint_t()33 rgbaint_t() { set(0, 0, 0, 0); } rgbaint_t(u32 rgba)34 explicit rgbaint_t(u32 rgba) { set(rgba); } rgbaint_t(s32 a,s32 r,s32 g,s32 b)35 rgbaint_t(s32 a, s32 r, s32 g, s32 b) { set(a, r, g, b); } rgbaint_t(const rgb_t & rgb)36 explicit rgbaint_t(const rgb_t& rgb) { set(rgb); } rgbaint_t(VECS32 rgba)37 explicit rgbaint_t(VECS32 rgba) : m_value(rgba) { } 38 39 rgbaint_t(const rgbaint_t& other) = default; 40 rgbaint_t &operator=(const rgbaint_t& other) = default; 41 set(const rgbaint_t & other)42 void set(const rgbaint_t& other) { m_value = other.m_value; } 43 set(u32 rgba)44 void set(u32 rgba) 45 { 46 const VECU32 zero = { 0, 0, 0, 0 }; 47 #ifdef __LITTLE_ENDIAN__ 48 const VECS8 temp = *reinterpret_cast<const VECS8 *>(&rgba); 49 m_value = VECS32(vec_mergeh(VECS16(vec_mergeh(temp, VECS8(zero))), VECS16(zero))); 50 #else 51 const VECS8 temp = VECS8(vec_perm(vec_lde(0, &rgba), zero, vec_lvsl(0, &rgba))); 52 m_value = VECS32(vec_mergeh(VECS16(zero), VECS16(vec_mergeh(VECS8(zero), temp)))); 53 #endif 54 } 55 set(s32 a,s32 r,s32 g,s32 b)56 void set(s32 a, s32 r, s32 g, s32 b) 57 { 58 #ifdef __LITTLE_ENDIAN__ 59 const VECS32 result = { b, g, r, a }; 60 #else 61 const VECS32 result = { a, r, g, b }; 62 #endif 63 m_value = result; 64 } 65 set(const rgb_t & rgb)66 void set(const rgb_t& rgb) 67 { 68 const VECU32 zero = { 0, 0, 0, 0 }; 69 #ifdef __LITTLE_ENDIAN__ 70 const VECS8 temp = *reinterpret_cast<const VECS8 *>(rgb.ptr()); 71 m_value = VECS32(vec_mergeh(VECS16(vec_mergeh(temp, VECS8(zero))), VECS16(zero))); 72 #else 73 const VECS8 temp = VECS8(vec_perm(vec_lde(0, rgb.ptr()), zero, vec_lvsl(0, rgb.ptr()))); 74 m_value = VECS32(vec_mergeh(VECS16(zero), VECS16(vec_mergeh(VECS8(zero), temp)))); 75 #endif 76 } 77 78 // This function sets all elements to the same val set_all(const s32 & val)79 void set_all(const s32& val) { set(val, val, val, val); } 80 // This function zeros all elements zero()81 void zero() { set_all(0); } 82 // This function zeros only the alpha element zero_alpha()83 void zero_alpha() { set_a(0); } 84 to_rgba()85 inline rgb_t to_rgba() const 86 { 87 VECU32 temp = VECU32(vec_packs(m_value, m_value)); 88 temp = VECU32(vec_packsu(VECS16(temp), VECS16(temp))); 89 u32 result; 90 vec_ste(temp, 0, &result); 91 return result; 92 } 93 to_rgba_clamp()94 inline rgb_t to_rgba_clamp() const 95 { 96 VECU32 temp = VECU32(vec_packs(m_value, m_value)); 97 temp = VECU32(vec_packsu(VECS16(temp), VECS16(temp))); 98 u32 result; 99 vec_ste(temp, 0, &result); 100 return result; 101 } 102 set_a16(const s32 value)103 void set_a16(const s32 value) 104 { 105 const VECS32 temp = { value, value, value, value }; 106 m_value = vec_perm(m_value, temp, alpha_perm); 107 } 108 set_a(const s32 value)109 void set_a(const s32 value) 110 { 111 const VECS32 temp = { value, value, value, value }; 112 m_value = vec_perm(m_value, temp, alpha_perm); 113 } 114 set_r(const s32 value)115 void set_r(const s32 value) 116 { 117 const VECS32 temp = { value, value, value, value }; 118 m_value = vec_perm(m_value, temp, red_perm); 119 } 120 set_g(const s32 value)121 void set_g(const s32 value) 122 { 123 const VECS32 temp = { value, value, value, value }; 124 m_value = vec_perm(m_value, temp, green_perm); 125 } 126 set_b(const s32 value)127 void set_b(const s32 value) 128 { 129 const VECS32 temp = { value, value, value, value }; 130 m_value = vec_perm(m_value, temp, blue_perm); 131 } 132 get_a()133 u8 get_a() const 134 { 135 u8 result; 136 #ifdef __LITTLE_ENDIAN__ 137 vec_ste(vec_splat(VECU8(m_value), 12), 0, &result); 138 #else 139 vec_ste(vec_splat(VECU8(m_value), 3), 0, &result); 140 #endif 141 return result; 142 } 143 get_r()144 u8 get_r() const 145 { 146 u8 result; 147 #ifdef __LITTLE_ENDIAN__ 148 vec_ste(vec_splat(VECU8(m_value), 8), 0, &result); 149 #else 150 vec_ste(vec_splat(VECU8(m_value), 7), 0, &result); 151 #endif 152 return result; 153 } 154 get_g()155 u8 get_g() const 156 { 157 u8 result; 158 #ifdef __LITTLE_ENDIAN__ 159 vec_ste(vec_splat(VECU8(m_value), 4), 0, &result); 160 #else 161 vec_ste(vec_splat(VECU8(m_value), 11), 0, &result); 162 #endif 163 return result; 164 } 165 get_b()166 u8 get_b() const 167 { 168 u8 result; 169 #ifdef __LITTLE_ENDIAN__ 170 vec_ste(vec_splat(VECU8(m_value), 0), 0, &result); 171 #else 172 vec_ste(vec_splat(VECU8(m_value), 15), 0, &result); 173 #endif 174 return result; 175 } 176 get_a32()177 s32 get_a32() const 178 { 179 s32 result; 180 #ifdef __LITTLE_ENDIAN__ 181 vec_ste(vec_splat(m_value, 3), 0, &result); 182 #else 183 vec_ste(vec_splat(m_value, 0), 0, &result); 184 #endif 185 return result; 186 } 187 get_r32()188 s32 get_r32() const 189 { 190 s32 result; 191 #ifdef __LITTLE_ENDIAN__ 192 vec_ste(vec_splat(m_value, 2), 0, &result); 193 #else 194 vec_ste(vec_splat(m_value, 1), 0, &result); 195 #endif 196 return result; 197 } 198 get_g32()199 s32 get_g32() const 200 { 201 s32 result; 202 #ifdef __LITTLE_ENDIAN__ 203 vec_ste(vec_splat(m_value, 1), 0, &result); 204 #else 205 vec_ste(vec_splat(m_value, 2), 0, &result); 206 #endif 207 return result; 208 } 209 get_b32()210 s32 get_b32() const 211 { 212 s32 result; 213 #ifdef __LITTLE_ENDIAN__ 214 vec_ste(vec_splat(m_value, 0), 0, &result); 215 #else 216 vec_ste(vec_splat(m_value, 3), 0, &result); 217 #endif 218 return result; 219 } 220 221 // These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b) select_alpha32()222 rgbaint_t select_alpha32() const { return rgbaint_t(get_a32(), get_a32(), get_a32(), get_a32()); } select_red32()223 rgbaint_t select_red32() const { return rgbaint_t(get_r32(), get_r32(), get_r32(), get_r32()); } select_green32()224 rgbaint_t select_green32() const { return rgbaint_t(get_g32(), get_g32(), get_g32(), get_g32()); } select_blue32()225 rgbaint_t select_blue32() const { return rgbaint_t(get_b32(), get_b32(), get_b32(), get_b32()); } 226 add(const rgbaint_t & color2)227 inline void add(const rgbaint_t& color2) 228 { 229 m_value = vec_add(m_value, color2.m_value); 230 } 231 add_imm(const s32 imm)232 inline void add_imm(const s32 imm) 233 { 234 const VECS32 temp = { imm, imm, imm, imm }; 235 m_value = vec_add(m_value, temp); 236 } 237 add_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)238 inline void add_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 239 { 240 #ifdef __LITTLE_ENDIAN__ 241 const VECS32 temp = { b, g, r, a }; 242 #else 243 const VECS32 temp = { a, r, g, b }; 244 #endif 245 m_value = vec_add(m_value, temp); 246 } 247 sub(const rgbaint_t & color2)248 inline void sub(const rgbaint_t& color2) 249 { 250 m_value = vec_sub(m_value, color2.m_value); 251 } 252 sub_imm(const s32 imm)253 inline void sub_imm(const s32 imm) 254 { 255 const VECS32 temp = { imm, imm, imm, imm }; 256 m_value = vec_sub(m_value, temp); 257 } 258 sub_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)259 inline void sub_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 260 { 261 #ifdef __LITTLE_ENDIAN__ 262 const VECS32 temp = { b, g, r, a }; 263 #else 264 const VECS32 temp = { a, r, g, b }; 265 #endif 266 m_value = vec_sub(m_value, temp); 267 } 268 subr(const rgbaint_t & color2)269 inline void subr(const rgbaint_t& color2) 270 { 271 m_value = vec_sub(color2.m_value, m_value); 272 } 273 subr_imm(const s32 imm)274 inline void subr_imm(const s32 imm) 275 { 276 const VECS32 temp = { imm, imm, imm, imm }; 277 m_value = vec_sub(temp, m_value); 278 } 279 subr_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)280 inline void subr_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 281 { 282 #ifdef __LITTLE_ENDIAN__ 283 const VECS32 temp = { b, g, r, a }; 284 #else 285 const VECS32 temp = { a, r, g, b }; 286 #endif 287 m_value = vec_sub(temp, m_value); 288 } 289 mul(const rgbaint_t & color)290 inline void mul(const rgbaint_t& color) 291 { 292 const VECU32 shift = vec_splat_u32(-16); 293 const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(color.m_value, shift)), vec_splat_u32(0)); 294 #ifdef __LITTLE_ENDIAN__ 295 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(color.m_value)))); 296 #else 297 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(color.m_value)))); 298 #endif 299 } 300 mul_imm(const s32 imm)301 inline void mul_imm(const s32 imm) 302 { 303 const VECU32 value = { u32(imm), u32(imm), u32(imm), u32(imm) }; 304 const VECU32 shift = vec_splat_u32(-16); 305 const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(value, shift)), vec_splat_u32(0)); 306 #ifdef __LITTLE_ENDIAN__ 307 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(value)))); 308 #else 309 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(value)))); 310 #endif 311 } 312 mul_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)313 inline void mul_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 314 { 315 #ifdef __LITTLE_ENDIAN__ 316 const VECU32 value = { u32(b), u32(g), u32(r), u32(a) }; 317 #else 318 const VECU32 value = { u32(a), u32(r), u32(g), u32(b) }; 319 #endif 320 const VECU32 shift = vec_splat_u32(-16); 321 const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(value, shift)), vec_splat_u32(0)); 322 #ifdef __LITTLE_ENDIAN__ 323 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(value)))); 324 #else 325 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(value)))); 326 #endif 327 } 328 shl(const rgbaint_t & shift)329 inline void shl(const rgbaint_t& shift) 330 { 331 const VECU32 limit = { 32, 32, 32, 32 }; 332 m_value = vec_and(vec_sl(m_value, VECU32(shift.m_value)), vec_cmpgt(limit, VECU32(shift.m_value))); 333 } 334 shl_imm(const u8 shift)335 inline void shl_imm(const u8 shift) 336 { 337 const VECU32 temp = { shift, shift, shift, shift }; 338 m_value = vec_sl(m_value, temp); 339 } 340 shr(const rgbaint_t & shift)341 inline void shr(const rgbaint_t& shift) 342 { 343 const VECU32 limit = { 32, 32, 32, 32 }; 344 m_value = vec_and(vec_sr(m_value, VECU32(shift.m_value)), vec_cmpgt(limit, VECU32(shift.m_value))); 345 } 346 shr_imm(const u8 shift)347 inline void shr_imm(const u8 shift) 348 { 349 const VECU32 temp = { shift, shift, shift, shift }; 350 m_value = vec_sr(m_value, temp); 351 } 352 sra(const rgbaint_t & shift)353 inline void sra(const rgbaint_t& shift) 354 { 355 const VECU32 limit = { 31, 31, 31, 31 }; 356 m_value = vec_sra(m_value, vec_min(VECU32(shift.m_value), limit)); 357 } 358 sra_imm(const u8 shift)359 inline void sra_imm(const u8 shift) 360 { 361 const VECU32 temp = { shift, shift, shift, shift }; 362 m_value = vec_sra(m_value, temp); 363 } 364 or_reg(const rgbaint_t & color2)365 inline void or_reg(const rgbaint_t& color2) 366 { 367 m_value = vec_or(m_value, color2.m_value); 368 } 369 or_imm(const s32 value)370 inline void or_imm(const s32 value) 371 { 372 const VECS32 temp = { value, value, value, value }; 373 m_value = vec_or(m_value, temp); 374 } 375 or_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)376 inline void or_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 377 { 378 #ifdef __LITTLE_ENDIAN__ 379 const VECS32 temp = { b, g, r, a }; 380 #else 381 const VECS32 temp = { a, r, g, b }; 382 #endif 383 m_value = vec_or(m_value, temp); 384 } 385 and_reg(const rgbaint_t & color)386 inline void and_reg(const rgbaint_t& color) 387 { 388 m_value = vec_and(m_value, color.m_value); 389 } 390 andnot_reg(const rgbaint_t & color)391 inline void andnot_reg(const rgbaint_t& color) 392 { 393 m_value = vec_andc(m_value, color.m_value); 394 } 395 and_imm(const s32 value)396 inline void and_imm(const s32 value) 397 { 398 const VECS32 temp = { value, value, value, value }; 399 m_value = vec_and(m_value, temp); 400 } 401 and_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)402 inline void and_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 403 { 404 #ifdef __LITTLE_ENDIAN__ 405 const VECS32 temp = { b, g, r, a }; 406 #else 407 const VECS32 temp = { a, r, g, b }; 408 #endif 409 m_value = vec_and(m_value, temp); 410 } 411 xor_reg(const rgbaint_t & color2)412 inline void xor_reg(const rgbaint_t& color2) 413 { 414 m_value = vec_xor(m_value, color2.m_value); 415 } 416 xor_imm(const s32 value)417 inline void xor_imm(const s32 value) 418 { 419 const VECS32 temp = { value, value, value, value }; 420 m_value = vec_xor(m_value, temp); 421 } 422 xor_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)423 inline void xor_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 424 { 425 #ifdef __LITTLE_ENDIAN__ 426 const VECS32 temp = { b, g, r, a }; 427 #else 428 const VECS32 temp = { a, r, g, b }; 429 #endif 430 m_value = vec_xor(m_value, temp); 431 } 432 clamp_and_clear(const u32 sign)433 inline void clamp_and_clear(const u32 sign) 434 { 435 const VECS32 vzero = { 0, 0, 0, 0 }; 436 VECS32 vsign = { s32(sign), s32(sign), s32(sign), s32(sign) }; 437 m_value = vec_and(m_value, vec_cmpeq(vec_and(m_value, vsign), vzero)); 438 vsign = vec_nor(vec_sra(vsign, vec_splat_u32(1)), vzero); 439 const VECS32 mask = VECS32(vec_cmpgt(m_value, vsign)); 440 m_value = vec_or(vec_and(vsign, mask), vec_and(m_value, vec_nor(mask, vzero))); 441 } 442 clamp_to_uint8()443 inline void clamp_to_uint8() 444 { 445 const VECU32 zero = { 0, 0, 0, 0 }; 446 m_value = VECS32(vec_packs(m_value, m_value)); 447 m_value = VECS32(vec_packsu(VECS16(m_value), VECS16(m_value))); 448 #ifdef __LITTLE_ENDIAN__ 449 m_value = VECS32(vec_mergeh(VECU8(m_value), VECU8(zero))); 450 m_value = VECS32(vec_mergeh(VECS16(m_value), VECS16(zero))); 451 #else 452 m_value = VECS32(vec_mergeh(VECU8(zero), VECU8(m_value))); 453 m_value = VECS32(vec_mergeh(VECS16(zero), VECS16(m_value))); 454 #endif 455 } 456 sign_extend(const u32 compare,const u32 sign)457 inline void sign_extend(const u32 compare, const u32 sign) 458 { 459 const VECS32 compare_vec = { s32(compare), s32(compare), s32(compare), s32(compare) }; 460 const VECS32 compare_mask = VECS32(vec_cmpeq(vec_and(m_value, compare_vec), compare_vec)); 461 const VECS32 sign_vec = { s32(sign), s32(sign), s32(sign), s32(sign) }; 462 m_value = vec_or(m_value, vec_and(sign_vec, compare_mask)); 463 } 464 min(const s32 value)465 inline void min(const s32 value) 466 { 467 const VECS32 temp = { value, value, value, value }; 468 m_value = vec_min(m_value, temp); 469 } 470 max(const s32 value)471 inline void max(const s32 value) 472 { 473 const VECS32 temp = { value, value, value, value }; 474 m_value = vec_max(m_value, temp); 475 } 476 477 void blend(const rgbaint_t& other, u8 factor); 478 479 void scale_and_clamp(const rgbaint_t& scale); 480 void scale_imm_and_clamp(const s32 scale); 481 scale_add_and_clamp(const rgbaint_t & scale,const rgbaint_t & other)482 void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other) 483 { 484 mul(scale); 485 sra_imm(8); 486 add(other); 487 clamp_to_uint8(); 488 } 489 scale2_add_and_clamp(const rgbaint_t & scale,const rgbaint_t & other,const rgbaint_t & scale2)490 void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2) 491 { 492 rgbaint_t color2(other); 493 color2.mul(scale2); 494 495 mul(scale); 496 add(color2); 497 sra_imm(8); 498 clamp_to_uint8(); 499 } 500 cmpeq(const rgbaint_t & value)501 inline void cmpeq(const rgbaint_t& value) 502 { 503 m_value = VECS32(vec_cmpeq(m_value, value.m_value)); 504 } 505 cmpeq_imm(const s32 value)506 inline void cmpeq_imm(const s32 value) 507 { 508 const VECS32 temp = { value, value, value, value }; 509 m_value = VECS32(vec_cmpeq(m_value, temp)); 510 } 511 cmpeq_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)512 inline void cmpeq_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 513 { 514 #ifdef __LITTLE_ENDIAN__ 515 const VECS32 temp = { b, g, r, a }; 516 #else 517 const VECS32 temp = { a, r, g, b }; 518 #endif 519 m_value = VECS32(vec_cmpeq(m_value, temp)); 520 } 521 cmpgt(const rgbaint_t & value)522 inline void cmpgt(const rgbaint_t& value) 523 { 524 m_value = VECS32(vec_cmpgt(m_value, value.m_value)); 525 } 526 cmpgt_imm(const s32 value)527 inline void cmpgt_imm(const s32 value) 528 { 529 const VECS32 temp = { value, value, value, value }; 530 m_value = VECS32(vec_cmpgt(m_value, temp)); 531 } 532 cmpgt_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)533 inline void cmpgt_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 534 { 535 #ifdef __LITTLE_ENDIAN__ 536 const VECS32 temp = { b, g, r, a }; 537 #else 538 const VECS32 temp = { a, r, g, b }; 539 #endif 540 m_value = VECS32(vec_cmpgt(m_value, temp)); 541 } 542 cmplt(const rgbaint_t & value)543 inline void cmplt(const rgbaint_t& value) 544 { 545 m_value = VECS32(vec_cmplt(m_value, value.m_value)); 546 } 547 cmplt_imm(const s32 value)548 inline void cmplt_imm(const s32 value) 549 { 550 const VECS32 temp = { value, value, value, value }; 551 m_value = VECS32(vec_cmplt(m_value, temp)); 552 } 553 cmplt_imm_rgba(const s32 a,const s32 r,const s32 g,const s32 b)554 inline void cmplt_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) 555 { 556 #ifdef __LITTLE_ENDIAN__ 557 const VECS32 temp = { b, g, r, a }; 558 #else 559 const VECS32 temp = { a, r, g, b }; 560 #endif 561 m_value = VECS32(vec_cmplt(m_value, temp)); 562 } 563 564 inline rgbaint_t& operator+=(const rgbaint_t& other) 565 { 566 m_value = vec_add(m_value, other.m_value); 567 return *this; 568 } 569 570 inline rgbaint_t& operator+=(const s32 other) 571 { 572 const VECS32 temp = { other, other, other, other }; 573 m_value = vec_add(m_value, temp); 574 return *this; 575 } 576 577 inline rgbaint_t& operator-=(const rgbaint_t& other) 578 { 579 m_value = vec_sub(m_value, other.m_value); 580 return *this; 581 } 582 583 inline rgbaint_t& operator*=(const rgbaint_t& other) 584 { 585 const VECU32 shift = vec_splat_u32(-16); 586 const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(other.m_value, shift)), vec_splat_u32(0)); 587 #ifdef __LITTLE_ENDIAN__ 588 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(other.m_value)))); 589 #else 590 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(other.m_value)))); 591 #endif 592 return *this; 593 } 594 595 inline rgbaint_t& operator*=(const s32 other) 596 { 597 const VECS32 value = { other, other, other, other }; 598 const VECU32 shift = vec_splat_u32(-16); 599 const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(value, shift)), vec_splat_u32(0)); 600 #ifdef __LITTLE_ENDIAN__ 601 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(value)))); 602 #else 603 m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(value)))); 604 #endif 605 return *this; 606 } 607 608 inline rgbaint_t& operator>>=(const s32 shift) 609 { 610 const VECU32 temp = { u32(shift), u32(shift), u32(shift), u32(shift) }; 611 m_value = vec_sra(m_value, temp); 612 return *this; 613 } 614 merge_alpha16(const rgbaint_t & alpha)615 inline void merge_alpha16(const rgbaint_t& alpha) 616 { 617 m_value = vec_perm(m_value, alpha.m_value, alpha_perm); 618 } 619 merge_alpha(const rgbaint_t & alpha)620 inline void merge_alpha(const rgbaint_t& alpha) 621 { 622 m_value = vec_perm(m_value, alpha.m_value, alpha_perm); 623 } 624 bilinear_filter(const u32 & rgb00,const u32 & rgb01,const u32 & rgb10,const u32 & rgb11,u8 u,u8 v)625 static u32 bilinear_filter(const u32 &rgb00, const u32 &rgb01, const u32 &rgb10, const u32 &rgb11, u8 u, u8 v) 626 { 627 const VECS32 zero = vec_splat_s32(0); 628 629 // put each packed value into first element of a vector register 630 #ifdef __LITTLE_ENDIAN__ 631 VECS32 color00 = *reinterpret_cast<const VECS32 *>(&rgb00); 632 VECS32 color01 = *reinterpret_cast<const VECS32 *>(&rgb01); 633 VECS32 color10 = *reinterpret_cast<const VECS32 *>(&rgb10); 634 VECS32 color11 = *reinterpret_cast<const VECS32 *>(&rgb11); 635 #else 636 VECS32 color00 = vec_perm(VECS32(vec_lde(0, &rgb00)), zero, vec_lvsl(0, &rgb00)); 637 VECS32 color01 = vec_perm(VECS32(vec_lde(0, &rgb01)), zero, vec_lvsl(0, &rgb01)); 638 VECS32 color10 = vec_perm(VECS32(vec_lde(0, &rgb10)), zero, vec_lvsl(0, &rgb10)); 639 VECS32 color11 = vec_perm(VECS32(vec_lde(0, &rgb11)), zero, vec_lvsl(0, &rgb11)); 640 #endif 641 642 // interleave color01/color00 and color10/color11 at the byte level then zero-extend 643 color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(color00))); 644 color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(color10))); 645 #ifdef __LITTLE_ENDIAN__ 646 color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(zero))); 647 color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(zero))); 648 #else 649 color01 = VECS32(vec_mergeh(VECU8(zero), VECU8(color01))); 650 color11 = VECS32(vec_mergeh(VECU8(zero), VECU8(color11))); 651 #endif 652 653 color01 = vec_msum(VECS16(color01), scale_table[u], zero); 654 color11 = vec_msum(VECS16(color11), scale_table[u], zero); 655 color01 = vec_sl(color01, vec_splat_u32(15)); 656 color11 = vec_sr(color11, vec_splat_u32(1)); 657 color01 = VECS32(vec_max(VECS16(color01), VECS16(color11))); 658 color01 = vec_msum(VECS16(color01), scale_table[v], zero); 659 color01 = vec_sr(color01, vec_splat_u32(15)); 660 color01 = VECS32(vec_packs(color01, color01)); 661 color01 = VECS32(vec_packsu(VECS16(color01), VECS16(color01))); 662 663 u32 result; 664 vec_ste(VECU32(color01), 0, &result); 665 return result; 666 } 667 bilinear_filter_rgbaint(const u32 & rgb00,const u32 & rgb01,const u32 & rgb10,const u32 & rgb11,u8 u,u8 v)668 void bilinear_filter_rgbaint(const u32 &rgb00, const u32 &rgb01, const u32 &rgb10, const u32 &rgb11, u8 u, u8 v) 669 { 670 const VECS32 zero = vec_splat_s32(0); 671 672 // put each packed value into first element of a vector register 673 #ifdef __LITTLE_ENDIAN__ 674 VECS32 color00 = *reinterpret_cast<const VECS32 *>(&rgb00); 675 VECS32 color01 = *reinterpret_cast<const VECS32 *>(&rgb01); 676 VECS32 color10 = *reinterpret_cast<const VECS32 *>(&rgb10); 677 VECS32 color11 = *reinterpret_cast<const VECS32 *>(&rgb11); 678 #else 679 VECS32 color00 = vec_perm(VECS32(vec_lde(0, &rgb00)), zero, vec_lvsl(0, &rgb00)); 680 VECS32 color01 = vec_perm(VECS32(vec_lde(0, &rgb01)), zero, vec_lvsl(0, &rgb01)); 681 VECS32 color10 = vec_perm(VECS32(vec_lde(0, &rgb10)), zero, vec_lvsl(0, &rgb10)); 682 VECS32 color11 = vec_perm(VECS32(vec_lde(0, &rgb11)), zero, vec_lvsl(0, &rgb11)); 683 #endif 684 685 // interleave color01/color00 and color10/color11 at the byte level then zero-extend 686 color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(color00))); 687 color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(color10))); 688 #ifdef __LITTLE_ENDIAN__ 689 color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(zero))); 690 color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(zero))); 691 #else 692 color01 = VECS32(vec_mergeh(VECU8(zero), VECU8(color01))); 693 color11 = VECS32(vec_mergeh(VECU8(zero), VECU8(color11))); 694 #endif 695 696 color01 = vec_msum(VECS16(color01), scale_table[u], zero); 697 color11 = vec_msum(VECS16(color11), scale_table[u], zero); 698 color01 = vec_sl(color01, vec_splat_u32(15)); 699 color11 = vec_sr(color11, vec_splat_u32(1)); 700 color01 = VECS32(vec_max(VECS16(color01), VECS16(color11))); 701 color01 = vec_msum(VECS16(color01), scale_table[v], zero); 702 m_value = vec_sr(color01, vec_splat_u32(15)); 703 } 704 705 protected: 706 VECS32 m_value; 707 708 static const VECU8 alpha_perm; 709 static const VECU8 red_perm; 710 static const VECU8 green_perm; 711 static const VECU8 blue_perm; 712 static const VECS16 scale_table[256]; 713 }; 714 715 716 717 // altivec.h somehow redefines "bool" in a bad way. really. 718 #ifdef vector 719 #undef vector 720 #endif 721 #ifdef bool 722 #undef bool 723 #endif 724 #ifdef pixel 725 #undef pixel 726 #endif 727 728 #endif // MAME_EMU_VIDEO_RGBVMX_H 729