1/**************************************************************************** 2** 3** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com 4** Contact: https://www.qt.io/licensing/ 5** 6** This file is part of the QtGui module of the Qt Toolkit. 7** 8** $QT_BEGIN_LICENSE:LGPL$ 9** Commercial License Usage 10** Licensees holding valid commercial Qt licenses may use this file in 11** accordance with the commercial license agreement provided with the 12** Software or, alternatively, in accordance with the terms contained in 13** a written agreement between you and The Qt Company. For licensing terms 14** and conditions see https://www.qt.io/terms-conditions. For further 15** information use the contact form at https://www.qt.io/contact-us. 16** 17** GNU Lesser General Public License Usage 18** Alternatively, this file may be used under the terms of the GNU Lesser 19** General Public License version 3 as published by the Free Software 20** Foundation and appearing in the file LICENSE.LGPL3 included in the 21** packaging of this file. Please review the following information to 22** ensure the GNU Lesser General Public License version 3 requirements 23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. 24** 25** GNU General Public License Usage 26** Alternatively, this file may be used under the terms of the GNU 27** General Public License version 2.0 or (at your option) the GNU General 28** Public license version 3 or any later version approved by the KDE Free 29** Qt Foundation. The licenses are as published by the Free Software 30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 31** included in the packaging of this file. Please review the following 32** information to ensure the GNU General Public License requirements will 33** be met: https://www.gnu.org/licenses/gpl-2.0.html and 34** https://www.gnu.org/licenses/gpl-3.0.html. 35** 36** $QT_END_LICENSE$ 37** 38****************************************************************************/ 39 40#include "qt_mips_asm_dsp_p.h" 41 42LEAF_MIPS_DSP(destfetchARGB32_asm_mips_dsp) 43/* 44 * a0 - buffer address (dst) 45 * a1 - data address (src) 46 * a2 - length 47 */ 48 49 beqz a2, 2f 50 move v0, a0 /* just return the address of buffer 51 * for storing returning values */ 52 move v0, a0 53 andi t1, a2, 0x1 54 li t7, 8388736 /* t7 = 0x800080 */ 55 beqz t1, 1f 56 nop 57 lw t8, 0(a1) 58 addiu a2, a2, -1 59 srl t6, t8, 24 /* t6 = alpha */ 60 61 preceu.ph.qbra t0, t8 62 mul t1, t0, t6 63 preceu.ph.qbla t4, t8 64 mul t5, t4, t6 65 66 preceu.ph.qbla t2, t1 67 addq.ph t3, t1, t2 68 addq.ph t3, t3, t7 69 preceu.ph.qbla t1, t3 /* t1 holds R & B blended with alpha 70 * | 0 | dRab | 0 | dBab | */ 71 preceu.ph.qbla t2, t5 72 addq.ph t3, t2, t5 73 addq.ph t4, t3, t7 74 preceu.ph.qbla t2, t4 /* t2 holds A & G blended with alpha 75 * | 0 | dAab | 0 | dGab | */ 76 andi t2, t2, 255 /* t2 = 0xff */ 77 78 sll t0, t6, 24 79 sll t3, t2, 8 80 or t4, t0, t3 81 or t0, t1, t4 82 sw t0, 0(a0) 83 addiu a0, a0, 4 84 addiu a1, a1, 4 85 beqz a2, 2f /* there was only one member */ 86 nop 871: 88 lw t0, 0(a1) /* t0 = src1 */ 89 lw t1, 4(a1) /* t1 = src2 */ 90 precrq.qb.ph t4, t0, t1 /* t4 = a1 G1 a2 G2 */ 91 preceu.ph.qbra t3, t4 /* t3 = 0 G1 0 G2 */ 92 preceu.ph.qbla t2, t4 /* t2 = | 0 | a1 | 0 | a2 | */ 93 srl t5, t2, 8 94 or t8, t2, t5 /* t8 = 0 a1 a1 a2 */ 95 muleu_s.ph.qbr t5, t8, t3 96 97 addiu a2, a2, -2 98 addiu a1, a1, 8 99 precrq.ph.w t9, t0, t1 100 preceu.ph.qbra t9, t9 101 102 preceu.ph.qbla t6, t5 103 addq.ph t5, t5, t6 104 addq.ph t2, t5, t7 105 muleu_s.ph.qbr t6, t8, t9 106 sll t3, t1, 16 107 packrl.ph t3, t0, t3 108 preceu.ph.qbra t3, t3 109 muleu_s.ph.qbr t8, t8, t3 110 preceu.ph.qbla t3, t6 111 addq.ph t3, t6, t3 112 addq.ph t3, t3, t7 113 preceu.ph.qbla t5, t8 114 addq.ph t5, t8, t5 115 addq.ph t5, t5, t7 116 117 precrq.ph.w t0, t4, t3 /* t0 = | 0 | a1 | 0 | dR1 | */ 118 precrq.ph.w t1, t2, t5 /* t1 = | 0 | dG1 | 0 | dB1 | */ 119 precrq.qb.ph t6, t0, t1 /* t6 = | a1 | dR1 | dG1 | dB1 | */ 120 sll t3, t3, 16 121 sll t5, t5, 16 122 packrl.ph t0, t4, t3 123 packrl.ph t1, t2, t5 124 precrq.qb.ph t8, t0, t1 /* t8 = | a2 | dR2 | dG2 | dB2 | */ 125 sw t6, 0(a0) 126 sw t8, 4(a0) 127 bnez a2, 1b 128 addiu a0, a0, 8 1292: 130 j ra 131 nop 132 133END(destfetchARGB32_asm_mips_dsp) 134 135LEAF_MIPS_DSP(qt_memfill32_asm_mips_dsp) 136/* 137 * a0 - destination address (dst) 138 * a1 - value 139 * a2 - count 140 */ 141 142 beqz a2, 5f 143 nop 144 li t8, 8 145 andi t0, a2, 0x7 /* t0 holds how many counts exceeds 8 */ 146 beqzl t0, 2f /* count is multiple of 8 (8, 16, 24, ....) */ 147 addiu a2, a2, -8 148 subu a2, a2, t0 1491: 150 sw a1, 0(a0) 151 addiu t0, t0, -1 152 bnez t0, 1b 153 addiu a0, a0, 4 154 bgeu a2, t8, 2f 155 addiu a2, a2, -8 156 b 5f 157 nop 1582: 159 beqz a2, 4f 160 nop 1613: 162 pref 30, 32(a0) 163 addiu a2, a2, -8 164 sw a1, 0( a0) 165 sw a1, 4(a0) 166 sw a1, 8(a0) 167 sw a1, 12(a0) 168 addiu a0, a0, 32 169 sw a1, -16(a0) 170 sw a1, -12(a0) 171 sw a1, -8(a0) 172 bnez a2, 3b 173 sw a1, -4(a0) 1744: 175 sw a1, 0(a0) 176 sw a1, 4(a0) 177 sw a1, 8(a0) 178 sw a1, 12(a0) 179 addiu a0, a0, 32 180 sw a1, -16(a0) 181 sw a1, -12(a0) 182 sw a1, -8(a0) 183 sw a1, -4(a0) 1845: 185 jr ra 186 nop 187 188END(qt_memfill32_asm_mips_dsp) 189 190LEAF_MIPS_DSP(comp_func_SourceOver_asm_mips_dsp) 191/* 192 * a0 - uint *dest 193 * a1 - const uint *src 194 * a2 - int length 195 * a3 - uint const_alpha 196 */ 197 198 beqz a2, 5f 199 nop 200 li t8, 0xff 201 li t7, 8388736 /* t7 = 0x800080 */ 202 bne a3, t8, 4f 203 nop 204 205/* part where const_alpha = 255 */ 206 b 2f 207 nop 2081: 209 addiu a0, a0, 4 210 addiu a2, a2, -1 211 beqz a2, 5f 212 nop 2132: 214 lw t0, 0(a1) /* t0 = s = src[i] */ 215 addiu a1, a1, 4 216 nor t1, t0, zero 217 srl t1, t1, 24 /* t1 = ~qAlpha(s) */ 218 bnez t1, 3f 219 nop 220 sw t0, 0(a0) /* dst[i] = src[i] */ 221 addiu a2, a2, -1 222 bnez a2, 2b 223 addiu a0, a0, 4 224 b 5f 225 nop 2263: 227 beqz t0, 1b 228 nop 229 230 lw t4, 0(a0) 231 replv.ph t6, t1 232 muleu_s.ph.qbl t2, t4, t6 233 muleu_s.ph.qbr t3, t4, t6 234 addiu a2, a2, -1 235 preceu.ph.qbla t4, t2 236 addq.ph t4, t2, t4 237 addq.ph t4, t4, t7 238 preceu.ph.qbla t5, t3 239 addq.ph t5, t5, t3 240 addq.ph t5, t5, t7 241 precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ 242 addu t8, t0, t8 /* dst[i] = 243 * s + BYTE_MUL(dst[i],~qAlpha(s)) */ 244 sw t8, 0(a0) 245 bnez a2, 2b 246 addiu a0, a0, 4 247 b 5f 248 nop 2494: 250 lw t0, 0(a0) /* t0 - dst[i] "1" */ 251 lw t1, 0(a1) /* t1 - src[i] "2" */ 252 addiu a1, a1, 4 253 addiu a2, a2, -1 254 replv.ph t6, a3 /* a1 = 0x00a00a */ 255 muleu_s.ph.qbl t2, t1, t6 256 muleu_s.ph.qbr t3, t1, t6 257 preceu.ph.qbla t4, t2 258 addq.ph t4, t2, t4 259 addq.ph t4, t4, t7 260 preceu.ph.qbla t5, t3 261 addq.ph t5, t5, t3 262 addq.ph t5, t5, t7 263 precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ 264 265 nor t6, t8, zero 266 srl t6, t6, 24 267 replv.ph t6, t6 268 269 muleu_s.ph.qbl t2, t0, t6 270 muleu_s.ph.qbr t3, t0, t6 271 preceu.ph.qbla t4, t2 272 addq.ph t4, t2, t4 273 addq.ph t4, t4, t7 274 preceu.ph.qbla t5, t3 275 addq.ph t5, t5, t3 276 addq.ph t5, t5, t7 277 precrq.qb.ph t6, t4, t5 /* t6 = | ddA | ddR | ddG | ddB | */ 278 279 addu t0, t8, t6 280 sw t0, 0(a0) 281 bnez a2, 4b 282 addiu a0, a0, 4 2835: 284 jr ra 285 nop 286 287END(comp_func_SourceOver_asm_mips_dsp) 288 289LEAF_MIPS_DSPR2(qt_destStoreARGB32_asm_mips_dsp) 290/* 291 * a0 - uint * data 292 * a1 - const uint *buffer 293 * a2 - int length 294 */ 295 296 blez a2, 6f 297 move v1, zero 298 li t0, 255 299 lui a3, 0xff 300 j 2f 301 lui t2, 0xff00 3021: 303 addiu v1, v1, 1 304 sw zero, 0(a0) 305 addiu a1, a1, 4 306 beq v1, a2, 6f 307 addiu a0, a0, 4 3082: 309 lw v0, 0(a1) 310 srl t3, v0, 0x18 311 beql t3, t0, 5f 312 addiu v1, v1, 1 313 beqz t3, 1b 314 315 srl t1, v0, 0x8 316 andi t1, t1, 0xff 317 318 teq t3, zero, 0x7 319 div zero, a3, t3 320 move t8, t3 321 andi t6, v0, 0xff 322 323 srl t3,v0,0x10 324 andi t3,t3,0xff 325 326 and t5, v0, t2 327 mflo t4 328 329 mult $ac0, t4, t6 330 mult $ac1, t1, t4 331 mul t4, t3, t4 332 333 sltiu t8, t8, 2 334 beqz t8, 3f 335 nop 336 mflo t6, $ac0 337 mflo t1, $ac1 338 sra t6, t6, 0x10 339 sra t1, t1, 0x8 340 b 4f 341 nop 3423: 343 extr.w t6, $ac0, 0x10 344 extr.w t1, $ac1, 0x8 3454: 346 and v0, t4, a3 347 or v0, v0, t6 348 or v0, v0, t5 349 andi t1, t1, 0xff00 350 or v0, v0, t1 351 addiu v1, v1, 1 3525: 353 sw v0, 0(a0) 354 addiu a1, a1, 4 355 bne v1, a2, 2b 356 addiu a0, a0, 4 3576: 358 jr ra 359 nop 360 361END(qt_destStoreARGB32_asm_mips_dsp) 362 363LEAF_MIPS_DSP(comp_func_solid_Source_dsp_asm_x2) 364/* 365 * a0 - const uint *dest 366 * a1 - int length 367 * a2 - uint color 368 * a3 - uint ialpha 369 */ 370 371 beqz a1, 2f 372 nop 373 replv.ph a3, a3 374 li t9, 8388736 /* t9 = 0x800080 */ 3751: 376 lw t0, 0(a0) 377 lw t1, 4(a0) 378 or t2, t0, t1 /* if both dest are zero, no computation needed */ 379 beqz t2, 12f 380 addiu a1, -2 381 382 BYTE_MUL_x2 t0, t1, t6, t7, a3, a3, t9, t2, t3, t4, t5, 0 38311: 384 addu t2, a2, t6 385 addu t3, a2, t7 386 sw t2, 0(a0) 387 sw t3, 4(a0) 388 bnez a1, 1b 389 addiu a0, 8 390 b 2f 39112: 392 addu t2, a2, t0 393 addu t3, a2, t1 394 sw t2, 0(a0) 395 sw t3, 4(a0) 396 bnez a1, 1b 397 addiu a0, 8 3982: 399 jr ra 400 nop 401 402END(comp_func_solid_Source_dsp_asm_x2) 403 404LEAF_MIPS_DSP(comp_func_solid_DestinationOver_dsp_asm_x2) 405/* 406 * a0 - uint *dest 407 * a1 - int length 408 * a2 - uint color 409 */ 410 411 addiu sp, sp, -8 412 sw s0, 0(sp) 413 sw s1, 4(sp) 414 beqz a1, 2f 415 nop 416 beqz a2, 2f 417 nop 418 li t9, 8388736 /* t4 = 0x800080 */ 419 4201: 421 lw t0, 0(a0) 422 lw t1, 4(a0) 423 not t2, t0 424 not t3, t1 425 srl t4, t2, 24 426 srl t5, t3, 24 427 or t2, t4, t5 /* if both dest are zero, no computation needed */ 428 beqz t2, 11f 429 addiu a1, -2 430 replv.ph t2, t4 431 replv.ph t3, t5 432 433 BYTE_MUL_x2 a2, a2, t8, a3, t2, t3, t9, t4, t5, t6, t7 434 435 addu t0, t0, t8 436 addu t1, t1, a3 43711: 438 sw t0, 0(a0) 439 sw t1, 4(a0) 440 bnez a1, 1b 441 addiu a0, 8 442 4432: 444 lw s0, 0(sp) 445 lw s1, 4(sp) 446 addiu sp, sp, 8 447 jr ra 448 nop 449 450END(comp_func_solid_DestinationOver_dsp_asm_x2) 451 452LEAF_MIPS_DSP(comp_func_DestinationOver_dsp_asm_x2) 453/* 454 * a0 - uint *dest 455 * a1 - uint *src 456 * a2 - int length 457 * a3 - uint const_alpha 458 */ 459 460 .set noat 461 addiu sp, sp, -8 462 sw s0, 0(sp) 463 sw s1, 4(sp) 464 beqz a2, 3f 465 nop 466 li t9, 8388736 /* t4 = 0x800080 */ 467 li t0, 0xff 468 beq a3, t0, 2f 469 nop 470 471/* part where const_alpha != 255 */ 4721: 473 replv.ph a3, a3 47411: 475 lw t0, 0(a1) # src_1 476 lw t1, 4(a1) # src_2 477 addiu a2, -2 478 479 BYTE_MUL_x2 t0, t1, t8, AT, a3, a3, t9, t4, t5, t6, t7, 0 480 # t8 = s1 481 # AT = s2 482 lw t0, 0(a0) # dest_1 483 lw t1, 4(a0) # dest_2 484 addiu a1, 8 485 not t2, t0 486 not t3, t1 487 srl t4, t2, 24 488 srl t5, t3, 24 489 replv.ph t2, t4 # qAlpha(~d) 1 490 replv.ph t3, t5 # qAlpha(~d) 2 491 492 BYTE_MUL_x2 t8, AT, s0, s1, t2, t3, t9, t4, t5, t6, t7 493 494 addu t0, t0, s0 495 addu t1, t1, s1 496 sw t0, 0(a0) 497 sw t1, 4(a0) 498 bnez a2, 11b 499 addiu a0, 8 500 b 3f 501 nop 502 503/* part where const_alpha = 255 */ 5042: 505 lw t0, 0(a0) # dest 1 506 lw t1, 4(a0) # dest 2 507 lw s0, 0(a1) # src 1 508 lw s1, 4(a1) # src 2 509 not t2, t0 510 not t3, t1 511 srl t4, t2, 24 512 srl t5, t3, 24 513 replv.ph t2, t4 514 replv.ph t3, t5 515 addiu a1, 8 516 addiu a2, -2 517 518 BYTE_MUL_x2 s0, s1, t8, AT, t2, t3, t9, t4, t5, t6, t7 519 520 addu t0, t0, t8 521 addu t1, t1, AT 522 sw t0, 0(a0) 523 sw t1, 4(a0) 524 bnez a2, 2b 525 addiu a0, 8 526 5273: 528 lw s0, 0(sp) 529 lw s1, 4(sp) 530 addiu sp, sp, 8 531 jr ra 532 nop 533 .set at 534 535END(comp_func_DestinationOver_dsp_asm_x2) 536 537LEAF_MIPS_DSP(comp_func_solid_SourceIn_dsp_asm_x2) 538/* 539 * a0 - uint *dest 540 * a1 - int length 541 * a2 - uint color 542 * a3 - uint const_alpha 543 */ 544 545 .set noat 546 addiu sp, -12 547 sw s0, 0(sp) 548 sw s1, 4(sp) 549 sw s2, 8(sp) 550 beqz a1, 3f 551 nop 552 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 553 lui t8, 0xff00 554 li t0, 0xff 555 beq a3, t0, 2f 556 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 557 558/* part where const_alpha != 255 */ 5591: 560 replv.ph t0, a3 561 li t5, 0xff 562 BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ 563 subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ 56411: 565 lw t2, 0(a0) /* t2 = d */ 566 lw s0, 4(a0) 567 addiu a1, -2 568 srl t3, t2, 24 /* t3 = qAlpha(d) */ 569 srl s2, s0, 24 570 571 INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 572 INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 573 574 sw AT, 0(a0) 575 sw s1, 4(a0) 576 bnez a1, 11b 577 addiu a0, 8 578 b 3f 579 nop 580 581/* part where const_alpha = 255 */ 5822: 583 lw t0, 0(a0) /* dest 1 */ 584 lw t1, 4(a0) /* dest 2 */ 585 srl t4, t0, 24 586 srl t5, t1, 24 587 replv.ph t2, t4 588 replv.ph t3, t5 589 addiu a1, -2 590 591 BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 592 593 sw t8, 0(a0) 594 sw AT, 4(a0) 595 bnez a1, 2b 596 addiu a0, 8 597 5983: 599 lw s0, 0(sp) 600 lw s1, 4(sp) 601 lw s2, 8(sp) 602 addiu sp, 12 603 jr ra 604 nop 605 .set at 606 607END(comp_func_solid_SourceIn_dsp_asm_x2) 608 609LEAF_MIPS_DSP(comp_func_SourceIn_dsp_asm_x2) 610/* 611 * a0 - uint *dest 612 * a1 - const uint *src 613 * a2 - int length 614 * a3 - uint const_alpha 615 */ 616 617 .set noat 618 addiu sp, -16 619 sw s0, 0(sp) 620 sw s1, 4(sp) 621 sw s2, 8(sp) 622 sw s3, 12(sp) 623 beqz a2, 3f 624 nop 625 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 626 lui t8, 0xff00 627 li t0, 0xff 628 beq a3, t0, 2f 629 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 630 631/* part where const_alpha != 255 */ 6321: 633 li t5, 0xff 634 subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ 635 replv.ph a3, a3 63611: 637 lw t0, 0(a1) /* t0 = src 1 */ 638 lw t1, 4(a1) /* t1 = src 2 */ 639 addiu a2, -2 640 641 BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 642 643 lw t0, 0(a0) /* t0 = dest 1 */ 644 lw t1, 4(a0) /* t1 = dest 2 */ 645 addiu a1, 8 646 647 srl t2, t0, 24 /* t2 = qAlpha(d) 1 */ 648 srl t3, t1, 24 /* t3 = qAlpha(d) 2 */ 649 650 INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 651 INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 652 653 sw s1, 0(a0) 654 sw s2, 4(a0) 655 bnez a2, 11b 656 addiu a0, 8 657 b 3f 658 nop 659 660/* part where const_alpha = 255 */ 6612: 662 lw t2, 0(a0) /* dest 1 */ 663 lw t3, 4(a0) /* dest 2 */ 664 lw t0, 0(a1) /* src 1 */ 665 lw t1, 4(a1) /* src 2 */ 666 srl t4, t2, 24 667 srl t5, t3, 24 668 replv.ph t2, t4 669 replv.ph t3, t5 670 addiu a2, -2 671 672 BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 673 674 addiu a1, 8 675 sw t8, 0(a0) 676 sw AT, 4(a0) 677 bnez a2, 2b 678 addiu a0, 8 679 6803: 681 lw s0, 0(sp) 682 lw s1, 4(sp) 683 lw s2, 8(sp) 684 lw s3, 12(sp) 685 addiu sp, 16 686 jr ra 687 nop 688 .set at 689 690END(comp_func_SourceIn_dsp_asm_x2) 691 692LEAF_MIPS_DSP(comp_func_solid_DestinationIn_dsp_asm_x2) 693/* 694 * a0 - uint *dest 695 * a1 - int length 696 * a2 - uint a 697 */ 698 699 .set noat 700 beqz a1, 2f 701 nop 702 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 703 replv.ph a2, a2 7041: 705 lw t0, 0(a0) 706 lw t1, 4(a0) 707 addiu a1, -2 708 709 BYTE_MUL_x2 t0, t1, t8, AT, a2, a2, t9, t4, t5, t6, t7, 0 710 711 sw t8, 0(a0) 712 sw AT, 4(a0) 713 bnez a1, 1b 714 addiu a0, 8 7152: 716 jr ra 717 nop 718 .set at 719 720END(comp_func_solid_DestinationIn_dsp_asm_x2) 721 722LEAF_MIPS_DSP(comp_func_DestinationIn_dsp_asm_x2) 723/* 724 * a0 - uint *dest 725 * a1 - const uint *src 726 * a2 - int length 727 * a3 - uint const_alpha 728 */ 729 730 addiu sp, -8 731 sw s0, 0(sp) 732 sw s1, 4(sp) 733 beqz a2, 3f 734 nop 735 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 736 li t0, 0xff 737 beq a3, t0, 2f 738 nop 739 740/* part where const_alpha != 255 */ 7411: 742 li t5, 0xff 743 subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ 744 replv.ph a3, a3 74511: 746 lw t0, 0(a1) /* t0 = src 1 */ 747 lw t1, 4(a1) /* t1 = src 2 */ 748 addiu a2, -2 749 srl t0, t0, 24 750 srl t1, t1, 24 751 752 BYTE_MUL_x2 t0, t1, s1, t7, a3, a3, t9, t3, t4, t5, t6, 0 753 754 lw t0, 0(a0) /* t0 = dest 1 */ 755 lw t1, 4(a0) /* t1 = dest 2 */ 756 addu s1, s1, t8 /* a 1 */ 757 addu t7, t7, t8 /* a 2 */ 758 replv.ph t2, s1 759 replv.ph t3, t7 760 761 BYTE_MUL_x2 t0, t1, s1, t7, t2, t3, t9, t4, t5, t6, s0 762 763 addiu a1, 8 764 sw s1, 0(a0) 765 sw t7, 4(a0) 766 bnez a2, 11b 767 addiu a0, 8 768 b 3f 769 nop 770 771/* part where const_alpha = 255 */ 7722: 773 lw t2, 0(a1) /* src 1 */ 774 lw t3, 4(a1) /* src 2 */ 775 lw t0, 0(a0) /* dest 1 */ 776 lw t1, 4(a0) /* dest 2 */ 777 srl t4, t2, 24 778 srl t5, t3, 24 779 replv.ph t2, t4 /* t2 = qAlpha(src 1) */ 780 replv.ph t3, t5 /* t3 = qAlpha(src 2) */ 781 addiu a2, -2 782 783 BYTE_MUL_x2 t0, t1, t8, s1, t2, t3, t9, t4, t5, t6, t7 784 785 addiu a1, 8 786 sw t8, 0(a0) 787 sw s1, 4(a0) 788 bnez a2, 2b 789 addiu a0, 8 790 7913: 792 lw s0, 0(sp) 793 lw s1, 4(sp) 794 addiu sp, 8 795 jr ra 796 nop 797 798END(comp_func_DestinationIn_dsp_asm_x2) 799 800LEAF_MIPS_DSP(comp_func_DestinationOut_dsp_asm_x2) 801/* 802 * a0 - uint *dest 803 * a1 - const uint *src 804 * a2 - int length 805 * a3 - uint const_alpha 806 */ 807 808 .set noat 809 addiu sp, -4 810 sw s0, 0(sp) 811 beqz a2, 3f 812 nop 813 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 814 li t0, 0xff 815 beq a3, t0, 2f 816 nop 817 818/* part where const_alpha != 255 */ 8191: 820 li t5, 0xff 821 subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ 822 replv.ph a3, a3 82311: 824 lw t0, 0(a1) /* t0 = src 1 */ 825 lw t1, 4(a1) /* t1 = src 2 */ 826 not t0, t0 827 not t1, t1 828 addiu a2, -2 829 srl t0, t0, 24 830 srl t1, t1, 24 831 832 BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 833 834 lw t0, 0(a0) /* t0 = dest 1 */ 835 lw t1, 4(a0) /* t1 = dest 2 */ 836 addu AT, AT, t8 /* a 1 */ 837 addu t7, t7, t8 /* a 2 */ 838 replv.ph t2, AT 839 replv.ph t3, t7 840 841 BYTE_MUL_x2 t0, t1, AT, t7, t2, t3, t9, t4, t5, t6, s0 842 843 addiu a1, 8 844 sw AT, 0(a0) 845 sw t7, 4(a0) 846 bnez a2, 11b 847 addiu a0, 8 848 b 3f 849 nop 850 851/* part where const_alpha = 255 */ 8522: 853 lw t2, 0(a1) /* src 1 */ 854 lw t3, 4(a1) /* src 2 */ 855 not t2, t2 856 not t3, t3 857 lw t0, 0(a0) /* dest 1 */ 858 lw t1, 4(a0) /* dest 2 */ 859 srl t4, t2, 24 860 srl t5, t3, 24 861 replv.ph t2, t4 /* t2 = qAlpha(src 1) */ 862 replv.ph t3, t5 /* t3 = qAlpha(src 2) */ 863 addiu a2, -2 864 865 BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 866 867 addiu a1, 8 868 sw t8, 0(a0) 869 sw AT, 4(a0) 870 bnez a2, 2b 871 addiu a0, 8 872 8733: 874 lw s0, 0(sp) 875 addiu sp, 4 876 jr ra 877 nop 878 .set at 879 880END(comp_func_DestinationOut_dsp_asm_x2) 881 882LEAF_MIPS_DSP(comp_func_solid_SourceAtop_dsp_asm_x2) 883/* 884 * a0 - uint *dest 885 * a1 - int length 886 * a2 - uint color 887 * a3 - uint sia 888 */ 889 890 .set noat 891 addu sp, -4 892 sw s0, 0(sp) 893 beqz a1, 2f 894 nop 895 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 896 lui t8, 0xff00 897 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 8981: 899 lw t0, 0(a0) /* t0 = dest 1 */ 900 lw t1, 4(a0) /* t1 = dest 2 */ 901 addiu a1, -2 902 srl t2, t0, 24 /* t2 = qAlpha(dest 1) */ 903 srl t3, t1, 24 /* t3 = qAlpha(dest 2) */ 904 905 INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 906 INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 907 908 sw AT, 0(a0) 909 sw s0, 4(a0) 910 bnez a1, 1b 911 addiu a0, 8 9122: 913 lw s0, 0(sp) 914 addiu sp, 4 915 jr ra 916 nop 917 .set at 918 919END(comp_func_solid_SourceAtop_dsp_asm_x2) 920 921LEAF_MIPS_DSP(comp_func_SourceAtop_dsp_asm_x2) 922/* 923 * a0 - uint *dest 924 * a1 - const uint *src 925 * a2 - int length 926 * a3 - uint const_alpha 927 */ 928 929 .set noat 930 addiu sp, -20 931 sw s0, 0(sp) 932 sw s1, 4(sp) 933 sw s2, 8(sp) 934 sw s3, 12(sp) 935 sw s4, 16(sp) 936 beqz a2, 3f 937 nop 938 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 939 lui t8, 0xff00 940 li t0, 0xff 941 beq a3, t0, 2f 942 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 943 944/* part where const_alpha != 255 */ 9451: 946 replv.ph a3, a3 94711: 948 lw AT, 0(a1) /* src 1 */ 949 lw s0, 4(a1) /* src 2 */ 950 951 BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 952 /* t0 = s */ 953 954 lw t2, 0(a0) /* t2 = dest 1 */ 955 lw t3, 4(a0) /* t3 = dest 2 */ 956 957 srl t4, t2, 24 /* t4 = qAplpha(dest 1) */ 958 srl t5, t3, 24 959 not t6, t0 960 not t7, t1 961 srl t6, t6, 24 /* t6 = qAlpha(~s) */ 962 srl t7, t7, 24 963 addiu a2, -2 964 965 INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 966 INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 967 968 addiu a1, 8 969 sw AT, 0(a0) 970 sw s0, 4(a0) 971 bnez a2, 11b 972 addiu a0, 8 973 b 3f 974 nop 975 976/* part where const_alpha = 255 */ 9772: 978 lw t2, 0(a0) /* dest 1 */ 979 lw t3, 4(a0) /* dest 2 */ 980 lw t0, 0(a1) /* src 1 */ 981 lw t1, 4(a1) /* src 2 */ 982 srl t4, t2, 24 983 srl t5, t3, 24 984 not t6, t0 985 not t7, t1 986 srl t6, t6, 24 987 srl t7, t7, 24 988 addiu a2, -2 989 990 INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 991 INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 992 993 addiu a1, 8 994 sw AT, 0(a0) 995 sw s0, 4(a0) 996 bnez a2, 2b 997 addiu a0, 8 998 9993: 1000 lw s0, 0(sp) 1001 lw s1, 4(sp) 1002 lw s2, 8(sp) 1003 lw s3, 12(sp) 1004 lw s4, 16(sp) 1005 addiu sp, 20 1006 jr ra 1007 nop 1008 .set at 1009 1010END(comp_func_SourceAtop_dsp_asm_x2) 1011 1012LEAF_MIPS_DSP(comp_func_solid_DestinationAtop_dsp_asm_x2) 1013/* 1014 * a0 - uint *dest 1015 * a1 - int length 1016 * a2 - uint color 1017 * a3 - uint a 1018 */ 1019 1020 .set noat 1021 addiu sp, -4 1022 sw s0, 0(sp) 1023 beqz a1, 2f 1024 nop 1025 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1026 lui t8, 0xff00 1027 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 10281: 1029 lw t0, 0(a0) /* t0 = dest 1 */ 1030 lw t1, 4(a0) /* t1 = dest 2 */ 1031 addiu a1, -2 1032 not t2, t0 1033 not t3, t1 1034 srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ 1035 srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ 1036 1037 INTERPOLATE_PIXEL_255 t0, a3, a2, t2, AT, t9, t8, t4, t5, t6, t7 1038 INTERPOLATE_PIXEL_255 t1, a3, a2, t3, s0, t9, t8, t4, t5, t6, t7 1039 1040 sw AT, 0(a0) 1041 sw s0, 4(a0) 1042 bnez a1, 1b 1043 addiu a0, 8 10442: 1045 lw s0, 0(sp) 1046 addiu sp, 4 1047 jr ra 1048 nop 1049 .set at 1050 1051END(comp_func_solid_DestinationAtop_dsp_asm_x2) 1052 1053LEAF_MIPS_DSP(comp_func_DestinationAtop_dsp_asm_x2) 1054/* 1055 * a0 - uint *dest 1056 * a1 - const uint *src 1057 * a2 - int length 1058 * a3 - uint const_alpha 1059 */ 1060 1061 .set noat 1062 addiu sp, -24 1063 sw s0, 0(sp) 1064 sw s1, 4(sp) 1065 sw s2, 8(sp) 1066 sw s3, 12(sp) 1067 sw s4, 16(sp) 1068 sw s5, 20(sp) 1069 beqz a2, 3f 1070 nop 1071 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1072 lui t8, 0xff00 1073 li t0, 0xff 1074 beq a3, t0, 2f 1075 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1076 1077/* part where const_alpha != 255 */ 10781: 1079 li s5, 0xff 1080 subu s5, s5, a3 /* s5 = cia = 255 - const_alpha */ 1081 replv.ph a3, a3 108211: 1083 lw AT, 0(a1) /* src 1 */ 1084 lw s0, 4(a1) /* src 2 */ 1085 1086 BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 1087 /* t0 = s */ 1088 1089 lw t2, 0(a0) /* t2 = dest 1 */ 1090 lw t3, 4(a0) /* t3 = dest 2 */ 1091 1092 not t4, t2 1093 not t5, t3 1094 srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ 1095 srl t5, t5, 24 1096 srl t6, t0, 24 1097 srl t7, t1, 24 1098 addu t6, t6, s5 /* t6 = a = qAlpha(s1) + cia */ 1099 addu t7, t7, s5 1100 addiu a2, -2 1101 1102 INTERPOLATE_PIXEL_255 t2, t6, t0, t4, AT, t9, t8, s1, s2, s3, s4 1103 INTERPOLATE_PIXEL_255 t3, t7, t1, t5, s0, t9, t8, s1, s2, s3, s4 1104 1105 addiu a1, 8 1106 sw AT, 0(a0) 1107 sw s0, 4(a0) 1108 bnez a2, 11b 1109 addiu a0, 8 1110 b 3f 1111 nop 1112 1113/* part where const_alpha = 255 */ 11142: 1115 lw t2, 0(a0) /* d1 */ 1116 lw t3, 4(a0) /* d2 */ 1117 lw t0, 0(a1) /* s1 */ 1118 lw t1, 4(a1) /* s2 */ 1119 srl t4, t0, 24 /* t4 = qAlpha(s1) */ 1120 srl t5, t1, 24 1121 not t6, t2 1122 not t7, t3 1123 srl t6, t6, 24 /* qAlpha(~d1) */ 1124 srl t7, t7, 24 1125 addiu a2, -2 1126 1127 INTERPOLATE_PIXEL_255 t2, t4, t0, t6, AT, t9, t8, s1, s2, s3, s4 1128 INTERPOLATE_PIXEL_255 t3, t5, t1, t7, s0, t9, t8, s1, s2, s3, s4 1129 1130 addiu a1, 8 1131 sw AT, 0(a0) 1132 sw s0, 4(a0) 1133 bnez a2, 2b 1134 addiu a0, 8 1135 11363: 1137 lw s0, 0(sp) 1138 lw s1, 4(sp) 1139 lw s2, 8(sp) 1140 lw s3, 12(sp) 1141 lw s4, 16(sp) 1142 lw s5, 20(sp) 1143 addiu sp, 24 1144 jr ra 1145 nop 1146 .set at 1147 1148END(comp_func_DestinationAtop_dsp_asm_x2) 1149 1150LEAF_MIPS_DSP(comp_func_solid_XOR_dsp_asm_x2) 1151/* 1152 * a0 - uint *dest 1153 * a1 - int length 1154 * a2 - uint color 1155 * a3 - uint sia 1156 */ 1157 1158 .set noat 1159 addu sp, -4 1160 sw s0, 0(sp) 1161 beqz a1, 2f 1162 nop 1163 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1164 lui t8, 0xff00 1165 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 11661: 1167 lw t0, 0(a0) /* t0 = dest 1 */ 1168 lw t1, 4(a0) /* t1 = dest 2 */ 1169 addiu a1, -2 1170 not t2, t0 1171 not t3, t1 1172 srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ 1173 srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ 1174 1175 INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 1176 INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 1177 1178 sw AT, 0(a0) 1179 sw s0, 4(a0) 1180 bnez a1, 1b 1181 addiu a0, 8 11822: 1183 lw s0, 0(sp) 1184 addu sp, 4 1185 jr ra 1186 nop 1187 .set at 1188 1189END(comp_func_solid_XOR_dsp_asm_x2) 1190 1191LEAF_MIPS_DSP(comp_func_XOR_dsp_asm_x2) 1192/* 1193 * a0 - uint *dest 1194 * a1 - const uint *src 1195 * a2 - int length 1196 * a3 - uint const_alpha 1197 */ 1198 1199 .set noat 1200 addiu sp, -20 1201 sw s0, 0(sp) 1202 sw s1, 4(sp) 1203 sw s2, 8(sp) 1204 sw s3, 12(sp) 1205 sw s4, 16(sp) 1206 beqz a2, 3f 1207 nop 1208 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1209 lui t8, 0xff00 1210 li t0, 0xff 1211 beq a3, t0, 2f 1212 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1213 1214/* part where const_alpha != 255 */ 12151: 1216 replv.ph a3, a3 121711: 1218 lw AT, 0(a1) /* src 1 */ 1219 lw s0, 4(a1) /* src 2 */ 1220 1221 BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 1222 /* t0 = s1 */ 1223 /* t1 = s2 */ 1224 1225 lw t2, 0(a0) /* t2 = dest 1 */ 1226 lw t3, 4(a0) /* t3 = dest 2 */ 1227 1228 not t4, t2 1229 not t5, t3 1230 srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ 1231 srl t5, t5, 24 1232 not t6, t0 1233 not t7, t1 1234 srl t6, t6, 24 /* t6 = qAlpha(~s) */ 1235 srl t7, t7, 24 1236 addiu a2, -2 1237 1238 INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 1239 INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 1240 1241 addiu a1, 8 1242 sw AT, 0(a0) 1243 sw s0, 4(a0) 1244 bnez a2, 11b 1245 addiu a0, 8 1246 b 3f 1247 nop 1248 1249/* part where const_alpha = 255 */ 12502: 1251 lw t2, 0(a0) /* d1 */ 1252 lw t3, 4(a0) /* d2 */ 1253 lw t0, 0(a1) /* s1 */ 1254 lw t1, 4(a1) /* s2 */ 1255 not t4, t0 1256 not t5, t1 1257 srl t4, t4, 24 /* t4 = qAlpha(~s1) */ 1258 srl t5, t5, 24 1259 not t6, t2 1260 not t7, t3 1261 srl t6, t6, 24 /* qAlpha(~d1) */ 1262 srl t7, t7, 24 1263 addiu a2, -2 1264 1265 INTERPOLATE_PIXEL_255 t0, t6, t2, t4, AT, t9, t8, s1, s2, s3, s4 1266 INTERPOLATE_PIXEL_255 t1, t7, t3, t5, s0, t9, t8, s1, s2, s3, s4 1267 1268 addiu a1, 8 1269 sw AT, 0(a0) 1270 sw s0, 4(a0) 1271 bnez a2, 2b 1272 addiu a0, 8 1273 12743: 1275 lw s0, 0(sp) 1276 lw s1, 4(sp) 1277 lw s2, 8(sp) 1278 lw s3, 12(sp) 1279 lw s4, 16(sp) 1280 addiu sp, 20 1281 jr ra 1282 nop 1283 .set at 1284 1285END(comp_func_XOR_dsp_asm_x2) 1286 1287LEAF_MIPS_DSP(comp_func_solid_SourceOut_dsp_asm_x2) 1288/* 1289 * a0 - uint *dest 1290 * a1 - int length 1291 * a2 - uint color 1292 * a3 - uint const_alpha 1293 */ 1294 1295 .set noat 1296 addiu sp, -12 1297 sw s0, 0(sp) 1298 sw s1, 4(sp) 1299 sw s2, 8(sp) 1300 beqz a1, 3f 1301 nop 1302 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1303 lui t8, 0xff00 1304 li t0, 0xff 1305 beq a3, t0, 2f 1306 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1307 1308/* part where const_alpha != 255 */ 13091: 1310 replv.ph t0, a3 1311 li t5, 0xff 1312 BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ 1313 subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ 131411: 1315 lw t2, 0(a0) /* t2 = d1 */ 1316 lw s0, 4(a0) /* s0 = d2 */ 1317 addiu a1, -2 1318 not t3, t2 1319 not s2, s0 1320 srl t3, t3, 24 /* t3 = qAlpha(~d1) */ 1321 srl s2, s2, 24 /* s2 = qAlpha(~d2) */ 1322 1323 INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 1324 INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 1325 1326 sw AT, 0(a0) 1327 sw s1, 4(a0) 1328 bnez a1, 11b 1329 addiu a0, 8 1330 b 3f 1331 nop 1332 1333/* part where const_alpha = 255 */ 13342: 1335 lw t0, 0(a0) /* dest 1 */ 1336 lw t1, 4(a0) /* dest 2 */ 1337 not t4, t0 1338 not t5, t1 1339 srl t4, t4, 24 1340 srl t5, t5, 24 1341 replv.ph t2, t4 1342 replv.ph t3, t5 1343 addiu a1, -2 1344 1345 BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 1346 1347 sw t8, 0(a0) 1348 sw AT, 4(a0) 1349 bnez a1, 2b 1350 addiu a0, 8 1351 13523: 1353 lw s0, 0(sp) 1354 lw s1, 4(sp) 1355 lw s2, 8(sp) 1356 addiu sp, 12 1357 jr ra 1358 nop 1359 .set at 1360 1361END(comp_func_solid_SourceOut_dsp_asm_x2) 1362 1363LEAF_MIPS_DSP(comp_func_SourceOut_dsp_asm_x2) 1364/* 1365 * a0 - uint *dest 1366 * a1 - const uint *src 1367 * a2 - int length 1368 * a3 - uint const_alpha 1369 */ 1370 1371 .set noat 1372 addiu sp, -16 1373 sw s0, 0(sp) 1374 sw s1, 4(sp) 1375 sw s2, 8(sp) 1376 sw s3, 12(sp) 1377 beqz a2, 3f 1378 nop 1379 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1380 lui t8, 0xff00 1381 li t0, 0xff 1382 beq a3, t0, 2f 1383 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1384 1385/* part where const_alpha != 255 */ 13861: 1387 li t5, 0xff 1388 subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ 1389 replv.ph a3, a3 139011: 1391 lw t0, 0(a1) /* t0 = src 1 */ 1392 lw t1, 4(a1) /* t1 = src 2 */ 1393 addiu a2, -2 1394 1395 BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 1396 1397 lw t0, 0(a0) /* t0 = dest 1 */ 1398 lw t1, 4(a0) /* t1 = dest 2 */ 1399 addiu a1, 8 1400 1401 not t2, t0 1402 not t3, t1 1403 srl t2, t2, 24 /* t2 = qAlpha(~d1) */ 1404 srl t3, t3, 24 /* t3 = qAlpha(~d2) */ 1405 1406 INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 1407 INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 1408 1409 sw s1, 0(a0) 1410 sw s2, 4(a0) 1411 bnez a2, 11b 1412 addiu a0, 8 1413 b 3f 1414 nop 1415 1416/* part where const_alpha = 255 */ 14172: 1418 lw t2, 0(a0) /* dest 1 */ 1419 lw t3, 4(a0) /* dest 2 */ 1420 lw t0, 0(a1) /* src 1 */ 1421 lw t1, 4(a1) /* src 2 */ 1422 not t4, t2 1423 not t5, t3 1424 srl t4, t4, 24 /* qAlpha(~d1) */ 1425 srl t5, t5, 24 /* qAlpha(~d2) */ 1426 replv.ph t2, t4 1427 replv.ph t3, t5 1428 addiu a2, -2 1429 1430 BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 1431 1432 addiu a1, 8 1433 sw t8, 0(a0) 1434 sw AT, 4(a0) 1435 bnez a2, 2b 1436 addiu a0, 8 1437 14383: 1439 lw s0, 0(sp) 1440 lw s1, 4(sp) 1441 lw s2, 8(sp) 1442 lw s3, 12(sp) 1443 addiu sp, 16 1444 jr ra 1445 nop 1446 .set at 1447 1448END(comp_func_SourceOut_dsp_asm_x2) 1449 1450LEAF_MIPS_DSP(comp_func_Source_dsp_asm_x2) 1451/* 1452 * a0 - uint *dest 1453 * a1 - const uint *src 1454 * a2 - int length 1455 * a3 - uint const_alpha 1456 */ 1457 1458 .set noat 1459 addiu sp, -8 1460 sw s0, 0(sp) 1461 sw s1, 4(sp) 1462 beqz a2, 2f 1463 nop 1464 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1465 lui t8, 0xff00 1466 ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ 1467 li t7, 0xff 1468 subu t7, t7, a3 /* t7 = ialpha */ 14691: 1470 lw t0, 0(a0) /* t0 = dest 1 */ 1471 lw t1, 4(a0) /* t1 = dest 2 */ 1472 lw t2, 0(a1) /* t2 = src 1 */ 1473 lw t3, 4(a1) /* t3 = src 2 */ 1474 addiu a2, -2 1475 addiu a1, 8 1476 1477 INTERPOLATE_PIXEL_255 t2, a3, t0, t7, AT, t9, t8, t4, t5, t6, s1 1478 INTERPOLATE_PIXEL_255 t3, a3, t1, t7, s0, t9, t8, t4, t5, t6, s1 1479 1480 sw AT, 0(a0) 1481 sw s0, 4(a0) 1482 bnez a2, 1b 1483 addiu a0, 8 14842: 1485 lw s0, 0(sp) 1486 lw s1, 4(sp) 1487 addiu sp, 8 1488 jr ra 1489 nop 1490 .set at 1491 1492END(comp_func_Source_dsp_asm_x2) 1493 1494LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) 1495/* 1496 * a0 - uint *dest 1497 * a1 - const uint *src 1498 * a2 - int length 1499 * a3 - uint const_alpha 1500 */ 1501 1502 .set noat 1503 addiu sp, -12 1504 sw s0, 0(sp) 1505 sw s1, 4(sp) 1506 sw s2, 8(sp) 1507 beqz a2, 2f 1508 nop 1509 replv.ph a3, a3 1510 li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ 1511 15121: 1513 lw t0, 0(a1) /* t0 = src 1 */ 1514 lw t1, 4(a1) /* t1 = src 2 */ 1515 addiu a2, -2 1516 1517 BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 1518 1519 lw t0, 0(a0) /* t0 = dest 1 */ 1520 lw t1, 4(a0) /* t1 = dest 2 */ 1521 not s1, AT 1522 not s2, t7 1523 srl s1, s1, 24 /* s1 = qAlpha(~s1) */ 1524 srl s2, s2, 24 /* s2 = qAlpha(~s2) */ 1525 replv.ph s1, s1 1526 replv.ph s2, s2 1527 1528 BYTE_MUL_x2 t0, t1, t2, t3, s1, s2, t9, t4, t5, t6, s0 1529 1530 addiu a1, 8 1531 addu AT, AT, t2 1532 addu t7, t7, t3 1533 sw AT, 0(a0) 1534 sw t7, 4(a0) 1535 bnez a2, 1b 1536 addiu a0, 8 1537 15382: 1539 lw s0, 0(sp) 1540 lw s1, 4(sp) 1541 lw s2, 8(sp) 1542 addiu sp, 12 1543 jr ra 1544 nop 1545 .set at 1546 1547END(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) 1548 1549LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) 1550/* 1551 * a0 - uint *dest 1552 * a1 - const uint *src 1553 * a2 - int length 1554 */ 1555 1556 beqz a2, 5f 1557 nop 1558 li t7, 8388736 /* t7 = 0x800080 */ 1559 b 2f 1560 nop 15611: 1562 addiu a0, a0, 4 1563 addiu a2, a2, -1 1564 beqz a2, 5f 1565 nop 15662: 1567 lw t0, 0(a1) /* t0 = s = src[i] */ 1568 addiu a1, a1, 4 1569 nor t1, t0, zero 1570 srl t1, t1, 24 /* t1 = ~qAlpha(s) */ 1571 bnez t1, 3f 1572 nop 1573 sw t0, 0(a0) /* dst[i] = src[i] */ 1574 addiu a2, a2, -1 1575 bnez a2, 2b 1576 addiu a0, a0, 4 1577 b 5f 1578 nop 15793: 1580 beqz t0, 1b 1581 replv.ph t6, t1 /* | 0 | qAlpha(~s) | 0 | qAlpha(~s) | */ 1582 1583 lw t4, 0(a0) 1584 addiu a2, a2, -1 1585 beqz t4, 31f 1586 move t8, zero 1587 1588 BYTE_MUL t4, t8, t6, t7, t1, t2, t3, t4 158931: 1590 addu t8, t0, t8 /* dst[i] = 1591 * s + BYTE_MUL(dst[i],~qAlpha(s)) */ 1592 sw t8, 0(a0) 1593 bnez a2, 2b 1594 addiu a0, a0, 4 1595 b 5f 1596 nop 15975: 1598 jr ra 1599 nop 1600 1601END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) 1602 1603 1604#if defined(__MIPSEL) && __MIPSEL 1605# define PACK(r, s, t) packrl.ph r, s, t 1606# define SWHI(r, o, b) swl r, o + 1 (b) 1607# define SWLO(r, o, b) swr r, o + 0 (b) 1608# define LDHI(r, o, b) lwl r, o + 1 (b) 1609# define LDLO(r, o, b) lwr r, o + 2 (b) 1610#else 1611# define PACK(r, s, t) packrl.ph r, t, s 1612# define SWHI(r, o, b) swr r, o + 1 (b) 1613# define SWLO(r, o, b) swl r, o + 0 (b) 1614# define LDHI(r, o, b) lwr r, o + 1 (b) 1615# define LDLO(r, o, b) lwl r, o + 2 (b) 1616#endif 1617 1618LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) 1619/* 1620 * a0 - dst (*r5g6b5) 1621 * a1 - src (const *r5g6b5) 1622 * a2 - len (unsigned int) 1623 * 1624 * Register usage: 1625 * t0-3 - Scratch registers 1626 * t4 - Number of iterations to do in unrolled loops 1627 * t5-7 - Auxiliary scratch registers. 1628 * 1629 * Check if base addresses of src/dst are aligned, cases: 1630 * a) Both aligned. 1631 * b) Both unaligned: 1632 * 1. Copy a halfword 1633 * 2. Use aligned case. 1634 * c) dst aligned, src unaligned: 1635 * 1. Read a word from dst, halfword from src. 1636 * 2. Continue reading words from both. 1637 * d) dst unaligned, src aligned: 1638 * 1. Read a word from src, halfword from dst. 1639 * 2. Continue reading words from both. 1640 */ 1641 1642 beqz a2, 0f /* if (a2:len == 0): return */ 1643 andi t0, a0, 0x3 /* t0 = a0:dst % 4 */ 1644 andi t1, a1, 0x3 /* t1 = a1:dst % 4 */ 1645 or t2, t0, t1 /* t1 = t0 | t1 */ 1646 1647 beqz t2, 4f /* both aligned */ 1648 nop 1649 beqz t0, 3f /* dst aligned, src unaligned */ 1650 nop 1651 beqz t1, 2f /* src aligned, dst unaligned */ 1652 nop 1653 1654 /* 1655 * Both src/dst are unaligned: read 1 halfword from each, 1656 * the fall-off to continue with word-aligned copy. 1657 */ 1658 lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ 1659 addiu a1, a1, 2 /* src++ */ 1660 addiu a2, a2,-1 /* len-- */ 1661 sh t0, 0 (a0) /* t1 -> ((uint16_t*) dst)[0] */ 1662 addiu a0, a0, 2 /* dst++ */ 1663 1664 /* 1665 * Both src/dst pointers are word-aligned, process eight 1666 * items at a time in an unrolled loop. 1667 */ 16684: beqz a2, 0f /* if (len == 0): return */ 1669 srl t4, a2, 3 /* t4 = len / 8 */ 1670 1671 beqz t4, 5f /* if (t4 == 0): tail */ 1672 andi a2, a2, 0x07 /* len = len % 8 */ 1673 16741: lw t0, 0 (a1) 1675 lw t1, 4 (a1) 1676 lw t2, 8 (a1) 1677 lw t3, 12 (a1) 1678 1679 addiu t4, t4, -1 /* t4-- */ 1680 addiu a1, a1, 16 /* src += 8 */ 1681 1682 sw t0, 0 (a0) 1683 sw t1, 4 (a0) 1684 sw t2, 8 (a0) 1685 sw t3, 12 (a0) 1686 1687 bnez t4, 1b 1688 addiu a0, a0, 16 /* dst += 8 */ 1689 1690 b 5f 1691 nop 1692 1693 1694 /* 1695 * dst pointer is unaligned 1696 */ 16972: beqz a2, 0f /* if (len == 0): return */ 1698 srl t4, a2, 3 /* t4 = len / 8 */ 1699 beqz t4, 5f /* if (t4 == 0): tail */ 1700 andi a2, a2, 0x07 /* len = len % 8 */ 1701 17021: lw t0, 0 (a1) 1703 lw t1, 4 (a1) 1704 lw t2, 8 (a1) 1705 lw t3, 12 (a1) 1706 1707 addiu t4, t4, -1 /* t4-- */ 1708 addiu a1, a1, 16 /* src += 8 */ 1709 1710 SWLO (t0, 0, a0) 1711 PACK (t5, t1, t0) 1712 PACK (t6, t2, t1) 1713 PACK (t7, t3, t2) 1714 SWHI (t3, 14, a0) 1715 sw t5, 2 (a0) 1716 sw t6, 6 (a0) 1717 sw t7, 10 (a0) 1718 1719 bnez t4, 1b 1720 addiu a0, a0, 16 /* dst += 8 */ 1721 1722 b 5f 1723 nop 1724 1725 /* 1726 * src pointer is unaligned 1727 */ 17283: beqz a2, 0f /* if (len == 0): return */ 1729 srl t4, a2, 3 /* t4 = len / 8 */ 1730 beqz t4, 5f /* if (t4 == 0): tail */ 1731 andi a2, a2, 0x07 /* len = len % 8 */ 1732 17331: LDHI (t0, 0, a1) 1734 lw t1, 2 (a1) 1735 lw t2, 6 (a1) 1736 lw t3, 10 (a1) 1737 LDLO (t5, 12, a1) 1738 1739 addiu t4, t4, -1 /* t4-- */ 1740 addiu a1, a1, 16 /* src += 8 */ 1741 1742 PACK (t0, t1, t0) 1743 PACK (t6, t2, t1) 1744 PACK (t7, t3, t2) 1745 sw t0, 0 (a0) 1746 PACK (t0, t5, t3) 1747 sw t6, 4 (a0) 1748 sw t7, 8 (a0) 1749 sw t0, 12 (a0) 1750 1751 bnez t4, 1b 1752 addiu a0, a0, 16 /* dst += 8 */ 1753 1754 17555: /* Process remaining items (a2:len < 4), one at a time */ 1756 beqz a2, 0f 1757 nop 1758 17591: lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ 1760 addiu a2, a2,-1 /* len-- */ 1761 addiu a1, a1, 2 /* src++ */ 1762 sh t0, 0 (a0) /* to -> ((uint16_t*) dst)[0] */ 1763 bnez a2, 1b /* if (len != 0): loop */ 1764 addiu a0, a0, 2 /* dst++ */ 1765 17660: jr ra 1767 nop 1768 1769END(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) 1770 1771 1772#undef LDHI 1773#undef LDLO 1774#undef PACK 1775#undef SWHI 1776#undef SWLO 1777 1778 1779LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_mips_dsp_asm) 1780/* 1781 * a0 - dst (*r5g6b5) 1782 * a1 - src (const *r5g6b5) 1783 * a2 - len (unsigned int) - batch length 1784 * a3 - alpha (int) 1785 */ 1786 1787 beqz a2, 2f 1788 li t9, 255 1789 sll t8, a3, 8 1790 subu a3, t8, a3 1791 srl a3, a3, 8 1792 subu t9, t9, a3 1793 addiu a3, a3, 1 1794 srl t4, a3, 2 1795 addiu t9, t9, 1 1796 srl t5, t9, 2 17971: 1798 lhu t0, 0(a1) 1799 lhu t1, 0(a0) 1800 addiu a2, a2, -1 1801 andi t2, t0, 0x07e0 1802 andi t0, t0, 0xf81f 1803 mul t2, t2, a3 1804 mul t0, t0, t4 1805 andi t3, t1, 0x07e0 1806 andi t1, t1, 0xf81f 1807 mul t3, t3, t9 1808 mul t1, t1, t5 1809 addiu a1, a1, 2 1810 srl t2, t2, 8 1811 srl t0, t0, 6 1812 andi t2, t2, 0x07e0 1813 andi t0, t0, 0xf81f 1814 or t0, t0, t2 1815 srl t3, t3, 8 1816 srl t1, t1, 6 1817 andi t3, t3, 0x07e0 1818 andi t1, t1, 0xf81f 1819 or t1, t1, t3 1820 addu t0, t0, t1 1821 sh t0, 0(a0) 1822 bgtz a2, 1b 1823 addiu a0, a0, 2 18242: 1825 jr ra 1826 nop 1827 1828END(qt_blend_rgb16_on_rgb16_mips_dsp_asm) 1829 1830 1831LEAF_MIPS_DSP(fetchUntransformed_888_asm_mips_dsp) 1832/* 1833 * a0 - dst address (address of 32-bit aRGB value) 1834 * a1 - src address 1835 * a2 - length 1836 */ 1837 1838 beqz a2, 4f 1839 lui t8, 0xff00 1840 andi t0, a2, 0x1 1841 beqz t0, 1f 1842 nop 1843/* case for one pixel */ 1844 lbu t1, 0(a1) 1845 lbu v1, 2(a1) 1846 lbu t0, 1(a1) 1847 addiu a1, a1, 3 1848 addiu a2, a2, -1 1849 sll t1, t1, 0x10 1850 or v1, v1, t8 1851 sll t0, t0, 0x8 1852 or v1, v1, t1 1853 or v1, v1, t0 1854 sw v1, 0(a0) 1855 addiu a0, a0, 4 1856 1857 beqz a2, 4f /* only one pixel is present (length = 1) */ 1858 nop 18591: 1860 andi t0, a1, 0x1 1861 beqz t0, 3f 1862 nop 18632: 1864 lbu t0, 0(a1) /* t0 = | 0 | 0 | 0 | R1 | */ 1865 lhu t1, 1(a1) /* t1 = | 0 | 0 | B1 | G1 | */ 1866 addiu a1, a1, 3 1867 lhu t2, 0(a1) /* t2 = | 0 | 0 | G2 | R2 | */ 1868 lbu t3, 2(a1) /* t3 = | 0 | 0 | 0 | B2 | */ 1869 1870 sll t0, t0, 16 1871 or t0, t0, t8 /* t0 = | ff | R1 | 0 | 0 | */ 1872 shll.ph t4, t1, 8 /* t4 = | 0 | 0 | G1 | 0 | */ 1873 srl t5, t1, 8 1874 or t4, t4, t5 /* t4 = | 0 | 0 | G1 | B1 | */ 1875 or t0, t0, t4 /* t0 = | ff | R1 | G1 | B1 | */ 1876 1877 shll.ph t4, t2, 8 /* t4 = | 0 | 0 | R2 | 0 | */ 1878 srl t5, t2, 8 /* t5 = | 0 | 0 | 0 | G2 | */ 1879 or t4, t4, t5 1880 sll t4, t4, 8 /* t4 = | 0 | R2 | G2 | 0 | */ 1881 or t5, t3, t8 1882 or t2, t4, t5 /* t2 = | ff | R2 | G2 | B2 | */ 1883 1884 sw t0, 0(a0) 1885 addiu a1, a1, 3 1886 sw t2, 4(a0) 1887 addiu a2, a2, -2 1888 bnez a2, 2b 1889 addiu a0, a0, 8 1890 b 4f 1891 nop 18923: 1893 lhu t0, 0(a1) /* t0 = | 0 | 0 | G1 | R1 | */ 1894 lbu t1, 2(a1) /* t1 = | 0 | 0 | 0 | B1 | */ 1895 addiu a1, a1, 3 1896 lbu t2, 0(a1) /* t2 = | 0 | 0 | 0 | R2 | */ 1897 lhu t3, 1(a1) /* t3 = | 0 | 0 | B2 | G2 | */ 1898 1899 srl t4, t0, 8 /* t4 = | 0 | 0 | 0 | G1 | */ 1900 shll.ph t5, t0, 8 /* t5 = | 0 | 0 | R1 | 0 | */ 1901 or t0, t4, t5 1902 sll t6, t0, 8 /* t6 = | 0 | R1 | G1 | 0 | */ 1903 or t4, t1, t8 /* t4 = | ff | 0 | 0 | B1 | */ 1904 or t0, t6, t4 1905 1906 sll t2, t2, 16 1907 srl t4, t3, 8 1908 shll.ph t5, t3, 8 1909 or t3, t4, t5 1910 or t2, t2, t3 1911 or t2, t2, t8 1912 1913 sw t0, 0(a0) 1914 addiu a1, a1, 3 1915 sw t2, 4(a0) 1916 addiu a2, a2, -2 1917 bnez a2, 3b 1918 addiu a0, a0, 8 19194: 1920 jr ra 1921 nop 1922 1923END(fetchUntransformed_888_asm_mips_dsp) 1924 1925 1926LEAF_MIPS_DSP(fetchUntransformed_444_asm_mips_dsp) 1927/* 1928 * a0 - dst address (address of 32-bit aRGB value) 1929 * a1 - src address 1930 * a2 - length 1931 */ 1932 1933 lui t8, 0xff00 1934 li t4, 0x1 1935 1936 beqz a2, 5f 1937 move v0, a0 /* just return the address of buffer 1938 * for storing returning values */ 1939 andi t0, a2, 0x1 1940 beqz t0, 2f /* there is more then one pixel 1941 * (check src memory alignment (word)) */ 1942 nop 19431: 1944 lhu v0, 0(a1) 1945 addiu a1, a1, 2 1946 addiu a2, a2, -1 1947 andi t0, v0, 0xf00 1948 andi v1, v0, 0xf 1949 andi v0, v0, 0xf0 1950 sra t3, t0, 0x4 1951 sra t1, v0, 0x4 1952 sra t0, t0, 0x8 1953 sll t2, v1, 0x4 1954 or t0, t0, t3 1955 or v0, t1, v0 1956 lui t1, 0xff00 1957 or v1, t2, v1 1958 sll t0, t0, 0x10 1959 or v1, v1, t1 1960 sll v0, v0, 0x8 1961 or v1, v1, t0 1962 or v0, v1, v0 1963 sw v0, 0(a0) 1964 addiu a0, a0, 4 1965 beqz a2, 5f /* no more pixels for processing */ 1966 nop 1967 beq a2, t4, 4f /* only one more pixel remained */ 1968 nop 1969/* check if src memory address is word aligned */ 19702: 1971 andi t0, a1, 0x3 1972 beqz t0, 3f /* memory is word aligned */ 1973 andi a3, a2, 0x1 /* set the a3 register as the comparation 1974 * for ending the unrolled loop 1975 * (1 if odd, 0 if even) */ 1976 b 1b /* not word aligned, 1977 * go another turn with 1978 * just one pixel processing */ 1979 nop 19803: 1981 lw t0, 0(a1) 1982 addiu a2, a2, -2 1983 preceu.ph.qbr t1, t0 /* t1 = | 0 | aR1 | 0 | G1B1 | */ 1984 preceu.ph.qbl t2, t0 /* t1 = | 0 | aR2 | 0 | G2B2 | */ 1985 shll.qb t3, t1, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ 1986 srl t4, t3, 4 1987 or t0, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ 1988 andi t3, t1, 0xf0 1989 sll t3, t3, 8 1990 srl t4, t3, 4 1991 or t1, t3, t4 1992 or t0, t0, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ 1993 or t0, t0, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ 1994 1995 shll.qb t3, t2, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ 1996 srl t4, t3, 4 1997 or t7, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ 1998 andi t3, t2, 0xf0 1999 sll t3, t3, 8 2000 srl t4, t3, 4 2001 or t1, t3, t4 2002 or t2, t7, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ 2003 or t2, t2, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ 2004 2005 sw t0, 0(a0) 2006 addiu a1, a1, 4 2007 sw t2, 4(a0) 2008 bne a2, a3, 3b 2009 addiu a0, a0, 8 2010 beqz a2, 5f /* no more pixels for processing */ 2011 nop 20124: 2013/* one more pixel remained (after loop unrolling process finished) */ 2014 lhu v0, 0(a1) 2015 addiu a1, a1, 2 2016 addiu a2, a2, -1 2017 andi t0, v0, 0xf00 2018 andi v1, v0, 0xf 2019 andi v0, v0, 0xf0 2020 sra t3, t0, 0x4 2021 sra t1, v0, 0x4 2022 sra t0, t0, 0x8 2023 sll t2, v1, 0x4 2024 or t0, t0, t3 2025 or v0, t1, v0 2026 lui t1, 0xff00 2027 or v1, t2, v1 2028 sll t0, t0, 0x10 2029 or v1, v1, t1 2030 sll v0, v0, 0x8 2031 or v1, v1, t0 2032 or v0, v1, v0 2033 sw v0, 0(a0) 2034 addiu a0, a0, 4 20355: 2036 jr ra 2037 nop 2038 2039END(fetchUntransformed_444_asm_mips_dsp) 2040 2041 2042LEAF_MIPS_DSP(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp) 2043/* 2044 * a0 - dst address 2045 * a1 - src address 2046 * a2 - length 2047 */ 2048 2049 beqz a2, 2f 2050 nop 2051 20521: 2053 ulh t1, 0(a1) 2054 lbu t2, 2(a1) 2055 addiu a2, a2, -1 2056 wsbh t1, t1 2057 sll t0, t1, 8 /* t0 = 00000000rrrrrggggggbbbbb00000000 */ 2058 ins t0, t1, 3, 16 /* t0 = 00000000rrrrrrrrrrggggggbbbbb000 */ 2059 ins t0, t1, 5, 11 /* t0 = 00000000rrrrrrrrggggggbbbbbbb000 */ 2060 srl t4, t1, 9 /* t4 = 0000000000000000000000000rrrrrgg */ 2061 replv.qb t3, t2 2062 ins t0, t4, 8, 2 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ 2063 ins t0, t1, 3, 5 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ 2064 srl t4, t1, 2 /* t4 = 000000000000000000rrrrrggggggbbb */ 2065 ins t0, t4, 0, 3 /* t0 = 00000000rrrrrrrrggggggggbbbbbbbb */ 2066 ins t0, t2, 24, 8 /* t0 =aaaaaaaarrrrrrrrggggggggbbbbbbbb */ 2067 cmpu.lt.qb t3, t0 2068 pick.qb t0, t3, t0 2069 addiu a1, a1, 3 2070 sw t0, 0(a0) 2071 bgtz a2, 1b 2072 addiu a0, a0, 4 20732: 2074 jr ra 2075 nop 2076 2077END(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp) 2078