1 /* 2 * Copyright (c) 2012 3 * MIPS Technologies, Inc., California. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) 30 */ 31 32 #ifndef PIXMAN_MIPS_DSPR2_ASM_H 33 #define PIXMAN_MIPS_DSPR2_ASM_H 34 35 #define zero $0 36 #define AT $1 37 #define v0 $2 38 #define v1 $3 39 #define a0 $4 40 #define a1 $5 41 #define a2 $6 42 #define a3 $7 43 #define t0 $8 44 #define t1 $9 45 #define t2 $10 46 #define t3 $11 47 #define t4 $12 48 #define t5 $13 49 #define t6 $14 50 #define t7 $15 51 #define s0 $16 52 #define s1 $17 53 #define s2 $18 54 #define s3 $19 55 #define s4 $20 56 #define s5 $21 57 #define s6 $22 58 #define s7 $23 59 #define t8 $24 60 #define t9 $25 61 #define k0 $26 62 #define k1 $27 63 #define gp $28 64 #define sp $29 65 #define fp $30 66 #define s8 $30 67 #define ra $31 68 69 /* 70 * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 71 */ 72 #define LEAF_MIPS32R2(symbol) \ 73 .globl symbol; \ 74 .align 2; \ 75 .hidden symbol; \ 76 .type symbol, @function; \ 77 .ent symbol, 0; \ 78 symbol: .frame sp, 0, ra; \ 79 .set push; \ 80 .set arch=mips32r2; \ 81 .set noreorder; \ 82 .set noat; 83 84 /* 85 * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2 86 */ 87 #define LEAF_MIPS_DSPR2(symbol) \ 88 LEAF_MIPS32R2(symbol) \ 89 .set dspr2; 90 91 /* 92 * END - mark end of function 93 */ 94 #define END(function) \ 95 .set pop; \ 96 .end function; \ 97 .size function,.-function 98 99 /* 100 * Checks if stack offset is big enough for storing/restoring regs_num 101 * number of register to/from stack. Stack offset must be greater than 102 * or equal to the number of bytes needed for storing registers (regs_num*4). 103 * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is 104 * preserved for input arguments of the functions, already stored in a0-a3), 105 * stack size can be further optimized by utilizing this space. 106 */ 107 .macro CHECK_STACK_OFFSET regs_num, stack_offset 108 .if \stack_offset < \regs_num * 4 - 16 109 .error "Stack offset too small." 110 .endif 111 .endm 112 113 /* 114 * Saves set of registers on stack. Maximum number of registers that 115 * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 116 * Stack offset is number of bytes that are added to stack pointer (sp) 117 * before registers are pushed in order to provide enough space on stack 118 * (offset must be multiple of 4, and must be big enough, as described by 119 * CHECK_STACK_OFFSET macro). This macro is intended to be used in 120 * combination with RESTORE_REGS_FROM_STACK macro. Example: 121 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 122 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 123 */ 124 .macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ 125 r2 = 0, r3 = 0, r4 = 0, \ 126 r5 = 0, r6 = 0, r7 = 0, \ 127 r8 = 0, r9 = 0, r10 = 0, \ 128 r11 = 0, r12 = 0, r13 = 0, \ 129 r14 = 0 130 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) 131 .error "Stack offset must be pozitive and multiple of 4." 132 .endif 133 .if \stack_offset != 0 134 addiu sp, sp, -\stack_offset 135 .endif 136 sw \r1, 0(sp) 137 .if \r2 != 0 138 sw \r2, 4(sp) 139 .endif 140 .if \r3 != 0 141 sw \r3, 8(sp) 142 .endif 143 .if \r4 != 0 144 sw \r4, 12(sp) 145 .endif 146 .if \r5 != 0 147 CHECK_STACK_OFFSET 5, \stack_offset 148 sw \r5, 16(sp) 149 .endif 150 .if \r6 != 0 151 CHECK_STACK_OFFSET 6, \stack_offset 152 sw \r6, 20(sp) 153 .endif 154 .if \r7 != 0 155 CHECK_STACK_OFFSET 7, \stack_offset 156 sw \r7, 24(sp) 157 .endif 158 .if \r8 != 0 159 CHECK_STACK_OFFSET 8, \stack_offset 160 sw \r8, 28(sp) 161 .endif 162 .if \r9 != 0 163 CHECK_STACK_OFFSET 9, \stack_offset 164 sw \r9, 32(sp) 165 .endif 166 .if \r10 != 0 167 CHECK_STACK_OFFSET 10, \stack_offset 168 sw \r10, 36(sp) 169 .endif 170 .if \r11 != 0 171 CHECK_STACK_OFFSET 11, \stack_offset 172 sw \r11, 40(sp) 173 .endif 174 .if \r12 != 0 175 CHECK_STACK_OFFSET 12, \stack_offset 176 sw \r12, 44(sp) 177 .endif 178 .if \r13 != 0 179 CHECK_STACK_OFFSET 13, \stack_offset 180 sw \r13, 48(sp) 181 .endif 182 .if \r14 != 0 183 CHECK_STACK_OFFSET 14, \stack_offset 184 sw \r14, 52(sp) 185 .endif 186 .endm 187 188 /* 189 * Restores set of registers from stack. Maximum number of registers that 190 * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 191 * Stack offset is number of bytes that are added to stack pointer (sp) 192 * after registers are restored (offset must be multiple of 4, and must 193 * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is 194 * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. 195 * Example: 196 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 197 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 198 */ 199 .macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ 200 r2 = 0, r3 = 0, r4 = 0, \ 201 r5 = 0, r6 = 0, r7 = 0, \ 202 r8 = 0, r9 = 0, r10 = 0, \ 203 r11 = 0, r12 = 0, r13 = 0, \ 204 r14 = 0 205 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) 206 .error "Stack offset must be pozitive and multiple of 4." 207 .endif 208 lw \r1, 0(sp) 209 .if \r2 != 0 210 lw \r2, 4(sp) 211 .endif 212 .if \r3 != 0 213 lw \r3, 8(sp) 214 .endif 215 .if \r4 != 0 216 lw \r4, 12(sp) 217 .endif 218 .if \r5 != 0 219 CHECK_STACK_OFFSET 5, \stack_offset 220 lw \r5, 16(sp) 221 .endif 222 .if \r6 != 0 223 CHECK_STACK_OFFSET 6, \stack_offset 224 lw \r6, 20(sp) 225 .endif 226 .if \r7 != 0 227 CHECK_STACK_OFFSET 7, \stack_offset 228 lw \r7, 24(sp) 229 .endif 230 .if \r8 != 0 231 CHECK_STACK_OFFSET 8, \stack_offset 232 lw \r8, 28(sp) 233 .endif 234 .if \r9 != 0 235 CHECK_STACK_OFFSET 9, \stack_offset 236 lw \r9, 32(sp) 237 .endif 238 .if \r10 != 0 239 CHECK_STACK_OFFSET 10, \stack_offset 240 lw \r10, 36(sp) 241 .endif 242 .if \r11 != 0 243 CHECK_STACK_OFFSET 11, \stack_offset 244 lw \r11, 40(sp) 245 .endif 246 .if \r12 != 0 247 CHECK_STACK_OFFSET 12, \stack_offset 248 lw \r12, 44(sp) 249 .endif 250 .if \r13 != 0 251 CHECK_STACK_OFFSET 13, \stack_offset 252 lw \r13, 48(sp) 253 .endif 254 .if \r14 != 0 255 CHECK_STACK_OFFSET 14, \stack_offset 256 lw \r14, 52(sp) 257 .endif 258 .if \stack_offset != 0 259 addiu sp, sp, \stack_offset 260 .endif 261 .endm 262 263 /* 264 * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel 265 * returned in (out_8888) register. Requires two temporary registers 266 * (scratch1 and scratch2). 267 */ 268 .macro CONVERT_1x0565_TO_1x8888 in_565, \ 269 out_8888, \ 270 scratch1, scratch2 271 lui \out_8888, 0xff00 272 sll \scratch1, \in_565, 0x3 273 andi \scratch2, \scratch1, 0xff 274 ext \scratch1, \in_565, 0x2, 0x3 275 or \scratch1, \scratch2, \scratch1 276 or \out_8888, \out_8888, \scratch1 277 278 sll \scratch1, \in_565, 0x5 279 andi \scratch1, \scratch1, 0xfc00 280 srl \scratch2, \in_565, 0x1 281 andi \scratch2, \scratch2, 0x300 282 or \scratch2, \scratch1, \scratch2 283 or \out_8888, \out_8888, \scratch2 284 285 andi \scratch1, \in_565, 0xf800 286 srl \scratch2, \scratch1, 0x5 287 andi \scratch2, \scratch2, 0xff00 288 or \scratch1, \scratch1, \scratch2 289 sll \scratch1, \scratch1, 0x8 290 or \out_8888, \out_8888, \scratch1 291 .endm 292 293 /* 294 * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels 295 * returned in (out1_8888 and out2_8888) registers. Requires four scratch 296 * registers (scratch1 ... scratch4). It also requires maskG and maskB for 297 * color component extractions. These masks must have following values: 298 * li maskG, 0x07e007e0 299 * li maskB, 0x001F001F 300 */ 301 .macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565, \ 302 out1_8888, out2_8888, \ 303 maskG, maskB, \ 304 scratch1, scratch2, scratch3, scratch4 305 sll \scratch1, \in1_565, 16 306 or \scratch1, \scratch1, \in2_565 307 lui \out2_8888, 0xff00 308 ori \out2_8888, \out2_8888, 0xff00 309 shrl.ph \scratch2, \scratch1, 11 310 and \scratch3, \scratch1, \maskG 311 shra.ph \scratch4, \scratch2, 2 312 shll.ph \scratch2, \scratch2, 3 313 shll.ph \scratch3, \scratch3, 5 314 or \scratch2, \scratch2, \scratch4 315 shrl.qb \scratch4, \scratch3, 6 316 or \out2_8888, \out2_8888, \scratch2 317 or \scratch3, \scratch3, \scratch4 318 and \scratch1, \scratch1, \maskB 319 shll.ph \scratch2, \scratch1, 3 320 shra.ph \scratch4, \scratch1, 2 321 or \scratch2, \scratch2, \scratch4 322 or \scratch3, \scratch2, \scratch3 323 precrq.ph.w \out1_8888, \out2_8888, \scratch3 324 precr_sra.ph.w \out2_8888, \scratch3, 0 325 .endm 326 327 /* 328 * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel 329 * returned in (out_565) register. Requires two temporary registers 330 * (scratch1 and scratch2). 331 */ 332 .macro CONVERT_1x8888_TO_1x0565 in_8888, \ 333 out_565, \ 334 scratch1, scratch2 335 ext \out_565, \in_8888, 0x3, 0x5 336 srl \scratch1, \in_8888, 0x5 337 andi \scratch1, \scratch1, 0x07e0 338 srl \scratch2, \in_8888, 0x8 339 andi \scratch2, \scratch2, 0xf800 340 or \out_565, \out_565, \scratch1 341 or \out_565, \out_565, \scratch2 342 .endm 343 344 /* 345 * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5 346 * pixels returned in (out1_565 and out2_565) registers. Requires two temporary 347 * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB 348 * for color component extractions. These masks must have following values: 349 * li maskR, 0xf800f800 350 * li maskG, 0x07e007e0 351 * li maskB, 0x001F001F 352 * Value of input register in2_8888 is lost. 353 */ 354 .macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888, \ 355 out1_565, out2_565, \ 356 maskR, maskG, maskB, \ 357 scratch1, scratch2 358 precr.qb.ph \scratch1, \in2_8888, \in1_8888 359 precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 360 and \out1_565, \scratch1, \maskR 361 shrl.ph \scratch1, \scratch1, 3 362 shll.ph \in2_8888, \in2_8888, 3 363 and \scratch1, \scratch1, \maskB 364 or \out1_565, \out1_565, \scratch1 365 and \in2_8888, \in2_8888, \maskG 366 or \out1_565, \out1_565, \in2_8888 367 srl \out2_565, \out1_565, 16 368 .endm 369 370 /* 371 * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed 372 * for rounding process. maskLSR must have following value: 373 * li maskLSR, 0x00ff00ff 374 */ 375 .macro MIPS_UN8x4_MUL_UN8 s_8888, \ 376 m_8, \ 377 d_8888, \ 378 maskLSR, \ 379 scratch1, scratch2, scratch3 380 replv.ph \m_8, \m_8 /* 0 | M | 0 | M */ 381 muleu_s.ph.qbl \scratch1, \s_8888, \m_8 /* A*M | R*M */ 382 muleu_s.ph.qbr \scratch2, \s_8888, \m_8 /* G*M | B*M */ 383 shra_r.ph \scratch3, \scratch1, 8 384 shra_r.ph \d_8888, \scratch2, 8 385 and \scratch3, \scratch3, \maskLSR /* 0 |A*M| 0 |R*M */ 386 and \d_8888, \d_8888, \maskLSR /* 0 |G*M| 0 |B*M */ 387 addq.ph \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */ 388 addq.ph \scratch2, \scratch2, \d_8888 /* G*M+G*M | B*M+B*M */ 389 shra_r.ph \scratch1, \scratch1, 8 390 shra_r.ph \scratch2, \scratch2, 8 391 precr.qb.ph \d_8888, \scratch1, \scratch2 392 .endm 393 394 /* 395 * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR 396 * needed for rounding process. maskLSR must have following value: 397 * li maskLSR, 0x00ff00ff 398 */ 399 .macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \ 400 s2_8888, \ 401 m1_8, \ 402 m2_8, \ 403 d1_8888, \ 404 d2_8888, \ 405 maskLSR, \ 406 scratch1, scratch2, scratch3, \ 407 scratch4, scratch5, scratch6 408 replv.ph \m1_8, \m1_8 /* 0 | M1 | 0 | M1 */ 409 replv.ph \m2_8, \m2_8 /* 0 | M2 | 0 | M2 */ 410 muleu_s.ph.qbl \scratch1, \s1_8888, \m1_8 /* A1*M1 | R1*M1 */ 411 muleu_s.ph.qbr \scratch2, \s1_8888, \m1_8 /* G1*M1 | B1*M1 */ 412 muleu_s.ph.qbl \scratch3, \s2_8888, \m2_8 /* A2*M2 | R2*M2 */ 413 muleu_s.ph.qbr \scratch4, \s2_8888, \m2_8 /* G2*M2 | B2*M2 */ 414 shra_r.ph \scratch5, \scratch1, 8 415 shra_r.ph \d1_8888, \scratch2, 8 416 shra_r.ph \scratch6, \scratch3, 8 417 shra_r.ph \d2_8888, \scratch4, 8 418 and \scratch5, \scratch5, \maskLSR /* 0 |A1*M1| 0 |R1*M1 */ 419 and \d1_8888, \d1_8888, \maskLSR /* 0 |G1*M1| 0 |B1*M1 */ 420 and \scratch6, \scratch6, \maskLSR /* 0 |A2*M2| 0 |R2*M2 */ 421 and \d2_8888, \d2_8888, \maskLSR /* 0 |G2*M2| 0 |B2*M2 */ 422 addq.ph \scratch1, \scratch1, \scratch5 423 addq.ph \scratch2, \scratch2, \d1_8888 424 addq.ph \scratch3, \scratch3, \scratch6 425 addq.ph \scratch4, \scratch4, \d2_8888 426 shra_r.ph \scratch1, \scratch1, 8 427 shra_r.ph \scratch2, \scratch2, 8 428 shra_r.ph \scratch3, \scratch3, 8 429 shra_r.ph \scratch4, \scratch4, 8 430 precr.qb.ph \d1_8888, \scratch1, \scratch2 431 precr.qb.ph \d2_8888, \scratch3, \scratch4 432 .endm 433 434 /* 435 * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR 436 * needed for rounding process. maskLSR must have following value: 437 * li maskLSR, 0x00ff00ff 438 */ 439 .macro MIPS_UN8x4_MUL_UN8x4 s_8888, \ 440 m_8888, \ 441 d_8888, \ 442 maskLSR, \ 443 scratch1, scratch2, scratch3, scratch4 444 preceu.ph.qbl \scratch1, \m_8888 /* 0 | A | 0 | R */ 445 preceu.ph.qbr \scratch2, \m_8888 /* 0 | G | 0 | B */ 446 muleu_s.ph.qbl \scratch3, \s_8888, \scratch1 /* A*A | R*R */ 447 muleu_s.ph.qbr \scratch4, \s_8888, \scratch2 /* G*G | B*B */ 448 shra_r.ph \scratch1, \scratch3, 8 449 shra_r.ph \scratch2, \scratch4, 8 450 and \scratch1, \scratch1, \maskLSR /* 0 |A*A| 0 |R*R */ 451 and \scratch2, \scratch2, \maskLSR /* 0 |G*G| 0 |B*B */ 452 addq.ph \scratch1, \scratch1, \scratch3 453 addq.ph \scratch2, \scratch2, \scratch4 454 shra_r.ph \scratch1, \scratch1, 8 455 shra_r.ph \scratch2, \scratch2, 8 456 precr.qb.ph \d_8888, \scratch1, \scratch2 457 .endm 458 459 /* 460 * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires 461 * maskLSR needed for rounding process. maskLSR must have following value: 462 * li maskLSR, 0x00ff00ff 463 */ 464 465 .macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888, \ 466 s2_8888, \ 467 m1_8888, \ 468 m2_8888, \ 469 d1_8888, \ 470 d2_8888, \ 471 maskLSR, \ 472 scratch1, scratch2, scratch3, \ 473 scratch4, scratch5, scratch6 474 preceu.ph.qbl \scratch1, \m1_8888 /* 0 | A | 0 | R */ 475 preceu.ph.qbr \scratch2, \m1_8888 /* 0 | G | 0 | B */ 476 preceu.ph.qbl \scratch3, \m2_8888 /* 0 | A | 0 | R */ 477 preceu.ph.qbr \scratch4, \m2_8888 /* 0 | G | 0 | B */ 478 muleu_s.ph.qbl \scratch5, \s1_8888, \scratch1 /* A*A | R*R */ 479 muleu_s.ph.qbr \scratch6, \s1_8888, \scratch2 /* G*G | B*B */ 480 muleu_s.ph.qbl \scratch1, \s2_8888, \scratch3 /* A*A | R*R */ 481 muleu_s.ph.qbr \scratch2, \s2_8888, \scratch4 /* G*G | B*B */ 482 shra_r.ph \scratch3, \scratch5, 8 483 shra_r.ph \scratch4, \scratch6, 8 484 shra_r.ph \d1_8888, \scratch1, 8 485 shra_r.ph \d2_8888, \scratch2, 8 486 and \scratch3, \scratch3, \maskLSR /* 0 |A*A| 0 |R*R */ 487 and \scratch4, \scratch4, \maskLSR /* 0 |G*G| 0 |B*B */ 488 and \d1_8888, \d1_8888, \maskLSR /* 0 |A*A| 0 |R*R */ 489 and \d2_8888, \d2_8888, \maskLSR /* 0 |G*G| 0 |B*B */ 490 addq.ph \scratch3, \scratch3, \scratch5 491 addq.ph \scratch4, \scratch4, \scratch6 492 addq.ph \d1_8888, \d1_8888, \scratch1 493 addq.ph \d2_8888, \d2_8888, \scratch2 494 shra_r.ph \scratch3, \scratch3, 8 495 shra_r.ph \scratch4, \scratch4, 8 496 shra_r.ph \scratch5, \d1_8888, 8 497 shra_r.ph \scratch6, \d2_8888, 8 498 precr.qb.ph \d1_8888, \scratch3, \scratch4 499 precr.qb.ph \d2_8888, \scratch5, \scratch6 500 .endm 501 502 /* 503 * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 504 * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR 505 * needed for rounding process. maskLSR must have following value: 506 * li maskLSR, 0x00ff00ff 507 */ 508 .macro OVER_8888_8_8888 s_8888, \ 509 m_8, \ 510 d_8888, \ 511 out_8888, \ 512 maskLSR, \ 513 scratch1, scratch2, scratch3, scratch4 514 MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ 515 \scratch1, \maskLSR, \ 516 \scratch2, \scratch3, \scratch4 517 518 not \scratch2, \scratch1 519 srl \scratch2, \scratch2, 24 520 521 MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \ 522 \d_8888, \maskLSR, \ 523 \scratch3, \scratch4, \out_8888 524 525 addu_s.qb \out_8888, \d_8888, \scratch1 526 .endm 527 528 /* 529 * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two 530 * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and 531 * m2_8). It also requires maskLSR needed for rounding process. maskLSR must 532 * have following value: 533 * li maskLSR, 0x00ff00ff 534 */ 535 .macro OVER_2x8888_2x8_2x8888 s1_8888, \ 536 s2_8888, \ 537 m1_8, \ 538 m2_8, \ 539 d1_8888, \ 540 d2_8888, \ 541 out1_8888, \ 542 out2_8888, \ 543 maskLSR, \ 544 scratch1, scratch2, scratch3, \ 545 scratch4, scratch5, scratch6 546 MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ 547 \m1_8, \m2_8, \ 548 \scratch1, \scratch2, \ 549 \maskLSR, \ 550 \scratch3, \scratch4, \out1_8888, \ 551 \out2_8888, \scratch5, \scratch6 552 553 not \scratch3, \scratch1 554 srl \scratch3, \scratch3, 24 555 not \scratch4, \scratch2 556 srl \scratch4, \scratch4, 24 557 558 MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ 559 \scratch3, \scratch4, \ 560 \d1_8888, \d2_8888, \ 561 \maskLSR, \ 562 \scratch5, \scratch6, \out1_8888, \ 563 \out2_8888, \scratch3, \scratch4 564 565 addu_s.qb \out1_8888, \d1_8888, \scratch1 566 addu_s.qb \out2_8888, \d2_8888, \scratch2 567 .endm 568 569 /* 570 * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 571 * destination pixel (d_8888). It also requires maskLSR needed for rounding 572 * process. maskLSR must have following value: 573 * li maskLSR, 0x00ff00ff 574 */ 575 .macro OVER_8888_8888 s_8888, \ 576 d_8888, \ 577 out_8888, \ 578 maskLSR, \ 579 scratch1, scratch2, scratch3, scratch4 580 not \scratch1, \s_8888 581 srl \scratch1, \scratch1, 24 582 583 MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \ 584 \out_8888, \maskLSR, \ 585 \scratch2, \scratch3, \scratch4 586 587 addu_s.qb \out_8888, \out_8888, \s_8888 588 .endm 589 590 /* 591 * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two 592 * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR 593 * needed for rounding process. maskLSR must have following value: 594 * li maskLSR, 0x00ff00ff 595 */ 596 .macro OVER_2x8888_2x8888 s1_8888, \ 597 s2_8888, \ 598 d1_8888, \ 599 d2_8888, \ 600 out1_8888, \ 601 out2_8888, \ 602 maskLSR, \ 603 scratch1, scratch2, scratch3, \ 604 scratch4, scratch5, scratch6 605 not \scratch1, \s1_8888 606 srl \scratch1, \scratch1, 24 607 not \scratch2, \s2_8888 608 srl \scratch2, \scratch2, 24 609 MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ 610 \scratch1, \scratch2, \ 611 \out1_8888, \out2_8888, \ 612 \maskLSR, \ 613 \scratch3, \scratch4, \scratch5, \ 614 \scratch6, \d1_8888, \d2_8888 615 616 addu_s.qb \out1_8888, \out1_8888, \s1_8888 617 addu_s.qb \out2_8888, \out2_8888, \s2_8888 618 .endm 619 620 .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ 621 m_8, \ 622 d_8888, \ 623 out_8888, \ 624 maskLSR, \ 625 scratch1, scratch2, scratch3 626 MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ 627 \out_8888, \maskLSR, \ 628 \scratch1, \scratch2, \scratch3 629 630 addu_s.qb \out_8888, \out_8888, \d_8888 631 .endm 632 633 .macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888, \ 634 s2_8888, \ 635 m1_8, \ 636 m2_8, \ 637 d1_8888, \ 638 d2_8888, \ 639 out1_8888, \ 640 out2_8888, \ 641 maskLSR, \ 642 scratch1, scratch2, scratch3, \ 643 scratch4, scratch5, scratch6 644 MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ 645 \m1_8, \m2_8, \ 646 \out1_8888, \out2_8888, \ 647 \maskLSR, \ 648 \scratch1, \scratch2, \scratch3, \ 649 \scratch4, \scratch5, \scratch6 650 651 addu_s.qb \out1_8888, \out1_8888, \d1_8888 652 addu_s.qb \out2_8888, \out2_8888, \d2_8888 653 .endm 654 655 .macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \ 656 scratch1, scratch2, \ 657 alpha, red, green, blue \ 658 wt1, wt2, wb1, wb2 659 andi \scratch1, \tl, 0xff 660 andi \scratch2, \tr, 0xff 661 andi \alpha, \bl, 0xff 662 andi \red, \br, 0xff 663 664 multu $ac0, \wt1, \scratch1 665 maddu $ac0, \wt2, \scratch2 666 maddu $ac0, \wb1, \alpha 667 maddu $ac0, \wb2, \red 668 669 ext \scratch1, \tl, 8, 8 670 ext \scratch2, \tr, 8, 8 671 ext \alpha, \bl, 8, 8 672 ext \red, \br, 8, 8 673 674 multu $ac1, \wt1, \scratch1 675 maddu $ac1, \wt2, \scratch2 676 maddu $ac1, \wb1, \alpha 677 maddu $ac1, \wb2, \red 678 679 ext \scratch1, \tl, 16, 8 680 ext \scratch2, \tr, 16, 8 681 ext \alpha, \bl, 16, 8 682 ext \red, \br, 16, 8 683 684 mflo \blue, $ac0 685 686 multu $ac2, \wt1, \scratch1 687 maddu $ac2, \wt2, \scratch2 688 maddu $ac2, \wb1, \alpha 689 maddu $ac2, \wb2, \red 690 691 ext \scratch1, \tl, 24, 8 692 ext \scratch2, \tr, 24, 8 693 ext \alpha, \bl, 24, 8 694 ext \red, \br, 24, 8 695 696 mflo \green, $ac1 697 698 multu $ac3, \wt1, \scratch1 699 maddu $ac3, \wt2, \scratch2 700 maddu $ac3, \wb1, \alpha 701 maddu $ac3, \wb2, \red 702 703 mflo \red, $ac2 704 mflo \alpha, $ac3 705 706 precr.qb.ph \alpha, \alpha, \red 707 precr.qb.ph \scratch1, \green, \blue 708 precrq.qb.ph \tl, \alpha, \scratch1 709 .endm 710 711 #endif //PIXMAN_MIPS_DSPR2_ASM_H 712