1; PowerPC optimized drawing methods for Goom 2; © 2003 Guillaume Borios 3; This Source Code is released under the terms of the General Public License 4 5; Change log : 6; 30 May 2003 : File creation 7 8; Section definition : We use a read only code section for the whole file 9.section __TEXT,__text,regular,pure_instructions 10 11 12; -------------------------------------------------------------------------------------- 13; Single 32b pixel drawing macros 14; Usage : 15; DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4 16; Only the work registers (WR) can be touched by the macros 17; 18; Available methods : 19; DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW) 20; DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST) 21; DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing 22; DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST) 23; DRAWMETHOD_B_OR_MACRO : Bitwise OR 24; DRAWMETHOD_BAND_MACRO : Bitwise AND 25; DRAWMETHOD_BXOR_MACRO : Bitwise XOR 26; DRAWMETHOD_BNOT_MACRO : Bitwise NOT 27; -------------------------------------------------------------------------------------- 28 29.macro DRAWMETHOD_OVRW_MACRO 30 stw $2,0($1) ;; *$1 <- $2 31.endmacro 32 33.macro DRAWMETHOD_B_OR_MACRO 34 lwz $3,0($0) ;; $3 <- *$0 35 or $3,$3,$2 ;; $3 <- $3 | $2 36 stw $3,0($1) ;; *$1 <- $3 37.endmacro 38 39.macro DRAWMETHOD_BAND_MACRO 40 lwz $3,0($0) ;; $3 <- *$0 41 and $3,$3,$2 ;; $3 <- $3 & $2 42 stw $3,0($1) ;; *$1 <- $3 43.endmacro 44 45.macro DRAWMETHOD_BXOR_MACRO 46 lwz $3,0($0) ;; $3 <- *$0 47 xor $3,$3,$2 ;; $3 <- $3 ^ $2 48 stw $3,0($1) ;; *$1 <- $3 49.endmacro 50 51.macro DRAWMETHOD_BNOT_MACRO 52 lwz $3,0($0) ;; $3 <- *$0 53 nand $3,$3,$3 ;; $3 <- ~$3 54 stw $3,0($1) ;; *$1 <- $3 55.endmacro 56 57.macro DRAWMETHOD_PLUS_MACRO 58 lwz $4,0($0) ;; $4 <- *$0 59 andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 60 andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 61 add $3,$3,$5 ;; $3 <- $3 + $5 62 rlwinm $5,$3,15,0,0 ;; $5 <- 0 | ($3[15] << 15) 63 srawi $5,$5,23 ;; $5 <- $5 >> 23 (algebraic for sign extension) 64 or $3,$3,$5 ;; $3 <- $3 | $5 65 lis $5,0xFF ;; $5 <- 0x00FF00FF 66 addi $5,$5,0xFF 67 and $4,$4,$5 ;; $4 <- $4 & $5 68 and $6,$2,$5 ;; $6 <- $2 & $5 69 add $4,$4,$6 ;; $4 <- $4 + $6 70 rlwinm $6,$4,7,0,0 ;; $6 <- 0 | ($4[7] << 7) 71 srawi $6,$6,15 ;; $6 <- $6 >> 15 (algebraic for sign extension) 72 rlwinm $5,$4,23,0,0 ;; $5 <- 0 | ($4[23] << 23) 73 srawi $5,$5,31 ;; $5 <- $5 >> 31 (algebraic for sign extension) 74 rlwimi $6,$5,0,24,31 ;; $6[24..31] <- $5[24..31] 75 or $4,$4,$6 ;; $4 <- $4 | $6 76 rlwimi $4,$3,0,16,23 ;; $4[16..23] <- $3[16..23] 77 stw $4,0($1) ;; *$1 <- $4 78.endmacro 79 80.macro DRAWMETHOD_HALF_MACRO 81 lwz $4,0($0) ;; $4 <- *$0 82 andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 83 andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 84 add $3,$3,$5 ;; $3 <- $3 + $5 85 lis $5,0xFF ;; $5 <- 0x00FF00FF 86 addi $5,$5,0xFF 87 and $4,$4,$5 ;; $4 <- $4 & $5 88 and $5,$2,$5 ;; $5 <- $2 & $5 89 add $4,$4,$5 ;; $4 <- $4 + $5 90 srwi $4,$4,1 ;; $4 <- $4 >> 1 91 rlwimi $4,$3,31,16,23 ;; $4[16..23] <- $3[15..22] 92 stw $4,0($1) ;; *$1 <- $4 93.endmacro 94 95.macro DRAWMETHOD_DFLT_MACRO 96 DRAWMETHOD_PLUS_MACRO 97.endmacro 98 99; -------------------------------------------------------------------------------------- 100 101 102 103; ************************************************************************************** 104; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col); 105; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); 106; ************************************************************************************** 107.globl _DRAWMETHOD_PLUS_2_PPC 108.align 3 109_DRAWMETHOD_PLUS_2_PPC: 110 DRAWMETHOD_PLUS_MACRO r3,r4,r5,r6,r7,r8,r9 111 blr ;; return 112 113.globl _DRAWMETHOD_PLUS_PPC 114.align 3 115_DRAWMETHOD_PLUS_PPC: 116 DRAWMETHOD_PLUS_MACRO r3,r3,r4,r5,r6,r7,r9 117 blr ;; return 118 119 120; ************************************************************************************** 121; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col); 122; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); 123; ************************************************************************************** 124.globl _DRAWMETHOD_HALF_2_PPC 125.align 3 126_DRAWMETHOD_HALF_2_PPC: 127 DRAWMETHOD_HALF_MACRO r3,r4,r5,r6,r7,r8 128 blr ;; return 129 130.globl _DRAWMETHOD_HALF_PPC 131.align 3 132_DRAWMETHOD_HALF_PPC: 133 DRAWMETHOD_HALF_MACRO r3,r3,r4,r5,r6,r7 134 blr ;; return 135 136 137; ************************************************************************************** 138; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col, 139; unsigned int screenx, unsigned int screeny) 140; ************************************************************************************** 141.globl _DRAW_LINE_PPC 142.align 3 143_DRAW_LINE_PPC: 144 ;; NOT IMPLEMENTED YET 145 blr ;; return 146 147 148; ************************************************************************************** 149; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff) 150; ************************************************************************************** 151 152 153.const 154.align 4 155vectorZERO: 156 .long 0,0,0,0 157 .long 0x10101000, 0x10101001, 0x10101002, 0x10101003 158 .long 0x10101004, 0x10101005, 0x10101006, 0x10101007 159 .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B 160 .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F 161 162 163.section __TEXT,__text,regular,pure_instructions 164 165.globl _ppc_brightness_G4 166.align 3 167_ppc_brightness_G4: 168 169 170;; PowerPC Altivec code 171 srwi r5,r5,2 172 mtctr r5 173 174;;vrsave 175 mfspr r11,256 176 lis r12,0xCFFC 177 mtspr 256,r12 178 179 mflr r0 180 bcl 20,31,"L00000000001$pb" 181"L00000000001$pb": 182 mflr r10 183 mtlr r0 184 185 addis r9,r10,ha16(vectorZERO-"L00000000001$pb") 186 addi r9,r9,lo16(vectorZERO-"L00000000001$pb") 187 188 vxor v0,v0,v0 ;; V0 = NULL vector 189 190 addi r9,r9,16 191 lvx v10,0,r9 192 addi r9,r9,16 193 lvx v11,0,r9 194 addi r9,r9,16 195 lvx v12,0,r9 196 addi r9,r9,16 197 lvx v13,0,r9 198 199 addis r9,r10,ha16(vectortmpwork-"L00000000001$pb") 200 addi r9,r9,lo16(vectortmpwork-"L00000000001$pb") 201 stw r6,0(r9) 202 li r6,8 203 stw r6,4(r9) 204 lvx v9,0,r9 205 li r9,128 206 vspltw v8,v9,0 207 vspltw v9,v9,1 208 209;; elt counter 210 li r9,0 211 lis r7,0x0F01 212 b L7 213.align 4 214L7: 215 lvx v1,r9,r3 216 217 vperm v4,v1,v0,v10 218 ;********************* 219 add r10,r9,r3 220 ;********************* 221 vperm v5,v1,v0,v11 222 vperm v6,v1,v0,v12 223 vperm v7,v1,v0,v13 224 225 vmulouh v4,v4,v8 226 ;********************* 227 dst r10,r7,3 228 ;********************* 229 vmulouh v5,v5,v8 230 vmulouh v6,v6,v8 231 vmulouh v7,v7,v8 232 vsrw v4,v4,v9 233 vsrw v5,v5,v9 234 vsrw v6,v6,v9 235 vsrw v7,v7,v9 236 237 vpkuwus v4,v4,v5 238 vpkuwus v6,v6,v7 239 vpkuhus v1,v4,v6 240 241 stvx v1,r9,r4 242 addi r9,r9,16 243 244 bdnz L7 245 246 mtspr 256,r11 247 blr 248 249 250.globl _ppc_brightness_G5 251.align 3 252_ppc_brightness_G5: 253 254;; PowerPC Altivec G5 code 255 srwi r5,r5,2 256 mtctr r5 257 258;;vrsave 259 mfspr r11,256 260 lis r12,0xCFFC 261 mtspr 256,r12 262 263 mflr r0 264 bcl 20,31,"L00000000002$pb" 265"L00000000002$pb": 266 mflr r10 267 mtlr r0 268 269 addis r9,r10,ha16(vectorZERO-"L00000000002$pb") 270 addi r9,r9,lo16(vectorZERO-"L00000000002$pb") 271 272 vxor v0,v0,v0 ;; V0 = NULL vector 273 274 addi r9,r9,16 275 lvx v10,0,r9 276 addi r9,r9,16 277 lvx v11,0,r9 278 addi r9,r9,16 279 lvx v12,0,r9 280 addi r9,r9,16 281 lvx v13,0,r9 282 283 addis r9,r10,ha16(vectortmpwork-"L00000000002$pb") 284 addi r9,r9,lo16(vectortmpwork-"L00000000002$pb") 285 stw r6,0(r9) 286 li r6,8 287 stw r6,4(r9) 288 lvx v9,0,r9 289 li r9,128 290 vspltw v8,v9,0 291 vspltw v9,v9,1 292 293;; elt counter 294 li r9,0 295 lis r7,0x0F01 296 b L6 297.align 4 298L6: 299 lvx v1,r9,r3 300 301 vperm v4,v1,v0,v10 302 ;********************* 303 add r10,r9,r3 304 ;********************* 305 vperm v5,v1,v0,v11 306 vperm v6,v1,v0,v12 307 vperm v7,v1,v0,v13 308 309 vmulouh v4,v4,v8 310 vmulouh v5,v5,v8 311 vmulouh v6,v6,v8 312 vmulouh v7,v7,v8 313 vsrw v4,v4,v9 314 vsrw v5,v5,v9 315 vsrw v6,v6,v9 316 vsrw v7,v7,v9 317 318 vpkuwus v4,v4,v5 319 vpkuwus v6,v6,v7 320 vpkuhus v1,v4,v6 321 322 stvx v1,r9,r4 323 addi r9,r9,16 324 325 bdnz L6 326 327 mtspr 256,r11 328 blr 329 330 331.globl _ppc_brightness_generic 332.align 3 333_ppc_brightness_generic: 334 lis r12,0x00FF 335 ori r12,r12,0x00FF 336 subi r3,r3,4 337 subi r4,r4,4 338 mtctr r5 339 b L1 340.align 4 341L1: 342 lwzu r7,4(r3) 343 344 rlwinm r8,r7,16,24,31 345 rlwinm r9,r7,24,24,31 346 mullw r8,r8,r6 347 rlwinm r10,r7,0,24,31 348 mullw r9,r9,r6 349 srwi r8,r8,8 350 mullw r10,r10,r6 351 srwi r9,r9,8 352 353 rlwinm. r11,r8,0,0,23 354 beq L2 355 li r8,0xFF 356L2: 357 srwi r10,r10,8 358 rlwinm. r11,r9,0,0,23 359 beq L3 360 li r9,0xFF 361L3: 362 rlwinm r7,r8,16,8,15 363 rlwinm. r11,r10,0,0,23 364 beq L4 365 li r10,0xFF 366L4: 367 rlwimi r7,r9,8,16,23 368 rlwimi r7,r10,0,24,31 369 370 stwu r7,4(r4) 371 bdnz L1 372 373 blr 374 375 376 377.static_data 378.align 4 379vectortmpwork: 380 .long 0,0,0,0 381 382