1 /* RetroArch - A frontend for libretro. 2 * Copyright (C) 2014-2016 - Ali Bouhlel 3 * 4 * RetroArch is free software: you can redistribute it and/or modify it under the terms 5 * of the GNU General Public License as published by the Free Software Found- 6 * ation, either version 3 of the License, or (at your option) any later version. 7 * 8 * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10 * PURPOSE. See the GNU General Public License for more details. 11 * 12 * You should have received a copy of the GNU General Public License along with RetroArch. 13 * If not, see <http://www.gnu.org/licenses/>. 14 */ 15 16 #ifndef GX2_SHADER_INL_H 17 #define GX2_SHADER_INL_H 18 19 #include <retro_endianness.h> 20 21 #ifdef MSB_FIRST 22 #define to_QWORD(w0, w1) (((u64)(w0) << 32ull) | (w1)) 23 #define to_LE(x) (__builtin_bswap32(x)) 24 #else 25 #define to_QWORD(w0, w1) (((u64)(w1) << 32ull) | (w0)) 26 #define to_LE(x) (x) 27 #endif 28 29 /* CF */ 30 #define CF_DWORD0(addr) to_LE(addr) 31 32 #define CF_DWORD1(popCount, cfConst, cond, count, callCount, inst) \ 33 to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31)) 34 35 #define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \ 36 to_LE(addr | (kcacheBank0 << 22) | (kcacheBank1 << 26) | (kcacheMode0 << 30)) 37 #define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \ 38 to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31)) 39 40 #define CF_EXP_WORD0(dstReg_and_type, srcReg, srcRel, indexGpr, elemSize)\ 41 to_LE(dstReg_and_type | (srcReg << 15) | (srcRel << 22) | (indexGpr << 23) | (elemSize << 30)) 42 43 #define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \ 44 to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31)) 45 46 #define CF_ALLOC_EXPORT_WORD0(arrayBase, type, dstReg, dstRel, indexGpr, elemSize) \ 47 to_LE(arrayBase | (type << 13) | (dstReg << 15) | (dstRel << 22) | (indexGpr << 23) | (elemSize << 30)) 48 49 #define CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, inst) \ 50 to_LE(arraySize | (writeMask << 12) | (inst << 23) | (1 << 31)) 51 52 #define ALU_SRC_KCACHE0_BASE 0x80 53 #define ALU_SRC_KCACHE1_BASE 0xA0 54 #define CF_KCACHE_BANK_LOCK_1 0x1 55 #define CB1 0x1 56 #define CB2 0x2 57 #define _0_15 CF_KCACHE_BANK_LOCK_1 58 59 #define KC0(x) (x + ALU_SRC_KCACHE0_BASE) 60 #define KC1(x) (x + ALU_SRC_KCACHE1_BASE) 61 62 #define NO_BARRIER & ~to_QWORD(0,to_LE(1 << 31)) 63 #define END_OF_PROGRAM | to_QWORD(0,to_LE(1 << 21)) 64 #define VALID_PIX | to_QWORD(0,to_LE(1 << 22)) 65 #define WHOLE_QUAD_MODE | to_QWORD(0,to_LE(1 << 30)) 66 #define BURSTCNT(x) | to_QWORD(0,to_LE(x << 17)) 67 #define WRITE(x) (x >> 2) 68 #define ARRAY_SIZE(x) x 69 #define ELEM_SIZE(x) x 70 #define KCACHE0(bank, mode) | to_QWORD(CF_ALU_WORD0(0, bank, 0, mode), 0) 71 #define KCACHE1(bank, mode) | to_QWORD(CF_ALU_WORD0(0, 0, bank, 0), CF_ALU_WORD1(mode,0, 0, 0, 0, 0)) 72 73 #define DEACTIVATE 1 74 #define UPDATE_EXEC_MASK(mode) | to_QWORD(0, to_LE(mode << 2)) 75 #define UPDATE_PRED | to_QWORD(0, to_LE(1ull << 3)) 76 #define CLAMP | to_QWORD(0, to_LE(1ull << 31)) 77 #define ALU_LAST | to_QWORD(to_LE(1ull << 31), 0) 78 79 /* ALU */ 80 81 #define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \ 82 to_LE(src0Sel | ((src0Rel) << 9) | ((src0Chan) << 10) | ((src0Neg) << 12) | ((src1Sel) << 13) | ((src1Rel) << 22) \ 83 | ((src1Chan) << 23) | ((src1Neg) << 25) | ((indexMode) << 26) | ((predSel) << 29)) 84 85 #define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \ 86 to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \ 87 (encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31)) 88 89 #define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \ 90 to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \ 91 (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31)) 92 93 /* TEX */ 94 #define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \ 95 to_LE(inst | (bcFracMode << 5) | (fetchWholeQuad << 7) | (resourceID << 8) | (srcReg << 16) | (srcRel << 23) | (altConst << 24)) 96 97 #define TEX_WORD1(dstReg, dstRel, dstSelX, dstSelY, dstSelZ, dstSelW, lodBias, coordTypeX, coordTypeY, coordTypeZ, coordTypeW) \ 98 to_LE(dstReg | (dstRel << 7) | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | \ 99 (lodBias << 21) | (coordTypeX << 28) | (coordTypeY << 29) | (coordTypeZ << 30) | (coordTypeW << 31)) 100 101 #define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \ 102 to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29)) 103 104 #define VTX_WORD0(inst, type, buffer_id, srcReg, srcSelX, mega) \ 105 to_LE(inst | (type << 5) | (buffer_id << 8) | (srcReg << 16) | (srcSelX << 24) | (mega << 26)) 106 107 #define VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW) \ 108 to_LE(dstReg | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | (1 << 21)) 109 110 #define VTX_WORD2(offset, ismega) \ 111 to_LE(offset| (ismega << 19)) 112 113 #define _x 0 114 #define _y 1 115 #define _z 2 116 #define _w 3 117 #define _0 4 118 #define _1 5 119 #define _m 7 /*mask*/ 120 121 #define _xyzw 0b1111 122 #define _xy__ 0b0011 123 124 #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3)) 125 126 #define ALU_LITERAL(v) to_QWORD(to_LE(v), 0) 127 #define ALU_LITERAL2(v0,v1) to_QWORD(to_LE(v0), to_LE(v1)) 128 #define ALU_LITERAL3(v0,v1,v2) ALU_LITERAL2(v0,v1),ALU_LITERAL(v2) 129 #define ALU_LITERAL4(v0,v1,v2,v3) ALU_LITERAL2(v0,v1),ALU_LITERAL2(v2,v3) 130 #define ALU_LITERAL5(v0,v1,v2,v3,v5) ALU_LITERAL4(v0,v1,v2,v3),ALU_LITERAL(v4) 131 132 /* SRCx_SEL special constants */ 133 #define ALU_SRC_1_DBL_L 0xF4 134 #define ALU_SRC_1_DBL_M 0xF5 135 #define ALU_SRC_0_5_DBL_L 0xF6 136 #define ALU_SRC_0_5_DBL_M 0xF7 137 #define ALU_SRC_0 0xF8 138 #define ALU_SRC_1 0xF9 139 #define ALU_SRC_1_INT 0xFA 140 #define ALU_SRC_M_1_INT 0xFB 141 #define ALU_SRC_0_5 0xFC 142 #define ALU_SRC_LITERAL 0xFD 143 #define ALU_SRC_PV 0xFE 144 #define ALU_SRC_PS 0xFF 145 146 #define _NEG | (1 << 12) 147 #define _ABS | (1 << 13) 148 149 #define ALU_OMOD_OFF 0x0 150 #define ALU_OMOD_M2 0x1 151 #define ALU_OMOD_M4 0x2 152 #define ALU_OMOD_D2 0x3 153 154 #define ALU_VEC_012 0x0 155 #define ALU_VEC_021 0x1 156 #define ALU_VEC_120 0x2 157 #define ALU_VEC_102 0x3 158 #define ALU_VEC_201 0x4 159 #define ALU_VEC_210 0x5 160 #define VEC_012 | to_QWORD(0, to_LE(ALU_VEC_012 << 18)) 161 #define VEC_021 | to_QWORD(0, to_LE(ALU_VEC_021 << 18)) 162 #define VEC_120 | to_QWORD(0, to_LE(ALU_VEC_120 << 18)) 163 #define VEC_102 | to_QWORD(0, to_LE(ALU_VEC_102 << 18)) 164 #define VEC_201 | to_QWORD(0, to_LE(ALU_VEC_201 << 18)) 165 #define VEC_210 | to_QWORD(0, to_LE(ALU_VEC_210 << 18)) 166 167 #define VALID_PIX | to_QWORD(0,to_LE(1 << 22)) 168 169 #define ALU_SCL_210 0x0 170 #define ALU_SCL_122 0x1 171 #define ALU_SCL_212 0x2 172 #define ALU_SCL_221 0x3 173 174 #define SCL_210 | to_QWORD(0, to_LE(ALU_SCL_210 << 18)) 175 #define SCL_122 | to_QWORD(0, to_LE(ALU_SCL_122 << 18)) 176 #define SCL_212 | to_QWORD(0, to_LE(ALU_SCL_212 << 18)) 177 #define SCL_221 | to_QWORD(0, to_LE(ALU_SCL_221 << 18)) 178 179 #define FETCH_TYPE(x) x 180 #define MINI(x) ((x) - 1) 181 #define MEGA(x) (MINI(x) | 0x80000000) 182 #define OFFSET(x) x 183 184 #define VERTEX_DATA 0 185 #define INSTANCE_DATA 1 186 #define NO_INDEX_OFFSET 2 187 188 /* CF defines */ 189 #define CF_COND_ACTIVE 0x0 190 #define CF_COND_FALSE 0x1 191 #define CF_COND_BOOL 0x2 192 #define CF_COND_NOT_BOOL 0x3 193 194 /* TEX defines */ 195 #define TEX_UNNORMALIZED 0x0 196 #define TEX_NORMALIZED 0x1 197 198 /* instructions */ 199 /* CF */ 200 #define CF_INST_TEX 0x01 201 #define CF_INST_VTX 0x02 202 #define CF_INST_JUMP 0x0A 203 #define CF_INST_ELSE 0x0D 204 #define CF_INST_CALL_FS 0x13 205 #define CF_INST_EMIT_VERTEX 0x15 206 #define CF_INST_MEM_RING 0x26 207 208 #define CF_INST_ALU 0x08 209 #define CF_INST_ALU_PUSH_BEFORE 0x09 210 #define CF_INST_ALU_POP_AFTER 0x0A 211 /* ALU */ 212 #define OP2_INST_ADD 0x0 213 #define OP2_INST_MUL 0x1 214 #define OP2_INST_MUL_IEEE 0x2 215 #define OP2_INST_MIN 0x04 216 #define OP2_INST_MAX 0x03 217 #define OP2_INST_MAX_DX10 0x05 218 #define OP2_INST_FRACT 0x10 219 #define OP2_INST_SETGT 0x09 220 #define OP2_INST_SETE_DX10 0x0C 221 #define OP2_INST_SETGT_DX10 0x0D 222 #define OP2_INST_FLOOR 0x14 223 #define OP2_INST_MOV 0x19 224 #define OP2_INST_PRED_SETGT 0x21 225 #define OP2_INST_PRED_SETE_INT 0x42 226 #define OP2_INST_DOT4 0x50 227 #define OP2_INST_DOT4_IEEE 0x51 228 #define OP2_INST_RECIP_IEEE 0x66 229 #define OP2_INST_RECIPSQRT_IEEE 0x69 230 #define OP2_INST_SQRT_IEEE 0x6A 231 #define OP2_INST_SIN 0x6E 232 #define OP2_INST_COS 0x6F 233 234 #define OP3_INST_MULADD 0x10 235 #define OP3_INST_CNDGT 0x19 236 #define OP3_INST_CNDE_INT 0x1C 237 /* EXP */ 238 #define CF_INST_EXP 0x27 239 #define CF_INST_EXP_DONE 0x28 240 241 /* TEX */ 242 #define TEX_INST_GET_GRADIENTS_H 0x07 243 #define TEX_INST_GET_GRADIENTS_V 0x08 244 #define TEX_INST_SAMPLE 0x10 245 246 /* VTX */ 247 #define VTX_INST_FETCH 0x0 248 249 /* EXPORT_TYPE */ 250 #define EXPORT_TYPE_PIXEL 0x0 251 #define EXPORT_TYPE_POS 0x1 252 #define EXPORT_TYPE_PARAM 0x2 253 254 #define EXPORT_ARRAY_BASE_POS(id) (0x3C + id) /* [0, 3] */ 255 #define EXPORT_ARRAY_BASE_PARAM(id) id /* [0, 31] */ 256 #define EXPORT_ARRAY_BASE_PIX(id) id 257 258 /* exports */ 259 #define POS(id) EXPORT_ARRAY_BASE_POS(id) | (EXPORT_TYPE_POS << 13) 260 #define PARAM(id) EXPORT_ARRAY_BASE_PARAM(id) | (EXPORT_TYPE_PARAM << 13) 261 #define PIX(id) EXPORT_ARRAY_BASE_PIX(id) | (EXPORT_TYPE_PIXEL << 13) 262 #define POS0 POS(0) 263 #define PARAM0 PARAM(0) 264 #define PARAM1 PARAM(1) 265 #define PIX0 PIX(0) 266 267 /* registers */ 268 #define __ (0x80) /* invalid regitser (write mask off) */ 269 #define _R(x) x 270 #define _R0 _R(0x0) 271 #define _R1 _R(0x1) 272 #define _R2 _R(0x2) 273 #define _R3 _R(0x3) 274 #define _R4 _R(0x4) 275 #define _R5 _R(0x5) 276 #define _R6 _R(0x6) 277 #define _R7 _R(0x7) 278 #define _R8 _R(0x8) 279 #define _R9 _R(0x9) 280 #define _R10 _R(0xA) 281 #define _R11 _R(0xB) 282 #define _R12 _R(0xC) 283 #define _R13 _R(0xD) 284 #define _R14 _R(0xE) 285 #define _R15 _R(0xF) 286 287 #define _R120 _R(0x78) 288 #define _R121 _R(0x79) 289 #define _R122 _R(0x7A) 290 #define _R123 _R(0x7B) 291 #define _R124 _R(0x7C) 292 #define _R125 _R(0x7D) 293 #define _R126 _R(0x7E) 294 #define _R127 _R(0x7F) 295 296 /* texture */ 297 #define _t(x) x 298 #define _t0 _t(0x0) 299 300 /* sampler */ 301 #define _s(x) x 302 #define _s0 _s(0x0) 303 304 #define _b(x) x 305 306 #define CALL_FS to_QWORD(CF_DWORD0(0), CF_DWORD1(0,0,0,0,0,CF_INST_CALL_FS)) 307 308 #define TEX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_TEX)) 309 #define VTX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_VTX)) 310 #define JUMP(popCount, addr) to_QWORD(CF_DWORD0(addr), CF_DWORD1(popCount, 0x0, CF_COND_ACTIVE, 0x0, 0x0, CF_INST_JUMP)) 311 #define ELSE(popCount, addr) to_QWORD(CF_DWORD0(addr), CF_DWORD1(popCount, 0x0, CF_COND_ACTIVE, 0x0, 0x0, CF_INST_ELSE)) 312 313 #define ALU(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU)) 314 #define ALU_PUSH_BEFORE(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU_PUSH_BEFORE)) 315 #define ALU_POP_AFTER(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU_POP_AFTER)) 316 317 #define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ 318 CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE)) 319 320 #define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ 321 CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP)) 322 323 #define MEM_RING(arrayBase, dstReg, writeMask, arraySize, elemSize) \ 324 to_QWORD(CF_ALLOC_EXPORT_WORD0(arrayBase, 0x00, dstReg, 0x00, 0x00, elemSize), \ 325 CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, CF_INST_MEM_RING)) 326 327 #define EMIT_VERTEX to_QWORD(0, CF_DWORD1(0, 0, 0, 0, 0, CF_INST_EMIT_VERTEX)) 328 329 #define ALU_OP2(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, omod) \ 330 to_QWORD(ALU_WORD0(((src0Sel) & ((1 << 13) - 1)), 0x0, src0Chan, 0x0, ((src1Sel) & ((1 << 13) - 1)), 0x0, src1Chan, 0x0, 0x0, 0x0), \ 331 ALU_WORD1_OP2(((src0Sel) >> 13), ((src1Sel) >> 13), 0x0, 0x0, (((dstGpr&__) >> 7) ^ 0x1), omod, inst, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0)) 332 333 #define ALU_OP3(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \ 334 to_QWORD(ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \ 335 ALU_WORD1_OP3(src2Sel, 0x0, src2Chan, 0x0, inst, 0x0, dstGpr, 0x0, dstChan, 0x0)) 336 337 #define ALU_ADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 338 ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 339 340 #define ALU_ADD_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 341 ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2) 342 343 #define ALU_ADD_D2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 344 ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_D2) 345 346 #define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 347 ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 348 349 #define ALU_MUL_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 350 ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2) 351 352 #define ALU_MUL_x4(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 353 ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M4) 354 355 #define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 356 ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 357 358 #define ALU_MUL_IEEE_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 359 ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2) 360 361 #define ALU_MUL_IEEE_x4(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 362 ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M4) 363 364 #define ALU_FRACT(dstGpr, dstChan, src0Sel, src0Chan) \ 365 ALU_OP2(OP2_INST_FRACT, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 366 367 #define ALU_FLOOR(dstGpr, dstChan, src0Sel, src0Chan) \ 368 ALU_OP2(OP2_INST_FLOOR, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 369 370 #define ALU_SQRT_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \ 371 ALU_OP2(OP2_INST_SQRT_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 372 373 #define ALU_SQRT_IEEE_D2(dstGpr, dstChan, src0Sel, src0Chan) \ 374 ALU_OP2(OP2_INST_SQRT_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_D2) 375 376 #define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \ 377 ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 378 379 #define ALU_MOV_D2(dstGpr, dstChan, src0Sel, src0Chan) \ 380 ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_D2) 381 382 #define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \ 383 ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2) 384 385 #define ALU_MOV_x4(dstGpr, dstChan, src0Sel, src0Chan) \ 386 ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M4) 387 388 #define ALU_DOT4_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 389 ALU_OP2(OP2_INST_DOT4_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 390 391 #define ALU_DOT4(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 392 ALU_OP2(OP2_INST_DOT4, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 393 394 #define ALU_PRED_SETGT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 395 ALU_OP2(OP2_INST_PRED_SETGT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 396 397 #define ALU_SETE_DX10(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 398 ALU_OP2(OP2_INST_SETE_DX10, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 399 400 #define ALU_SETGT_DX10(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 401 ALU_OP2(OP2_INST_SETGT_DX10, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 402 403 #define ALU_SETGT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 404 ALU_OP2(OP2_INST_SETGT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 405 406 #define ALU_PRED_SETE_INT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 407 ALU_OP2(OP2_INST_PRED_SETE_INT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 408 409 #define ALU_MIN(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 410 ALU_OP2(OP2_INST_MIN, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 411 412 #define ALU_MAX(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 413 ALU_OP2(OP2_INST_MAX, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 414 415 #define ALU_MAX_DX10(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ 416 ALU_OP2(OP2_INST_MAX_DX10, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) 417 418 #define ALU_RECIP_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \ 419 ALU_OP2(OP2_INST_RECIP_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 420 421 #define ALU_RECIPSQRT_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \ 422 ALU_OP2(OP2_INST_RECIPSQRT_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 423 424 #define ALU_SIN(dstGpr, dstChan, src0Sel, src0Chan) \ 425 ALU_OP2(OP2_INST_SIN, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 426 427 #define ALU_COS(dstGpr, dstChan, src0Sel, src0Chan) \ 428 ALU_OP2(OP2_INST_COS, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) 429 430 #define ALU_COS_D2(dstGpr, dstChan, src0Sel, src0Chan) \ 431 ALU_OP2(OP2_INST_COS, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_D2) 432 433 #define ALU_MULADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \ 434 ALU_OP3(OP3_INST_MULADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) 435 436 #define ALU_CNDGT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \ 437 ALU_OP3(OP3_INST_CNDGT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) 438 439 #define ALU_CNDE_INT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \ 440 ALU_OP3(OP3_INST_CNDE_INT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) 441 442 #define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ 443 to_QWORD(TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ 444 TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \ 445 to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _0, _x), 0x00000000) 446 447 #define TEX_GET_GRADIENTS_H(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ 448 to_QWORD(TEX_WORD0(TEX_INST_GET_GRADIENTS_H, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ 449 TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \ 450 to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _z, _x), 0x00000000) 451 452 #define TEX_GET_GRADIENTS_V(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ 453 to_QWORD(TEX_WORD0(TEX_INST_GET_GRADIENTS_V, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ 454 TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \ 455 to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _z, _x), 0x00000000) 456 457 #define VTX_FETCH(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, buffer_id, type, mega, offset) \ 458 to_QWORD(VTX_WORD0(VTX_INST_FETCH, type, buffer_id, srcReg, srcSelX, mega), VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW)) , \ 459 to_QWORD(VTX_WORD2(offset, (mega >> 31)), 0x00000000) 460 461 #define _x2(v) v, v 462 #define _x4(v) _x2(v), _x2(v) 463 #define _x8(v) _x4(v), _x4(v) 464 #define _x16(v) _x8(v), _x8(v) 465 466 #define _x9(v) _x8(v), v 467 #define _x30(v) _x16(v), _x8(v), _x4(v),_x2(v) 468 #define _x31(v) _x30(v), v 469 470 #endif /* GX2_SHADER_INL_H */ 471