1 /*  RetroArch - A frontend for libretro.
2  *  Copyright (C) 2014-2016 - Ali Bouhlel
3  *
4  *  RetroArch is free software: you can redistribute it and/or modify it under the terms
5  *  of the GNU General Public License as published by the Free Software Found-
6  *  ation, either version 3 of the License, or (at your option) any later version.
7  *
8  *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9  *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10  *  PURPOSE.  See the GNU General Public License for more details.
11  *
12  *  You should have received a copy of the GNU General Public License along with RetroArch.
13  *  If not, see <http://www.gnu.org/licenses/>.
14  */
15 
16 #ifndef GX2_SHADER_INL_H
17 #define GX2_SHADER_INL_H
18 
19 #include <retro_endianness.h>
20 
21 #ifdef MSB_FIRST
22 #define to_QWORD(w0, w1) (((u64)(w0) << 32ull) | (w1))
23 #define to_LE(x) (__builtin_bswap32(x))
24 #else
25 #define to_QWORD(w0, w1) (((u64)(w1) << 32ull) | (w0))
26 #define to_LE(x) (x)
27 #endif
28 
29 /* CF */
30 #define CF_DWORD0(addr) to_LE(addr)
31 
32 #define CF_DWORD1(popCount, cfConst, cond, count, callCount, inst) \
33    to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31))
34 
35 #define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \
36    to_LE(addr | (kcacheBank0 << 22) | (kcacheBank1 << 26) | (kcacheMode0 << 30))
37 #define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \
38    to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31))
39 
40 #define CF_EXP_WORD0(dstReg_and_type, srcReg, srcRel, indexGpr, elemSize)\
41    to_LE(dstReg_and_type | (srcReg << 15) | (srcRel << 22) | (indexGpr << 23) | (elemSize << 30))
42 
43 #define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \
44    to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31))
45 
46 #define CF_ALLOC_EXPORT_WORD0(arrayBase, type, dstReg, dstRel, indexGpr, elemSize) \
47    to_LE(arrayBase | (type << 13) | (dstReg << 15) | (dstRel << 22) | (indexGpr << 23) | (elemSize << 30))
48 
49 #define CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, inst) \
50    to_LE(arraySize | (writeMask << 12) | (inst << 23) | (1 << 31))
51 
52 #define ALU_SRC_KCACHE0_BASE  0x80
53 #define ALU_SRC_KCACHE1_BASE  0xA0
54 #define CF_KCACHE_BANK_LOCK_1 0x1
55 #define CB1                   0x1
56 #define CB2                   0x2
57 #define _0_15                 CF_KCACHE_BANK_LOCK_1
58 
59 #define KC0(x) (x + ALU_SRC_KCACHE0_BASE)
60 #define KC1(x) (x + ALU_SRC_KCACHE1_BASE)
61 
62 #define NO_BARRIER      & ~to_QWORD(0,to_LE(1 << 31))
63 #define END_OF_PROGRAM  | to_QWORD(0,to_LE(1 << 21))
64 #define VALID_PIX       | to_QWORD(0,to_LE(1 << 22))
65 #define WHOLE_QUAD_MODE | to_QWORD(0,to_LE(1 << 30))
66 #define BURSTCNT(x)     | to_QWORD(0,to_LE(x << 17))
67 #define WRITE(x)        (x >> 2)
68 #define ARRAY_SIZE(x)   x
69 #define ELEM_SIZE(x)    x
70 #define KCACHE0(bank, mode) | to_QWORD(CF_ALU_WORD0(0, bank, 0, mode), 0)
71 #define KCACHE1(bank, mode) | to_QWORD(CF_ALU_WORD0(0, 0, bank, 0), CF_ALU_WORD1(mode,0, 0, 0, 0, 0))
72 
73 #define DEACTIVATE               1
74 #define UPDATE_EXEC_MASK(mode)   | to_QWORD(0, to_LE(mode << 2))
75 #define UPDATE_PRED              | to_QWORD(0, to_LE(1ull << 3))
76 #define CLAMP                    | to_QWORD(0, to_LE(1ull << 31))
77 #define ALU_LAST                 | to_QWORD(to_LE(1ull << 31), 0)
78 
79 /* ALU */
80 
81 #define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \
82    to_LE(src0Sel | ((src0Rel) << 9) | ((src0Chan) << 10) | ((src0Neg) << 12) | ((src1Sel) << 13) | ((src1Rel) << 22) \
83                  | ((src1Chan) << 23) | ((src1Neg) << 25) | ((indexMode) << 26) | ((predSel) << 29))
84 
85 #define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
86       to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \
87                          (encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31))
88 
89 #define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
90       to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \
91      (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31))
92 
93 /* TEX */
94 #define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \
95    to_LE(inst | (bcFracMode << 5) | (fetchWholeQuad << 7) | (resourceID << 8) | (srcReg << 16) | (srcRel << 23) | (altConst << 24))
96 
97 #define TEX_WORD1(dstReg, dstRel, dstSelX, dstSelY, dstSelZ, dstSelW, lodBias, coordTypeX, coordTypeY, coordTypeZ, coordTypeW) \
98    to_LE(dstReg | (dstRel << 7) | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | \
99    (lodBias << 21) | (coordTypeX << 28) | (coordTypeY << 29) | (coordTypeZ << 30) | (coordTypeW << 31))
100 
101 #define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \
102    to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29))
103 
104 #define VTX_WORD0(inst, type, buffer_id, srcReg, srcSelX, mega) \
105    to_LE(inst | (type << 5) | (buffer_id << 8) | (srcReg << 16) | (srcSelX << 24) | (mega << 26))
106 
107 #define VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW) \
108    to_LE(dstReg | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | (1 << 21))
109 
110 #define VTX_WORD2(offset, ismega) \
111    to_LE(offset| (ismega << 19))
112 
113 #define _x 0
114 #define _y 1
115 #define _z 2
116 #define _w 3
117 #define _0 4
118 #define _1 5
119 #define _m 7 /*mask*/
120 
121 #define _xyzw 0b1111
122 #define _xy__ 0b0011
123 
124 #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3))
125 
126 #define ALU_LITERAL(v)  to_QWORD(to_LE(v), 0)
127 #define ALU_LITERAL2(v0,v1)  to_QWORD(to_LE(v0), to_LE(v1))
128 #define ALU_LITERAL3(v0,v1,v2)  ALU_LITERAL2(v0,v1),ALU_LITERAL(v2)
129 #define ALU_LITERAL4(v0,v1,v2,v3)  ALU_LITERAL2(v0,v1),ALU_LITERAL2(v2,v3)
130 #define ALU_LITERAL5(v0,v1,v2,v3,v5)  ALU_LITERAL4(v0,v1,v2,v3),ALU_LITERAL(v4)
131 
132 /* SRCx_SEL special constants */
133 #define ALU_SRC_1_DBL_L     0xF4
134 #define ALU_SRC_1_DBL_M     0xF5
135 #define ALU_SRC_0_5_DBL_L   0xF6
136 #define ALU_SRC_0_5_DBL_M   0xF7
137 #define ALU_SRC_0           0xF8
138 #define ALU_SRC_1           0xF9
139 #define ALU_SRC_1_INT       0xFA
140 #define ALU_SRC_M_1_INT     0xFB
141 #define ALU_SRC_0_5         0xFC
142 #define ALU_SRC_LITERAL     0xFD
143 #define ALU_SRC_PV          0xFE
144 #define ALU_SRC_PS          0xFF
145 
146 #define _NEG                | (1 << 12)
147 #define _ABS                | (1 << 13)
148 
149 #define ALU_OMOD_OFF          0x0
150 #define ALU_OMOD_M2           0x1
151 #define ALU_OMOD_M4           0x2
152 #define ALU_OMOD_D2           0x3
153 
154 #define ALU_VEC_012           0x0
155 #define ALU_VEC_021           0x1
156 #define ALU_VEC_120           0x2
157 #define ALU_VEC_102           0x3
158 #define ALU_VEC_201           0x4
159 #define ALU_VEC_210           0x5
160 #define VEC_012               | to_QWORD(0, to_LE(ALU_VEC_012 << 18))
161 #define VEC_021               | to_QWORD(0, to_LE(ALU_VEC_021 << 18))
162 #define VEC_120               | to_QWORD(0, to_LE(ALU_VEC_120 << 18))
163 #define VEC_102               | to_QWORD(0, to_LE(ALU_VEC_102 << 18))
164 #define VEC_201               | to_QWORD(0, to_LE(ALU_VEC_201 << 18))
165 #define VEC_210               | to_QWORD(0, to_LE(ALU_VEC_210 << 18))
166 
167 #define VALID_PIX       | to_QWORD(0,to_LE(1 << 22))
168 
169 #define ALU_SCL_210           0x0
170 #define ALU_SCL_122           0x1
171 #define ALU_SCL_212           0x2
172 #define ALU_SCL_221           0x3
173 
174 #define SCL_210               | to_QWORD(0, to_LE(ALU_SCL_210 << 18))
175 #define SCL_122               | to_QWORD(0, to_LE(ALU_SCL_122 << 18))
176 #define SCL_212               | to_QWORD(0, to_LE(ALU_SCL_212 << 18))
177 #define SCL_221               | to_QWORD(0, to_LE(ALU_SCL_221 << 18))
178 
179 #define FETCH_TYPE(x) x
180 #define MINI(x) ((x) - 1)
181 #define MEGA(x) (MINI(x) | 0x80000000)
182 #define OFFSET(x) x
183 
184 #define VERTEX_DATA     0
185 #define INSTANCE_DATA   1
186 #define NO_INDEX_OFFSET 2
187 
188 /* CF defines */
189 #define CF_COND_ACTIVE      0x0
190 #define CF_COND_FALSE       0x1
191 #define CF_COND_BOOL        0x2
192 #define CF_COND_NOT_BOOL    0x3
193 
194 /* TEX defines */
195 #define TEX_UNNORMALIZED    0x0
196 #define TEX_NORMALIZED      0x1
197 
198 /* instructions */
199 /* CF */
200 #define CF_INST_TEX              0x01
201 #define CF_INST_VTX              0x02
202 #define CF_INST_JUMP             0x0A
203 #define CF_INST_ELSE             0x0D
204 #define CF_INST_CALL_FS          0x13
205 #define CF_INST_EMIT_VERTEX      0x15
206 #define CF_INST_MEM_RING         0x26
207 
208 #define CF_INST_ALU              0x08
209 #define CF_INST_ALU_PUSH_BEFORE  0x09
210 #define CF_INST_ALU_POP_AFTER    0x0A
211 /* ALU */
212 #define OP2_INST_ADD             0x0
213 #define OP2_INST_MUL             0x1
214 #define OP2_INST_MUL_IEEE        0x2
215 #define OP2_INST_MIN             0x04
216 #define OP2_INST_MAX             0x03
217 #define OP2_INST_MAX_DX10        0x05
218 #define OP2_INST_FRACT           0x10
219 #define OP2_INST_SETGT           0x09
220 #define OP2_INST_SETE_DX10       0x0C
221 #define OP2_INST_SETGT_DX10      0x0D
222 #define OP2_INST_FLOOR           0x14
223 #define OP2_INST_MOV             0x19
224 #define OP2_INST_PRED_SETGT      0x21
225 #define OP2_INST_PRED_SETE_INT   0x42
226 #define OP2_INST_DOT4            0x50
227 #define OP2_INST_DOT4_IEEE       0x51
228 #define OP2_INST_RECIP_IEEE      0x66
229 #define OP2_INST_RECIPSQRT_IEEE  0x69
230 #define OP2_INST_SQRT_IEEE       0x6A
231 #define OP2_INST_SIN             0x6E
232 #define OP2_INST_COS             0x6F
233 
234 #define OP3_INST_MULADD          0x10
235 #define OP3_INST_CNDGT           0x19
236 #define OP3_INST_CNDE_INT        0x1C
237 /* EXP */
238 #define CF_INST_EXP      0x27
239 #define CF_INST_EXP_DONE 0x28
240 
241 /* TEX */
242 #define TEX_INST_GET_GRADIENTS_H 0x07
243 #define TEX_INST_GET_GRADIENTS_V 0x08
244 #define TEX_INST_SAMPLE          0x10
245 
246 /* VTX */
247 #define VTX_INST_FETCH  0x0
248 
249 /* EXPORT_TYPE */
250 #define EXPORT_TYPE_PIXEL  0x0
251 #define EXPORT_TYPE_POS    0x1
252 #define EXPORT_TYPE_PARAM  0x2
253 
254 #define EXPORT_ARRAY_BASE_POS(id)      (0x3C + id)   /* [0, 3] */
255 #define EXPORT_ARRAY_BASE_PARAM(id)    id          /* [0, 31] */
256 #define EXPORT_ARRAY_BASE_PIX(id)      id
257 
258 /* exports */
259 #define POS(id)   EXPORT_ARRAY_BASE_POS(id)   | (EXPORT_TYPE_POS   << 13)
260 #define PARAM(id) EXPORT_ARRAY_BASE_PARAM(id) | (EXPORT_TYPE_PARAM << 13)
261 #define PIX(id)   EXPORT_ARRAY_BASE_PIX(id)   | (EXPORT_TYPE_PIXEL << 13)
262 #define POS0   POS(0)
263 #define PARAM0 PARAM(0)
264 #define PARAM1 PARAM(1)
265 #define PIX0   PIX(0)
266 
267 /* registers */
268 #define __     (0x80) /* invalid regitser (write mask off) */
269 #define _R(x)  x
270 #define _R0    _R(0x0)
271 #define _R1    _R(0x1)
272 #define _R2    _R(0x2)
273 #define _R3    _R(0x3)
274 #define _R4    _R(0x4)
275 #define _R5    _R(0x5)
276 #define _R6    _R(0x6)
277 #define _R7    _R(0x7)
278 #define _R8    _R(0x8)
279 #define _R9    _R(0x9)
280 #define _R10    _R(0xA)
281 #define _R11    _R(0xB)
282 #define _R12    _R(0xC)
283 #define _R13    _R(0xD)
284 #define _R14    _R(0xE)
285 #define _R15    _R(0xF)
286 
287 #define _R120    _R(0x78)
288 #define _R121    _R(0x79)
289 #define _R122    _R(0x7A)
290 #define _R123    _R(0x7B)
291 #define _R124    _R(0x7C)
292 #define _R125    _R(0x7D)
293 #define _R126    _R(0x7E)
294 #define _R127    _R(0x7F)
295 
296 /* texture */
297 #define _t(x)  x
298 #define _t0    _t(0x0)
299 
300 /* sampler */
301 #define _s(x)  x
302 #define _s0    _s(0x0)
303 
304 #define _b(x)  x
305 
306 #define CALL_FS to_QWORD(CF_DWORD0(0), CF_DWORD1(0,0,0,0,0,CF_INST_CALL_FS))
307 
308 #define TEX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_TEX))
309 #define VTX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_VTX))
310 #define JUMP(popCount, addr) to_QWORD(CF_DWORD0(addr), CF_DWORD1(popCount, 0x0, CF_COND_ACTIVE, 0x0, 0x0, CF_INST_JUMP))
311 #define ELSE(popCount, addr) to_QWORD(CF_DWORD0(addr), CF_DWORD1(popCount, 0x0, CF_COND_ACTIVE, 0x0, 0x0, CF_INST_ELSE))
312 
313 #define ALU(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU))
314 #define ALU_PUSH_BEFORE(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU_PUSH_BEFORE))
315 #define ALU_POP_AFTER(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU_POP_AFTER))
316 
317 #define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
318    CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE))
319 
320 #define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
321    CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP))
322 
323 #define MEM_RING(arrayBase, dstReg, writeMask, arraySize, elemSize) \
324    to_QWORD(CF_ALLOC_EXPORT_WORD0(arrayBase, 0x00, dstReg, 0x00, 0x00, elemSize), \
325    CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, CF_INST_MEM_RING))
326 
327 #define EMIT_VERTEX to_QWORD(0, CF_DWORD1(0, 0, 0, 0, 0, CF_INST_EMIT_VERTEX))
328 
329 #define ALU_OP2(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, omod) \
330    to_QWORD(ALU_WORD0(((src0Sel) & ((1 << 13) - 1)), 0x0, src0Chan, 0x0, ((src1Sel) & ((1 << 13) - 1)), 0x0, src1Chan, 0x0, 0x0, 0x0), \
331    ALU_WORD1_OP2(((src0Sel) >> 13), ((src1Sel) >> 13), 0x0, 0x0, (((dstGpr&__) >> 7) ^ 0x1), omod, inst, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0))
332 
333 #define ALU_OP3(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \
334    to_QWORD(ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \
335    ALU_WORD1_OP3(src2Sel, 0x0, src2Chan, 0x0, inst, 0x0, dstGpr, 0x0, dstChan, 0x0))
336 
337 #define ALU_ADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
338    ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
339 
340 #define ALU_ADD_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
341    ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2)
342 
343 #define ALU_ADD_D2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
344    ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_D2)
345 
346 #define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
347    ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
348 
349 #define ALU_MUL_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
350    ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2)
351 
352 #define ALU_MUL_x4(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
353    ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M4)
354 
355 #define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
356    ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
357 
358 #define ALU_MUL_IEEE_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
359    ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2)
360 
361 #define ALU_MUL_IEEE_x4(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
362    ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M4)
363 
364 #define ALU_FRACT(dstGpr, dstChan, src0Sel, src0Chan) \
365    ALU_OP2(OP2_INST_FRACT, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
366 
367 #define ALU_FLOOR(dstGpr, dstChan, src0Sel, src0Chan) \
368    ALU_OP2(OP2_INST_FLOOR, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
369 
370 #define ALU_SQRT_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \
371    ALU_OP2(OP2_INST_SQRT_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
372 
373 #define ALU_SQRT_IEEE_D2(dstGpr, dstChan, src0Sel, src0Chan) \
374    ALU_OP2(OP2_INST_SQRT_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_D2)
375 
376 #define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \
377    ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
378 
379 #define ALU_MOV_D2(dstGpr, dstChan, src0Sel, src0Chan) \
380    ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_D2)
381 
382 #define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \
383    ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2)
384 
385 #define ALU_MOV_x4(dstGpr, dstChan, src0Sel, src0Chan) \
386    ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M4)
387 
388 #define ALU_DOT4_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
389    ALU_OP2(OP2_INST_DOT4_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
390 
391 #define ALU_DOT4(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
392    ALU_OP2(OP2_INST_DOT4, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
393 
394 #define ALU_PRED_SETGT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
395    ALU_OP2(OP2_INST_PRED_SETGT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
396 
397 #define ALU_SETE_DX10(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
398    ALU_OP2(OP2_INST_SETE_DX10, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
399 
400 #define ALU_SETGT_DX10(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
401    ALU_OP2(OP2_INST_SETGT_DX10, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
402 
403 #define ALU_SETGT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
404    ALU_OP2(OP2_INST_SETGT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
405 
406 #define ALU_PRED_SETE_INT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
407    ALU_OP2(OP2_INST_PRED_SETE_INT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
408 
409 #define ALU_MIN(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
410    ALU_OP2(OP2_INST_MIN, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
411 
412 #define ALU_MAX(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
413    ALU_OP2(OP2_INST_MAX, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
414 
415 #define ALU_MAX_DX10(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
416    ALU_OP2(OP2_INST_MAX_DX10, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
417 
418 #define ALU_RECIP_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \
419    ALU_OP2(OP2_INST_RECIP_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
420 
421 #define ALU_RECIPSQRT_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \
422    ALU_OP2(OP2_INST_RECIPSQRT_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
423 
424 #define ALU_SIN(dstGpr, dstChan, src0Sel, src0Chan) \
425    ALU_OP2(OP2_INST_SIN, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
426 
427 #define ALU_COS(dstGpr, dstChan, src0Sel, src0Chan) \
428    ALU_OP2(OP2_INST_COS, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
429 
430 #define ALU_COS_D2(dstGpr, dstChan, src0Sel, src0Chan) \
431    ALU_OP2(OP2_INST_COS, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_D2)
432 
433 #define ALU_MULADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \
434    ALU_OP3(OP3_INST_MULADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan)
435 
436 #define ALU_CNDGT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \
437    ALU_OP3(OP3_INST_CNDGT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan)
438 
439 #define ALU_CNDE_INT(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \
440    ALU_OP3(OP3_INST_CNDE_INT, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan)
441 
442 #define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\
443    to_QWORD(TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
444    TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \
445    to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _0, _x), 0x00000000)
446 
447 #define TEX_GET_GRADIENTS_H(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\
448    to_QWORD(TEX_WORD0(TEX_INST_GET_GRADIENTS_H, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
449    TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \
450    to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _z, _x), 0x00000000)
451 
452 #define TEX_GET_GRADIENTS_V(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\
453    to_QWORD(TEX_WORD0(TEX_INST_GET_GRADIENTS_V, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
454    TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \
455    to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _z, _x), 0x00000000)
456 
457 #define VTX_FETCH(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, buffer_id, type, mega, offset) \
458    to_QWORD(VTX_WORD0(VTX_INST_FETCH, type, buffer_id, srcReg, srcSelX, mega), VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW)) , \
459    to_QWORD(VTX_WORD2(offset, (mega >> 31)), 0x00000000)
460 
461 #define _x2(v)        v, v
462 #define _x4(v)   _x2(v), _x2(v)
463 #define _x8(v)   _x4(v), _x4(v)
464 #define _x16(v)  _x8(v), _x8(v)
465 
466 #define _x9(v)   _x8(v), v
467 #define _x30(v) _x16(v), _x8(v), _x4(v),_x2(v)
468 #define _x31(v) _x30(v), v
469 
470 #endif /* GX2_SHADER_INL_H */
471