1 /*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include "xf86.h"
32
33 #include "radeon.h"
34 #include "r600_shader.h"
35 #include "r600_reg.h"
36
37 /* solid vs --------------------------------------- */
R600_solid_vs(RADEONChipFamily ChipSet,uint32_t * shader)38 int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39 {
40 int i = 0;
41
42 /* 0 */
43 shader[i++] = CF_DWORD0(ADDR(4));
44 shader[i++] = CF_DWORD1(POP_COUNT(0),
45 CF_CONST(0),
46 COND(SQ_CF_COND_ACTIVE),
47 I_COUNT(1),
48 CALL_COUNT(0),
49 END_OF_PROGRAM(0),
50 VALID_PIXEL_MODE(0),
51 CF_INST(SQ_CF_INST_VTX),
52 WHOLE_QUAD_MODE(0),
53 BARRIER(1));
54 /* 1 */
55 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56 TYPE(SQ_EXPORT_POS),
57 RW_GPR(1),
58 RW_REL(ABSOLUTE),
59 INDEX_GPR(0),
60 ELEM_SIZE(0));
61 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62 SRC_SEL_Y(SQ_SEL_Y),
63 SRC_SEL_Z(SQ_SEL_Z),
64 SRC_SEL_W(SQ_SEL_W),
65 R6xx_ELEM_LOOP(0),
66 BURST_COUNT(1),
67 END_OF_PROGRAM(0),
68 VALID_PIXEL_MODE(0),
69 CF_INST(SQ_CF_INST_EXPORT_DONE),
70 WHOLE_QUAD_MODE(0),
71 BARRIER(1));
72 /* 2 - always export a param whether it's used or not */
73 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74 TYPE(SQ_EXPORT_PARAM),
75 RW_GPR(0),
76 RW_REL(ABSOLUTE),
77 INDEX_GPR(0),
78 ELEM_SIZE(0));
79 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80 SRC_SEL_Y(SQ_SEL_Y),
81 SRC_SEL_Z(SQ_SEL_Z),
82 SRC_SEL_W(SQ_SEL_W),
83 R6xx_ELEM_LOOP(0),
84 BURST_COUNT(0),
85 END_OF_PROGRAM(1),
86 VALID_PIXEL_MODE(0),
87 CF_INST(SQ_CF_INST_EXPORT_DONE),
88 WHOLE_QUAD_MODE(0),
89 BARRIER(0));
90 /* 3 - padding */
91 shader[i++] = 0x00000000;
92 shader[i++] = 0x00000000;
93 /* 4/5 */
94 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96 FETCH_WHOLE_QUAD(0),
97 BUFFER_ID(0),
98 SRC_GPR(0),
99 SRC_REL(ABSOLUTE),
100 SRC_SEL_X(SQ_SEL_X),
101 MEGA_FETCH_COUNT(8));
102 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103 DST_REL(0),
104 DST_SEL_X(SQ_SEL_X),
105 DST_SEL_Y(SQ_SEL_Y),
106 DST_SEL_Z(SQ_SEL_0),
107 DST_SEL_W(SQ_SEL_1),
108 USE_CONST_FIELDS(0),
109 DATA_FORMAT(FMT_32_32_FLOAT),
110 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113 shader[i++] = VTX_DWORD2(OFFSET(0),
114 #if X_BYTE_ORDER == X_BIG_ENDIAN
115 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
116 #else
117 ENDIAN_SWAP(SQ_ENDIAN_NONE),
118 #endif
119 CONST_BUF_NO_STRIDE(0),
120 MEGA_FETCH(1));
121 shader[i++] = VTX_DWORD_PAD;
122
123 return i;
124 }
125
126 /* solid ps --------------------------------------- */
R600_solid_ps(RADEONChipFamily ChipSet,uint32_t * shader)127 int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
128 {
129 int i = 0;
130
131 /* 0 */
132 shader[i++] = CF_ALU_DWORD0(ADDR(2),
133 KCACHE_BANK0(0),
134 KCACHE_BANK1(0),
135 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
136 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
137 KCACHE_ADDR0(0),
138 KCACHE_ADDR1(0),
139 I_COUNT(4),
140 USES_WATERFALL(0),
141 CF_INST(SQ_CF_INST_ALU),
142 WHOLE_QUAD_MODE(0),
143 BARRIER(1));
144 /* 1 */
145 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
146 TYPE(SQ_EXPORT_PIXEL),
147 RW_GPR(0),
148 RW_REL(ABSOLUTE),
149 INDEX_GPR(0),
150 ELEM_SIZE(1));
151 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
152 SRC_SEL_Y(SQ_SEL_Y),
153 SRC_SEL_Z(SQ_SEL_Z),
154 SRC_SEL_W(SQ_SEL_W),
155 R6xx_ELEM_LOOP(0),
156 BURST_COUNT(1),
157 END_OF_PROGRAM(1),
158 VALID_PIXEL_MODE(0),
159 CF_INST(SQ_CF_INST_EXPORT_DONE),
160 WHOLE_QUAD_MODE(0),
161 BARRIER(1));
162
163 /* 2 */
164 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
165 SRC0_REL(ABSOLUTE),
166 SRC0_ELEM(ELEM_X),
167 SRC0_NEG(0),
168 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169 SRC1_REL(ABSOLUTE),
170 SRC1_ELEM(ELEM_X),
171 SRC1_NEG(0),
172 INDEX_MODE(SQ_INDEX_AR_X),
173 PRED_SEL(SQ_PRED_SEL_OFF),
174 LAST(0));
175 shader[i++] = ALU_DWORD1_OP2(ChipSet,
176 SRC0_ABS(0),
177 SRC1_ABS(0),
178 UPDATE_EXECUTE_MASK(0),
179 UPDATE_PRED(0),
180 WRITE_MASK(1),
181 FOG_MERGE(0),
182 OMOD(SQ_ALU_OMOD_OFF),
183 ALU_INST(SQ_OP2_INST_MOV),
184 BANK_SWIZZLE(SQ_ALU_VEC_012),
185 DST_GPR(0),
186 DST_REL(ABSOLUTE),
187 DST_ELEM(ELEM_X),
188 CLAMP(1));
189 /* 3 */
190 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
191 SRC0_REL(ABSOLUTE),
192 SRC0_ELEM(ELEM_Y),
193 SRC0_NEG(0),
194 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
195 SRC1_REL(ABSOLUTE),
196 SRC1_ELEM(ELEM_Y),
197 SRC1_NEG(0),
198 INDEX_MODE(SQ_INDEX_AR_X),
199 PRED_SEL(SQ_PRED_SEL_OFF),
200 LAST(0));
201 shader[i++] = ALU_DWORD1_OP2(ChipSet,
202 SRC0_ABS(0),
203 SRC1_ABS(0),
204 UPDATE_EXECUTE_MASK(0),
205 UPDATE_PRED(0),
206 WRITE_MASK(1),
207 FOG_MERGE(0),
208 OMOD(SQ_ALU_OMOD_OFF),
209 ALU_INST(SQ_OP2_INST_MOV),
210 BANK_SWIZZLE(SQ_ALU_VEC_012),
211 DST_GPR(0),
212 DST_REL(ABSOLUTE),
213 DST_ELEM(ELEM_Y),
214 CLAMP(1));
215 /* 4 */
216 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
217 SRC0_REL(ABSOLUTE),
218 SRC0_ELEM(ELEM_Z),
219 SRC0_NEG(0),
220 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
221 SRC1_REL(ABSOLUTE),
222 SRC1_ELEM(ELEM_Z),
223 SRC1_NEG(0),
224 INDEX_MODE(SQ_INDEX_AR_X),
225 PRED_SEL(SQ_PRED_SEL_OFF),
226 LAST(0));
227 shader[i++] = ALU_DWORD1_OP2(ChipSet,
228 SRC0_ABS(0),
229 SRC1_ABS(0),
230 UPDATE_EXECUTE_MASK(0),
231 UPDATE_PRED(0),
232 WRITE_MASK(1),
233 FOG_MERGE(0),
234 OMOD(SQ_ALU_OMOD_OFF),
235 ALU_INST(SQ_OP2_INST_MOV),
236 BANK_SWIZZLE(SQ_ALU_VEC_012),
237 DST_GPR(0),
238 DST_REL(ABSOLUTE),
239 DST_ELEM(ELEM_Z),
240 CLAMP(1));
241 /* 5 */
242 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
243 SRC0_REL(ABSOLUTE),
244 SRC0_ELEM(ELEM_W),
245 SRC0_NEG(0),
246 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
247 SRC1_REL(ABSOLUTE),
248 SRC1_ELEM(ELEM_W),
249 SRC1_NEG(0),
250 INDEX_MODE(SQ_INDEX_AR_X),
251 PRED_SEL(SQ_PRED_SEL_OFF),
252 LAST(1));
253 shader[i++] = ALU_DWORD1_OP2(ChipSet,
254 SRC0_ABS(0),
255 SRC1_ABS(0),
256 UPDATE_EXECUTE_MASK(0),
257 UPDATE_PRED(0),
258 WRITE_MASK(1),
259 FOG_MERGE(0),
260 OMOD(SQ_ALU_OMOD_OFF),
261 ALU_INST(SQ_OP2_INST_MOV),
262 BANK_SWIZZLE(SQ_ALU_VEC_012),
263 DST_GPR(0),
264 DST_REL(ABSOLUTE),
265 DST_ELEM(ELEM_W),
266 CLAMP(1));
267
268 return i;
269 }
270
271 /* copy vs --------------------------------------- */
R600_copy_vs(RADEONChipFamily ChipSet,uint32_t * shader)272 int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
273 {
274 int i = 0;
275
276 /* 0 */
277 shader[i++] = CF_DWORD0(ADDR(4));
278 shader[i++] = CF_DWORD1(POP_COUNT(0),
279 CF_CONST(0),
280 COND(SQ_CF_COND_ACTIVE),
281 I_COUNT(2),
282 CALL_COUNT(0),
283 END_OF_PROGRAM(0),
284 VALID_PIXEL_MODE(0),
285 CF_INST(SQ_CF_INST_VTX),
286 WHOLE_QUAD_MODE(0),
287 BARRIER(1));
288 /* 1 */
289 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
290 TYPE(SQ_EXPORT_POS),
291 RW_GPR(1),
292 RW_REL(ABSOLUTE),
293 INDEX_GPR(0),
294 ELEM_SIZE(0));
295 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
296 SRC_SEL_Y(SQ_SEL_Y),
297 SRC_SEL_Z(SQ_SEL_Z),
298 SRC_SEL_W(SQ_SEL_W),
299 R6xx_ELEM_LOOP(0),
300 BURST_COUNT(0),
301 END_OF_PROGRAM(0),
302 VALID_PIXEL_MODE(0),
303 CF_INST(SQ_CF_INST_EXPORT_DONE),
304 WHOLE_QUAD_MODE(0),
305 BARRIER(1));
306 /* 2 */
307 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
308 TYPE(SQ_EXPORT_PARAM),
309 RW_GPR(0),
310 RW_REL(ABSOLUTE),
311 INDEX_GPR(0),
312 ELEM_SIZE(0));
313 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
314 SRC_SEL_Y(SQ_SEL_Y),
315 SRC_SEL_Z(SQ_SEL_Z),
316 SRC_SEL_W(SQ_SEL_W),
317 R6xx_ELEM_LOOP(0),
318 BURST_COUNT(0),
319 END_OF_PROGRAM(1),
320 VALID_PIXEL_MODE(0),
321 CF_INST(SQ_CF_INST_EXPORT_DONE),
322 WHOLE_QUAD_MODE(0),
323 BARRIER(0));
324 /* 3 */
325 shader[i++] = 0x00000000;
326 shader[i++] = 0x00000000;
327 /* 4/5 */
328 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
329 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
330 FETCH_WHOLE_QUAD(0),
331 BUFFER_ID(0),
332 SRC_GPR(0),
333 SRC_REL(ABSOLUTE),
334 SRC_SEL_X(SQ_SEL_X),
335 MEGA_FETCH_COUNT(16));
336 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
337 DST_REL(0),
338 DST_SEL_X(SQ_SEL_X),
339 DST_SEL_Y(SQ_SEL_Y),
340 DST_SEL_Z(SQ_SEL_0),
341 DST_SEL_W(SQ_SEL_1),
342 USE_CONST_FIELDS(0),
343 DATA_FORMAT(FMT_32_32_FLOAT),
344 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
345 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
346 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
347 shader[i++] = VTX_DWORD2(OFFSET(0),
348 #if X_BYTE_ORDER == X_BIG_ENDIAN
349 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
350 #else
351 ENDIAN_SWAP(SQ_ENDIAN_NONE),
352 #endif
353 CONST_BUF_NO_STRIDE(0),
354 MEGA_FETCH(1));
355 shader[i++] = VTX_DWORD_PAD;
356 /* 6/7 */
357 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
358 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
359 FETCH_WHOLE_QUAD(0),
360 BUFFER_ID(0),
361 SRC_GPR(0),
362 SRC_REL(ABSOLUTE),
363 SRC_SEL_X(SQ_SEL_X),
364 MEGA_FETCH_COUNT(8));
365 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
366 DST_REL(0),
367 DST_SEL_X(SQ_SEL_X),
368 DST_SEL_Y(SQ_SEL_Y),
369 DST_SEL_Z(SQ_SEL_0),
370 DST_SEL_W(SQ_SEL_1),
371 USE_CONST_FIELDS(0),
372 DATA_FORMAT(FMT_32_32_FLOAT),
373 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
374 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
375 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
376 shader[i++] = VTX_DWORD2(OFFSET(8),
377 #if X_BYTE_ORDER == X_BIG_ENDIAN
378 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
379 #else
380 ENDIAN_SWAP(SQ_ENDIAN_NONE),
381 #endif
382 CONST_BUF_NO_STRIDE(0),
383 MEGA_FETCH(0));
384 shader[i++] = VTX_DWORD_PAD;
385
386 return i;
387 }
388
389 /* copy ps --------------------------------------- */
R600_copy_ps(RADEONChipFamily ChipSet,uint32_t * shader)390 int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
391 {
392 int i=0;
393
394 /* CF INST 0 */
395 shader[i++] = CF_DWORD0(ADDR(2));
396 shader[i++] = CF_DWORD1(POP_COUNT(0),
397 CF_CONST(0),
398 COND(SQ_CF_COND_ACTIVE),
399 I_COUNT(1),
400 CALL_COUNT(0),
401 END_OF_PROGRAM(0),
402 VALID_PIXEL_MODE(0),
403 CF_INST(SQ_CF_INST_TEX),
404 WHOLE_QUAD_MODE(0),
405 BARRIER(1));
406 /* CF INST 1 */
407 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
408 TYPE(SQ_EXPORT_PIXEL),
409 RW_GPR(0),
410 RW_REL(ABSOLUTE),
411 INDEX_GPR(0),
412 ELEM_SIZE(1));
413 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
414 SRC_SEL_Y(SQ_SEL_Y),
415 SRC_SEL_Z(SQ_SEL_Z),
416 SRC_SEL_W(SQ_SEL_W),
417 R6xx_ELEM_LOOP(0),
418 BURST_COUNT(1),
419 END_OF_PROGRAM(1),
420 VALID_PIXEL_MODE(0),
421 CF_INST(SQ_CF_INST_EXPORT_DONE),
422 WHOLE_QUAD_MODE(0),
423 BARRIER(1));
424 /* TEX INST 0 */
425 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
426 BC_FRAC_MODE(0),
427 FETCH_WHOLE_QUAD(0),
428 RESOURCE_ID(0),
429 SRC_GPR(0),
430 SRC_REL(ABSOLUTE),
431 R7xx_ALT_CONST(0));
432 shader[i++] = TEX_DWORD1(DST_GPR(0),
433 DST_REL(ABSOLUTE),
434 DST_SEL_X(SQ_SEL_X), /* R */
435 DST_SEL_Y(SQ_SEL_Y), /* G */
436 DST_SEL_Z(SQ_SEL_Z), /* B */
437 DST_SEL_W(SQ_SEL_W), /* A */
438 LOD_BIAS(0),
439 COORD_TYPE_X(TEX_UNNORMALIZED),
440 COORD_TYPE_Y(TEX_UNNORMALIZED),
441 COORD_TYPE_Z(TEX_UNNORMALIZED),
442 COORD_TYPE_W(TEX_UNNORMALIZED));
443 shader[i++] = TEX_DWORD2(OFFSET_X(0),
444 OFFSET_Y(0),
445 OFFSET_Z(0),
446 SAMPLER_ID(0),
447 SRC_SEL_X(SQ_SEL_X),
448 SRC_SEL_Y(SQ_SEL_Y),
449 SRC_SEL_Z(SQ_SEL_0),
450 SRC_SEL_W(SQ_SEL_1));
451 shader[i++] = TEX_DWORD_PAD;
452
453 return i;
454 }
455
456 /*
457 * ; xv vertex shader
458 * 00 VTX: ADDR(4) CNT(2)
459 * 0 VFETCH R1.xy01, R0.x, fc0 MEGA(16) FORMAT(32_32_FLOAT)
460 * FORMAT_COMP(SIGNED)
461 * 1 VFETCH R0.xy01, R0.x, fc0 MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
462 * FORMAT_COMP(SIGNED)
463 * 01 EXP_DONE: POS0, R1
464 * 02 EXP_DONE: PARAM0, R0 NO_BARRIER
465 * END_OF_PROGRAM
466 */
R600_xv_vs(RADEONChipFamily ChipSet,uint32_t * shader)467 int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
468 {
469 int i = 0;
470
471 /* 0 */
472 shader[i++] = CF_DWORD0(ADDR(6));
473 shader[i++] = CF_DWORD1(POP_COUNT(0),
474 CF_CONST(0),
475 COND(SQ_CF_COND_ACTIVE),
476 I_COUNT(2),
477 CALL_COUNT(0),
478 END_OF_PROGRAM(0),
479 VALID_PIXEL_MODE(0),
480 CF_INST(SQ_CF_INST_VTX),
481 WHOLE_QUAD_MODE(0),
482 BARRIER(1));
483
484 /* 1 - ALU */
485 shader[i++] = CF_ALU_DWORD0(ADDR(4),
486 KCACHE_BANK0(0),
487 KCACHE_BANK1(0),
488 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
489 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
490 KCACHE_ADDR0(0),
491 KCACHE_ADDR1(0),
492 I_COUNT(2),
493 USES_WATERFALL(0),
494 CF_INST(SQ_CF_INST_ALU),
495 WHOLE_QUAD_MODE(0),
496 BARRIER(1));
497
498 /* 2 */
499 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
500 TYPE(SQ_EXPORT_POS),
501 RW_GPR(1),
502 RW_REL(ABSOLUTE),
503 INDEX_GPR(0),
504 ELEM_SIZE(3));
505 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
506 SRC_SEL_Y(SQ_SEL_Y),
507 SRC_SEL_Z(SQ_SEL_Z),
508 SRC_SEL_W(SQ_SEL_W),
509 R6xx_ELEM_LOOP(0),
510 BURST_COUNT(1),
511 END_OF_PROGRAM(0),
512 VALID_PIXEL_MODE(0),
513 CF_INST(SQ_CF_INST_EXPORT_DONE),
514 WHOLE_QUAD_MODE(0),
515 BARRIER(1));
516 /* 3 */
517 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
518 TYPE(SQ_EXPORT_PARAM),
519 RW_GPR(0),
520 RW_REL(ABSOLUTE),
521 INDEX_GPR(0),
522 ELEM_SIZE(3));
523 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
524 SRC_SEL_Y(SQ_SEL_Y),
525 SRC_SEL_Z(SQ_SEL_Z),
526 SRC_SEL_W(SQ_SEL_W),
527 R6xx_ELEM_LOOP(0),
528 BURST_COUNT(1),
529 END_OF_PROGRAM(1),
530 VALID_PIXEL_MODE(0),
531 CF_INST(SQ_CF_INST_EXPORT_DONE),
532 WHOLE_QUAD_MODE(0),
533 BARRIER(0));
534
535
536 /* 4 texX / w */
537 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
538 SRC0_REL(ABSOLUTE),
539 SRC0_ELEM(ELEM_X),
540 SRC0_NEG(0),
541 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
542 SRC1_REL(ABSOLUTE),
543 SRC1_ELEM(ELEM_X),
544 SRC1_NEG(0),
545 INDEX_MODE(SQ_INDEX_AR_X),
546 PRED_SEL(SQ_PRED_SEL_OFF),
547 LAST(0));
548 shader[i++] = ALU_DWORD1_OP2(ChipSet,
549 SRC0_ABS(0),
550 SRC1_ABS(0),
551 UPDATE_EXECUTE_MASK(0),
552 UPDATE_PRED(0),
553 WRITE_MASK(1),
554 FOG_MERGE(0),
555 OMOD(SQ_ALU_OMOD_OFF),
556 ALU_INST(SQ_OP2_INST_MUL),
557 BANK_SWIZZLE(SQ_ALU_VEC_012),
558 DST_GPR(0),
559 DST_REL(ABSOLUTE),
560 DST_ELEM(ELEM_X),
561 CLAMP(0));
562
563 /* 5 texY / h */
564 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
565 SRC0_REL(ABSOLUTE),
566 SRC0_ELEM(ELEM_Y),
567 SRC0_NEG(0),
568 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
569 SRC1_REL(ABSOLUTE),
570 SRC1_ELEM(ELEM_Y),
571 SRC1_NEG(0),
572 INDEX_MODE(SQ_INDEX_AR_X),
573 PRED_SEL(SQ_PRED_SEL_OFF),
574 LAST(1));
575 shader[i++] = ALU_DWORD1_OP2(ChipSet,
576 SRC0_ABS(0),
577 SRC1_ABS(0),
578 UPDATE_EXECUTE_MASK(0),
579 UPDATE_PRED(0),
580 WRITE_MASK(1),
581 FOG_MERGE(0),
582 OMOD(SQ_ALU_OMOD_OFF),
583 ALU_INST(SQ_OP2_INST_MUL),
584 BANK_SWIZZLE(SQ_ALU_VEC_012),
585 DST_GPR(0),
586 DST_REL(ABSOLUTE),
587 DST_ELEM(ELEM_Y),
588 CLAMP(0));
589
590 /* 6/7 */
591 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
592 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
593 FETCH_WHOLE_QUAD(0),
594 BUFFER_ID(0),
595 SRC_GPR(0),
596 SRC_REL(ABSOLUTE),
597 SRC_SEL_X(SQ_SEL_X),
598 MEGA_FETCH_COUNT(16));
599 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
600 DST_REL(ABSOLUTE),
601 DST_SEL_X(SQ_SEL_X),
602 DST_SEL_Y(SQ_SEL_Y),
603 DST_SEL_Z(SQ_SEL_0),
604 DST_SEL_W(SQ_SEL_1),
605 USE_CONST_FIELDS(0),
606 DATA_FORMAT(FMT_32_32_FLOAT),
607 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
608 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
609 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
610 shader[i++] = VTX_DWORD2(OFFSET(0),
611 #if X_BYTE_ORDER == X_BIG_ENDIAN
612 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
613 #else
614 ENDIAN_SWAP(SQ_ENDIAN_NONE),
615 #endif
616 CONST_BUF_NO_STRIDE(0),
617 MEGA_FETCH(1));
618 shader[i++] = VTX_DWORD_PAD;
619 /* 8/9 */
620 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
621 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
622 FETCH_WHOLE_QUAD(0),
623 BUFFER_ID(0),
624 SRC_GPR(0),
625 SRC_REL(ABSOLUTE),
626 SRC_SEL_X(SQ_SEL_X),
627 MEGA_FETCH_COUNT(8));
628 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
629 DST_REL(ABSOLUTE),
630 DST_SEL_X(SQ_SEL_X),
631 DST_SEL_Y(SQ_SEL_Y),
632 DST_SEL_Z(SQ_SEL_0),
633 DST_SEL_W(SQ_SEL_1),
634 USE_CONST_FIELDS(0),
635 DATA_FORMAT(FMT_32_32_FLOAT),
636 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
637 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
638 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
639 shader[i++] = VTX_DWORD2(OFFSET(8),
640 #if X_BYTE_ORDER == X_BIG_ENDIAN
641 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
642 #else
643 ENDIAN_SWAP(SQ_ENDIAN_NONE),
644 #endif
645 CONST_BUF_NO_STRIDE(0),
646 MEGA_FETCH(0));
647 shader[i++] = VTX_DWORD_PAD;
648
649 return i;
650 }
651
R600_xv_ps(RADEONChipFamily ChipSet,uint32_t * shader)652 int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
653 {
654 int i = 0;
655
656 /* 0 */
657 shader[i++] = CF_DWORD0(ADDR(16));
658 shader[i++] = CF_DWORD1(POP_COUNT(0),
659 CF_CONST(0),
660 COND(SQ_CF_COND_BOOL),
661 I_COUNT(0),
662 CALL_COUNT(0),
663 END_OF_PROGRAM(0),
664 VALID_PIXEL_MODE(0),
665 CF_INST(SQ_CF_INST_CALL),
666 WHOLE_QUAD_MODE(0),
667 BARRIER(0));
668 /* 1 */
669 shader[i++] = CF_DWORD0(ADDR(24));
670 shader[i++] = CF_DWORD1(POP_COUNT(0),
671 CF_CONST(0),
672 COND(SQ_CF_COND_NOT_BOOL),
673 I_COUNT(0),
674 CALL_COUNT(0),
675 END_OF_PROGRAM(0),
676 VALID_PIXEL_MODE(0),
677 CF_INST(SQ_CF_INST_CALL),
678 WHOLE_QUAD_MODE(0),
679 BARRIER(0));
680 /* 2 */
681 shader[i++] = CF_ALU_DWORD0(ADDR(4),
682 KCACHE_BANK0(0),
683 KCACHE_BANK1(0),
684 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
685 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
686 KCACHE_ADDR0(0),
687 KCACHE_ADDR1(0),
688 I_COUNT(12),
689 USES_WATERFALL(0),
690 CF_INST(SQ_CF_INST_ALU),
691 WHOLE_QUAD_MODE(0),
692 BARRIER(1));
693 /* 3 */
694 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
695 TYPE(SQ_EXPORT_PIXEL),
696 RW_GPR(2),
697 RW_REL(ABSOLUTE),
698 INDEX_GPR(0),
699 ELEM_SIZE(3));
700 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
701 SRC_SEL_Y(SQ_SEL_Y),
702 SRC_SEL_Z(SQ_SEL_Z),
703 SRC_SEL_W(SQ_SEL_W),
704 R6xx_ELEM_LOOP(0),
705 BURST_COUNT(1),
706 END_OF_PROGRAM(1),
707 VALID_PIXEL_MODE(0),
708 CF_INST(SQ_CF_INST_EXPORT_DONE),
709 WHOLE_QUAD_MODE(0),
710 BARRIER(1));
711 /* 4,5,6,7 */
712 /* r2.x = MAD(c0.w, r1.x, c0.x) */
713 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
714 SRC0_REL(ABSOLUTE),
715 SRC0_ELEM(ELEM_W),
716 SRC0_NEG(0),
717 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
718 SRC1_REL(ABSOLUTE),
719 SRC1_ELEM(ELEM_X),
720 SRC1_NEG(0),
721 INDEX_MODE(SQ_INDEX_LOOP),
722 PRED_SEL(SQ_PRED_SEL_OFF),
723 LAST(0));
724 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
725 SRC2_REL(ABSOLUTE),
726 SRC2_ELEM(ELEM_X),
727 SRC2_NEG(0),
728 ALU_INST(SQ_OP3_INST_MULADD),
729 BANK_SWIZZLE(SQ_ALU_VEC_012),
730 DST_GPR(2),
731 DST_REL(ABSOLUTE),
732 DST_ELEM(ELEM_X),
733 CLAMP(0));
734 /* r2.y = MAD(c0.w, r1.x, c0.y) */
735 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
736 SRC0_REL(ABSOLUTE),
737 SRC0_ELEM(ELEM_W),
738 SRC0_NEG(0),
739 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
740 SRC1_REL(ABSOLUTE),
741 SRC1_ELEM(ELEM_X),
742 SRC1_NEG(0),
743 INDEX_MODE(SQ_INDEX_LOOP),
744 PRED_SEL(SQ_PRED_SEL_OFF),
745 LAST(0));
746 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
747 SRC2_REL(ABSOLUTE),
748 SRC2_ELEM(ELEM_Y),
749 SRC2_NEG(0),
750 ALU_INST(SQ_OP3_INST_MULADD),
751 BANK_SWIZZLE(SQ_ALU_VEC_012),
752 DST_GPR(2),
753 DST_REL(ABSOLUTE),
754 DST_ELEM(ELEM_Y),
755 CLAMP(0));
756 /* r2.z = MAD(c0.w, r1.x, c0.z) */
757 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
758 SRC0_REL(ABSOLUTE),
759 SRC0_ELEM(ELEM_W),
760 SRC0_NEG(0),
761 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
762 SRC1_REL(ABSOLUTE),
763 SRC1_ELEM(ELEM_X),
764 SRC1_NEG(0),
765 INDEX_MODE(SQ_INDEX_LOOP),
766 PRED_SEL(SQ_PRED_SEL_OFF),
767 LAST(0));
768 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
769 SRC2_REL(ABSOLUTE),
770 SRC2_ELEM(ELEM_Z),
771 SRC2_NEG(0),
772 ALU_INST(SQ_OP3_INST_MULADD),
773 BANK_SWIZZLE(SQ_ALU_VEC_012),
774 DST_GPR(2),
775 DST_REL(ABSOLUTE),
776 DST_ELEM(ELEM_Z),
777 CLAMP(0));
778 /* r2.w = MAD(0, 0, 1) */
779 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
780 SRC0_REL(ABSOLUTE),
781 SRC0_ELEM(ELEM_X),
782 SRC0_NEG(0),
783 SRC1_SEL(SQ_ALU_SRC_0),
784 SRC1_REL(ABSOLUTE),
785 SRC1_ELEM(ELEM_X),
786 SRC1_NEG(0),
787 INDEX_MODE(SQ_INDEX_LOOP),
788 PRED_SEL(SQ_PRED_SEL_OFF),
789 LAST(1));
790 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
791 SRC2_REL(ABSOLUTE),
792 SRC2_ELEM(ELEM_X),
793 SRC2_NEG(0),
794 ALU_INST(SQ_OP3_INST_MULADD),
795 BANK_SWIZZLE(SQ_ALU_VEC_012),
796 DST_GPR(2),
797 DST_REL(ABSOLUTE),
798 DST_ELEM(ELEM_W),
799 CLAMP(0));
800
801 /* 8,9,10,11 */
802 /* r2.x = MAD(c1.x, r1.y, pv.x) */
803 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
804 SRC0_REL(ABSOLUTE),
805 SRC0_ELEM(ELEM_X),
806 SRC0_NEG(0),
807 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
808 SRC1_REL(ABSOLUTE),
809 SRC1_ELEM(ELEM_Y),
810 SRC1_NEG(0),
811 INDEX_MODE(SQ_INDEX_LOOP),
812 PRED_SEL(SQ_PRED_SEL_OFF),
813 LAST(0));
814 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
815 SRC2_REL(ABSOLUTE),
816 SRC2_ELEM(ELEM_X),
817 SRC2_NEG(0),
818 ALU_INST(SQ_OP3_INST_MULADD),
819 BANK_SWIZZLE(SQ_ALU_VEC_012),
820 DST_GPR(2),
821 DST_REL(ABSOLUTE),
822 DST_ELEM(ELEM_X),
823 CLAMP(0));
824 /* r2.y = MAD(c1.y, r1.y, pv.y) */
825 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
826 SRC0_REL(ABSOLUTE),
827 SRC0_ELEM(ELEM_Y),
828 SRC0_NEG(0),
829 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
830 SRC1_REL(ABSOLUTE),
831 SRC1_ELEM(ELEM_Y),
832 SRC1_NEG(0),
833 INDEX_MODE(SQ_INDEX_LOOP),
834 PRED_SEL(SQ_PRED_SEL_OFF),
835 LAST(0));
836 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
837 SRC2_REL(ABSOLUTE),
838 SRC2_ELEM(ELEM_Y),
839 SRC2_NEG(0),
840 ALU_INST(SQ_OP3_INST_MULADD),
841 BANK_SWIZZLE(SQ_ALU_VEC_012),
842 DST_GPR(2),
843 DST_REL(ABSOLUTE),
844 DST_ELEM(ELEM_Y),
845 CLAMP(0));
846 /* r2.z = MAD(c1.z, r1.y, pv.z) */
847 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
848 SRC0_REL(ABSOLUTE),
849 SRC0_ELEM(ELEM_Z),
850 SRC0_NEG(0),
851 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
852 SRC1_REL(ABSOLUTE),
853 SRC1_ELEM(ELEM_Y),
854 SRC1_NEG(0),
855 INDEX_MODE(SQ_INDEX_LOOP),
856 PRED_SEL(SQ_PRED_SEL_OFF),
857 LAST(0));
858 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
859 SRC2_REL(ABSOLUTE),
860 SRC2_ELEM(ELEM_Z),
861 SRC2_NEG(0),
862 ALU_INST(SQ_OP3_INST_MULADD),
863 BANK_SWIZZLE(SQ_ALU_VEC_012),
864 DST_GPR(2),
865 DST_REL(ABSOLUTE),
866 DST_ELEM(ELEM_Z),
867 CLAMP(0));
868 /* r2.w = MAD(0, 0, 1) */
869 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
870 SRC0_REL(ABSOLUTE),
871 SRC0_ELEM(ELEM_X),
872 SRC0_NEG(0),
873 SRC1_SEL(SQ_ALU_SRC_0),
874 SRC1_REL(ABSOLUTE),
875 SRC1_ELEM(ELEM_X),
876 SRC1_NEG(0),
877 INDEX_MODE(SQ_INDEX_LOOP),
878 PRED_SEL(SQ_PRED_SEL_OFF),
879 LAST(1));
880 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
881 SRC2_REL(ABSOLUTE),
882 SRC2_ELEM(ELEM_W),
883 SRC2_NEG(0),
884 ALU_INST(SQ_OP3_INST_MULADD),
885 BANK_SWIZZLE(SQ_ALU_VEC_012),
886 DST_GPR(2),
887 DST_REL(ABSOLUTE),
888 DST_ELEM(ELEM_W),
889 CLAMP(0));
890 /* 12,13,14,15 */
891 /* r2.x = MAD(c2.x, r1.z, pv.x) */
892 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
893 SRC0_REL(ABSOLUTE),
894 SRC0_ELEM(ELEM_X),
895 SRC0_NEG(0),
896 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
897 SRC1_REL(ABSOLUTE),
898 SRC1_ELEM(ELEM_Z),
899 SRC1_NEG(0),
900 INDEX_MODE(SQ_INDEX_LOOP),
901 PRED_SEL(SQ_PRED_SEL_OFF),
902 LAST(0));
903 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
904 SRC2_REL(ABSOLUTE),
905 SRC2_ELEM(ELEM_X),
906 SRC2_NEG(0),
907 ALU_INST(SQ_OP3_INST_MULADD),
908 BANK_SWIZZLE(SQ_ALU_VEC_012),
909 DST_GPR(2),
910 DST_REL(ABSOLUTE),
911 DST_ELEM(ELEM_X),
912 CLAMP(1));
913 /* r2.y = MAD(c2.y, r1.z, pv.y) */
914 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
915 SRC0_REL(ABSOLUTE),
916 SRC0_ELEM(ELEM_Y),
917 SRC0_NEG(0),
918 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
919 SRC1_REL(ABSOLUTE),
920 SRC1_ELEM(ELEM_Z),
921 SRC1_NEG(0),
922 INDEX_MODE(SQ_INDEX_LOOP),
923 PRED_SEL(SQ_PRED_SEL_OFF),
924 LAST(0));
925 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
926 SRC2_REL(ABSOLUTE),
927 SRC2_ELEM(ELEM_Y),
928 SRC2_NEG(0),
929 ALU_INST(SQ_OP3_INST_MULADD),
930 BANK_SWIZZLE(SQ_ALU_VEC_012),
931 DST_GPR(2),
932 DST_REL(ABSOLUTE),
933 DST_ELEM(ELEM_Y),
934 CLAMP(1));
935 /* r2.z = MAD(c2.z, r1.z, pv.z) */
936 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
937 SRC0_REL(ABSOLUTE),
938 SRC0_ELEM(ELEM_Z),
939 SRC0_NEG(0),
940 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
941 SRC1_REL(ABSOLUTE),
942 SRC1_ELEM(ELEM_Z),
943 SRC1_NEG(0),
944 INDEX_MODE(SQ_INDEX_LOOP),
945 PRED_SEL(SQ_PRED_SEL_OFF),
946 LAST(0));
947 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
948 SRC2_REL(ABSOLUTE),
949 SRC2_ELEM(ELEM_Z),
950 SRC2_NEG(0),
951 ALU_INST(SQ_OP3_INST_MULADD),
952 BANK_SWIZZLE(SQ_ALU_VEC_012),
953 DST_GPR(2),
954 DST_REL(ABSOLUTE),
955 DST_ELEM(ELEM_Z),
956 CLAMP(1));
957 /* r2.w = MAD(0, 0, 1) */
958 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
959 SRC0_REL(ABSOLUTE),
960 SRC0_ELEM(ELEM_X),
961 SRC0_NEG(0),
962 SRC1_SEL(SQ_ALU_SRC_0),
963 SRC1_REL(ABSOLUTE),
964 SRC1_ELEM(ELEM_X),
965 SRC1_NEG(0),
966 INDEX_MODE(SQ_INDEX_LOOP),
967 PRED_SEL(SQ_PRED_SEL_OFF),
968 LAST(1));
969 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
970 SRC2_REL(ABSOLUTE),
971 SRC2_ELEM(ELEM_X),
972 SRC2_NEG(0),
973 ALU_INST(SQ_OP3_INST_MULADD),
974 BANK_SWIZZLE(SQ_ALU_VEC_012),
975 DST_GPR(2),
976 DST_REL(ABSOLUTE),
977 DST_ELEM(ELEM_W),
978 CLAMP(1));
979
980 /* 16 */
981 shader[i++] = CF_DWORD0(ADDR(18));
982 shader[i++] = CF_DWORD1(POP_COUNT(0),
983 CF_CONST(0),
984 COND(SQ_CF_COND_ACTIVE),
985 I_COUNT(3),
986 CALL_COUNT(0),
987 END_OF_PROGRAM(0),
988 VALID_PIXEL_MODE(0),
989 CF_INST(SQ_CF_INST_TEX),
990 WHOLE_QUAD_MODE(0),
991 BARRIER(1));
992 /* 17 */
993 shader[i++] = CF_DWORD0(ADDR(0));
994 shader[i++] = CF_DWORD1(POP_COUNT(0),
995 CF_CONST(0),
996 COND(SQ_CF_COND_ACTIVE),
997 I_COUNT(0),
998 CALL_COUNT(0),
999 END_OF_PROGRAM(0),
1000 VALID_PIXEL_MODE(0),
1001 CF_INST(SQ_CF_INST_RETURN),
1002 WHOLE_QUAD_MODE(0),
1003 BARRIER(1));
1004 /* 18/19 */
1005 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1006 BC_FRAC_MODE(0),
1007 FETCH_WHOLE_QUAD(0),
1008 RESOURCE_ID(0),
1009 SRC_GPR(0),
1010 SRC_REL(ABSOLUTE),
1011 R7xx_ALT_CONST(0));
1012 shader[i++] = TEX_DWORD1(DST_GPR(1),
1013 DST_REL(ABSOLUTE),
1014 DST_SEL_X(SQ_SEL_X),
1015 DST_SEL_Y(SQ_SEL_MASK),
1016 DST_SEL_Z(SQ_SEL_MASK),
1017 DST_SEL_W(SQ_SEL_1),
1018 LOD_BIAS(0),
1019 COORD_TYPE_X(TEX_NORMALIZED),
1020 COORD_TYPE_Y(TEX_NORMALIZED),
1021 COORD_TYPE_Z(TEX_NORMALIZED),
1022 COORD_TYPE_W(TEX_NORMALIZED));
1023 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1024 OFFSET_Y(0),
1025 OFFSET_Z(0),
1026 SAMPLER_ID(0),
1027 SRC_SEL_X(SQ_SEL_X),
1028 SRC_SEL_Y(SQ_SEL_Y),
1029 SRC_SEL_Z(SQ_SEL_0),
1030 SRC_SEL_W(SQ_SEL_1));
1031 shader[i++] = TEX_DWORD_PAD;
1032 /* 20/21 */
1033 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1034 BC_FRAC_MODE(0),
1035 FETCH_WHOLE_QUAD(0),
1036 RESOURCE_ID(1),
1037 SRC_GPR(0),
1038 SRC_REL(ABSOLUTE),
1039 R7xx_ALT_CONST(0));
1040 shader[i++] = TEX_DWORD1(DST_GPR(1),
1041 DST_REL(ABSOLUTE),
1042 DST_SEL_X(SQ_SEL_MASK),
1043 DST_SEL_Y(SQ_SEL_MASK),
1044 DST_SEL_Z(SQ_SEL_X),
1045 DST_SEL_W(SQ_SEL_MASK),
1046 LOD_BIAS(0),
1047 COORD_TYPE_X(TEX_NORMALIZED),
1048 COORD_TYPE_Y(TEX_NORMALIZED),
1049 COORD_TYPE_Z(TEX_NORMALIZED),
1050 COORD_TYPE_W(TEX_NORMALIZED));
1051 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1052 OFFSET_Y(0),
1053 OFFSET_Z(0),
1054 SAMPLER_ID(1),
1055 SRC_SEL_X(SQ_SEL_X),
1056 SRC_SEL_Y(SQ_SEL_Y),
1057 SRC_SEL_Z(SQ_SEL_0),
1058 SRC_SEL_W(SQ_SEL_1));
1059 shader[i++] = TEX_DWORD_PAD;
1060 /* 22/23 */
1061 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1062 BC_FRAC_MODE(0),
1063 FETCH_WHOLE_QUAD(0),
1064 RESOURCE_ID(2),
1065 SRC_GPR(0),
1066 SRC_REL(ABSOLUTE),
1067 R7xx_ALT_CONST(0));
1068 shader[i++] = TEX_DWORD1(DST_GPR(1),
1069 DST_REL(ABSOLUTE),
1070 DST_SEL_X(SQ_SEL_MASK),
1071 DST_SEL_Y(SQ_SEL_X),
1072 DST_SEL_Z(SQ_SEL_MASK),
1073 DST_SEL_W(SQ_SEL_MASK),
1074 LOD_BIAS(0),
1075 COORD_TYPE_X(TEX_NORMALIZED),
1076 COORD_TYPE_Y(TEX_NORMALIZED),
1077 COORD_TYPE_Z(TEX_NORMALIZED),
1078 COORD_TYPE_W(TEX_NORMALIZED));
1079 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1080 OFFSET_Y(0),
1081 OFFSET_Z(0),
1082 SAMPLER_ID(2),
1083 SRC_SEL_X(SQ_SEL_X),
1084 SRC_SEL_Y(SQ_SEL_Y),
1085 SRC_SEL_Z(SQ_SEL_0),
1086 SRC_SEL_W(SQ_SEL_1));
1087 shader[i++] = TEX_DWORD_PAD;
1088 /* 24 */
1089 shader[i++] = CF_DWORD0(ADDR(26));
1090 shader[i++] = CF_DWORD1(POP_COUNT(0),
1091 CF_CONST(0),
1092 COND(SQ_CF_COND_ACTIVE),
1093 I_COUNT(1),
1094 CALL_COUNT(0),
1095 END_OF_PROGRAM(0),
1096 VALID_PIXEL_MODE(0),
1097 CF_INST(SQ_CF_INST_TEX),
1098 WHOLE_QUAD_MODE(0),
1099 BARRIER(1));
1100 /* 25 */
1101 shader[i++] = CF_DWORD0(ADDR(0));
1102 shader[i++] = CF_DWORD1(POP_COUNT(0),
1103 CF_CONST(0),
1104 COND(SQ_CF_COND_ACTIVE),
1105 I_COUNT(0),
1106 CALL_COUNT(0),
1107 END_OF_PROGRAM(0),
1108 VALID_PIXEL_MODE(0),
1109 CF_INST(SQ_CF_INST_RETURN),
1110 WHOLE_QUAD_MODE(0),
1111 BARRIER(1));
1112 /* 26/27 */
1113 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1114 BC_FRAC_MODE(0),
1115 FETCH_WHOLE_QUAD(0),
1116 RESOURCE_ID(0),
1117 SRC_GPR(0),
1118 SRC_REL(ABSOLUTE),
1119 R7xx_ALT_CONST(0));
1120 shader[i++] = TEX_DWORD1(DST_GPR(1),
1121 DST_REL(ABSOLUTE),
1122 DST_SEL_X(SQ_SEL_X),
1123 DST_SEL_Y(SQ_SEL_Y),
1124 DST_SEL_Z(SQ_SEL_Z),
1125 DST_SEL_W(SQ_SEL_1),
1126 LOD_BIAS(0),
1127 COORD_TYPE_X(TEX_NORMALIZED),
1128 COORD_TYPE_Y(TEX_NORMALIZED),
1129 COORD_TYPE_Z(TEX_NORMALIZED),
1130 COORD_TYPE_W(TEX_NORMALIZED));
1131 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1132 OFFSET_Y(0),
1133 OFFSET_Z(0),
1134 SAMPLER_ID(0),
1135 SRC_SEL_X(SQ_SEL_X),
1136 SRC_SEL_Y(SQ_SEL_Y),
1137 SRC_SEL_Z(SQ_SEL_0),
1138 SRC_SEL_W(SQ_SEL_1));
1139 shader[i++] = TEX_DWORD_PAD;
1140
1141 return i;
1142 }
1143
1144 /* comp vs --------------------------------------- */
R600_comp_vs(RADEONChipFamily ChipSet,uint32_t * shader)1145 int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1146 {
1147 int i = 0;
1148
1149 /* 0 */
1150 shader[i++] = CF_DWORD0(ADDR(3));
1151 shader[i++] = CF_DWORD1(POP_COUNT(0),
1152 CF_CONST(0),
1153 COND(SQ_CF_COND_BOOL),
1154 I_COUNT(0),
1155 CALL_COUNT(0),
1156 END_OF_PROGRAM(0),
1157 VALID_PIXEL_MODE(0),
1158 CF_INST(SQ_CF_INST_CALL),
1159 WHOLE_QUAD_MODE(0),
1160 BARRIER(0));
1161 /* 1 */
1162 shader[i++] = CF_DWORD0(ADDR(9));
1163 shader[i++] = CF_DWORD1(POP_COUNT(0),
1164 CF_CONST(0),
1165 COND(SQ_CF_COND_NOT_BOOL),
1166 I_COUNT(0),
1167 CALL_COUNT(0),
1168 END_OF_PROGRAM(0),
1169 VALID_PIXEL_MODE(0),
1170 CF_INST(SQ_CF_INST_CALL),
1171 WHOLE_QUAD_MODE(0),
1172 BARRIER(0));
1173 /* 2 */
1174 shader[i++] = CF_DWORD0(ADDR(0));
1175 shader[i++] = CF_DWORD1(POP_COUNT(0),
1176 CF_CONST(0),
1177 COND(SQ_CF_COND_ACTIVE),
1178 I_COUNT(0),
1179 CALL_COUNT(0),
1180 END_OF_PROGRAM(1),
1181 VALID_PIXEL_MODE(0),
1182 CF_INST(SQ_CF_INST_NOP),
1183 WHOLE_QUAD_MODE(0),
1184 BARRIER(1));
1185 /* 3 - mask sub */
1186 shader[i++] = CF_DWORD0(ADDR(44));
1187 shader[i++] = CF_DWORD1(POP_COUNT(0),
1188 CF_CONST(0),
1189 COND(SQ_CF_COND_ACTIVE),
1190 I_COUNT(3),
1191 CALL_COUNT(0),
1192 END_OF_PROGRAM(0),
1193 VALID_PIXEL_MODE(0),
1194 CF_INST(SQ_CF_INST_VTX),
1195 WHOLE_QUAD_MODE(0),
1196 BARRIER(1));
1197
1198 /* 4 - ALU */
1199 shader[i++] = CF_ALU_DWORD0(ADDR(14),
1200 KCACHE_BANK0(0),
1201 KCACHE_BANK1(0),
1202 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1203 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1204 KCACHE_ADDR0(0),
1205 KCACHE_ADDR1(0),
1206 I_COUNT(20),
1207 USES_WATERFALL(0),
1208 CF_INST(SQ_CF_INST_ALU),
1209 WHOLE_QUAD_MODE(0),
1210 BARRIER(1));
1211
1212 /* 5 - dst */
1213 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1214 TYPE(SQ_EXPORT_POS),
1215 RW_GPR(2),
1216 RW_REL(ABSOLUTE),
1217 INDEX_GPR(0),
1218 ELEM_SIZE(0));
1219 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1220 SRC_SEL_Y(SQ_SEL_Y),
1221 SRC_SEL_Z(SQ_SEL_0),
1222 SRC_SEL_W(SQ_SEL_1),
1223 R6xx_ELEM_LOOP(0),
1224 BURST_COUNT(1),
1225 END_OF_PROGRAM(0),
1226 VALID_PIXEL_MODE(0),
1227 CF_INST(SQ_CF_INST_EXPORT_DONE),
1228 WHOLE_QUAD_MODE(0),
1229 BARRIER(1));
1230 /* 6 - src */
1231 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1232 TYPE(SQ_EXPORT_PARAM),
1233 RW_GPR(1),
1234 RW_REL(ABSOLUTE),
1235 INDEX_GPR(0),
1236 ELEM_SIZE(0));
1237 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1238 SRC_SEL_Y(SQ_SEL_Y),
1239 SRC_SEL_Z(SQ_SEL_0),
1240 SRC_SEL_W(SQ_SEL_1),
1241 R6xx_ELEM_LOOP(0),
1242 BURST_COUNT(1),
1243 END_OF_PROGRAM(0),
1244 VALID_PIXEL_MODE(0),
1245 CF_INST(SQ_CF_INST_EXPORT),
1246 WHOLE_QUAD_MODE(0),
1247 BARRIER(0));
1248 /* 7 - mask */
1249 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1250 TYPE(SQ_EXPORT_PARAM),
1251 RW_GPR(0),
1252 RW_REL(ABSOLUTE),
1253 INDEX_GPR(0),
1254 ELEM_SIZE(0));
1255 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1256 SRC_SEL_Y(SQ_SEL_Y),
1257 SRC_SEL_Z(SQ_SEL_0),
1258 SRC_SEL_W(SQ_SEL_1),
1259 R6xx_ELEM_LOOP(0),
1260 BURST_COUNT(1),
1261 END_OF_PROGRAM(0),
1262 VALID_PIXEL_MODE(0),
1263 CF_INST(SQ_CF_INST_EXPORT_DONE),
1264 WHOLE_QUAD_MODE(0),
1265 BARRIER(0));
1266 /* 8 */
1267 shader[i++] = CF_DWORD0(ADDR(0));
1268 shader[i++] = CF_DWORD1(POP_COUNT(0),
1269 CF_CONST(0),
1270 COND(SQ_CF_COND_ACTIVE),
1271 I_COUNT(0),
1272 CALL_COUNT(0),
1273 END_OF_PROGRAM(0),
1274 VALID_PIXEL_MODE(0),
1275 CF_INST(SQ_CF_INST_RETURN),
1276 WHOLE_QUAD_MODE(0),
1277 BARRIER(1));
1278 /* 9 - non-mask sub */
1279 shader[i++] = CF_DWORD0(ADDR(50));
1280 shader[i++] = CF_DWORD1(POP_COUNT(0),
1281 CF_CONST(0),
1282 COND(SQ_CF_COND_ACTIVE),
1283 I_COUNT(2),
1284 CALL_COUNT(0),
1285 END_OF_PROGRAM(0),
1286 VALID_PIXEL_MODE(0),
1287 CF_INST(SQ_CF_INST_VTX),
1288 WHOLE_QUAD_MODE(0),
1289 BARRIER(1));
1290
1291 /* 10 - ALU */
1292 shader[i++] = CF_ALU_DWORD0(ADDR(34),
1293 KCACHE_BANK0(0),
1294 KCACHE_BANK1(0),
1295 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1296 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1297 KCACHE_ADDR0(0),
1298 KCACHE_ADDR1(0),
1299 I_COUNT(10),
1300 USES_WATERFALL(0),
1301 CF_INST(SQ_CF_INST_ALU),
1302 WHOLE_QUAD_MODE(0),
1303 BARRIER(1));
1304
1305 /* 11 - dst */
1306 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1307 TYPE(SQ_EXPORT_POS),
1308 RW_GPR(1),
1309 RW_REL(ABSOLUTE),
1310 INDEX_GPR(0),
1311 ELEM_SIZE(0));
1312 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1313 SRC_SEL_Y(SQ_SEL_Y),
1314 SRC_SEL_Z(SQ_SEL_0),
1315 SRC_SEL_W(SQ_SEL_1),
1316 R6xx_ELEM_LOOP(0),
1317 BURST_COUNT(0),
1318 END_OF_PROGRAM(0),
1319 VALID_PIXEL_MODE(0),
1320 CF_INST(SQ_CF_INST_EXPORT_DONE),
1321 WHOLE_QUAD_MODE(0),
1322 BARRIER(1));
1323 /* 12 - src */
1324 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1325 TYPE(SQ_EXPORT_PARAM),
1326 RW_GPR(0),
1327 RW_REL(ABSOLUTE),
1328 INDEX_GPR(0),
1329 ELEM_SIZE(0));
1330 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1331 SRC_SEL_Y(SQ_SEL_Y),
1332 SRC_SEL_Z(SQ_SEL_0),
1333 SRC_SEL_W(SQ_SEL_1),
1334 R6xx_ELEM_LOOP(0),
1335 BURST_COUNT(0),
1336 END_OF_PROGRAM(0),
1337 VALID_PIXEL_MODE(0),
1338 CF_INST(SQ_CF_INST_EXPORT_DONE),
1339 WHOLE_QUAD_MODE(0),
1340 BARRIER(0));
1341 /* 13 */
1342 shader[i++] = CF_DWORD0(ADDR(0));
1343 shader[i++] = CF_DWORD1(POP_COUNT(0),
1344 CF_CONST(0),
1345 COND(SQ_CF_COND_ACTIVE),
1346 I_COUNT(0),
1347 CALL_COUNT(0),
1348 END_OF_PROGRAM(0),
1349 VALID_PIXEL_MODE(0),
1350 CF_INST(SQ_CF_INST_RETURN),
1351 WHOLE_QUAD_MODE(0),
1352 BARRIER(1));
1353
1354
1355 /* 14 srcX.x DOT4 - mask */
1356 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1357 SRC0_REL(ABSOLUTE),
1358 SRC0_ELEM(ELEM_X),
1359 SRC0_NEG(0),
1360 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1361 SRC1_REL(ABSOLUTE),
1362 SRC1_ELEM(ELEM_X),
1363 SRC1_NEG(0),
1364 INDEX_MODE(SQ_INDEX_LOOP),
1365 PRED_SEL(SQ_PRED_SEL_OFF),
1366 LAST(0));
1367 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1368 SRC0_ABS(0),
1369 SRC1_ABS(0),
1370 UPDATE_EXECUTE_MASK(0),
1371 UPDATE_PRED(0),
1372 WRITE_MASK(1),
1373 FOG_MERGE(0),
1374 OMOD(SQ_ALU_OMOD_OFF),
1375 ALU_INST(SQ_OP2_INST_DOT4),
1376 BANK_SWIZZLE(SQ_ALU_VEC_012),
1377 DST_GPR(3),
1378 DST_REL(ABSOLUTE),
1379 DST_ELEM(ELEM_X),
1380 CLAMP(0));
1381
1382 /* 15 srcX.y DOT4 - mask */
1383 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1384 SRC0_REL(ABSOLUTE),
1385 SRC0_ELEM(ELEM_Y),
1386 SRC0_NEG(0),
1387 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1388 SRC1_REL(ABSOLUTE),
1389 SRC1_ELEM(ELEM_Y),
1390 SRC1_NEG(0),
1391 INDEX_MODE(SQ_INDEX_LOOP),
1392 PRED_SEL(SQ_PRED_SEL_OFF),
1393 LAST(0));
1394 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1395 SRC0_ABS(0),
1396 SRC1_ABS(0),
1397 UPDATE_EXECUTE_MASK(0),
1398 UPDATE_PRED(0),
1399 WRITE_MASK(0),
1400 FOG_MERGE(0),
1401 OMOD(SQ_ALU_OMOD_OFF),
1402 ALU_INST(SQ_OP2_INST_DOT4),
1403 BANK_SWIZZLE(SQ_ALU_VEC_012),
1404 DST_GPR(3),
1405 DST_REL(ABSOLUTE),
1406 DST_ELEM(ELEM_Y),
1407 CLAMP(0));
1408
1409 /* 16 srcX.z DOT4 - mask */
1410 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1411 SRC0_REL(ABSOLUTE),
1412 SRC0_ELEM(ELEM_Z),
1413 SRC0_NEG(0),
1414 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1415 SRC1_REL(ABSOLUTE),
1416 SRC1_ELEM(ELEM_Z),
1417 SRC1_NEG(0),
1418 INDEX_MODE(SQ_INDEX_LOOP),
1419 PRED_SEL(SQ_PRED_SEL_OFF),
1420 LAST(0));
1421 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1422 SRC0_ABS(0),
1423 SRC1_ABS(0),
1424 UPDATE_EXECUTE_MASK(0),
1425 UPDATE_PRED(0),
1426 WRITE_MASK(0),
1427 FOG_MERGE(0),
1428 OMOD(SQ_ALU_OMOD_OFF),
1429 ALU_INST(SQ_OP2_INST_DOT4),
1430 BANK_SWIZZLE(SQ_ALU_VEC_012),
1431 DST_GPR(3),
1432 DST_REL(ABSOLUTE),
1433 DST_ELEM(ELEM_Z),
1434 CLAMP(0));
1435
1436 /* 17 srcX.w DOT4 - mask */
1437 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1438 SRC0_REL(ABSOLUTE),
1439 SRC0_ELEM(ELEM_W),
1440 SRC0_NEG(0),
1441 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1442 SRC1_REL(ABSOLUTE),
1443 SRC1_ELEM(ELEM_W),
1444 SRC1_NEG(0),
1445 INDEX_MODE(SQ_INDEX_LOOP),
1446 PRED_SEL(SQ_PRED_SEL_OFF),
1447 LAST(1));
1448 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1449 SRC0_ABS(0),
1450 SRC1_ABS(0),
1451 UPDATE_EXECUTE_MASK(0),
1452 UPDATE_PRED(0),
1453 WRITE_MASK(0),
1454 FOG_MERGE(0),
1455 OMOD(SQ_ALU_OMOD_OFF),
1456 ALU_INST(SQ_OP2_INST_DOT4),
1457 BANK_SWIZZLE(SQ_ALU_VEC_012),
1458 DST_GPR(3),
1459 DST_REL(ABSOLUTE),
1460 DST_ELEM(ELEM_W),
1461 CLAMP(0));
1462
1463 /* 18 srcY.x DOT4 - mask */
1464 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1465 SRC0_REL(ABSOLUTE),
1466 SRC0_ELEM(ELEM_X),
1467 SRC0_NEG(0),
1468 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1469 SRC1_REL(ABSOLUTE),
1470 SRC1_ELEM(ELEM_X),
1471 SRC1_NEG(0),
1472 INDEX_MODE(SQ_INDEX_LOOP),
1473 PRED_SEL(SQ_PRED_SEL_OFF),
1474 LAST(0));
1475 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1476 SRC0_ABS(0),
1477 SRC1_ABS(0),
1478 UPDATE_EXECUTE_MASK(0),
1479 UPDATE_PRED(0),
1480 WRITE_MASK(0),
1481 FOG_MERGE(0),
1482 OMOD(SQ_ALU_OMOD_OFF),
1483 ALU_INST(SQ_OP2_INST_DOT4),
1484 BANK_SWIZZLE(SQ_ALU_VEC_012),
1485 DST_GPR(3),
1486 DST_REL(ABSOLUTE),
1487 DST_ELEM(ELEM_X),
1488 CLAMP(0));
1489
1490 /* 19 srcY.y DOT4 - mask */
1491 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1492 SRC0_REL(ABSOLUTE),
1493 SRC0_ELEM(ELEM_Y),
1494 SRC0_NEG(0),
1495 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1496 SRC1_REL(ABSOLUTE),
1497 SRC1_ELEM(ELEM_Y),
1498 SRC1_NEG(0),
1499 INDEX_MODE(SQ_INDEX_LOOP),
1500 PRED_SEL(SQ_PRED_SEL_OFF),
1501 LAST(0));
1502 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1503 SRC0_ABS(0),
1504 SRC1_ABS(0),
1505 UPDATE_EXECUTE_MASK(0),
1506 UPDATE_PRED(0),
1507 WRITE_MASK(1),
1508 FOG_MERGE(0),
1509 OMOD(SQ_ALU_OMOD_OFF),
1510 ALU_INST(SQ_OP2_INST_DOT4),
1511 BANK_SWIZZLE(SQ_ALU_VEC_012),
1512 DST_GPR(3),
1513 DST_REL(ABSOLUTE),
1514 DST_ELEM(ELEM_Y),
1515 CLAMP(0));
1516
1517 /* 20 srcY.z DOT4 - mask */
1518 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1519 SRC0_REL(ABSOLUTE),
1520 SRC0_ELEM(ELEM_Z),
1521 SRC0_NEG(0),
1522 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1523 SRC1_REL(ABSOLUTE),
1524 SRC1_ELEM(ELEM_Z),
1525 SRC1_NEG(0),
1526 INDEX_MODE(SQ_INDEX_LOOP),
1527 PRED_SEL(SQ_PRED_SEL_OFF),
1528 LAST(0));
1529 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1530 SRC0_ABS(0),
1531 SRC1_ABS(0),
1532 UPDATE_EXECUTE_MASK(0),
1533 UPDATE_PRED(0),
1534 WRITE_MASK(0),
1535 FOG_MERGE(0),
1536 OMOD(SQ_ALU_OMOD_OFF),
1537 ALU_INST(SQ_OP2_INST_DOT4),
1538 BANK_SWIZZLE(SQ_ALU_VEC_012),
1539 DST_GPR(3),
1540 DST_REL(ABSOLUTE),
1541 DST_ELEM(ELEM_Z),
1542 CLAMP(0));
1543
1544 /* 21 srcY.w DOT4 - mask */
1545 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1546 SRC0_REL(ABSOLUTE),
1547 SRC0_ELEM(ELEM_W),
1548 SRC0_NEG(0),
1549 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1550 SRC1_REL(ABSOLUTE),
1551 SRC1_ELEM(ELEM_W),
1552 SRC1_NEG(0),
1553 INDEX_MODE(SQ_INDEX_LOOP),
1554 PRED_SEL(SQ_PRED_SEL_OFF),
1555 LAST(1));
1556 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1557 SRC0_ABS(0),
1558 SRC1_ABS(0),
1559 UPDATE_EXECUTE_MASK(0),
1560 UPDATE_PRED(0),
1561 WRITE_MASK(0),
1562 FOG_MERGE(0),
1563 OMOD(SQ_ALU_OMOD_OFF),
1564 ALU_INST(SQ_OP2_INST_DOT4),
1565 BANK_SWIZZLE(SQ_ALU_VEC_012),
1566 DST_GPR(3),
1567 DST_REL(ABSOLUTE),
1568 DST_ELEM(ELEM_W),
1569 CLAMP(0));
1570
1571 /* 22 maskX.x DOT4 - mask */
1572 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1573 SRC0_REL(ABSOLUTE),
1574 SRC0_ELEM(ELEM_X),
1575 SRC0_NEG(0),
1576 SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1577 SRC1_REL(ABSOLUTE),
1578 SRC1_ELEM(ELEM_X),
1579 SRC1_NEG(0),
1580 INDEX_MODE(SQ_INDEX_LOOP),
1581 PRED_SEL(SQ_PRED_SEL_OFF),
1582 LAST(0));
1583 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1584 SRC0_ABS(0),
1585 SRC1_ABS(0),
1586 UPDATE_EXECUTE_MASK(0),
1587 UPDATE_PRED(0),
1588 WRITE_MASK(1),
1589 FOG_MERGE(0),
1590 OMOD(SQ_ALU_OMOD_OFF),
1591 ALU_INST(SQ_OP2_INST_DOT4),
1592 BANK_SWIZZLE(SQ_ALU_VEC_012),
1593 DST_GPR(4),
1594 DST_REL(ABSOLUTE),
1595 DST_ELEM(ELEM_X),
1596 CLAMP(0));
1597
1598 /* 23 maskX.y DOT4 - mask */
1599 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1600 SRC0_REL(ABSOLUTE),
1601 SRC0_ELEM(ELEM_Y),
1602 SRC0_NEG(0),
1603 SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1604 SRC1_REL(ABSOLUTE),
1605 SRC1_ELEM(ELEM_Y),
1606 SRC1_NEG(0),
1607 INDEX_MODE(SQ_INDEX_LOOP),
1608 PRED_SEL(SQ_PRED_SEL_OFF),
1609 LAST(0));
1610 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1611 SRC0_ABS(0),
1612 SRC1_ABS(0),
1613 UPDATE_EXECUTE_MASK(0),
1614 UPDATE_PRED(0),
1615 WRITE_MASK(0),
1616 FOG_MERGE(0),
1617 OMOD(SQ_ALU_OMOD_OFF),
1618 ALU_INST(SQ_OP2_INST_DOT4),
1619 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620 DST_GPR(4),
1621 DST_REL(ABSOLUTE),
1622 DST_ELEM(ELEM_Y),
1623 CLAMP(0));
1624
1625 /* 24 maskX.z DOT4 - mask */
1626 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1627 SRC0_REL(ABSOLUTE),
1628 SRC0_ELEM(ELEM_Z),
1629 SRC0_NEG(0),
1630 SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1631 SRC1_REL(ABSOLUTE),
1632 SRC1_ELEM(ELEM_Z),
1633 SRC1_NEG(0),
1634 INDEX_MODE(SQ_INDEX_LOOP),
1635 PRED_SEL(SQ_PRED_SEL_OFF),
1636 LAST(0));
1637 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1638 SRC0_ABS(0),
1639 SRC1_ABS(0),
1640 UPDATE_EXECUTE_MASK(0),
1641 UPDATE_PRED(0),
1642 WRITE_MASK(0),
1643 FOG_MERGE(0),
1644 OMOD(SQ_ALU_OMOD_OFF),
1645 ALU_INST(SQ_OP2_INST_DOT4),
1646 BANK_SWIZZLE(SQ_ALU_VEC_012),
1647 DST_GPR(4),
1648 DST_REL(ABSOLUTE),
1649 DST_ELEM(ELEM_Z),
1650 CLAMP(0));
1651
1652 /* 25 maskX.w DOT4 - mask */
1653 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1654 SRC0_REL(ABSOLUTE),
1655 SRC0_ELEM(ELEM_W),
1656 SRC0_NEG(0),
1657 SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1658 SRC1_REL(ABSOLUTE),
1659 SRC1_ELEM(ELEM_W),
1660 SRC1_NEG(0),
1661 INDEX_MODE(SQ_INDEX_LOOP),
1662 PRED_SEL(SQ_PRED_SEL_OFF),
1663 LAST(1));
1664 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1665 SRC0_ABS(0),
1666 SRC1_ABS(0),
1667 UPDATE_EXECUTE_MASK(0),
1668 UPDATE_PRED(0),
1669 WRITE_MASK(0),
1670 FOG_MERGE(0),
1671 OMOD(SQ_ALU_OMOD_OFF),
1672 ALU_INST(SQ_OP2_INST_DOT4),
1673 BANK_SWIZZLE(SQ_ALU_VEC_012),
1674 DST_GPR(4),
1675 DST_REL(ABSOLUTE),
1676 DST_ELEM(ELEM_W),
1677 CLAMP(0));
1678
1679 /* 26 maskY.x DOT4 - mask */
1680 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1681 SRC0_REL(ABSOLUTE),
1682 SRC0_ELEM(ELEM_X),
1683 SRC0_NEG(0),
1684 SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1685 SRC1_REL(ABSOLUTE),
1686 SRC1_ELEM(ELEM_X),
1687 SRC1_NEG(0),
1688 INDEX_MODE(SQ_INDEX_LOOP),
1689 PRED_SEL(SQ_PRED_SEL_OFF),
1690 LAST(0));
1691 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1692 SRC0_ABS(0),
1693 SRC1_ABS(0),
1694 UPDATE_EXECUTE_MASK(0),
1695 UPDATE_PRED(0),
1696 WRITE_MASK(0),
1697 FOG_MERGE(0),
1698 OMOD(SQ_ALU_OMOD_OFF),
1699 ALU_INST(SQ_OP2_INST_DOT4),
1700 BANK_SWIZZLE(SQ_ALU_VEC_012),
1701 DST_GPR(4),
1702 DST_REL(ABSOLUTE),
1703 DST_ELEM(ELEM_X),
1704 CLAMP(0));
1705
1706 /* 27 maskY.y DOT4 - mask */
1707 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1708 SRC0_REL(ABSOLUTE),
1709 SRC0_ELEM(ELEM_Y),
1710 SRC0_NEG(0),
1711 SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1712 SRC1_REL(ABSOLUTE),
1713 SRC1_ELEM(ELEM_Y),
1714 SRC1_NEG(0),
1715 INDEX_MODE(SQ_INDEX_LOOP),
1716 PRED_SEL(SQ_PRED_SEL_OFF),
1717 LAST(0));
1718 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1719 SRC0_ABS(0),
1720 SRC1_ABS(0),
1721 UPDATE_EXECUTE_MASK(0),
1722 UPDATE_PRED(0),
1723 WRITE_MASK(1),
1724 FOG_MERGE(0),
1725 OMOD(SQ_ALU_OMOD_OFF),
1726 ALU_INST(SQ_OP2_INST_DOT4),
1727 BANK_SWIZZLE(SQ_ALU_VEC_012),
1728 DST_GPR(4),
1729 DST_REL(ABSOLUTE),
1730 DST_ELEM(ELEM_Y),
1731 CLAMP(0));
1732
1733 /* 28 maskY.z DOT4 - mask */
1734 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1735 SRC0_REL(ABSOLUTE),
1736 SRC0_ELEM(ELEM_Z),
1737 SRC0_NEG(0),
1738 SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1739 SRC1_REL(ABSOLUTE),
1740 SRC1_ELEM(ELEM_Z),
1741 SRC1_NEG(0),
1742 INDEX_MODE(SQ_INDEX_LOOP),
1743 PRED_SEL(SQ_PRED_SEL_OFF),
1744 LAST(0));
1745 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1746 SRC0_ABS(0),
1747 SRC1_ABS(0),
1748 UPDATE_EXECUTE_MASK(0),
1749 UPDATE_PRED(0),
1750 WRITE_MASK(0),
1751 FOG_MERGE(0),
1752 OMOD(SQ_ALU_OMOD_OFF),
1753 ALU_INST(SQ_OP2_INST_DOT4),
1754 BANK_SWIZZLE(SQ_ALU_VEC_012),
1755 DST_GPR(4),
1756 DST_REL(ABSOLUTE),
1757 DST_ELEM(ELEM_Z),
1758 CLAMP(0));
1759
1760 /* 29 maskY.w DOT4 - mask */
1761 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1762 SRC0_REL(ABSOLUTE),
1763 SRC0_ELEM(ELEM_W),
1764 SRC0_NEG(0),
1765 SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1766 SRC1_REL(ABSOLUTE),
1767 SRC1_ELEM(ELEM_W),
1768 SRC1_NEG(0),
1769 INDEX_MODE(SQ_INDEX_LOOP),
1770 PRED_SEL(SQ_PRED_SEL_OFF),
1771 LAST(1));
1772 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1773 SRC0_ABS(0),
1774 SRC1_ABS(0),
1775 UPDATE_EXECUTE_MASK(0),
1776 UPDATE_PRED(0),
1777 WRITE_MASK(0),
1778 FOG_MERGE(0),
1779 OMOD(SQ_ALU_OMOD_OFF),
1780 ALU_INST(SQ_OP2_INST_DOT4),
1781 BANK_SWIZZLE(SQ_ALU_VEC_012),
1782 DST_GPR(4),
1783 DST_REL(ABSOLUTE),
1784 DST_ELEM(ELEM_W),
1785 CLAMP(0));
1786
1787 /* 30 srcX / w */
1788 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1789 SRC0_REL(ABSOLUTE),
1790 SRC0_ELEM(ELEM_X),
1791 SRC0_NEG(0),
1792 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1793 SRC1_REL(ABSOLUTE),
1794 SRC1_ELEM(ELEM_W),
1795 SRC1_NEG(0),
1796 INDEX_MODE(SQ_INDEX_AR_X),
1797 PRED_SEL(SQ_PRED_SEL_OFF),
1798 LAST(1));
1799 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1800 SRC0_ABS(0),
1801 SRC1_ABS(0),
1802 UPDATE_EXECUTE_MASK(0),
1803 UPDATE_PRED(0),
1804 WRITE_MASK(1),
1805 FOG_MERGE(0),
1806 OMOD(SQ_ALU_OMOD_OFF),
1807 ALU_INST(SQ_OP2_INST_MUL),
1808 BANK_SWIZZLE(SQ_ALU_VEC_012),
1809 DST_GPR(1),
1810 DST_REL(ABSOLUTE),
1811 DST_ELEM(ELEM_X),
1812 CLAMP(0));
1813
1814 /* 31 srcY / h */
1815 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1816 SRC0_REL(ABSOLUTE),
1817 SRC0_ELEM(ELEM_Y),
1818 SRC0_NEG(0),
1819 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1820 SRC1_REL(ABSOLUTE),
1821 SRC1_ELEM(ELEM_W),
1822 SRC1_NEG(0),
1823 INDEX_MODE(SQ_INDEX_AR_X),
1824 PRED_SEL(SQ_PRED_SEL_OFF),
1825 LAST(1));
1826 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1827 SRC0_ABS(0),
1828 SRC1_ABS(0),
1829 UPDATE_EXECUTE_MASK(0),
1830 UPDATE_PRED(0),
1831 WRITE_MASK(1),
1832 FOG_MERGE(0),
1833 OMOD(SQ_ALU_OMOD_OFF),
1834 ALU_INST(SQ_OP2_INST_MUL),
1835 BANK_SWIZZLE(SQ_ALU_VEC_012),
1836 DST_GPR(1),
1837 DST_REL(ABSOLUTE),
1838 DST_ELEM(ELEM_Y),
1839 CLAMP(0));
1840
1841 /* 32 maskX / w */
1842 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1843 SRC0_REL(ABSOLUTE),
1844 SRC0_ELEM(ELEM_X),
1845 SRC0_NEG(0),
1846 SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1847 SRC1_REL(ABSOLUTE),
1848 SRC1_ELEM(ELEM_W),
1849 SRC1_NEG(0),
1850 INDEX_MODE(SQ_INDEX_AR_X),
1851 PRED_SEL(SQ_PRED_SEL_OFF),
1852 LAST(1));
1853 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1854 SRC0_ABS(0),
1855 SRC1_ABS(0),
1856 UPDATE_EXECUTE_MASK(0),
1857 UPDATE_PRED(0),
1858 WRITE_MASK(1),
1859 FOG_MERGE(0),
1860 OMOD(SQ_ALU_OMOD_OFF),
1861 ALU_INST(SQ_OP2_INST_MUL),
1862 BANK_SWIZZLE(SQ_ALU_VEC_012),
1863 DST_GPR(0),
1864 DST_REL(ABSOLUTE),
1865 DST_ELEM(ELEM_X),
1866 CLAMP(0));
1867
1868 /* 33 maskY / h */
1869 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1870 SRC0_REL(ABSOLUTE),
1871 SRC0_ELEM(ELEM_Y),
1872 SRC0_NEG(0),
1873 SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1874 SRC1_REL(ABSOLUTE),
1875 SRC1_ELEM(ELEM_W),
1876 SRC1_NEG(0),
1877 INDEX_MODE(SQ_INDEX_AR_X),
1878 PRED_SEL(SQ_PRED_SEL_OFF),
1879 LAST(1));
1880 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1881 SRC0_ABS(0),
1882 SRC1_ABS(0),
1883 UPDATE_EXECUTE_MASK(0),
1884 UPDATE_PRED(0),
1885 WRITE_MASK(1),
1886 FOG_MERGE(0),
1887 OMOD(SQ_ALU_OMOD_OFF),
1888 ALU_INST(SQ_OP2_INST_MUL),
1889 BANK_SWIZZLE(SQ_ALU_VEC_012),
1890 DST_GPR(0),
1891 DST_REL(ABSOLUTE),
1892 DST_ELEM(ELEM_Y),
1893 CLAMP(0));
1894
1895 /* 34 srcX.x DOT4 - non-mask */
1896 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1897 SRC0_REL(ABSOLUTE),
1898 SRC0_ELEM(ELEM_X),
1899 SRC0_NEG(0),
1900 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1901 SRC1_REL(ABSOLUTE),
1902 SRC1_ELEM(ELEM_X),
1903 SRC1_NEG(0),
1904 INDEX_MODE(SQ_INDEX_LOOP),
1905 PRED_SEL(SQ_PRED_SEL_OFF),
1906 LAST(0));
1907 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1908 SRC0_ABS(0),
1909 SRC1_ABS(0),
1910 UPDATE_EXECUTE_MASK(0),
1911 UPDATE_PRED(0),
1912 WRITE_MASK(1),
1913 FOG_MERGE(0),
1914 OMOD(SQ_ALU_OMOD_OFF),
1915 ALU_INST(SQ_OP2_INST_DOT4),
1916 BANK_SWIZZLE(SQ_ALU_VEC_012),
1917 DST_GPR(2),
1918 DST_REL(ABSOLUTE),
1919 DST_ELEM(ELEM_X),
1920 CLAMP(0));
1921
1922 /* 35 srcX.y DOT4 - non-mask */
1923 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1924 SRC0_REL(ABSOLUTE),
1925 SRC0_ELEM(ELEM_Y),
1926 SRC0_NEG(0),
1927 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1928 SRC1_REL(ABSOLUTE),
1929 SRC1_ELEM(ELEM_Y),
1930 SRC1_NEG(0),
1931 INDEX_MODE(SQ_INDEX_LOOP),
1932 PRED_SEL(SQ_PRED_SEL_OFF),
1933 LAST(0));
1934 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1935 SRC0_ABS(0),
1936 SRC1_ABS(0),
1937 UPDATE_EXECUTE_MASK(0),
1938 UPDATE_PRED(0),
1939 WRITE_MASK(0),
1940 FOG_MERGE(0),
1941 OMOD(SQ_ALU_OMOD_OFF),
1942 ALU_INST(SQ_OP2_INST_DOT4),
1943 BANK_SWIZZLE(SQ_ALU_VEC_012),
1944 DST_GPR(2),
1945 DST_REL(ABSOLUTE),
1946 DST_ELEM(ELEM_Y),
1947 CLAMP(0));
1948
1949 /* 36 srcX.z DOT4 - non-mask */
1950 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1951 SRC0_REL(ABSOLUTE),
1952 SRC0_ELEM(ELEM_Z),
1953 SRC0_NEG(0),
1954 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1955 SRC1_REL(ABSOLUTE),
1956 SRC1_ELEM(ELEM_Z),
1957 SRC1_NEG(0),
1958 INDEX_MODE(SQ_INDEX_LOOP),
1959 PRED_SEL(SQ_PRED_SEL_OFF),
1960 LAST(0));
1961 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1962 SRC0_ABS(0),
1963 SRC1_ABS(0),
1964 UPDATE_EXECUTE_MASK(0),
1965 UPDATE_PRED(0),
1966 WRITE_MASK(0),
1967 FOG_MERGE(0),
1968 OMOD(SQ_ALU_OMOD_OFF),
1969 ALU_INST(SQ_OP2_INST_DOT4),
1970 BANK_SWIZZLE(SQ_ALU_VEC_012),
1971 DST_GPR(2),
1972 DST_REL(ABSOLUTE),
1973 DST_ELEM(ELEM_Z),
1974 CLAMP(0));
1975
1976 /* 37 srcX.w DOT4 - non-mask */
1977 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1978 SRC0_REL(ABSOLUTE),
1979 SRC0_ELEM(ELEM_W),
1980 SRC0_NEG(0),
1981 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1982 SRC1_REL(ABSOLUTE),
1983 SRC1_ELEM(ELEM_W),
1984 SRC1_NEG(0),
1985 INDEX_MODE(SQ_INDEX_LOOP),
1986 PRED_SEL(SQ_PRED_SEL_OFF),
1987 LAST(1));
1988 shader[i++] = ALU_DWORD1_OP2(ChipSet,
1989 SRC0_ABS(0),
1990 SRC1_ABS(0),
1991 UPDATE_EXECUTE_MASK(0),
1992 UPDATE_PRED(0),
1993 WRITE_MASK(0),
1994 FOG_MERGE(0),
1995 OMOD(SQ_ALU_OMOD_OFF),
1996 ALU_INST(SQ_OP2_INST_DOT4),
1997 BANK_SWIZZLE(SQ_ALU_VEC_012),
1998 DST_GPR(2),
1999 DST_REL(ABSOLUTE),
2000 DST_ELEM(ELEM_W),
2001 CLAMP(0));
2002
2003 /* 38 srcY.x DOT4 - non-mask */
2004 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2005 SRC0_REL(ABSOLUTE),
2006 SRC0_ELEM(ELEM_X),
2007 SRC0_NEG(0),
2008 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2009 SRC1_REL(ABSOLUTE),
2010 SRC1_ELEM(ELEM_X),
2011 SRC1_NEG(0),
2012 INDEX_MODE(SQ_INDEX_LOOP),
2013 PRED_SEL(SQ_PRED_SEL_OFF),
2014 LAST(0));
2015 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2016 SRC0_ABS(0),
2017 SRC1_ABS(0),
2018 UPDATE_EXECUTE_MASK(0),
2019 UPDATE_PRED(0),
2020 WRITE_MASK(0),
2021 FOG_MERGE(0),
2022 OMOD(SQ_ALU_OMOD_OFF),
2023 ALU_INST(SQ_OP2_INST_DOT4),
2024 BANK_SWIZZLE(SQ_ALU_VEC_012),
2025 DST_GPR(2),
2026 DST_REL(ABSOLUTE),
2027 DST_ELEM(ELEM_X),
2028 CLAMP(0));
2029
2030 /* 39 srcY.y DOT4 - non-mask */
2031 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2032 SRC0_REL(ABSOLUTE),
2033 SRC0_ELEM(ELEM_Y),
2034 SRC0_NEG(0),
2035 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2036 SRC1_REL(ABSOLUTE),
2037 SRC1_ELEM(ELEM_Y),
2038 SRC1_NEG(0),
2039 INDEX_MODE(SQ_INDEX_LOOP),
2040 PRED_SEL(SQ_PRED_SEL_OFF),
2041 LAST(0));
2042 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2043 SRC0_ABS(0),
2044 SRC1_ABS(0),
2045 UPDATE_EXECUTE_MASK(0),
2046 UPDATE_PRED(0),
2047 WRITE_MASK(1),
2048 FOG_MERGE(0),
2049 OMOD(SQ_ALU_OMOD_OFF),
2050 ALU_INST(SQ_OP2_INST_DOT4),
2051 BANK_SWIZZLE(SQ_ALU_VEC_012),
2052 DST_GPR(2),
2053 DST_REL(ABSOLUTE),
2054 DST_ELEM(ELEM_Y),
2055 CLAMP(0));
2056
2057 /* 40 srcY.z DOT4 - non-mask */
2058 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2059 SRC0_REL(ABSOLUTE),
2060 SRC0_ELEM(ELEM_Z),
2061 SRC0_NEG(0),
2062 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2063 SRC1_REL(ABSOLUTE),
2064 SRC1_ELEM(ELEM_Z),
2065 SRC1_NEG(0),
2066 INDEX_MODE(SQ_INDEX_LOOP),
2067 PRED_SEL(SQ_PRED_SEL_OFF),
2068 LAST(0));
2069 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2070 SRC0_ABS(0),
2071 SRC1_ABS(0),
2072 UPDATE_EXECUTE_MASK(0),
2073 UPDATE_PRED(0),
2074 WRITE_MASK(0),
2075 FOG_MERGE(0),
2076 OMOD(SQ_ALU_OMOD_OFF),
2077 ALU_INST(SQ_OP2_INST_DOT4),
2078 BANK_SWIZZLE(SQ_ALU_VEC_012),
2079 DST_GPR(2),
2080 DST_REL(ABSOLUTE),
2081 DST_ELEM(ELEM_Z),
2082 CLAMP(0));
2083
2084 /* 41 srcY.w DOT4 - non-mask */
2085 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2086 SRC0_REL(ABSOLUTE),
2087 SRC0_ELEM(ELEM_W),
2088 SRC0_NEG(0),
2089 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2090 SRC1_REL(ABSOLUTE),
2091 SRC1_ELEM(ELEM_W),
2092 SRC1_NEG(0),
2093 INDEX_MODE(SQ_INDEX_LOOP),
2094 PRED_SEL(SQ_PRED_SEL_OFF),
2095 LAST(1));
2096 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2097 SRC0_ABS(0),
2098 SRC1_ABS(0),
2099 UPDATE_EXECUTE_MASK(0),
2100 UPDATE_PRED(0),
2101 WRITE_MASK(0),
2102 FOG_MERGE(0),
2103 OMOD(SQ_ALU_OMOD_OFF),
2104 ALU_INST(SQ_OP2_INST_DOT4),
2105 BANK_SWIZZLE(SQ_ALU_VEC_012),
2106 DST_GPR(2),
2107 DST_REL(ABSOLUTE),
2108 DST_ELEM(ELEM_W),
2109 CLAMP(0));
2110
2111 /* 42 srcX / w */
2112 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2113 SRC0_REL(ABSOLUTE),
2114 SRC0_ELEM(ELEM_X),
2115 SRC0_NEG(0),
2116 SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2117 SRC1_REL(ABSOLUTE),
2118 SRC1_ELEM(ELEM_W),
2119 SRC1_NEG(0),
2120 INDEX_MODE(SQ_INDEX_AR_X),
2121 PRED_SEL(SQ_PRED_SEL_OFF),
2122 LAST(1));
2123 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2124 SRC0_ABS(0),
2125 SRC1_ABS(0),
2126 UPDATE_EXECUTE_MASK(0),
2127 UPDATE_PRED(0),
2128 WRITE_MASK(1),
2129 FOG_MERGE(0),
2130 OMOD(SQ_ALU_OMOD_OFF),
2131 ALU_INST(SQ_OP2_INST_MUL),
2132 BANK_SWIZZLE(SQ_ALU_VEC_012),
2133 DST_GPR(0),
2134 DST_REL(ABSOLUTE),
2135 DST_ELEM(ELEM_X),
2136 CLAMP(0));
2137
2138 /* 43 srcY / h */
2139 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2140 SRC0_REL(ABSOLUTE),
2141 SRC0_ELEM(ELEM_Y),
2142 SRC0_NEG(0),
2143 SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2144 SRC1_REL(ABSOLUTE),
2145 SRC1_ELEM(ELEM_W),
2146 SRC1_NEG(0),
2147 INDEX_MODE(SQ_INDEX_AR_X),
2148 PRED_SEL(SQ_PRED_SEL_OFF),
2149 LAST(1));
2150 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2151 SRC0_ABS(0),
2152 SRC1_ABS(0),
2153 UPDATE_EXECUTE_MASK(0),
2154 UPDATE_PRED(0),
2155 WRITE_MASK(1),
2156 FOG_MERGE(0),
2157 OMOD(SQ_ALU_OMOD_OFF),
2158 ALU_INST(SQ_OP2_INST_MUL),
2159 BANK_SWIZZLE(SQ_ALU_VEC_012),
2160 DST_GPR(0),
2161 DST_REL(ABSOLUTE),
2162 DST_ELEM(ELEM_Y),
2163 CLAMP(0));
2164
2165 /* 44/45 - dst - mask */
2166 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2167 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2168 FETCH_WHOLE_QUAD(0),
2169 BUFFER_ID(0),
2170 SRC_GPR(0),
2171 SRC_REL(ABSOLUTE),
2172 SRC_SEL_X(SQ_SEL_X),
2173 MEGA_FETCH_COUNT(24));
2174 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2175 DST_REL(0),
2176 DST_SEL_X(SQ_SEL_X),
2177 DST_SEL_Y(SQ_SEL_Y),
2178 DST_SEL_Z(SQ_SEL_0),
2179 DST_SEL_W(SQ_SEL_1),
2180 USE_CONST_FIELDS(0),
2181 DATA_FORMAT(FMT_32_32_FLOAT),
2182 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2183 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2184 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2185 shader[i++] = VTX_DWORD2(OFFSET(0),
2186 #if X_BYTE_ORDER == X_BIG_ENDIAN
2187 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2188 #else
2189 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2190 #endif
2191 CONST_BUF_NO_STRIDE(0),
2192 MEGA_FETCH(1));
2193 shader[i++] = VTX_DWORD_PAD;
2194 /* 46/47 - src */
2195 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2196 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2197 FETCH_WHOLE_QUAD(0),
2198 BUFFER_ID(0),
2199 SRC_GPR(0),
2200 SRC_REL(ABSOLUTE),
2201 SRC_SEL_X(SQ_SEL_X),
2202 MEGA_FETCH_COUNT(8));
2203 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2204 DST_REL(0),
2205 DST_SEL_X(SQ_SEL_X),
2206 DST_SEL_Y(SQ_SEL_Y),
2207 DST_SEL_Z(SQ_SEL_1),
2208 DST_SEL_W(SQ_SEL_0),
2209 USE_CONST_FIELDS(0),
2210 DATA_FORMAT(FMT_32_32_FLOAT),
2211 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2212 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2213 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2214 shader[i++] = VTX_DWORD2(OFFSET(8),
2215 #if X_BYTE_ORDER == X_BIG_ENDIAN
2216 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2217 #else
2218 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2219 #endif
2220 CONST_BUF_NO_STRIDE(0),
2221 MEGA_FETCH(0));
2222 shader[i++] = VTX_DWORD_PAD;
2223 /* 48/49 - mask */
2224 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2225 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2226 FETCH_WHOLE_QUAD(0),
2227 BUFFER_ID(0),
2228 SRC_GPR(0),
2229 SRC_REL(ABSOLUTE),
2230 SRC_SEL_X(SQ_SEL_X),
2231 MEGA_FETCH_COUNT(8));
2232 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2233 DST_REL(0),
2234 DST_SEL_X(SQ_SEL_X),
2235 DST_SEL_Y(SQ_SEL_Y),
2236 DST_SEL_Z(SQ_SEL_1),
2237 DST_SEL_W(SQ_SEL_0),
2238 USE_CONST_FIELDS(0),
2239 DATA_FORMAT(FMT_32_32_FLOAT),
2240 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2241 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2242 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2243 shader[i++] = VTX_DWORD2(OFFSET(16),
2244 #if X_BYTE_ORDER == X_BIG_ENDIAN
2245 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2246 #else
2247 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2248 #endif
2249 CONST_BUF_NO_STRIDE(0),
2250 MEGA_FETCH(0));
2251 shader[i++] = VTX_DWORD_PAD;
2252
2253 /* 50/51 - dst - non-mask */
2254 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2255 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2256 FETCH_WHOLE_QUAD(0),
2257 BUFFER_ID(0),
2258 SRC_GPR(0),
2259 SRC_REL(ABSOLUTE),
2260 SRC_SEL_X(SQ_SEL_X),
2261 MEGA_FETCH_COUNT(16));
2262 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2263 DST_REL(0),
2264 DST_SEL_X(SQ_SEL_X),
2265 DST_SEL_Y(SQ_SEL_Y),
2266 DST_SEL_Z(SQ_SEL_0),
2267 DST_SEL_W(SQ_SEL_1),
2268 USE_CONST_FIELDS(0),
2269 DATA_FORMAT(FMT_32_32_FLOAT),
2270 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2271 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2272 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2273 shader[i++] = VTX_DWORD2(OFFSET(0),
2274 #if X_BYTE_ORDER == X_BIG_ENDIAN
2275 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2276 #else
2277 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2278 #endif
2279 CONST_BUF_NO_STRIDE(0),
2280 MEGA_FETCH(1));
2281 shader[i++] = VTX_DWORD_PAD;
2282 /* 52/53 - src */
2283 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2284 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2285 FETCH_WHOLE_QUAD(0),
2286 BUFFER_ID(0),
2287 SRC_GPR(0),
2288 SRC_REL(ABSOLUTE),
2289 SRC_SEL_X(SQ_SEL_X),
2290 MEGA_FETCH_COUNT(8));
2291 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2292 DST_REL(0),
2293 DST_SEL_X(SQ_SEL_X),
2294 DST_SEL_Y(SQ_SEL_Y),
2295 DST_SEL_Z(SQ_SEL_1),
2296 DST_SEL_W(SQ_SEL_0),
2297 USE_CONST_FIELDS(0),
2298 DATA_FORMAT(FMT_32_32_FLOAT),
2299 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2300 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2301 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2302 shader[i++] = VTX_DWORD2(OFFSET(8),
2303 #if X_BYTE_ORDER == X_BIG_ENDIAN
2304 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2305 #else
2306 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2307 #endif
2308 CONST_BUF_NO_STRIDE(0),
2309 MEGA_FETCH(0));
2310 shader[i++] = VTX_DWORD_PAD;
2311
2312 return i;
2313 }
2314
2315 /* comp ps --------------------------------------- */
R600_comp_ps(RADEONChipFamily ChipSet,uint32_t * shader)2316 int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2317 {
2318 int i = 0;
2319
2320 /* 0 */
2321 /* call fetch-mask if boolean1 == true */
2322 shader[i++] = CF_DWORD0(ADDR(10));
2323 shader[i++] = CF_DWORD1(POP_COUNT(0),
2324 CF_CONST(1),
2325 COND(SQ_CF_COND_BOOL),
2326 I_COUNT(0),
2327 CALL_COUNT(0),
2328 END_OF_PROGRAM(0),
2329 VALID_PIXEL_MODE(0),
2330 CF_INST(SQ_CF_INST_CALL),
2331 WHOLE_QUAD_MODE(0),
2332 BARRIER(0));
2333 /* 1 */
2334 /* call read-constant-mask if boolean1 == false */
2335 shader[i++] = CF_DWORD0(ADDR(12));
2336 shader[i++] = CF_DWORD1(POP_COUNT(0),
2337 CF_CONST(1),
2338 COND(SQ_CF_COND_NOT_BOOL),
2339 I_COUNT(0),
2340 CALL_COUNT(0),
2341 END_OF_PROGRAM(0),
2342 VALID_PIXEL_MODE(0),
2343 CF_INST(SQ_CF_INST_CALL),
2344 WHOLE_QUAD_MODE(0),
2345 BARRIER(0));
2346 /* 2 */
2347 /* call fetch-src if boolean0 == true */
2348 shader[i++] = CF_DWORD0(ADDR(6));
2349 shader[i++] = CF_DWORD1(POP_COUNT(0),
2350 CF_CONST(0),
2351 COND(SQ_CF_COND_BOOL),
2352 I_COUNT(0),
2353 CALL_COUNT(0),
2354 END_OF_PROGRAM(0),
2355 VALID_PIXEL_MODE(0),
2356 CF_INST(SQ_CF_INST_CALL),
2357 WHOLE_QUAD_MODE(0),
2358 BARRIER(0));
2359
2360 /* 3 */
2361 /* call read-constant-src if boolean0 == false */
2362 shader[i++] = CF_DWORD0(ADDR(8));
2363 shader[i++] = CF_DWORD1(POP_COUNT(0),
2364 CF_CONST(0),
2365 COND(SQ_CF_COND_NOT_BOOL),
2366 I_COUNT(0),
2367 CALL_COUNT(0),
2368 END_OF_PROGRAM(0),
2369 VALID_PIXEL_MODE(0),
2370 CF_INST(SQ_CF_INST_CALL),
2371 WHOLE_QUAD_MODE(0),
2372 BARRIER(0));
2373
2374 /* 4 */
2375 /* src IN mask (GPR0 := GPR1 .* GPR0) */
2376 shader[i++] = CF_ALU_DWORD0(ADDR(14),
2377 KCACHE_BANK0(0),
2378 KCACHE_BANK1(0),
2379 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2380 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2381 KCACHE_ADDR0(0),
2382 KCACHE_ADDR1(0),
2383 I_COUNT(4),
2384 USES_WATERFALL(0),
2385 CF_INST(SQ_CF_INST_ALU),
2386 WHOLE_QUAD_MODE(0),
2387 BARRIER(1));
2388
2389 /* 5 */
2390 /* export pixel data */
2391 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2392 TYPE(SQ_EXPORT_PIXEL),
2393 RW_GPR(0),
2394 RW_REL(ABSOLUTE),
2395 INDEX_GPR(0),
2396 ELEM_SIZE(1));
2397 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2398 SRC_SEL_Y(SQ_SEL_Y),
2399 SRC_SEL_Z(SQ_SEL_Z),
2400 SRC_SEL_W(SQ_SEL_W),
2401 R6xx_ELEM_LOOP(0),
2402 BURST_COUNT(1),
2403 END_OF_PROGRAM(1),
2404 VALID_PIXEL_MODE(0),
2405 CF_INST(SQ_CF_INST_EXPORT_DONE),
2406 WHOLE_QUAD_MODE(0),
2407 BARRIER(1));
2408 /* subroutine fetch src */
2409 /* 6 */
2410 /* fetch src into GPR0*/
2411 shader[i++] = CF_DWORD0(ADDR(26));
2412 shader[i++] = CF_DWORD1(POP_COUNT(0),
2413 CF_CONST(0),
2414 COND(SQ_CF_COND_ACTIVE),
2415 I_COUNT(1),
2416 CALL_COUNT(0),
2417 END_OF_PROGRAM(0),
2418 VALID_PIXEL_MODE(0),
2419 CF_INST(SQ_CF_INST_TEX),
2420 WHOLE_QUAD_MODE(0),
2421 BARRIER(1));
2422
2423 /* 7 */
2424 /* return */
2425 shader[i++] = CF_DWORD0(ADDR(0));
2426 shader[i++] = CF_DWORD1(POP_COUNT(0),
2427 CF_CONST(0),
2428 COND(SQ_CF_COND_ACTIVE),
2429 I_COUNT(0),
2430 CALL_COUNT(0),
2431 END_OF_PROGRAM(0),
2432 VALID_PIXEL_MODE(0),
2433 CF_INST(SQ_CF_INST_RETURN),
2434 WHOLE_QUAD_MODE(0),
2435 BARRIER(1));
2436
2437 /* subroutine read-constant-src*/
2438 /* 8 */
2439 /* read constants into GPR0 */
2440 shader[i++] = CF_ALU_DWORD0(ADDR(18),
2441 KCACHE_BANK0(0),
2442 KCACHE_BANK1(0),
2443 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2444 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2445 KCACHE_ADDR0(0),
2446 KCACHE_ADDR1(0),
2447 I_COUNT(4),
2448 USES_WATERFALL(0),
2449 CF_INST(SQ_CF_INST_ALU),
2450 WHOLE_QUAD_MODE(0),
2451 BARRIER(1));
2452 /* 9 */
2453 /* return */
2454 shader[i++] = CF_DWORD0(ADDR(0));
2455 shader[i++] = CF_DWORD1(POP_COUNT(0),
2456 CF_CONST(0),
2457 COND(SQ_CF_COND_ACTIVE),
2458 I_COUNT(0),
2459 CALL_COUNT(0),
2460 END_OF_PROGRAM(0),
2461 VALID_PIXEL_MODE(0),
2462 CF_INST(SQ_CF_INST_RETURN),
2463 WHOLE_QUAD_MODE(0),
2464 BARRIER(1));
2465
2466 /* subroutine fetch mask */
2467 /* 10 */
2468 /* fetch mask into GPR1*/
2469 shader[i++] = CF_DWORD0(ADDR(28));
2470 shader[i++] = CF_DWORD1(POP_COUNT(0),
2471 CF_CONST(0),
2472 COND(SQ_CF_COND_ACTIVE),
2473 I_COUNT(1),
2474 CALL_COUNT(0),
2475 END_OF_PROGRAM(0),
2476 VALID_PIXEL_MODE(0),
2477 CF_INST(SQ_CF_INST_TEX),
2478 WHOLE_QUAD_MODE(0),
2479 BARRIER(1));
2480
2481 /* 11 */
2482 /* return */
2483 shader[i++] = CF_DWORD0(ADDR(0));
2484 shader[i++] = CF_DWORD1(POP_COUNT(0),
2485 CF_CONST(0),
2486 COND(SQ_CF_COND_ACTIVE),
2487 I_COUNT(0),
2488 CALL_COUNT(0),
2489 END_OF_PROGRAM(0),
2490 VALID_PIXEL_MODE(0),
2491 CF_INST(SQ_CF_INST_RETURN),
2492 WHOLE_QUAD_MODE(0),
2493 BARRIER(1));
2494
2495 /* subroutine read-constant-mask*/
2496 /* 12 */
2497 /* read constants into GPR1 */
2498 shader[i++] = CF_ALU_DWORD0(ADDR(22),
2499 KCACHE_BANK0(0),
2500 KCACHE_BANK1(0),
2501 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2502 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2503 KCACHE_ADDR0(0),
2504 KCACHE_ADDR1(0),
2505 I_COUNT(4),
2506 USES_WATERFALL(0),
2507 CF_INST(SQ_CF_INST_ALU),
2508 WHOLE_QUAD_MODE(0),
2509 BARRIER(1));
2510 /* 13 */
2511 /* return */
2512 shader[i++] = CF_DWORD0(ADDR(0));
2513 shader[i++] = CF_DWORD1(POP_COUNT(0),
2514 CF_CONST(0),
2515 COND(SQ_CF_COND_ACTIVE),
2516 I_COUNT(0),
2517 CALL_COUNT(0),
2518 END_OF_PROGRAM(0),
2519 VALID_PIXEL_MODE(0),
2520 CF_INST(SQ_CF_INST_RETURN),
2521 WHOLE_QUAD_MODE(0),
2522 BARRIER(1));
2523 /* ALU clauses */
2524
2525 /* 14 - alu 0 */
2526 /* MUL gpr[0].x gpr[1].x gpr[0].x */
2527 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2528 SRC0_REL(ABSOLUTE),
2529 SRC0_ELEM(ELEM_X),
2530 SRC0_NEG(0),
2531 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2532 SRC1_REL(ABSOLUTE),
2533 SRC1_ELEM(ELEM_X),
2534 SRC1_NEG(0),
2535 INDEX_MODE(SQ_INDEX_LOOP),
2536 PRED_SEL(SQ_PRED_SEL_OFF),
2537 LAST(0));
2538 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2539 SRC0_ABS(0),
2540 SRC1_ABS(0),
2541 UPDATE_EXECUTE_MASK(0),
2542 UPDATE_PRED(0),
2543 WRITE_MASK(1),
2544 FOG_MERGE(0),
2545 OMOD(SQ_ALU_OMOD_OFF),
2546 ALU_INST(SQ_OP2_INST_MUL),
2547 BANK_SWIZZLE(SQ_ALU_VEC_012),
2548 DST_GPR(0),
2549 DST_REL(ABSOLUTE),
2550 DST_ELEM(ELEM_X),
2551 CLAMP(1));
2552 /* 15 - alu 1 */
2553 /* MUL gpr[0].y gpr[1].y gpr[0].y */
2554 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2555 SRC0_REL(ABSOLUTE),
2556 SRC0_ELEM(ELEM_Y),
2557 SRC0_NEG(0),
2558 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2559 SRC1_REL(ABSOLUTE),
2560 SRC1_ELEM(ELEM_Y),
2561 SRC1_NEG(0),
2562 INDEX_MODE(SQ_INDEX_LOOP),
2563 PRED_SEL(SQ_PRED_SEL_OFF),
2564 LAST(0));
2565 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2566 SRC0_ABS(0),
2567 SRC1_ABS(0),
2568 UPDATE_EXECUTE_MASK(0),
2569 UPDATE_PRED(0),
2570 WRITE_MASK(1),
2571 FOG_MERGE(0),
2572 OMOD(SQ_ALU_OMOD_OFF),
2573 ALU_INST(SQ_OP2_INST_MUL),
2574 BANK_SWIZZLE(SQ_ALU_VEC_012),
2575 DST_GPR(0),
2576 DST_REL(ABSOLUTE),
2577 DST_ELEM(ELEM_Y),
2578 CLAMP(1));
2579 /* 16 - alu 2 */
2580 /* MUL gpr[0].z gpr[1].z gpr[0].z */
2581 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2582 SRC0_REL(ABSOLUTE),
2583 SRC0_ELEM(ELEM_Z),
2584 SRC0_NEG(0),
2585 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2586 SRC1_REL(ABSOLUTE),
2587 SRC1_ELEM(ELEM_Z),
2588 SRC1_NEG(0),
2589 INDEX_MODE(SQ_INDEX_LOOP),
2590 PRED_SEL(SQ_PRED_SEL_OFF),
2591 LAST(0));
2592 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2593 SRC0_ABS(0),
2594 SRC1_ABS(0),
2595 UPDATE_EXECUTE_MASK(0),
2596 UPDATE_PRED(0),
2597 WRITE_MASK(1),
2598 FOG_MERGE(0),
2599 OMOD(SQ_ALU_OMOD_OFF),
2600 ALU_INST(SQ_OP2_INST_MUL),
2601 BANK_SWIZZLE(SQ_ALU_VEC_012),
2602 DST_GPR(0),
2603 DST_REL(ABSOLUTE),
2604 DST_ELEM(ELEM_Z),
2605 CLAMP(1));
2606 /* 17 - alu 3 */
2607 /* MUL gpr[0].w gpr[1].w gpr[0].w */
2608 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2609 SRC0_REL(ABSOLUTE),
2610 SRC0_ELEM(ELEM_W),
2611 SRC0_NEG(0),
2612 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2613 SRC1_REL(ABSOLUTE),
2614 SRC1_ELEM(ELEM_W),
2615 SRC1_NEG(0),
2616 INDEX_MODE(SQ_INDEX_LOOP),
2617 PRED_SEL(SQ_PRED_SEL_OFF),
2618 LAST(1));
2619 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2620 SRC0_ABS(0),
2621 SRC1_ABS(0),
2622 UPDATE_EXECUTE_MASK(0),
2623 UPDATE_PRED(0),
2624 WRITE_MASK(1),
2625 FOG_MERGE(0),
2626 OMOD(SQ_ALU_OMOD_OFF),
2627 ALU_INST(SQ_OP2_INST_MUL),
2628 BANK_SWIZZLE(SQ_ALU_VEC_012),
2629 DST_GPR(0),
2630 DST_REL(ABSOLUTE),
2631 DST_ELEM(ELEM_W),
2632 CLAMP(1));
2633
2634 /* 18 */
2635 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2636 SRC0_REL(ABSOLUTE),
2637 SRC0_ELEM(ELEM_X),
2638 SRC0_NEG(0),
2639 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2640 SRC1_REL(ABSOLUTE),
2641 SRC1_ELEM(ELEM_X),
2642 SRC1_NEG(0),
2643 INDEX_MODE(SQ_INDEX_AR_X),
2644 PRED_SEL(SQ_PRED_SEL_OFF),
2645 LAST(0));
2646 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2647 SRC0_ABS(0),
2648 SRC1_ABS(0),
2649 UPDATE_EXECUTE_MASK(0),
2650 UPDATE_PRED(0),
2651 WRITE_MASK(1),
2652 FOG_MERGE(0),
2653 OMOD(SQ_ALU_OMOD_OFF),
2654 ALU_INST(SQ_OP2_INST_MOV),
2655 BANK_SWIZZLE(SQ_ALU_VEC_012),
2656 DST_GPR(0),
2657 DST_REL(ABSOLUTE),
2658 DST_ELEM(ELEM_X),
2659 CLAMP(1));
2660 /* 19 */
2661 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2662 SRC0_REL(ABSOLUTE),
2663 SRC0_ELEM(ELEM_Y),
2664 SRC0_NEG(0),
2665 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2666 SRC1_REL(ABSOLUTE),
2667 SRC1_ELEM(ELEM_Y),
2668 SRC1_NEG(0),
2669 INDEX_MODE(SQ_INDEX_AR_X),
2670 PRED_SEL(SQ_PRED_SEL_OFF),
2671 LAST(0));
2672 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2673 SRC0_ABS(0),
2674 SRC1_ABS(0),
2675 UPDATE_EXECUTE_MASK(0),
2676 UPDATE_PRED(0),
2677 WRITE_MASK(1),
2678 FOG_MERGE(0),
2679 OMOD(SQ_ALU_OMOD_OFF),
2680 ALU_INST(SQ_OP2_INST_MOV),
2681 BANK_SWIZZLE(SQ_ALU_VEC_012),
2682 DST_GPR(0),
2683 DST_REL(ABSOLUTE),
2684 DST_ELEM(ELEM_Y),
2685 CLAMP(1));
2686 /* 20 */
2687 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2688 SRC0_REL(ABSOLUTE),
2689 SRC0_ELEM(ELEM_Z),
2690 SRC0_NEG(0),
2691 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2692 SRC1_REL(ABSOLUTE),
2693 SRC1_ELEM(ELEM_Z),
2694 SRC1_NEG(0),
2695 INDEX_MODE(SQ_INDEX_AR_X),
2696 PRED_SEL(SQ_PRED_SEL_OFF),
2697 LAST(0));
2698 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2699 SRC0_ABS(0),
2700 SRC1_ABS(0),
2701 UPDATE_EXECUTE_MASK(0),
2702 UPDATE_PRED(0),
2703 WRITE_MASK(1),
2704 FOG_MERGE(0),
2705 OMOD(SQ_ALU_OMOD_OFF),
2706 ALU_INST(SQ_OP2_INST_MOV),
2707 BANK_SWIZZLE(SQ_ALU_VEC_012),
2708 DST_GPR(0),
2709 DST_REL(ABSOLUTE),
2710 DST_ELEM(ELEM_Z),
2711 CLAMP(1));
2712 /* 21 */
2713 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2714 SRC0_REL(ABSOLUTE),
2715 SRC0_ELEM(ELEM_W),
2716 SRC0_NEG(0),
2717 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2718 SRC1_REL(ABSOLUTE),
2719 SRC1_ELEM(ELEM_W),
2720 SRC1_NEG(0),
2721 INDEX_MODE(SQ_INDEX_AR_X),
2722 PRED_SEL(SQ_PRED_SEL_OFF),
2723 LAST(1));
2724 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2725 SRC0_ABS(0),
2726 SRC1_ABS(0),
2727 UPDATE_EXECUTE_MASK(0),
2728 UPDATE_PRED(0),
2729 WRITE_MASK(1),
2730 FOG_MERGE(0),
2731 OMOD(SQ_ALU_OMOD_OFF),
2732 ALU_INST(SQ_OP2_INST_MOV),
2733 BANK_SWIZZLE(SQ_ALU_VEC_012),
2734 DST_GPR(0),
2735 DST_REL(ABSOLUTE),
2736 DST_ELEM(ELEM_W),
2737 CLAMP(1));
2738
2739 /* 22 */
2740 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2741 SRC0_REL(ABSOLUTE),
2742 SRC0_ELEM(ELEM_X),
2743 SRC0_NEG(0),
2744 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2745 SRC1_REL(ABSOLUTE),
2746 SRC1_ELEM(ELEM_X),
2747 SRC1_NEG(0),
2748 INDEX_MODE(SQ_INDEX_AR_X),
2749 PRED_SEL(SQ_PRED_SEL_OFF),
2750 LAST(0));
2751 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2752 SRC0_ABS(0),
2753 SRC1_ABS(0),
2754 UPDATE_EXECUTE_MASK(0),
2755 UPDATE_PRED(0),
2756 WRITE_MASK(1),
2757 FOG_MERGE(0),
2758 OMOD(SQ_ALU_OMOD_OFF),
2759 ALU_INST(SQ_OP2_INST_MOV),
2760 BANK_SWIZZLE(SQ_ALU_VEC_012),
2761 DST_GPR(1),
2762 DST_REL(ABSOLUTE),
2763 DST_ELEM(ELEM_X),
2764 CLAMP(1));
2765 /* 23 */
2766 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2767 SRC0_REL(ABSOLUTE),
2768 SRC0_ELEM(ELEM_Y),
2769 SRC0_NEG(0),
2770 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2771 SRC1_REL(ABSOLUTE),
2772 SRC1_ELEM(ELEM_Y),
2773 SRC1_NEG(0),
2774 INDEX_MODE(SQ_INDEX_AR_X),
2775 PRED_SEL(SQ_PRED_SEL_OFF),
2776 LAST(0));
2777 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2778 SRC0_ABS(0),
2779 SRC1_ABS(0),
2780 UPDATE_EXECUTE_MASK(0),
2781 UPDATE_PRED(0),
2782 WRITE_MASK(1),
2783 FOG_MERGE(0),
2784 OMOD(SQ_ALU_OMOD_OFF),
2785 ALU_INST(SQ_OP2_INST_MOV),
2786 BANK_SWIZZLE(SQ_ALU_VEC_012),
2787 DST_GPR(1),
2788 DST_REL(ABSOLUTE),
2789 DST_ELEM(ELEM_Y),
2790 CLAMP(1));
2791 /* 24 */
2792 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2793 SRC0_REL(ABSOLUTE),
2794 SRC0_ELEM(ELEM_Z),
2795 SRC0_NEG(0),
2796 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2797 SRC1_REL(ABSOLUTE),
2798 SRC1_ELEM(ELEM_Z),
2799 SRC1_NEG(0),
2800 INDEX_MODE(SQ_INDEX_AR_X),
2801 PRED_SEL(SQ_PRED_SEL_OFF),
2802 LAST(0));
2803 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2804 SRC0_ABS(0),
2805 SRC1_ABS(0),
2806 UPDATE_EXECUTE_MASK(0),
2807 UPDATE_PRED(0),
2808 WRITE_MASK(1),
2809 FOG_MERGE(0),
2810 OMOD(SQ_ALU_OMOD_OFF),
2811 ALU_INST(SQ_OP2_INST_MOV),
2812 BANK_SWIZZLE(SQ_ALU_VEC_012),
2813 DST_GPR(1),
2814 DST_REL(ABSOLUTE),
2815 DST_ELEM(ELEM_Z),
2816 CLAMP(1));
2817 /* 25 */
2818 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2819 SRC0_REL(ABSOLUTE),
2820 SRC0_ELEM(ELEM_W),
2821 SRC0_NEG(0),
2822 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2823 SRC1_REL(ABSOLUTE),
2824 SRC1_ELEM(ELEM_W),
2825 SRC1_NEG(0),
2826 INDEX_MODE(SQ_INDEX_AR_X),
2827 PRED_SEL(SQ_PRED_SEL_OFF),
2828 LAST(1));
2829 shader[i++] = ALU_DWORD1_OP2(ChipSet,
2830 SRC0_ABS(0),
2831 SRC1_ABS(0),
2832 UPDATE_EXECUTE_MASK(0),
2833 UPDATE_PRED(0),
2834 WRITE_MASK(1),
2835 FOG_MERGE(0),
2836 OMOD(SQ_ALU_OMOD_OFF),
2837 ALU_INST(SQ_OP2_INST_MOV),
2838 BANK_SWIZZLE(SQ_ALU_VEC_012),
2839 DST_GPR(1),
2840 DST_REL(ABSOLUTE),
2841 DST_ELEM(ELEM_W),
2842 CLAMP(1));
2843
2844 /* 26/27 - src */
2845 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2846 BC_FRAC_MODE(0),
2847 FETCH_WHOLE_QUAD(0),
2848 RESOURCE_ID(0),
2849 SRC_GPR(0),
2850 SRC_REL(ABSOLUTE),
2851 R7xx_ALT_CONST(0));
2852 shader[i++] = TEX_DWORD1(DST_GPR(0),
2853 DST_REL(ABSOLUTE),
2854 DST_SEL_X(SQ_SEL_X),
2855 DST_SEL_Y(SQ_SEL_Y),
2856 DST_SEL_Z(SQ_SEL_Z),
2857 DST_SEL_W(SQ_SEL_W),
2858 LOD_BIAS(0),
2859 COORD_TYPE_X(TEX_NORMALIZED),
2860 COORD_TYPE_Y(TEX_NORMALIZED),
2861 COORD_TYPE_Z(TEX_NORMALIZED),
2862 COORD_TYPE_W(TEX_NORMALIZED));
2863 shader[i++] = TEX_DWORD2(OFFSET_X(0),
2864 OFFSET_Y(0),
2865 OFFSET_Z(0),
2866 SAMPLER_ID(0),
2867 SRC_SEL_X(SQ_SEL_X),
2868 SRC_SEL_Y(SQ_SEL_Y),
2869 SRC_SEL_Z(SQ_SEL_0),
2870 SRC_SEL_W(SQ_SEL_1));
2871 shader[i++] = TEX_DWORD_PAD;
2872 /* 28/29 - mask */
2873 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2874 BC_FRAC_MODE(0),
2875 FETCH_WHOLE_QUAD(0),
2876 RESOURCE_ID(1),
2877 SRC_GPR(1),
2878 SRC_REL(ABSOLUTE),
2879 R7xx_ALT_CONST(0));
2880 shader[i++] = TEX_DWORD1(DST_GPR(1),
2881 DST_REL(ABSOLUTE),
2882 DST_SEL_X(SQ_SEL_X),
2883 DST_SEL_Y(SQ_SEL_Y),
2884 DST_SEL_Z(SQ_SEL_Z),
2885 DST_SEL_W(SQ_SEL_W),
2886 LOD_BIAS(0),
2887 COORD_TYPE_X(TEX_NORMALIZED),
2888 COORD_TYPE_Y(TEX_NORMALIZED),
2889 COORD_TYPE_Z(TEX_NORMALIZED),
2890 COORD_TYPE_W(TEX_NORMALIZED));
2891 shader[i++] = TEX_DWORD2(OFFSET_X(0),
2892 OFFSET_Y(0),
2893 OFFSET_Z(0),
2894 SAMPLER_ID(1),
2895 SRC_SEL_X(SQ_SEL_X),
2896 SRC_SEL_Y(SQ_SEL_Y),
2897 SRC_SEL_Z(SQ_SEL_0),
2898 SRC_SEL_W(SQ_SEL_1));
2899 shader[i++] = TEX_DWORD_PAD;
2900
2901 return i;
2902 }
2903