1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Author: Alex Deucher <alexander.deucher@amd.com>
24  *
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include "xf86.h"
32 
33 #include "radeon.h"
34 #include "r600_shader.h"
35 #include "r600_reg.h"
36 
37 /* solid vs --------------------------------------- */
R600_solid_vs(RADEONChipFamily ChipSet,uint32_t * shader)38 int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39 {
40     int i = 0;
41 
42     /* 0 */
43     shader[i++] = CF_DWORD0(ADDR(4));
44     shader[i++] = CF_DWORD1(POP_COUNT(0),
45 			    CF_CONST(0),
46 			    COND(SQ_CF_COND_ACTIVE),
47 			    I_COUNT(1),
48 			    CALL_COUNT(0),
49 			    END_OF_PROGRAM(0),
50 			    VALID_PIXEL_MODE(0),
51 			    CF_INST(SQ_CF_INST_VTX),
52 			    WHOLE_QUAD_MODE(0),
53 			    BARRIER(1));
54     /* 1 */
55     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56 					  TYPE(SQ_EXPORT_POS),
57 					  RW_GPR(1),
58 					  RW_REL(ABSOLUTE),
59 					  INDEX_GPR(0),
60 					  ELEM_SIZE(0));
61     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62 					       SRC_SEL_Y(SQ_SEL_Y),
63 					       SRC_SEL_Z(SQ_SEL_Z),
64 					       SRC_SEL_W(SQ_SEL_W),
65 					       R6xx_ELEM_LOOP(0),
66 					       BURST_COUNT(1),
67 					       END_OF_PROGRAM(0),
68 					       VALID_PIXEL_MODE(0),
69 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70 					       WHOLE_QUAD_MODE(0),
71 					       BARRIER(1));
72     /* 2 - always export a param whether it's used or not */
73     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74 					  TYPE(SQ_EXPORT_PARAM),
75 					  RW_GPR(0),
76 					  RW_REL(ABSOLUTE),
77 					  INDEX_GPR(0),
78 					  ELEM_SIZE(0));
79     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80 					       SRC_SEL_Y(SQ_SEL_Y),
81 					       SRC_SEL_Z(SQ_SEL_Z),
82 					       SRC_SEL_W(SQ_SEL_W),
83 					       R6xx_ELEM_LOOP(0),
84 					       BURST_COUNT(0),
85 					       END_OF_PROGRAM(1),
86 					       VALID_PIXEL_MODE(0),
87 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88 					       WHOLE_QUAD_MODE(0),
89 					       BARRIER(0));
90     /* 3 - padding */
91     shader[i++] = 0x00000000;
92     shader[i++] = 0x00000000;
93     /* 4/5 */
94     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96 			     FETCH_WHOLE_QUAD(0),
97 			     BUFFER_ID(0),
98 			     SRC_GPR(0),
99 			     SRC_REL(ABSOLUTE),
100 			     SRC_SEL_X(SQ_SEL_X),
101 			     MEGA_FETCH_COUNT(8));
102     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103 				 DST_REL(0),
104 				 DST_SEL_X(SQ_SEL_X),
105 				 DST_SEL_Y(SQ_SEL_Y),
106 				 DST_SEL_Z(SQ_SEL_0),
107 				 DST_SEL_W(SQ_SEL_1),
108 				 USE_CONST_FIELDS(0),
109 				 DATA_FORMAT(FMT_32_32_FLOAT),
110 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113     shader[i++] = VTX_DWORD2(OFFSET(0),
114 #if X_BYTE_ORDER == X_BIG_ENDIAN
115 			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
116 #else
117 			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
118 #endif
119 			     CONST_BUF_NO_STRIDE(0),
120 			     MEGA_FETCH(1));
121     shader[i++] = VTX_DWORD_PAD;
122 
123     return i;
124 }
125 
126 /* solid ps --------------------------------------- */
R600_solid_ps(RADEONChipFamily ChipSet,uint32_t * shader)127 int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
128 {
129     int i = 0;
130 
131     /* 0 */
132     shader[i++] = CF_ALU_DWORD0(ADDR(2),
133 				KCACHE_BANK0(0),
134 				KCACHE_BANK1(0),
135 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
136     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
137 				KCACHE_ADDR0(0),
138 				KCACHE_ADDR1(0),
139 				I_COUNT(4),
140 				USES_WATERFALL(0),
141 				CF_INST(SQ_CF_INST_ALU),
142 				WHOLE_QUAD_MODE(0),
143 				BARRIER(1));
144     /* 1 */
145     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
146 					  TYPE(SQ_EXPORT_PIXEL),
147 					  RW_GPR(0),
148 					  RW_REL(ABSOLUTE),
149 					  INDEX_GPR(0),
150 					  ELEM_SIZE(1));
151     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
152 					       SRC_SEL_Y(SQ_SEL_Y),
153 					       SRC_SEL_Z(SQ_SEL_Z),
154 					       SRC_SEL_W(SQ_SEL_W),
155 					       R6xx_ELEM_LOOP(0),
156 					       BURST_COUNT(1),
157 					       END_OF_PROGRAM(1),
158 					       VALID_PIXEL_MODE(0),
159 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
160 					       WHOLE_QUAD_MODE(0),
161 					       BARRIER(1));
162 
163     /* 2 */
164     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
165 			     SRC0_REL(ABSOLUTE),
166 			     SRC0_ELEM(ELEM_X),
167 			     SRC0_NEG(0),
168 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169 			     SRC1_REL(ABSOLUTE),
170 			     SRC1_ELEM(ELEM_X),
171 			     SRC1_NEG(0),
172 			     INDEX_MODE(SQ_INDEX_AR_X),
173 			     PRED_SEL(SQ_PRED_SEL_OFF),
174 			     LAST(0));
175     shader[i++] = ALU_DWORD1_OP2(ChipSet,
176 				 SRC0_ABS(0),
177 				 SRC1_ABS(0),
178 				 UPDATE_EXECUTE_MASK(0),
179 				 UPDATE_PRED(0),
180 				 WRITE_MASK(1),
181 				 FOG_MERGE(0),
182 				 OMOD(SQ_ALU_OMOD_OFF),
183 				 ALU_INST(SQ_OP2_INST_MOV),
184 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
185 				 DST_GPR(0),
186 				 DST_REL(ABSOLUTE),
187 				 DST_ELEM(ELEM_X),
188 				 CLAMP(1));
189     /* 3 */
190     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
191 			     SRC0_REL(ABSOLUTE),
192 			     SRC0_ELEM(ELEM_Y),
193 			     SRC0_NEG(0),
194 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
195 			     SRC1_REL(ABSOLUTE),
196 			     SRC1_ELEM(ELEM_Y),
197 			     SRC1_NEG(0),
198 			     INDEX_MODE(SQ_INDEX_AR_X),
199 			     PRED_SEL(SQ_PRED_SEL_OFF),
200 			     LAST(0));
201     shader[i++] = ALU_DWORD1_OP2(ChipSet,
202 				 SRC0_ABS(0),
203 				 SRC1_ABS(0),
204 				 UPDATE_EXECUTE_MASK(0),
205 				 UPDATE_PRED(0),
206 				 WRITE_MASK(1),
207 				 FOG_MERGE(0),
208 				 OMOD(SQ_ALU_OMOD_OFF),
209 				 ALU_INST(SQ_OP2_INST_MOV),
210 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
211 				 DST_GPR(0),
212 				 DST_REL(ABSOLUTE),
213 				 DST_ELEM(ELEM_Y),
214 				 CLAMP(1));
215     /* 4 */
216     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
217 			     SRC0_REL(ABSOLUTE),
218 			     SRC0_ELEM(ELEM_Z),
219 			     SRC0_NEG(0),
220 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
221 			     SRC1_REL(ABSOLUTE),
222 			     SRC1_ELEM(ELEM_Z),
223 			     SRC1_NEG(0),
224 			     INDEX_MODE(SQ_INDEX_AR_X),
225 			     PRED_SEL(SQ_PRED_SEL_OFF),
226 			     LAST(0));
227     shader[i++] = ALU_DWORD1_OP2(ChipSet,
228 				 SRC0_ABS(0),
229 				 SRC1_ABS(0),
230 				 UPDATE_EXECUTE_MASK(0),
231 				 UPDATE_PRED(0),
232 				 WRITE_MASK(1),
233 				 FOG_MERGE(0),
234 				 OMOD(SQ_ALU_OMOD_OFF),
235 				 ALU_INST(SQ_OP2_INST_MOV),
236 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
237 				 DST_GPR(0),
238 				 DST_REL(ABSOLUTE),
239 				 DST_ELEM(ELEM_Z),
240 				 CLAMP(1));
241     /* 5 */
242     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
243 			     SRC0_REL(ABSOLUTE),
244 			     SRC0_ELEM(ELEM_W),
245 			     SRC0_NEG(0),
246 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
247 			     SRC1_REL(ABSOLUTE),
248 			     SRC1_ELEM(ELEM_W),
249 			     SRC1_NEG(0),
250 			     INDEX_MODE(SQ_INDEX_AR_X),
251 			     PRED_SEL(SQ_PRED_SEL_OFF),
252 			     LAST(1));
253     shader[i++] = ALU_DWORD1_OP2(ChipSet,
254 				 SRC0_ABS(0),
255 				 SRC1_ABS(0),
256 				 UPDATE_EXECUTE_MASK(0),
257 				 UPDATE_PRED(0),
258 				 WRITE_MASK(1),
259 				 FOG_MERGE(0),
260 				 OMOD(SQ_ALU_OMOD_OFF),
261 				 ALU_INST(SQ_OP2_INST_MOV),
262 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
263 				 DST_GPR(0),
264 				 DST_REL(ABSOLUTE),
265 				 DST_ELEM(ELEM_W),
266 				 CLAMP(1));
267 
268     return i;
269 }
270 
271 /* copy vs --------------------------------------- */
R600_copy_vs(RADEONChipFamily ChipSet,uint32_t * shader)272 int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
273 {
274     int i = 0;
275 
276     /* 0 */
277     shader[i++] = CF_DWORD0(ADDR(4));
278     shader[i++] = CF_DWORD1(POP_COUNT(0),
279 			    CF_CONST(0),
280 			    COND(SQ_CF_COND_ACTIVE),
281 			    I_COUNT(2),
282 			    CALL_COUNT(0),
283 			    END_OF_PROGRAM(0),
284 			    VALID_PIXEL_MODE(0),
285 			    CF_INST(SQ_CF_INST_VTX),
286 			    WHOLE_QUAD_MODE(0),
287 			    BARRIER(1));
288     /* 1 */
289     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
290 					  TYPE(SQ_EXPORT_POS),
291 					  RW_GPR(1),
292 					  RW_REL(ABSOLUTE),
293 					  INDEX_GPR(0),
294 					  ELEM_SIZE(0));
295     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
296 					       SRC_SEL_Y(SQ_SEL_Y),
297 					       SRC_SEL_Z(SQ_SEL_Z),
298 					       SRC_SEL_W(SQ_SEL_W),
299 					       R6xx_ELEM_LOOP(0),
300 					       BURST_COUNT(0),
301 					       END_OF_PROGRAM(0),
302 					       VALID_PIXEL_MODE(0),
303 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
304 					       WHOLE_QUAD_MODE(0),
305 					       BARRIER(1));
306     /* 2 */
307     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
308 					  TYPE(SQ_EXPORT_PARAM),
309 					  RW_GPR(0),
310 					  RW_REL(ABSOLUTE),
311 					  INDEX_GPR(0),
312 					  ELEM_SIZE(0));
313     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
314 					       SRC_SEL_Y(SQ_SEL_Y),
315 					       SRC_SEL_Z(SQ_SEL_Z),
316 					       SRC_SEL_W(SQ_SEL_W),
317 					       R6xx_ELEM_LOOP(0),
318 					       BURST_COUNT(0),
319 					       END_OF_PROGRAM(1),
320 					       VALID_PIXEL_MODE(0),
321 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
322 					       WHOLE_QUAD_MODE(0),
323 					       BARRIER(0));
324     /* 3 */
325     shader[i++] = 0x00000000;
326     shader[i++] = 0x00000000;
327     /* 4/5 */
328     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
329 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
330 			     FETCH_WHOLE_QUAD(0),
331 			     BUFFER_ID(0),
332 			     SRC_GPR(0),
333 			     SRC_REL(ABSOLUTE),
334 			     SRC_SEL_X(SQ_SEL_X),
335 			     MEGA_FETCH_COUNT(16));
336     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
337 				 DST_REL(0),
338 				 DST_SEL_X(SQ_SEL_X),
339 				 DST_SEL_Y(SQ_SEL_Y),
340 				 DST_SEL_Z(SQ_SEL_0),
341 				 DST_SEL_W(SQ_SEL_1),
342 				 USE_CONST_FIELDS(0),
343 				 DATA_FORMAT(FMT_32_32_FLOAT),
344 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
345 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
346 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
347     shader[i++] = VTX_DWORD2(OFFSET(0),
348 #if X_BYTE_ORDER == X_BIG_ENDIAN
349 			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
350 #else
351 			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
352 #endif
353 			     CONST_BUF_NO_STRIDE(0),
354 			     MEGA_FETCH(1));
355     shader[i++] = VTX_DWORD_PAD;
356     /* 6/7 */
357     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
358 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
359 			     FETCH_WHOLE_QUAD(0),
360 			     BUFFER_ID(0),
361 			     SRC_GPR(0),
362 			     SRC_REL(ABSOLUTE),
363 			     SRC_SEL_X(SQ_SEL_X),
364 			     MEGA_FETCH_COUNT(8));
365     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
366 				 DST_REL(0),
367 				 DST_SEL_X(SQ_SEL_X),
368 				 DST_SEL_Y(SQ_SEL_Y),
369 				 DST_SEL_Z(SQ_SEL_0),
370 				 DST_SEL_W(SQ_SEL_1),
371 				 USE_CONST_FIELDS(0),
372 				 DATA_FORMAT(FMT_32_32_FLOAT),
373 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
374 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
375 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
376     shader[i++] = VTX_DWORD2(OFFSET(8),
377 #if X_BYTE_ORDER == X_BIG_ENDIAN
378                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
379 #else
380                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
381 #endif
382 			     CONST_BUF_NO_STRIDE(0),
383 			     MEGA_FETCH(0));
384     shader[i++] = VTX_DWORD_PAD;
385 
386     return i;
387 }
388 
389 /* copy ps --------------------------------------- */
R600_copy_ps(RADEONChipFamily ChipSet,uint32_t * shader)390 int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
391 {
392     int i=0;
393 
394     /* CF INST 0 */
395     shader[i++] = CF_DWORD0(ADDR(2));
396     shader[i++] = CF_DWORD1(POP_COUNT(0),
397 			    CF_CONST(0),
398 			    COND(SQ_CF_COND_ACTIVE),
399 			    I_COUNT(1),
400 			    CALL_COUNT(0),
401 			    END_OF_PROGRAM(0),
402 			    VALID_PIXEL_MODE(0),
403 			    CF_INST(SQ_CF_INST_TEX),
404 			    WHOLE_QUAD_MODE(0),
405 			    BARRIER(1));
406     /* CF INST 1 */
407     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
408 					  TYPE(SQ_EXPORT_PIXEL),
409 					  RW_GPR(0),
410 					  RW_REL(ABSOLUTE),
411 					  INDEX_GPR(0),
412 					  ELEM_SIZE(1));
413     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
414 					       SRC_SEL_Y(SQ_SEL_Y),
415 					       SRC_SEL_Z(SQ_SEL_Z),
416 					       SRC_SEL_W(SQ_SEL_W),
417 					       R6xx_ELEM_LOOP(0),
418 					       BURST_COUNT(1),
419 					       END_OF_PROGRAM(1),
420 					       VALID_PIXEL_MODE(0),
421 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
422 					       WHOLE_QUAD_MODE(0),
423 					       BARRIER(1));
424     /* TEX INST 0 */
425     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
426 			     BC_FRAC_MODE(0),
427 			     FETCH_WHOLE_QUAD(0),
428 			     RESOURCE_ID(0),
429 			     SRC_GPR(0),
430 			     SRC_REL(ABSOLUTE),
431 			     R7xx_ALT_CONST(0));
432     shader[i++] = TEX_DWORD1(DST_GPR(0),
433 			     DST_REL(ABSOLUTE),
434 			     DST_SEL_X(SQ_SEL_X), /* R */
435 			     DST_SEL_Y(SQ_SEL_Y), /* G */
436 			     DST_SEL_Z(SQ_SEL_Z), /* B */
437 			     DST_SEL_W(SQ_SEL_W), /* A */
438 			     LOD_BIAS(0),
439 			     COORD_TYPE_X(TEX_UNNORMALIZED),
440 			     COORD_TYPE_Y(TEX_UNNORMALIZED),
441 			     COORD_TYPE_Z(TEX_UNNORMALIZED),
442 			     COORD_TYPE_W(TEX_UNNORMALIZED));
443     shader[i++] = TEX_DWORD2(OFFSET_X(0),
444 			     OFFSET_Y(0),
445 			     OFFSET_Z(0),
446 			     SAMPLER_ID(0),
447 			     SRC_SEL_X(SQ_SEL_X),
448 			     SRC_SEL_Y(SQ_SEL_Y),
449 			     SRC_SEL_Z(SQ_SEL_0),
450 			     SRC_SEL_W(SQ_SEL_1));
451     shader[i++] = TEX_DWORD_PAD;
452 
453     return i;
454 }
455 
456 /*
457  * ; xv vertex shader
458  * 00 VTX: ADDR(4) CNT(2)
459  *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
460  *          FORMAT_COMP(SIGNED)
461  *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
462  *          FORMAT_COMP(SIGNED)
463  * 01 EXP_DONE: POS0, R1
464  * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
465  * END_OF_PROGRAM
466  */
R600_xv_vs(RADEONChipFamily ChipSet,uint32_t * shader)467 int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
468 {
469     int i = 0;
470 
471     /* 0 */
472     shader[i++] = CF_DWORD0(ADDR(6));
473     shader[i++] = CF_DWORD1(POP_COUNT(0),
474                             CF_CONST(0),
475                             COND(SQ_CF_COND_ACTIVE),
476                             I_COUNT(2),
477                             CALL_COUNT(0),
478                             END_OF_PROGRAM(0),
479                             VALID_PIXEL_MODE(0),
480                             CF_INST(SQ_CF_INST_VTX),
481                             WHOLE_QUAD_MODE(0),
482                             BARRIER(1));
483 
484     /* 1 - ALU */
485     shader[i++] = CF_ALU_DWORD0(ADDR(4),
486 				KCACHE_BANK0(0),
487 				KCACHE_BANK1(0),
488 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
489     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
490 				KCACHE_ADDR0(0),
491 				KCACHE_ADDR1(0),
492 				I_COUNT(2),
493 				USES_WATERFALL(0),
494 				CF_INST(SQ_CF_INST_ALU),
495 				WHOLE_QUAD_MODE(0),
496 				BARRIER(1));
497 
498     /* 2 */
499     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
500                                           TYPE(SQ_EXPORT_POS),
501                                           RW_GPR(1),
502                                           RW_REL(ABSOLUTE),
503                                           INDEX_GPR(0),
504                                           ELEM_SIZE(3));
505     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
506                                                SRC_SEL_Y(SQ_SEL_Y),
507                                                SRC_SEL_Z(SQ_SEL_Z),
508                                                SRC_SEL_W(SQ_SEL_W),
509                                                R6xx_ELEM_LOOP(0),
510                                                BURST_COUNT(1),
511                                                END_OF_PROGRAM(0),
512                                                VALID_PIXEL_MODE(0),
513                                                CF_INST(SQ_CF_INST_EXPORT_DONE),
514                                                WHOLE_QUAD_MODE(0),
515                                                BARRIER(1));
516     /* 3 */
517     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
518                                           TYPE(SQ_EXPORT_PARAM),
519                                           RW_GPR(0),
520                                           RW_REL(ABSOLUTE),
521                                           INDEX_GPR(0),
522                                           ELEM_SIZE(3));
523     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
524                                                SRC_SEL_Y(SQ_SEL_Y),
525                                                SRC_SEL_Z(SQ_SEL_Z),
526                                                SRC_SEL_W(SQ_SEL_W),
527                                                R6xx_ELEM_LOOP(0),
528                                                BURST_COUNT(1),
529                                                END_OF_PROGRAM(1),
530                                                VALID_PIXEL_MODE(0),
531                                                CF_INST(SQ_CF_INST_EXPORT_DONE),
532                                                WHOLE_QUAD_MODE(0),
533                                                BARRIER(0));
534 
535 
536     /* 4 texX / w */
537     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
538                              SRC0_REL(ABSOLUTE),
539                              SRC0_ELEM(ELEM_X),
540                              SRC0_NEG(0),
541                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
542                              SRC1_REL(ABSOLUTE),
543                              SRC1_ELEM(ELEM_X),
544                              SRC1_NEG(0),
545                              INDEX_MODE(SQ_INDEX_AR_X),
546                              PRED_SEL(SQ_PRED_SEL_OFF),
547                              LAST(0));
548     shader[i++] = ALU_DWORD1_OP2(ChipSet,
549                                  SRC0_ABS(0),
550                                  SRC1_ABS(0),
551                                  UPDATE_EXECUTE_MASK(0),
552                                  UPDATE_PRED(0),
553                                  WRITE_MASK(1),
554                                  FOG_MERGE(0),
555                                  OMOD(SQ_ALU_OMOD_OFF),
556                                  ALU_INST(SQ_OP2_INST_MUL),
557                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
558                                  DST_GPR(0),
559                                  DST_REL(ABSOLUTE),
560                                  DST_ELEM(ELEM_X),
561                                  CLAMP(0));
562 
563     /* 5 texY / h */
564     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
565                              SRC0_REL(ABSOLUTE),
566                              SRC0_ELEM(ELEM_Y),
567                              SRC0_NEG(0),
568                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
569                              SRC1_REL(ABSOLUTE),
570                              SRC1_ELEM(ELEM_Y),
571                              SRC1_NEG(0),
572                              INDEX_MODE(SQ_INDEX_AR_X),
573                              PRED_SEL(SQ_PRED_SEL_OFF),
574                              LAST(1));
575     shader[i++] = ALU_DWORD1_OP2(ChipSet,
576                                  SRC0_ABS(0),
577                                  SRC1_ABS(0),
578                                  UPDATE_EXECUTE_MASK(0),
579                                  UPDATE_PRED(0),
580                                  WRITE_MASK(1),
581                                  FOG_MERGE(0),
582                                  OMOD(SQ_ALU_OMOD_OFF),
583                                  ALU_INST(SQ_OP2_INST_MUL),
584                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
585                                  DST_GPR(0),
586                                  DST_REL(ABSOLUTE),
587                                  DST_ELEM(ELEM_Y),
588                                  CLAMP(0));
589 
590     /* 6/7 */
591     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
592                              FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
593                              FETCH_WHOLE_QUAD(0),
594                              BUFFER_ID(0),
595                              SRC_GPR(0),
596                              SRC_REL(ABSOLUTE),
597                              SRC_SEL_X(SQ_SEL_X),
598                              MEGA_FETCH_COUNT(16));
599     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
600                                  DST_REL(ABSOLUTE),
601                                  DST_SEL_X(SQ_SEL_X),
602                                  DST_SEL_Y(SQ_SEL_Y),
603                                  DST_SEL_Z(SQ_SEL_0),
604                                  DST_SEL_W(SQ_SEL_1),
605                                  USE_CONST_FIELDS(0),
606                                  DATA_FORMAT(FMT_32_32_FLOAT),
607                                  NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
608                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
609                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
610     shader[i++] = VTX_DWORD2(OFFSET(0),
611 #if X_BYTE_ORDER == X_BIG_ENDIAN
612                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
613 #else
614                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
615 #endif
616                              CONST_BUF_NO_STRIDE(0),
617                              MEGA_FETCH(1));
618     shader[i++] = VTX_DWORD_PAD;
619     /* 8/9 */
620     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
621                              FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
622                              FETCH_WHOLE_QUAD(0),
623                              BUFFER_ID(0),
624                              SRC_GPR(0),
625                              SRC_REL(ABSOLUTE),
626                              SRC_SEL_X(SQ_SEL_X),
627                              MEGA_FETCH_COUNT(8));
628     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
629                                  DST_REL(ABSOLUTE),
630                                  DST_SEL_X(SQ_SEL_X),
631                                  DST_SEL_Y(SQ_SEL_Y),
632                                  DST_SEL_Z(SQ_SEL_0),
633                                  DST_SEL_W(SQ_SEL_1),
634                                  USE_CONST_FIELDS(0),
635                                  DATA_FORMAT(FMT_32_32_FLOAT),
636                                  NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
637                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
638                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
639     shader[i++] = VTX_DWORD2(OFFSET(8),
640 #if X_BYTE_ORDER == X_BIG_ENDIAN
641                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
642 #else
643                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
644 #endif
645                              CONST_BUF_NO_STRIDE(0),
646                              MEGA_FETCH(0));
647     shader[i++] = VTX_DWORD_PAD;
648 
649     return i;
650 }
651 
R600_xv_ps(RADEONChipFamily ChipSet,uint32_t * shader)652 int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
653 {
654     int i = 0;
655 
656     /* 0 */
657     shader[i++] = CF_DWORD0(ADDR(16));
658     shader[i++] = CF_DWORD1(POP_COUNT(0),
659                             CF_CONST(0),
660                             COND(SQ_CF_COND_BOOL),
661                             I_COUNT(0),
662                             CALL_COUNT(0),
663                             END_OF_PROGRAM(0),
664                             VALID_PIXEL_MODE(0),
665                             CF_INST(SQ_CF_INST_CALL),
666                             WHOLE_QUAD_MODE(0),
667                             BARRIER(0));
668     /* 1 */
669     shader[i++] = CF_DWORD0(ADDR(24));
670     shader[i++] = CF_DWORD1(POP_COUNT(0),
671                             CF_CONST(0),
672                             COND(SQ_CF_COND_NOT_BOOL),
673                             I_COUNT(0),
674                             CALL_COUNT(0),
675                             END_OF_PROGRAM(0),
676                             VALID_PIXEL_MODE(0),
677                             CF_INST(SQ_CF_INST_CALL),
678                             WHOLE_QUAD_MODE(0),
679                             BARRIER(0));
680     /* 2 */
681     shader[i++] = CF_ALU_DWORD0(ADDR(4),
682                                 KCACHE_BANK0(0),
683                                 KCACHE_BANK1(0),
684                                 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
685     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
686                                 KCACHE_ADDR0(0),
687                                 KCACHE_ADDR1(0),
688                                 I_COUNT(12),
689                                 USES_WATERFALL(0),
690                                 CF_INST(SQ_CF_INST_ALU),
691                                 WHOLE_QUAD_MODE(0),
692                                 BARRIER(1));
693     /* 3 */
694     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
695                                           TYPE(SQ_EXPORT_PIXEL),
696                                           RW_GPR(2),
697                                           RW_REL(ABSOLUTE),
698                                           INDEX_GPR(0),
699                                           ELEM_SIZE(3));
700     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
701                                                SRC_SEL_Y(SQ_SEL_Y),
702                                                SRC_SEL_Z(SQ_SEL_Z),
703                                                SRC_SEL_W(SQ_SEL_W),
704                                                R6xx_ELEM_LOOP(0),
705                                                BURST_COUNT(1),
706                                                END_OF_PROGRAM(1),
707                                                VALID_PIXEL_MODE(0),
708                                                CF_INST(SQ_CF_INST_EXPORT_DONE),
709                                                WHOLE_QUAD_MODE(0),
710                                                BARRIER(1));
711     /* 4,5,6,7 */
712     /* r2.x = MAD(c0.w, r1.x, c0.x) */
713     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
714                              SRC0_REL(ABSOLUTE),
715                              SRC0_ELEM(ELEM_W),
716                              SRC0_NEG(0),
717                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
718                              SRC1_REL(ABSOLUTE),
719                              SRC1_ELEM(ELEM_X),
720                              SRC1_NEG(0),
721                              INDEX_MODE(SQ_INDEX_LOOP),
722                              PRED_SEL(SQ_PRED_SEL_OFF),
723                              LAST(0));
724     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
725                                  SRC2_REL(ABSOLUTE),
726                                  SRC2_ELEM(ELEM_X),
727                                  SRC2_NEG(0),
728                                  ALU_INST(SQ_OP3_INST_MULADD),
729                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
730                                  DST_GPR(2),
731                                  DST_REL(ABSOLUTE),
732                                  DST_ELEM(ELEM_X),
733                                  CLAMP(0));
734     /* r2.y = MAD(c0.w, r1.x, c0.y) */
735     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
736                              SRC0_REL(ABSOLUTE),
737                              SRC0_ELEM(ELEM_W),
738                              SRC0_NEG(0),
739                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
740                              SRC1_REL(ABSOLUTE),
741                              SRC1_ELEM(ELEM_X),
742                              SRC1_NEG(0),
743                              INDEX_MODE(SQ_INDEX_LOOP),
744                              PRED_SEL(SQ_PRED_SEL_OFF),
745                              LAST(0));
746     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
747                                  SRC2_REL(ABSOLUTE),
748                                  SRC2_ELEM(ELEM_Y),
749                                  SRC2_NEG(0),
750                                  ALU_INST(SQ_OP3_INST_MULADD),
751                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
752                                  DST_GPR(2),
753                                  DST_REL(ABSOLUTE),
754                                  DST_ELEM(ELEM_Y),
755                                  CLAMP(0));
756     /* r2.z = MAD(c0.w, r1.x, c0.z) */
757     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
758                              SRC0_REL(ABSOLUTE),
759                              SRC0_ELEM(ELEM_W),
760                              SRC0_NEG(0),
761                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
762                              SRC1_REL(ABSOLUTE),
763                              SRC1_ELEM(ELEM_X),
764                              SRC1_NEG(0),
765                              INDEX_MODE(SQ_INDEX_LOOP),
766                              PRED_SEL(SQ_PRED_SEL_OFF),
767                              LAST(0));
768     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
769                                  SRC2_REL(ABSOLUTE),
770                                  SRC2_ELEM(ELEM_Z),
771                                  SRC2_NEG(0),
772                                  ALU_INST(SQ_OP3_INST_MULADD),
773                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
774                                  DST_GPR(2),
775                                  DST_REL(ABSOLUTE),
776                                  DST_ELEM(ELEM_Z),
777                                  CLAMP(0));
778     /* r2.w = MAD(0, 0, 1) */
779     shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
780                              SRC0_REL(ABSOLUTE),
781                              SRC0_ELEM(ELEM_X),
782                              SRC0_NEG(0),
783                              SRC1_SEL(SQ_ALU_SRC_0),
784                              SRC1_REL(ABSOLUTE),
785                              SRC1_ELEM(ELEM_X),
786                              SRC1_NEG(0),
787                              INDEX_MODE(SQ_INDEX_LOOP),
788                              PRED_SEL(SQ_PRED_SEL_OFF),
789                              LAST(1));
790     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
791                                  SRC2_REL(ABSOLUTE),
792                                  SRC2_ELEM(ELEM_X),
793                                  SRC2_NEG(0),
794                                  ALU_INST(SQ_OP3_INST_MULADD),
795                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
796                                  DST_GPR(2),
797                                  DST_REL(ABSOLUTE),
798                                  DST_ELEM(ELEM_W),
799                                  CLAMP(0));
800 
801     /* 8,9,10,11 */
802     /* r2.x = MAD(c1.x, r1.y, pv.x) */
803     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
804                              SRC0_REL(ABSOLUTE),
805                              SRC0_ELEM(ELEM_X),
806                              SRC0_NEG(0),
807                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
808                              SRC1_REL(ABSOLUTE),
809                              SRC1_ELEM(ELEM_Y),
810                              SRC1_NEG(0),
811                              INDEX_MODE(SQ_INDEX_LOOP),
812                              PRED_SEL(SQ_PRED_SEL_OFF),
813                              LAST(0));
814     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
815                                  SRC2_REL(ABSOLUTE),
816                                  SRC2_ELEM(ELEM_X),
817                                  SRC2_NEG(0),
818                                  ALU_INST(SQ_OP3_INST_MULADD),
819                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
820                                  DST_GPR(2),
821                                  DST_REL(ABSOLUTE),
822                                  DST_ELEM(ELEM_X),
823                                  CLAMP(0));
824     /* r2.y = MAD(c1.y, r1.y, pv.y) */
825     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
826                              SRC0_REL(ABSOLUTE),
827                              SRC0_ELEM(ELEM_Y),
828                              SRC0_NEG(0),
829                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
830                              SRC1_REL(ABSOLUTE),
831                              SRC1_ELEM(ELEM_Y),
832                              SRC1_NEG(0),
833                              INDEX_MODE(SQ_INDEX_LOOP),
834                              PRED_SEL(SQ_PRED_SEL_OFF),
835                              LAST(0));
836     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
837                                  SRC2_REL(ABSOLUTE),
838                                  SRC2_ELEM(ELEM_Y),
839                                  SRC2_NEG(0),
840                                  ALU_INST(SQ_OP3_INST_MULADD),
841                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
842                                  DST_GPR(2),
843                                  DST_REL(ABSOLUTE),
844                                  DST_ELEM(ELEM_Y),
845                                  CLAMP(0));
846     /* r2.z = MAD(c1.z, r1.y, pv.z) */
847     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
848                              SRC0_REL(ABSOLUTE),
849                              SRC0_ELEM(ELEM_Z),
850                              SRC0_NEG(0),
851                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
852                              SRC1_REL(ABSOLUTE),
853                              SRC1_ELEM(ELEM_Y),
854                              SRC1_NEG(0),
855                              INDEX_MODE(SQ_INDEX_LOOP),
856                              PRED_SEL(SQ_PRED_SEL_OFF),
857                              LAST(0));
858     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
859                                  SRC2_REL(ABSOLUTE),
860                                  SRC2_ELEM(ELEM_Z),
861                                  SRC2_NEG(0),
862                                  ALU_INST(SQ_OP3_INST_MULADD),
863                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
864                                  DST_GPR(2),
865                                  DST_REL(ABSOLUTE),
866                                  DST_ELEM(ELEM_Z),
867                                  CLAMP(0));
868     /* r2.w = MAD(0, 0, 1) */
869     shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
870                              SRC0_REL(ABSOLUTE),
871                              SRC0_ELEM(ELEM_X),
872                              SRC0_NEG(0),
873                              SRC1_SEL(SQ_ALU_SRC_0),
874                              SRC1_REL(ABSOLUTE),
875                              SRC1_ELEM(ELEM_X),
876                              SRC1_NEG(0),
877                              INDEX_MODE(SQ_INDEX_LOOP),
878                              PRED_SEL(SQ_PRED_SEL_OFF),
879                              LAST(1));
880     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
881                                  SRC2_REL(ABSOLUTE),
882                                  SRC2_ELEM(ELEM_W),
883                                  SRC2_NEG(0),
884                                  ALU_INST(SQ_OP3_INST_MULADD),
885                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
886                                  DST_GPR(2),
887                                  DST_REL(ABSOLUTE),
888                                  DST_ELEM(ELEM_W),
889                                  CLAMP(0));
890     /* 12,13,14,15 */
891     /* r2.x = MAD(c2.x, r1.z, pv.x) */
892     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
893                              SRC0_REL(ABSOLUTE),
894                              SRC0_ELEM(ELEM_X),
895                              SRC0_NEG(0),
896                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
897                              SRC1_REL(ABSOLUTE),
898                              SRC1_ELEM(ELEM_Z),
899                              SRC1_NEG(0),
900                              INDEX_MODE(SQ_INDEX_LOOP),
901                              PRED_SEL(SQ_PRED_SEL_OFF),
902                              LAST(0));
903     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
904                                  SRC2_REL(ABSOLUTE),
905                                  SRC2_ELEM(ELEM_X),
906                                  SRC2_NEG(0),
907                                  ALU_INST(SQ_OP3_INST_MULADD),
908                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
909                                  DST_GPR(2),
910                                  DST_REL(ABSOLUTE),
911                                  DST_ELEM(ELEM_X),
912                                  CLAMP(1));
913     /* r2.y = MAD(c2.y, r1.z, pv.y) */
914     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
915                              SRC0_REL(ABSOLUTE),
916                              SRC0_ELEM(ELEM_Y),
917                              SRC0_NEG(0),
918                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
919                              SRC1_REL(ABSOLUTE),
920                              SRC1_ELEM(ELEM_Z),
921                              SRC1_NEG(0),
922                              INDEX_MODE(SQ_INDEX_LOOP),
923                              PRED_SEL(SQ_PRED_SEL_OFF),
924                              LAST(0));
925     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
926                                  SRC2_REL(ABSOLUTE),
927                                  SRC2_ELEM(ELEM_Y),
928                                  SRC2_NEG(0),
929                                  ALU_INST(SQ_OP3_INST_MULADD),
930                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
931                                  DST_GPR(2),
932                                  DST_REL(ABSOLUTE),
933                                  DST_ELEM(ELEM_Y),
934                                  CLAMP(1));
935     /* r2.z = MAD(c2.z, r1.z, pv.z) */
936     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
937                              SRC0_REL(ABSOLUTE),
938                              SRC0_ELEM(ELEM_Z),
939                              SRC0_NEG(0),
940                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
941                              SRC1_REL(ABSOLUTE),
942                              SRC1_ELEM(ELEM_Z),
943                              SRC1_NEG(0),
944                              INDEX_MODE(SQ_INDEX_LOOP),
945                              PRED_SEL(SQ_PRED_SEL_OFF),
946                              LAST(0));
947     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
948                                  SRC2_REL(ABSOLUTE),
949                                  SRC2_ELEM(ELEM_Z),
950                                  SRC2_NEG(0),
951                                  ALU_INST(SQ_OP3_INST_MULADD),
952                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
953                                  DST_GPR(2),
954                                  DST_REL(ABSOLUTE),
955                                  DST_ELEM(ELEM_Z),
956                                  CLAMP(1));
957     /* r2.w = MAD(0, 0, 1) */
958     shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
959                              SRC0_REL(ABSOLUTE),
960                              SRC0_ELEM(ELEM_X),
961                              SRC0_NEG(0),
962                              SRC1_SEL(SQ_ALU_SRC_0),
963                              SRC1_REL(ABSOLUTE),
964                              SRC1_ELEM(ELEM_X),
965                              SRC1_NEG(0),
966                              INDEX_MODE(SQ_INDEX_LOOP),
967                              PRED_SEL(SQ_PRED_SEL_OFF),
968                              LAST(1));
969     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
970                                  SRC2_REL(ABSOLUTE),
971                                  SRC2_ELEM(ELEM_X),
972                                  SRC2_NEG(0),
973                                  ALU_INST(SQ_OP3_INST_MULADD),
974                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
975                                  DST_GPR(2),
976                                  DST_REL(ABSOLUTE),
977                                  DST_ELEM(ELEM_W),
978                                  CLAMP(1));
979 
980     /* 16 */
981     shader[i++] = CF_DWORD0(ADDR(18));
982     shader[i++] = CF_DWORD1(POP_COUNT(0),
983                             CF_CONST(0),
984                             COND(SQ_CF_COND_ACTIVE),
985                             I_COUNT(3),
986                             CALL_COUNT(0),
987                             END_OF_PROGRAM(0),
988                             VALID_PIXEL_MODE(0),
989                             CF_INST(SQ_CF_INST_TEX),
990                             WHOLE_QUAD_MODE(0),
991                             BARRIER(1));
992     /* 17 */
993     shader[i++] = CF_DWORD0(ADDR(0));
994     shader[i++] = CF_DWORD1(POP_COUNT(0),
995 			    CF_CONST(0),
996 			    COND(SQ_CF_COND_ACTIVE),
997 			    I_COUNT(0),
998 			    CALL_COUNT(0),
999 			    END_OF_PROGRAM(0),
1000 			    VALID_PIXEL_MODE(0),
1001 			    CF_INST(SQ_CF_INST_RETURN),
1002 			    WHOLE_QUAD_MODE(0),
1003 			    BARRIER(1));
1004     /* 18/19 */
1005     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1006                              BC_FRAC_MODE(0),
1007                              FETCH_WHOLE_QUAD(0),
1008                              RESOURCE_ID(0),
1009                              SRC_GPR(0),
1010                              SRC_REL(ABSOLUTE),
1011                              R7xx_ALT_CONST(0));
1012     shader[i++] = TEX_DWORD1(DST_GPR(1),
1013                              DST_REL(ABSOLUTE),
1014                              DST_SEL_X(SQ_SEL_X),
1015                              DST_SEL_Y(SQ_SEL_MASK),
1016                              DST_SEL_Z(SQ_SEL_MASK),
1017                              DST_SEL_W(SQ_SEL_1),
1018                              LOD_BIAS(0),
1019                              COORD_TYPE_X(TEX_NORMALIZED),
1020                              COORD_TYPE_Y(TEX_NORMALIZED),
1021                              COORD_TYPE_Z(TEX_NORMALIZED),
1022                              COORD_TYPE_W(TEX_NORMALIZED));
1023     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1024                              OFFSET_Y(0),
1025                              OFFSET_Z(0),
1026                              SAMPLER_ID(0),
1027                              SRC_SEL_X(SQ_SEL_X),
1028                              SRC_SEL_Y(SQ_SEL_Y),
1029                              SRC_SEL_Z(SQ_SEL_0),
1030                              SRC_SEL_W(SQ_SEL_1));
1031     shader[i++] = TEX_DWORD_PAD;
1032     /* 20/21 */
1033     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1034                              BC_FRAC_MODE(0),
1035                              FETCH_WHOLE_QUAD(0),
1036                              RESOURCE_ID(1),
1037                              SRC_GPR(0),
1038                              SRC_REL(ABSOLUTE),
1039                              R7xx_ALT_CONST(0));
1040     shader[i++] = TEX_DWORD1(DST_GPR(1),
1041                              DST_REL(ABSOLUTE),
1042                              DST_SEL_X(SQ_SEL_MASK),
1043                              DST_SEL_Y(SQ_SEL_MASK),
1044                              DST_SEL_Z(SQ_SEL_X),
1045                              DST_SEL_W(SQ_SEL_MASK),
1046                              LOD_BIAS(0),
1047                              COORD_TYPE_X(TEX_NORMALIZED),
1048                              COORD_TYPE_Y(TEX_NORMALIZED),
1049                              COORD_TYPE_Z(TEX_NORMALIZED),
1050                              COORD_TYPE_W(TEX_NORMALIZED));
1051     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1052                              OFFSET_Y(0),
1053                              OFFSET_Z(0),
1054                              SAMPLER_ID(1),
1055                              SRC_SEL_X(SQ_SEL_X),
1056                              SRC_SEL_Y(SQ_SEL_Y),
1057                              SRC_SEL_Z(SQ_SEL_0),
1058                              SRC_SEL_W(SQ_SEL_1));
1059     shader[i++] = TEX_DWORD_PAD;
1060     /* 22/23 */
1061     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1062                              BC_FRAC_MODE(0),
1063                              FETCH_WHOLE_QUAD(0),
1064                              RESOURCE_ID(2),
1065                              SRC_GPR(0),
1066                              SRC_REL(ABSOLUTE),
1067                              R7xx_ALT_CONST(0));
1068     shader[i++] = TEX_DWORD1(DST_GPR(1),
1069                              DST_REL(ABSOLUTE),
1070                              DST_SEL_X(SQ_SEL_MASK),
1071                              DST_SEL_Y(SQ_SEL_X),
1072                              DST_SEL_Z(SQ_SEL_MASK),
1073                              DST_SEL_W(SQ_SEL_MASK),
1074                              LOD_BIAS(0),
1075                              COORD_TYPE_X(TEX_NORMALIZED),
1076                              COORD_TYPE_Y(TEX_NORMALIZED),
1077                              COORD_TYPE_Z(TEX_NORMALIZED),
1078                              COORD_TYPE_W(TEX_NORMALIZED));
1079     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1080                              OFFSET_Y(0),
1081                              OFFSET_Z(0),
1082                              SAMPLER_ID(2),
1083                              SRC_SEL_X(SQ_SEL_X),
1084                              SRC_SEL_Y(SQ_SEL_Y),
1085                              SRC_SEL_Z(SQ_SEL_0),
1086                              SRC_SEL_W(SQ_SEL_1));
1087     shader[i++] = TEX_DWORD_PAD;
1088     /* 24 */
1089     shader[i++] = CF_DWORD0(ADDR(26));
1090     shader[i++] = CF_DWORD1(POP_COUNT(0),
1091                             CF_CONST(0),
1092                             COND(SQ_CF_COND_ACTIVE),
1093                             I_COUNT(1),
1094                             CALL_COUNT(0),
1095                             END_OF_PROGRAM(0),
1096                             VALID_PIXEL_MODE(0),
1097                             CF_INST(SQ_CF_INST_TEX),
1098                             WHOLE_QUAD_MODE(0),
1099                             BARRIER(1));
1100     /* 25 */
1101     shader[i++] = CF_DWORD0(ADDR(0));
1102     shader[i++] = CF_DWORD1(POP_COUNT(0),
1103 			    CF_CONST(0),
1104 			    COND(SQ_CF_COND_ACTIVE),
1105 			    I_COUNT(0),
1106 			    CALL_COUNT(0),
1107 			    END_OF_PROGRAM(0),
1108 			    VALID_PIXEL_MODE(0),
1109 			    CF_INST(SQ_CF_INST_RETURN),
1110 			    WHOLE_QUAD_MODE(0),
1111 			    BARRIER(1));
1112     /* 26/27 */
1113     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1114                              BC_FRAC_MODE(0),
1115                              FETCH_WHOLE_QUAD(0),
1116                              RESOURCE_ID(0),
1117                              SRC_GPR(0),
1118                              SRC_REL(ABSOLUTE),
1119                              R7xx_ALT_CONST(0));
1120     shader[i++] = TEX_DWORD1(DST_GPR(1),
1121                              DST_REL(ABSOLUTE),
1122                              DST_SEL_X(SQ_SEL_X),
1123                              DST_SEL_Y(SQ_SEL_Y),
1124                              DST_SEL_Z(SQ_SEL_Z),
1125                              DST_SEL_W(SQ_SEL_1),
1126                              LOD_BIAS(0),
1127                              COORD_TYPE_X(TEX_NORMALIZED),
1128                              COORD_TYPE_Y(TEX_NORMALIZED),
1129                              COORD_TYPE_Z(TEX_NORMALIZED),
1130                              COORD_TYPE_W(TEX_NORMALIZED));
1131     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1132                              OFFSET_Y(0),
1133                              OFFSET_Z(0),
1134                              SAMPLER_ID(0),
1135                              SRC_SEL_X(SQ_SEL_X),
1136                              SRC_SEL_Y(SQ_SEL_Y),
1137                              SRC_SEL_Z(SQ_SEL_0),
1138                              SRC_SEL_W(SQ_SEL_1));
1139     shader[i++] = TEX_DWORD_PAD;
1140 
1141     return i;
1142 }
1143 
1144 /* comp vs --------------------------------------- */
R600_comp_vs(RADEONChipFamily ChipSet,uint32_t * shader)1145 int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1146 {
1147     int i = 0;
1148 
1149     /* 0 */
1150     shader[i++] = CF_DWORD0(ADDR(3));
1151     shader[i++] = CF_DWORD1(POP_COUNT(0),
1152                             CF_CONST(0),
1153                             COND(SQ_CF_COND_BOOL),
1154                             I_COUNT(0),
1155                             CALL_COUNT(0),
1156                             END_OF_PROGRAM(0),
1157                             VALID_PIXEL_MODE(0),
1158                             CF_INST(SQ_CF_INST_CALL),
1159                             WHOLE_QUAD_MODE(0),
1160                             BARRIER(0));
1161     /* 1 */
1162     shader[i++] = CF_DWORD0(ADDR(9));
1163     shader[i++] = CF_DWORD1(POP_COUNT(0),
1164                             CF_CONST(0),
1165                             COND(SQ_CF_COND_NOT_BOOL),
1166                             I_COUNT(0),
1167                             CALL_COUNT(0),
1168                             END_OF_PROGRAM(0),
1169                             VALID_PIXEL_MODE(0),
1170                             CF_INST(SQ_CF_INST_CALL),
1171                             WHOLE_QUAD_MODE(0),
1172                             BARRIER(0));
1173     /* 2 */
1174     shader[i++] = CF_DWORD0(ADDR(0));
1175     shader[i++] = CF_DWORD1(POP_COUNT(0),
1176                             CF_CONST(0),
1177                             COND(SQ_CF_COND_ACTIVE),
1178                             I_COUNT(0),
1179                             CALL_COUNT(0),
1180                             END_OF_PROGRAM(1),
1181                             VALID_PIXEL_MODE(0),
1182                             CF_INST(SQ_CF_INST_NOP),
1183                             WHOLE_QUAD_MODE(0),
1184                             BARRIER(1));
1185     /* 3 - mask sub */
1186     shader[i++] = CF_DWORD0(ADDR(44));
1187     shader[i++] = CF_DWORD1(POP_COUNT(0),
1188 			    CF_CONST(0),
1189 			    COND(SQ_CF_COND_ACTIVE),
1190 			    I_COUNT(3),
1191 			    CALL_COUNT(0),
1192 			    END_OF_PROGRAM(0),
1193 			    VALID_PIXEL_MODE(0),
1194 			    CF_INST(SQ_CF_INST_VTX),
1195 			    WHOLE_QUAD_MODE(0),
1196 			    BARRIER(1));
1197 
1198     /* 4 - ALU */
1199     shader[i++] = CF_ALU_DWORD0(ADDR(14),
1200 				KCACHE_BANK0(0),
1201 				KCACHE_BANK1(0),
1202 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1203     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1204 				KCACHE_ADDR0(0),
1205 				KCACHE_ADDR1(0),
1206 				I_COUNT(20),
1207 				USES_WATERFALL(0),
1208 				CF_INST(SQ_CF_INST_ALU),
1209 				WHOLE_QUAD_MODE(0),
1210 				BARRIER(1));
1211 
1212     /* 5 - dst */
1213     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1214 					  TYPE(SQ_EXPORT_POS),
1215 					  RW_GPR(2),
1216 					  RW_REL(ABSOLUTE),
1217 					  INDEX_GPR(0),
1218 					  ELEM_SIZE(0));
1219     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1220 					       SRC_SEL_Y(SQ_SEL_Y),
1221 					       SRC_SEL_Z(SQ_SEL_0),
1222 					       SRC_SEL_W(SQ_SEL_1),
1223 					       R6xx_ELEM_LOOP(0),
1224 					       BURST_COUNT(1),
1225 					       END_OF_PROGRAM(0),
1226 					       VALID_PIXEL_MODE(0),
1227 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1228 					       WHOLE_QUAD_MODE(0),
1229 					       BARRIER(1));
1230     /* 6 - src */
1231     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1232 					  TYPE(SQ_EXPORT_PARAM),
1233 					  RW_GPR(1),
1234 					  RW_REL(ABSOLUTE),
1235 					  INDEX_GPR(0),
1236 					  ELEM_SIZE(0));
1237     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1238 					       SRC_SEL_Y(SQ_SEL_Y),
1239 					       SRC_SEL_Z(SQ_SEL_0),
1240 					       SRC_SEL_W(SQ_SEL_1),
1241 					       R6xx_ELEM_LOOP(0),
1242 					       BURST_COUNT(1),
1243 					       END_OF_PROGRAM(0),
1244 					       VALID_PIXEL_MODE(0),
1245 					       CF_INST(SQ_CF_INST_EXPORT),
1246 					       WHOLE_QUAD_MODE(0),
1247 					       BARRIER(0));
1248     /* 7 - mask */
1249     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1250 					  TYPE(SQ_EXPORT_PARAM),
1251 					  RW_GPR(0),
1252 					  RW_REL(ABSOLUTE),
1253 					  INDEX_GPR(0),
1254 					  ELEM_SIZE(0));
1255     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1256 					       SRC_SEL_Y(SQ_SEL_Y),
1257 					       SRC_SEL_Z(SQ_SEL_0),
1258 					       SRC_SEL_W(SQ_SEL_1),
1259 					       R6xx_ELEM_LOOP(0),
1260 					       BURST_COUNT(1),
1261 					       END_OF_PROGRAM(0),
1262 					       VALID_PIXEL_MODE(0),
1263 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1264 					       WHOLE_QUAD_MODE(0),
1265 					       BARRIER(0));
1266     /* 8 */
1267     shader[i++] = CF_DWORD0(ADDR(0));
1268     shader[i++] = CF_DWORD1(POP_COUNT(0),
1269 			    CF_CONST(0),
1270 			    COND(SQ_CF_COND_ACTIVE),
1271 			    I_COUNT(0),
1272 			    CALL_COUNT(0),
1273 			    END_OF_PROGRAM(0),
1274 			    VALID_PIXEL_MODE(0),
1275 			    CF_INST(SQ_CF_INST_RETURN),
1276 			    WHOLE_QUAD_MODE(0),
1277 			    BARRIER(1));
1278     /* 9 - non-mask sub */
1279     shader[i++] = CF_DWORD0(ADDR(50));
1280     shader[i++] = CF_DWORD1(POP_COUNT(0),
1281 			    CF_CONST(0),
1282 			    COND(SQ_CF_COND_ACTIVE),
1283 			    I_COUNT(2),
1284 			    CALL_COUNT(0),
1285 			    END_OF_PROGRAM(0),
1286 			    VALID_PIXEL_MODE(0),
1287 			    CF_INST(SQ_CF_INST_VTX),
1288 			    WHOLE_QUAD_MODE(0),
1289 			    BARRIER(1));
1290 
1291     /* 10 - ALU */
1292     shader[i++] = CF_ALU_DWORD0(ADDR(34),
1293 				KCACHE_BANK0(0),
1294 				KCACHE_BANK1(0),
1295 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1296     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1297 				KCACHE_ADDR0(0),
1298 				KCACHE_ADDR1(0),
1299 				I_COUNT(10),
1300 				USES_WATERFALL(0),
1301 				CF_INST(SQ_CF_INST_ALU),
1302 				WHOLE_QUAD_MODE(0),
1303 				BARRIER(1));
1304 
1305     /* 11 - dst */
1306     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1307 					  TYPE(SQ_EXPORT_POS),
1308 					  RW_GPR(1),
1309 					  RW_REL(ABSOLUTE),
1310 					  INDEX_GPR(0),
1311 					  ELEM_SIZE(0));
1312     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1313 					       SRC_SEL_Y(SQ_SEL_Y),
1314 					       SRC_SEL_Z(SQ_SEL_0),
1315 					       SRC_SEL_W(SQ_SEL_1),
1316 					       R6xx_ELEM_LOOP(0),
1317 					       BURST_COUNT(0),
1318 					       END_OF_PROGRAM(0),
1319 					       VALID_PIXEL_MODE(0),
1320 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1321 					       WHOLE_QUAD_MODE(0),
1322 					       BARRIER(1));
1323     /* 12 - src */
1324     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1325 					  TYPE(SQ_EXPORT_PARAM),
1326 					  RW_GPR(0),
1327 					  RW_REL(ABSOLUTE),
1328 					  INDEX_GPR(0),
1329 					  ELEM_SIZE(0));
1330     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1331 					       SRC_SEL_Y(SQ_SEL_Y),
1332 					       SRC_SEL_Z(SQ_SEL_0),
1333 					       SRC_SEL_W(SQ_SEL_1),
1334 					       R6xx_ELEM_LOOP(0),
1335 					       BURST_COUNT(0),
1336 					       END_OF_PROGRAM(0),
1337 					       VALID_PIXEL_MODE(0),
1338 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1339 					       WHOLE_QUAD_MODE(0),
1340 					       BARRIER(0));
1341     /* 13 */
1342     shader[i++] = CF_DWORD0(ADDR(0));
1343     shader[i++] = CF_DWORD1(POP_COUNT(0),
1344 			    CF_CONST(0),
1345 			    COND(SQ_CF_COND_ACTIVE),
1346 			    I_COUNT(0),
1347 			    CALL_COUNT(0),
1348 			    END_OF_PROGRAM(0),
1349 			    VALID_PIXEL_MODE(0),
1350 			    CF_INST(SQ_CF_INST_RETURN),
1351 			    WHOLE_QUAD_MODE(0),
1352 			    BARRIER(1));
1353 
1354 
1355     /* 14 srcX.x DOT4 - mask */
1356     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1357                              SRC0_REL(ABSOLUTE),
1358                              SRC0_ELEM(ELEM_X),
1359                              SRC0_NEG(0),
1360                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1361                              SRC1_REL(ABSOLUTE),
1362                              SRC1_ELEM(ELEM_X),
1363                              SRC1_NEG(0),
1364                              INDEX_MODE(SQ_INDEX_LOOP),
1365                              PRED_SEL(SQ_PRED_SEL_OFF),
1366                              LAST(0));
1367     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1368                                  SRC0_ABS(0),
1369                                  SRC1_ABS(0),
1370                                  UPDATE_EXECUTE_MASK(0),
1371                                  UPDATE_PRED(0),
1372                                  WRITE_MASK(1),
1373                                  FOG_MERGE(0),
1374                                  OMOD(SQ_ALU_OMOD_OFF),
1375                                  ALU_INST(SQ_OP2_INST_DOT4),
1376                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1377                                  DST_GPR(3),
1378                                  DST_REL(ABSOLUTE),
1379                                  DST_ELEM(ELEM_X),
1380                                  CLAMP(0));
1381 
1382     /* 15 srcX.y DOT4 - mask */
1383     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1384                              SRC0_REL(ABSOLUTE),
1385                              SRC0_ELEM(ELEM_Y),
1386                              SRC0_NEG(0),
1387                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1388                              SRC1_REL(ABSOLUTE),
1389                              SRC1_ELEM(ELEM_Y),
1390                              SRC1_NEG(0),
1391                              INDEX_MODE(SQ_INDEX_LOOP),
1392                              PRED_SEL(SQ_PRED_SEL_OFF),
1393                              LAST(0));
1394     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1395                                  SRC0_ABS(0),
1396                                  SRC1_ABS(0),
1397                                  UPDATE_EXECUTE_MASK(0),
1398                                  UPDATE_PRED(0),
1399                                  WRITE_MASK(0),
1400                                  FOG_MERGE(0),
1401                                  OMOD(SQ_ALU_OMOD_OFF),
1402                                  ALU_INST(SQ_OP2_INST_DOT4),
1403                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1404                                  DST_GPR(3),
1405                                  DST_REL(ABSOLUTE),
1406                                  DST_ELEM(ELEM_Y),
1407                                  CLAMP(0));
1408 
1409     /* 16 srcX.z DOT4 - mask */
1410     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1411                              SRC0_REL(ABSOLUTE),
1412                              SRC0_ELEM(ELEM_Z),
1413                              SRC0_NEG(0),
1414                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1415                              SRC1_REL(ABSOLUTE),
1416                              SRC1_ELEM(ELEM_Z),
1417                              SRC1_NEG(0),
1418                              INDEX_MODE(SQ_INDEX_LOOP),
1419                              PRED_SEL(SQ_PRED_SEL_OFF),
1420                              LAST(0));
1421     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1422                                  SRC0_ABS(0),
1423                                  SRC1_ABS(0),
1424                                  UPDATE_EXECUTE_MASK(0),
1425                                  UPDATE_PRED(0),
1426                                  WRITE_MASK(0),
1427                                  FOG_MERGE(0),
1428                                  OMOD(SQ_ALU_OMOD_OFF),
1429                                  ALU_INST(SQ_OP2_INST_DOT4),
1430                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1431                                  DST_GPR(3),
1432                                  DST_REL(ABSOLUTE),
1433                                  DST_ELEM(ELEM_Z),
1434                                  CLAMP(0));
1435 
1436     /* 17 srcX.w DOT4 - mask */
1437     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1438                              SRC0_REL(ABSOLUTE),
1439                              SRC0_ELEM(ELEM_W),
1440                              SRC0_NEG(0),
1441                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1442                              SRC1_REL(ABSOLUTE),
1443                              SRC1_ELEM(ELEM_W),
1444                              SRC1_NEG(0),
1445                              INDEX_MODE(SQ_INDEX_LOOP),
1446                              PRED_SEL(SQ_PRED_SEL_OFF),
1447                              LAST(1));
1448     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1449                                  SRC0_ABS(0),
1450                                  SRC1_ABS(0),
1451                                  UPDATE_EXECUTE_MASK(0),
1452                                  UPDATE_PRED(0),
1453                                  WRITE_MASK(0),
1454                                  FOG_MERGE(0),
1455                                  OMOD(SQ_ALU_OMOD_OFF),
1456                                  ALU_INST(SQ_OP2_INST_DOT4),
1457                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1458                                  DST_GPR(3),
1459                                  DST_REL(ABSOLUTE),
1460                                  DST_ELEM(ELEM_W),
1461                                  CLAMP(0));
1462 
1463     /* 18 srcY.x DOT4 - mask */
1464     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1465                              SRC0_REL(ABSOLUTE),
1466                              SRC0_ELEM(ELEM_X),
1467                              SRC0_NEG(0),
1468                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1469                              SRC1_REL(ABSOLUTE),
1470                              SRC1_ELEM(ELEM_X),
1471                              SRC1_NEG(0),
1472                              INDEX_MODE(SQ_INDEX_LOOP),
1473                              PRED_SEL(SQ_PRED_SEL_OFF),
1474                              LAST(0));
1475     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1476                                  SRC0_ABS(0),
1477                                  SRC1_ABS(0),
1478                                  UPDATE_EXECUTE_MASK(0),
1479                                  UPDATE_PRED(0),
1480                                  WRITE_MASK(0),
1481                                  FOG_MERGE(0),
1482                                  OMOD(SQ_ALU_OMOD_OFF),
1483                                  ALU_INST(SQ_OP2_INST_DOT4),
1484                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1485                                  DST_GPR(3),
1486                                  DST_REL(ABSOLUTE),
1487                                  DST_ELEM(ELEM_X),
1488                                  CLAMP(0));
1489 
1490     /* 19 srcY.y DOT4 - mask */
1491     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1492                              SRC0_REL(ABSOLUTE),
1493                              SRC0_ELEM(ELEM_Y),
1494                              SRC0_NEG(0),
1495                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1496                              SRC1_REL(ABSOLUTE),
1497                              SRC1_ELEM(ELEM_Y),
1498                              SRC1_NEG(0),
1499                              INDEX_MODE(SQ_INDEX_LOOP),
1500                              PRED_SEL(SQ_PRED_SEL_OFF),
1501                              LAST(0));
1502     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1503                                  SRC0_ABS(0),
1504                                  SRC1_ABS(0),
1505                                  UPDATE_EXECUTE_MASK(0),
1506                                  UPDATE_PRED(0),
1507                                  WRITE_MASK(1),
1508                                  FOG_MERGE(0),
1509                                  OMOD(SQ_ALU_OMOD_OFF),
1510                                  ALU_INST(SQ_OP2_INST_DOT4),
1511                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1512                                  DST_GPR(3),
1513                                  DST_REL(ABSOLUTE),
1514                                  DST_ELEM(ELEM_Y),
1515                                  CLAMP(0));
1516 
1517     /* 20 srcY.z DOT4 - mask */
1518     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1519                              SRC0_REL(ABSOLUTE),
1520                              SRC0_ELEM(ELEM_Z),
1521                              SRC0_NEG(0),
1522                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1523                              SRC1_REL(ABSOLUTE),
1524                              SRC1_ELEM(ELEM_Z),
1525                              SRC1_NEG(0),
1526                              INDEX_MODE(SQ_INDEX_LOOP),
1527                              PRED_SEL(SQ_PRED_SEL_OFF),
1528                              LAST(0));
1529     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1530                                  SRC0_ABS(0),
1531                                  SRC1_ABS(0),
1532                                  UPDATE_EXECUTE_MASK(0),
1533                                  UPDATE_PRED(0),
1534                                  WRITE_MASK(0),
1535                                  FOG_MERGE(0),
1536                                  OMOD(SQ_ALU_OMOD_OFF),
1537                                  ALU_INST(SQ_OP2_INST_DOT4),
1538                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1539                                  DST_GPR(3),
1540                                  DST_REL(ABSOLUTE),
1541                                  DST_ELEM(ELEM_Z),
1542                                  CLAMP(0));
1543 
1544     /* 21 srcY.w DOT4 - mask */
1545     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1546                              SRC0_REL(ABSOLUTE),
1547                              SRC0_ELEM(ELEM_W),
1548                              SRC0_NEG(0),
1549                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1550                              SRC1_REL(ABSOLUTE),
1551                              SRC1_ELEM(ELEM_W),
1552                              SRC1_NEG(0),
1553                              INDEX_MODE(SQ_INDEX_LOOP),
1554                              PRED_SEL(SQ_PRED_SEL_OFF),
1555                              LAST(1));
1556     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1557                                  SRC0_ABS(0),
1558                                  SRC1_ABS(0),
1559                                  UPDATE_EXECUTE_MASK(0),
1560                                  UPDATE_PRED(0),
1561                                  WRITE_MASK(0),
1562                                  FOG_MERGE(0),
1563                                  OMOD(SQ_ALU_OMOD_OFF),
1564                                  ALU_INST(SQ_OP2_INST_DOT4),
1565                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1566                                  DST_GPR(3),
1567                                  DST_REL(ABSOLUTE),
1568                                  DST_ELEM(ELEM_W),
1569                                  CLAMP(0));
1570 
1571     /* 22 maskX.x DOT4 - mask */
1572     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1573                              SRC0_REL(ABSOLUTE),
1574                              SRC0_ELEM(ELEM_X),
1575                              SRC0_NEG(0),
1576                              SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1577                              SRC1_REL(ABSOLUTE),
1578                              SRC1_ELEM(ELEM_X),
1579                              SRC1_NEG(0),
1580                              INDEX_MODE(SQ_INDEX_LOOP),
1581                              PRED_SEL(SQ_PRED_SEL_OFF),
1582                              LAST(0));
1583     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1584                                  SRC0_ABS(0),
1585                                  SRC1_ABS(0),
1586                                  UPDATE_EXECUTE_MASK(0),
1587                                  UPDATE_PRED(0),
1588                                  WRITE_MASK(1),
1589                                  FOG_MERGE(0),
1590                                  OMOD(SQ_ALU_OMOD_OFF),
1591                                  ALU_INST(SQ_OP2_INST_DOT4),
1592                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1593                                  DST_GPR(4),
1594                                  DST_REL(ABSOLUTE),
1595                                  DST_ELEM(ELEM_X),
1596                                  CLAMP(0));
1597 
1598     /* 23 maskX.y DOT4 - mask */
1599     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1600                              SRC0_REL(ABSOLUTE),
1601                              SRC0_ELEM(ELEM_Y),
1602                              SRC0_NEG(0),
1603                              SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1604                              SRC1_REL(ABSOLUTE),
1605                              SRC1_ELEM(ELEM_Y),
1606                              SRC1_NEG(0),
1607                              INDEX_MODE(SQ_INDEX_LOOP),
1608                              PRED_SEL(SQ_PRED_SEL_OFF),
1609                              LAST(0));
1610     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1611                                  SRC0_ABS(0),
1612                                  SRC1_ABS(0),
1613                                  UPDATE_EXECUTE_MASK(0),
1614                                  UPDATE_PRED(0),
1615                                  WRITE_MASK(0),
1616                                  FOG_MERGE(0),
1617                                  OMOD(SQ_ALU_OMOD_OFF),
1618                                  ALU_INST(SQ_OP2_INST_DOT4),
1619                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1620                                  DST_GPR(4),
1621                                  DST_REL(ABSOLUTE),
1622                                  DST_ELEM(ELEM_Y),
1623                                  CLAMP(0));
1624 
1625     /* 24 maskX.z DOT4 - mask */
1626     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1627                              SRC0_REL(ABSOLUTE),
1628                              SRC0_ELEM(ELEM_Z),
1629                              SRC0_NEG(0),
1630                              SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1631                              SRC1_REL(ABSOLUTE),
1632                              SRC1_ELEM(ELEM_Z),
1633                              SRC1_NEG(0),
1634                              INDEX_MODE(SQ_INDEX_LOOP),
1635                              PRED_SEL(SQ_PRED_SEL_OFF),
1636                              LAST(0));
1637     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1638                                  SRC0_ABS(0),
1639                                  SRC1_ABS(0),
1640                                  UPDATE_EXECUTE_MASK(0),
1641                                  UPDATE_PRED(0),
1642                                  WRITE_MASK(0),
1643                                  FOG_MERGE(0),
1644                                  OMOD(SQ_ALU_OMOD_OFF),
1645                                  ALU_INST(SQ_OP2_INST_DOT4),
1646                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1647                                  DST_GPR(4),
1648                                  DST_REL(ABSOLUTE),
1649                                  DST_ELEM(ELEM_Z),
1650                                  CLAMP(0));
1651 
1652     /* 25 maskX.w DOT4 - mask */
1653     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1654                              SRC0_REL(ABSOLUTE),
1655                              SRC0_ELEM(ELEM_W),
1656                              SRC0_NEG(0),
1657                              SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1658                              SRC1_REL(ABSOLUTE),
1659                              SRC1_ELEM(ELEM_W),
1660                              SRC1_NEG(0),
1661                              INDEX_MODE(SQ_INDEX_LOOP),
1662                              PRED_SEL(SQ_PRED_SEL_OFF),
1663                              LAST(1));
1664     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1665                                  SRC0_ABS(0),
1666                                  SRC1_ABS(0),
1667                                  UPDATE_EXECUTE_MASK(0),
1668                                  UPDATE_PRED(0),
1669                                  WRITE_MASK(0),
1670                                  FOG_MERGE(0),
1671                                  OMOD(SQ_ALU_OMOD_OFF),
1672                                  ALU_INST(SQ_OP2_INST_DOT4),
1673                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1674                                  DST_GPR(4),
1675                                  DST_REL(ABSOLUTE),
1676                                  DST_ELEM(ELEM_W),
1677                                  CLAMP(0));
1678 
1679     /* 26 maskY.x DOT4 - mask */
1680     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1681                              SRC0_REL(ABSOLUTE),
1682                              SRC0_ELEM(ELEM_X),
1683                              SRC0_NEG(0),
1684                              SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1685                              SRC1_REL(ABSOLUTE),
1686                              SRC1_ELEM(ELEM_X),
1687                              SRC1_NEG(0),
1688                              INDEX_MODE(SQ_INDEX_LOOP),
1689                              PRED_SEL(SQ_PRED_SEL_OFF),
1690                              LAST(0));
1691     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1692                                  SRC0_ABS(0),
1693                                  SRC1_ABS(0),
1694                                  UPDATE_EXECUTE_MASK(0),
1695                                  UPDATE_PRED(0),
1696                                  WRITE_MASK(0),
1697                                  FOG_MERGE(0),
1698                                  OMOD(SQ_ALU_OMOD_OFF),
1699                                  ALU_INST(SQ_OP2_INST_DOT4),
1700                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1701                                  DST_GPR(4),
1702                                  DST_REL(ABSOLUTE),
1703                                  DST_ELEM(ELEM_X),
1704                                  CLAMP(0));
1705 
1706     /* 27 maskY.y DOT4 - mask */
1707     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1708                              SRC0_REL(ABSOLUTE),
1709                              SRC0_ELEM(ELEM_Y),
1710                              SRC0_NEG(0),
1711                              SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1712                              SRC1_REL(ABSOLUTE),
1713                              SRC1_ELEM(ELEM_Y),
1714                              SRC1_NEG(0),
1715                              INDEX_MODE(SQ_INDEX_LOOP),
1716                              PRED_SEL(SQ_PRED_SEL_OFF),
1717                              LAST(0));
1718     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1719                                  SRC0_ABS(0),
1720                                  SRC1_ABS(0),
1721                                  UPDATE_EXECUTE_MASK(0),
1722                                  UPDATE_PRED(0),
1723                                  WRITE_MASK(1),
1724                                  FOG_MERGE(0),
1725                                  OMOD(SQ_ALU_OMOD_OFF),
1726                                  ALU_INST(SQ_OP2_INST_DOT4),
1727                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1728                                  DST_GPR(4),
1729                                  DST_REL(ABSOLUTE),
1730                                  DST_ELEM(ELEM_Y),
1731                                  CLAMP(0));
1732 
1733     /* 28 maskY.z DOT4 - mask */
1734     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1735                              SRC0_REL(ABSOLUTE),
1736                              SRC0_ELEM(ELEM_Z),
1737                              SRC0_NEG(0),
1738                              SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1739                              SRC1_REL(ABSOLUTE),
1740                              SRC1_ELEM(ELEM_Z),
1741                              SRC1_NEG(0),
1742                              INDEX_MODE(SQ_INDEX_LOOP),
1743                              PRED_SEL(SQ_PRED_SEL_OFF),
1744                              LAST(0));
1745     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1746                                  SRC0_ABS(0),
1747                                  SRC1_ABS(0),
1748                                  UPDATE_EXECUTE_MASK(0),
1749                                  UPDATE_PRED(0),
1750                                  WRITE_MASK(0),
1751                                  FOG_MERGE(0),
1752                                  OMOD(SQ_ALU_OMOD_OFF),
1753                                  ALU_INST(SQ_OP2_INST_DOT4),
1754                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1755                                  DST_GPR(4),
1756                                  DST_REL(ABSOLUTE),
1757                                  DST_ELEM(ELEM_Z),
1758                                  CLAMP(0));
1759 
1760     /* 29 maskY.w DOT4 - mask */
1761     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1762                              SRC0_REL(ABSOLUTE),
1763                              SRC0_ELEM(ELEM_W),
1764                              SRC0_NEG(0),
1765                              SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1766                              SRC1_REL(ABSOLUTE),
1767                              SRC1_ELEM(ELEM_W),
1768                              SRC1_NEG(0),
1769                              INDEX_MODE(SQ_INDEX_LOOP),
1770                              PRED_SEL(SQ_PRED_SEL_OFF),
1771                              LAST(1));
1772     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1773                                  SRC0_ABS(0),
1774                                  SRC1_ABS(0),
1775                                  UPDATE_EXECUTE_MASK(0),
1776                                  UPDATE_PRED(0),
1777                                  WRITE_MASK(0),
1778                                  FOG_MERGE(0),
1779                                  OMOD(SQ_ALU_OMOD_OFF),
1780                                  ALU_INST(SQ_OP2_INST_DOT4),
1781                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1782                                  DST_GPR(4),
1783                                  DST_REL(ABSOLUTE),
1784                                  DST_ELEM(ELEM_W),
1785                                  CLAMP(0));
1786 
1787     /* 30 srcX / w */
1788     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1789                              SRC0_REL(ABSOLUTE),
1790                              SRC0_ELEM(ELEM_X),
1791                              SRC0_NEG(0),
1792                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1793                              SRC1_REL(ABSOLUTE),
1794                              SRC1_ELEM(ELEM_W),
1795                              SRC1_NEG(0),
1796                              INDEX_MODE(SQ_INDEX_AR_X),
1797                              PRED_SEL(SQ_PRED_SEL_OFF),
1798                              LAST(1));
1799     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1800                                  SRC0_ABS(0),
1801                                  SRC1_ABS(0),
1802                                  UPDATE_EXECUTE_MASK(0),
1803                                  UPDATE_PRED(0),
1804                                  WRITE_MASK(1),
1805                                  FOG_MERGE(0),
1806                                  OMOD(SQ_ALU_OMOD_OFF),
1807                                  ALU_INST(SQ_OP2_INST_MUL),
1808                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1809                                  DST_GPR(1),
1810                                  DST_REL(ABSOLUTE),
1811                                  DST_ELEM(ELEM_X),
1812                                  CLAMP(0));
1813 
1814     /* 31 srcY / h */
1815     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1816                              SRC0_REL(ABSOLUTE),
1817                              SRC0_ELEM(ELEM_Y),
1818                              SRC0_NEG(0),
1819                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1820                              SRC1_REL(ABSOLUTE),
1821                              SRC1_ELEM(ELEM_W),
1822                              SRC1_NEG(0),
1823                              INDEX_MODE(SQ_INDEX_AR_X),
1824                              PRED_SEL(SQ_PRED_SEL_OFF),
1825                              LAST(1));
1826     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1827                                  SRC0_ABS(0),
1828                                  SRC1_ABS(0),
1829                                  UPDATE_EXECUTE_MASK(0),
1830                                  UPDATE_PRED(0),
1831                                  WRITE_MASK(1),
1832                                  FOG_MERGE(0),
1833                                  OMOD(SQ_ALU_OMOD_OFF),
1834                                  ALU_INST(SQ_OP2_INST_MUL),
1835                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1836                                  DST_GPR(1),
1837                                  DST_REL(ABSOLUTE),
1838                                  DST_ELEM(ELEM_Y),
1839                                  CLAMP(0));
1840 
1841     /* 32 maskX / w */
1842     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1843                              SRC0_REL(ABSOLUTE),
1844                              SRC0_ELEM(ELEM_X),
1845                              SRC0_NEG(0),
1846                              SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1847                              SRC1_REL(ABSOLUTE),
1848                              SRC1_ELEM(ELEM_W),
1849                              SRC1_NEG(0),
1850                              INDEX_MODE(SQ_INDEX_AR_X),
1851                              PRED_SEL(SQ_PRED_SEL_OFF),
1852                              LAST(1));
1853     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1854                                  SRC0_ABS(0),
1855                                  SRC1_ABS(0),
1856                                  UPDATE_EXECUTE_MASK(0),
1857                                  UPDATE_PRED(0),
1858                                  WRITE_MASK(1),
1859                                  FOG_MERGE(0),
1860                                  OMOD(SQ_ALU_OMOD_OFF),
1861                                  ALU_INST(SQ_OP2_INST_MUL),
1862                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1863                                  DST_GPR(0),
1864                                  DST_REL(ABSOLUTE),
1865                                  DST_ELEM(ELEM_X),
1866                                  CLAMP(0));
1867 
1868     /* 33 maskY / h */
1869     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1870                              SRC0_REL(ABSOLUTE),
1871                              SRC0_ELEM(ELEM_Y),
1872                              SRC0_NEG(0),
1873                              SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1874                              SRC1_REL(ABSOLUTE),
1875                              SRC1_ELEM(ELEM_W),
1876                              SRC1_NEG(0),
1877                              INDEX_MODE(SQ_INDEX_AR_X),
1878                              PRED_SEL(SQ_PRED_SEL_OFF),
1879                              LAST(1));
1880     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1881                                  SRC0_ABS(0),
1882                                  SRC1_ABS(0),
1883                                  UPDATE_EXECUTE_MASK(0),
1884                                  UPDATE_PRED(0),
1885                                  WRITE_MASK(1),
1886                                  FOG_MERGE(0),
1887                                  OMOD(SQ_ALU_OMOD_OFF),
1888                                  ALU_INST(SQ_OP2_INST_MUL),
1889                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1890                                  DST_GPR(0),
1891                                  DST_REL(ABSOLUTE),
1892                                  DST_ELEM(ELEM_Y),
1893                                  CLAMP(0));
1894 
1895     /* 34 srcX.x DOT4 - non-mask */
1896     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1897                              SRC0_REL(ABSOLUTE),
1898                              SRC0_ELEM(ELEM_X),
1899                              SRC0_NEG(0),
1900                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1901                              SRC1_REL(ABSOLUTE),
1902                              SRC1_ELEM(ELEM_X),
1903                              SRC1_NEG(0),
1904                              INDEX_MODE(SQ_INDEX_LOOP),
1905                              PRED_SEL(SQ_PRED_SEL_OFF),
1906                              LAST(0));
1907     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1908                                  SRC0_ABS(0),
1909                                  SRC1_ABS(0),
1910                                  UPDATE_EXECUTE_MASK(0),
1911                                  UPDATE_PRED(0),
1912                                  WRITE_MASK(1),
1913                                  FOG_MERGE(0),
1914                                  OMOD(SQ_ALU_OMOD_OFF),
1915                                  ALU_INST(SQ_OP2_INST_DOT4),
1916                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1917                                  DST_GPR(2),
1918                                  DST_REL(ABSOLUTE),
1919                                  DST_ELEM(ELEM_X),
1920                                  CLAMP(0));
1921 
1922     /* 35 srcX.y DOT4 - non-mask */
1923     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1924                              SRC0_REL(ABSOLUTE),
1925                              SRC0_ELEM(ELEM_Y),
1926                              SRC0_NEG(0),
1927                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1928                              SRC1_REL(ABSOLUTE),
1929                              SRC1_ELEM(ELEM_Y),
1930                              SRC1_NEG(0),
1931                              INDEX_MODE(SQ_INDEX_LOOP),
1932                              PRED_SEL(SQ_PRED_SEL_OFF),
1933                              LAST(0));
1934     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1935                                  SRC0_ABS(0),
1936                                  SRC1_ABS(0),
1937                                  UPDATE_EXECUTE_MASK(0),
1938                                  UPDATE_PRED(0),
1939                                  WRITE_MASK(0),
1940                                  FOG_MERGE(0),
1941                                  OMOD(SQ_ALU_OMOD_OFF),
1942                                  ALU_INST(SQ_OP2_INST_DOT4),
1943                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1944                                  DST_GPR(2),
1945                                  DST_REL(ABSOLUTE),
1946                                  DST_ELEM(ELEM_Y),
1947                                  CLAMP(0));
1948 
1949     /* 36 srcX.z DOT4 - non-mask */
1950     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1951                              SRC0_REL(ABSOLUTE),
1952                              SRC0_ELEM(ELEM_Z),
1953                              SRC0_NEG(0),
1954                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1955                              SRC1_REL(ABSOLUTE),
1956                              SRC1_ELEM(ELEM_Z),
1957                              SRC1_NEG(0),
1958                              INDEX_MODE(SQ_INDEX_LOOP),
1959                              PRED_SEL(SQ_PRED_SEL_OFF),
1960                              LAST(0));
1961     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1962                                  SRC0_ABS(0),
1963                                  SRC1_ABS(0),
1964                                  UPDATE_EXECUTE_MASK(0),
1965                                  UPDATE_PRED(0),
1966                                  WRITE_MASK(0),
1967                                  FOG_MERGE(0),
1968                                  OMOD(SQ_ALU_OMOD_OFF),
1969                                  ALU_INST(SQ_OP2_INST_DOT4),
1970                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1971                                  DST_GPR(2),
1972                                  DST_REL(ABSOLUTE),
1973                                  DST_ELEM(ELEM_Z),
1974                                  CLAMP(0));
1975 
1976     /* 37 srcX.w DOT4 - non-mask */
1977     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1978                              SRC0_REL(ABSOLUTE),
1979                              SRC0_ELEM(ELEM_W),
1980                              SRC0_NEG(0),
1981                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1982                              SRC1_REL(ABSOLUTE),
1983                              SRC1_ELEM(ELEM_W),
1984                              SRC1_NEG(0),
1985                              INDEX_MODE(SQ_INDEX_LOOP),
1986                              PRED_SEL(SQ_PRED_SEL_OFF),
1987                              LAST(1));
1988     shader[i++] = ALU_DWORD1_OP2(ChipSet,
1989                                  SRC0_ABS(0),
1990                                  SRC1_ABS(0),
1991                                  UPDATE_EXECUTE_MASK(0),
1992                                  UPDATE_PRED(0),
1993                                  WRITE_MASK(0),
1994                                  FOG_MERGE(0),
1995                                  OMOD(SQ_ALU_OMOD_OFF),
1996                                  ALU_INST(SQ_OP2_INST_DOT4),
1997                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1998                                  DST_GPR(2),
1999                                  DST_REL(ABSOLUTE),
2000                                  DST_ELEM(ELEM_W),
2001                                  CLAMP(0));
2002 
2003     /* 38 srcY.x DOT4 - non-mask */
2004     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2005                              SRC0_REL(ABSOLUTE),
2006                              SRC0_ELEM(ELEM_X),
2007                              SRC0_NEG(0),
2008                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2009                              SRC1_REL(ABSOLUTE),
2010                              SRC1_ELEM(ELEM_X),
2011                              SRC1_NEG(0),
2012                              INDEX_MODE(SQ_INDEX_LOOP),
2013                              PRED_SEL(SQ_PRED_SEL_OFF),
2014                              LAST(0));
2015     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2016                                  SRC0_ABS(0),
2017                                  SRC1_ABS(0),
2018                                  UPDATE_EXECUTE_MASK(0),
2019                                  UPDATE_PRED(0),
2020                                  WRITE_MASK(0),
2021                                  FOG_MERGE(0),
2022                                  OMOD(SQ_ALU_OMOD_OFF),
2023                                  ALU_INST(SQ_OP2_INST_DOT4),
2024                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2025                                  DST_GPR(2),
2026                                  DST_REL(ABSOLUTE),
2027                                  DST_ELEM(ELEM_X),
2028                                  CLAMP(0));
2029 
2030     /* 39 srcY.y DOT4 - non-mask */
2031     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2032                              SRC0_REL(ABSOLUTE),
2033                              SRC0_ELEM(ELEM_Y),
2034                              SRC0_NEG(0),
2035                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2036                              SRC1_REL(ABSOLUTE),
2037                              SRC1_ELEM(ELEM_Y),
2038                              SRC1_NEG(0),
2039                              INDEX_MODE(SQ_INDEX_LOOP),
2040                              PRED_SEL(SQ_PRED_SEL_OFF),
2041                              LAST(0));
2042     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2043                                  SRC0_ABS(0),
2044                                  SRC1_ABS(0),
2045                                  UPDATE_EXECUTE_MASK(0),
2046                                  UPDATE_PRED(0),
2047                                  WRITE_MASK(1),
2048                                  FOG_MERGE(0),
2049                                  OMOD(SQ_ALU_OMOD_OFF),
2050                                  ALU_INST(SQ_OP2_INST_DOT4),
2051                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2052                                  DST_GPR(2),
2053                                  DST_REL(ABSOLUTE),
2054                                  DST_ELEM(ELEM_Y),
2055                                  CLAMP(0));
2056 
2057     /* 40 srcY.z DOT4 - non-mask */
2058     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2059                              SRC0_REL(ABSOLUTE),
2060                              SRC0_ELEM(ELEM_Z),
2061                              SRC0_NEG(0),
2062                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2063                              SRC1_REL(ABSOLUTE),
2064                              SRC1_ELEM(ELEM_Z),
2065                              SRC1_NEG(0),
2066                              INDEX_MODE(SQ_INDEX_LOOP),
2067                              PRED_SEL(SQ_PRED_SEL_OFF),
2068                              LAST(0));
2069     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2070                                  SRC0_ABS(0),
2071                                  SRC1_ABS(0),
2072                                  UPDATE_EXECUTE_MASK(0),
2073                                  UPDATE_PRED(0),
2074                                  WRITE_MASK(0),
2075                                  FOG_MERGE(0),
2076                                  OMOD(SQ_ALU_OMOD_OFF),
2077                                  ALU_INST(SQ_OP2_INST_DOT4),
2078                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2079                                  DST_GPR(2),
2080                                  DST_REL(ABSOLUTE),
2081                                  DST_ELEM(ELEM_Z),
2082                                  CLAMP(0));
2083 
2084     /* 41 srcY.w DOT4 - non-mask */
2085     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2086                              SRC0_REL(ABSOLUTE),
2087                              SRC0_ELEM(ELEM_W),
2088                              SRC0_NEG(0),
2089                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2090                              SRC1_REL(ABSOLUTE),
2091                              SRC1_ELEM(ELEM_W),
2092                              SRC1_NEG(0),
2093                              INDEX_MODE(SQ_INDEX_LOOP),
2094                              PRED_SEL(SQ_PRED_SEL_OFF),
2095                              LAST(1));
2096     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2097                                  SRC0_ABS(0),
2098                                  SRC1_ABS(0),
2099                                  UPDATE_EXECUTE_MASK(0),
2100                                  UPDATE_PRED(0),
2101                                  WRITE_MASK(0),
2102                                  FOG_MERGE(0),
2103                                  OMOD(SQ_ALU_OMOD_OFF),
2104                                  ALU_INST(SQ_OP2_INST_DOT4),
2105                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2106                                  DST_GPR(2),
2107                                  DST_REL(ABSOLUTE),
2108                                  DST_ELEM(ELEM_W),
2109                                  CLAMP(0));
2110 
2111     /* 42 srcX / w */
2112     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2113                              SRC0_REL(ABSOLUTE),
2114                              SRC0_ELEM(ELEM_X),
2115                              SRC0_NEG(0),
2116                              SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2117                              SRC1_REL(ABSOLUTE),
2118                              SRC1_ELEM(ELEM_W),
2119                              SRC1_NEG(0),
2120                              INDEX_MODE(SQ_INDEX_AR_X),
2121                              PRED_SEL(SQ_PRED_SEL_OFF),
2122                              LAST(1));
2123     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2124                                  SRC0_ABS(0),
2125                                  SRC1_ABS(0),
2126                                  UPDATE_EXECUTE_MASK(0),
2127                                  UPDATE_PRED(0),
2128                                  WRITE_MASK(1),
2129                                  FOG_MERGE(0),
2130                                  OMOD(SQ_ALU_OMOD_OFF),
2131                                  ALU_INST(SQ_OP2_INST_MUL),
2132                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2133                                  DST_GPR(0),
2134                                  DST_REL(ABSOLUTE),
2135                                  DST_ELEM(ELEM_X),
2136                                  CLAMP(0));
2137 
2138     /* 43 srcY / h */
2139     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2140                              SRC0_REL(ABSOLUTE),
2141                              SRC0_ELEM(ELEM_Y),
2142                              SRC0_NEG(0),
2143                              SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2144                              SRC1_REL(ABSOLUTE),
2145                              SRC1_ELEM(ELEM_W),
2146                              SRC1_NEG(0),
2147                              INDEX_MODE(SQ_INDEX_AR_X),
2148                              PRED_SEL(SQ_PRED_SEL_OFF),
2149                              LAST(1));
2150     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2151                                  SRC0_ABS(0),
2152                                  SRC1_ABS(0),
2153                                  UPDATE_EXECUTE_MASK(0),
2154                                  UPDATE_PRED(0),
2155                                  WRITE_MASK(1),
2156                                  FOG_MERGE(0),
2157                                  OMOD(SQ_ALU_OMOD_OFF),
2158                                  ALU_INST(SQ_OP2_INST_MUL),
2159                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2160                                  DST_GPR(0),
2161                                  DST_REL(ABSOLUTE),
2162                                  DST_ELEM(ELEM_Y),
2163                                  CLAMP(0));
2164 
2165     /* 44/45 - dst - mask */
2166     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2167 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2168 			     FETCH_WHOLE_QUAD(0),
2169 			     BUFFER_ID(0),
2170 			     SRC_GPR(0),
2171 			     SRC_REL(ABSOLUTE),
2172 			     SRC_SEL_X(SQ_SEL_X),
2173 			     MEGA_FETCH_COUNT(24));
2174     shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2175 				 DST_REL(0),
2176 				 DST_SEL_X(SQ_SEL_X),
2177 				 DST_SEL_Y(SQ_SEL_Y),
2178 				 DST_SEL_Z(SQ_SEL_0),
2179 				 DST_SEL_W(SQ_SEL_1),
2180 				 USE_CONST_FIELDS(0),
2181 				 DATA_FORMAT(FMT_32_32_FLOAT),
2182 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2183 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2184 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2185     shader[i++] = VTX_DWORD2(OFFSET(0),
2186 #if X_BYTE_ORDER == X_BIG_ENDIAN
2187                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2188 #else
2189                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2190 #endif
2191 			     CONST_BUF_NO_STRIDE(0),
2192 			     MEGA_FETCH(1));
2193     shader[i++] = VTX_DWORD_PAD;
2194     /* 46/47 - src */
2195     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2196 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2197 			     FETCH_WHOLE_QUAD(0),
2198 			     BUFFER_ID(0),
2199 			     SRC_GPR(0),
2200 			     SRC_REL(ABSOLUTE),
2201 			     SRC_SEL_X(SQ_SEL_X),
2202 			     MEGA_FETCH_COUNT(8));
2203     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2204 				 DST_REL(0),
2205 				 DST_SEL_X(SQ_SEL_X),
2206 				 DST_SEL_Y(SQ_SEL_Y),
2207 				 DST_SEL_Z(SQ_SEL_1),
2208 				 DST_SEL_W(SQ_SEL_0),
2209 				 USE_CONST_FIELDS(0),
2210 				 DATA_FORMAT(FMT_32_32_FLOAT),
2211 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2212 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2213 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2214     shader[i++] = VTX_DWORD2(OFFSET(8),
2215 #if X_BYTE_ORDER == X_BIG_ENDIAN
2216                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2217 #else
2218                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2219 #endif
2220 			     CONST_BUF_NO_STRIDE(0),
2221 			     MEGA_FETCH(0));
2222     shader[i++] = VTX_DWORD_PAD;
2223     /* 48/49 - mask */
2224     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2225 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2226 			     FETCH_WHOLE_QUAD(0),
2227 			     BUFFER_ID(0),
2228 			     SRC_GPR(0),
2229 			     SRC_REL(ABSOLUTE),
2230 			     SRC_SEL_X(SQ_SEL_X),
2231 			     MEGA_FETCH_COUNT(8));
2232     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2233 				 DST_REL(0),
2234 				 DST_SEL_X(SQ_SEL_X),
2235 				 DST_SEL_Y(SQ_SEL_Y),
2236 				 DST_SEL_Z(SQ_SEL_1),
2237 				 DST_SEL_W(SQ_SEL_0),
2238 				 USE_CONST_FIELDS(0),
2239 				 DATA_FORMAT(FMT_32_32_FLOAT),
2240 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2241 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2242 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2243     shader[i++] = VTX_DWORD2(OFFSET(16),
2244 #if X_BYTE_ORDER == X_BIG_ENDIAN
2245                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2246 #else
2247                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2248 #endif
2249 			     CONST_BUF_NO_STRIDE(0),
2250 			     MEGA_FETCH(0));
2251     shader[i++] = VTX_DWORD_PAD;
2252 
2253     /* 50/51 - dst - non-mask */
2254     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2255 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2256 			     FETCH_WHOLE_QUAD(0),
2257 			     BUFFER_ID(0),
2258 			     SRC_GPR(0),
2259 			     SRC_REL(ABSOLUTE),
2260 			     SRC_SEL_X(SQ_SEL_X),
2261 			     MEGA_FETCH_COUNT(16));
2262     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2263 				 DST_REL(0),
2264 				 DST_SEL_X(SQ_SEL_X),
2265 				 DST_SEL_Y(SQ_SEL_Y),
2266 				 DST_SEL_Z(SQ_SEL_0),
2267 				 DST_SEL_W(SQ_SEL_1),
2268 				 USE_CONST_FIELDS(0),
2269 				 DATA_FORMAT(FMT_32_32_FLOAT),
2270 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2271 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2272 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2273     shader[i++] = VTX_DWORD2(OFFSET(0),
2274 #if X_BYTE_ORDER == X_BIG_ENDIAN
2275                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2276 #else
2277                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2278 #endif
2279 			     CONST_BUF_NO_STRIDE(0),
2280 			     MEGA_FETCH(1));
2281     shader[i++] = VTX_DWORD_PAD;
2282     /* 52/53 - src */
2283     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2284 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2285 			     FETCH_WHOLE_QUAD(0),
2286 			     BUFFER_ID(0),
2287 			     SRC_GPR(0),
2288 			     SRC_REL(ABSOLUTE),
2289 			     SRC_SEL_X(SQ_SEL_X),
2290 			     MEGA_FETCH_COUNT(8));
2291     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2292 				 DST_REL(0),
2293 				 DST_SEL_X(SQ_SEL_X),
2294 				 DST_SEL_Y(SQ_SEL_Y),
2295 				 DST_SEL_Z(SQ_SEL_1),
2296 				 DST_SEL_W(SQ_SEL_0),
2297 				 USE_CONST_FIELDS(0),
2298 				 DATA_FORMAT(FMT_32_32_FLOAT),
2299 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2300 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2301 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2302     shader[i++] = VTX_DWORD2(OFFSET(8),
2303 #if X_BYTE_ORDER == X_BIG_ENDIAN
2304                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2305 #else
2306                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2307 #endif
2308 			     CONST_BUF_NO_STRIDE(0),
2309 			     MEGA_FETCH(0));
2310     shader[i++] = VTX_DWORD_PAD;
2311 
2312     return i;
2313 }
2314 
2315 /* comp ps --------------------------------------- */
R600_comp_ps(RADEONChipFamily ChipSet,uint32_t * shader)2316 int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2317 {
2318     int i = 0;
2319 
2320     /* 0 */
2321     /* call fetch-mask if boolean1 == true */
2322     shader[i++] = CF_DWORD0(ADDR(10));
2323     shader[i++] = CF_DWORD1(POP_COUNT(0),
2324                             CF_CONST(1),
2325                             COND(SQ_CF_COND_BOOL),
2326                             I_COUNT(0),
2327                             CALL_COUNT(0),
2328                             END_OF_PROGRAM(0),
2329                             VALID_PIXEL_MODE(0),
2330                             CF_INST(SQ_CF_INST_CALL),
2331                             WHOLE_QUAD_MODE(0),
2332                             BARRIER(0));
2333     /* 1 */
2334     /* call read-constant-mask if boolean1 == false */
2335     shader[i++] = CF_DWORD0(ADDR(12));
2336     shader[i++] = CF_DWORD1(POP_COUNT(0),
2337                             CF_CONST(1),
2338                             COND(SQ_CF_COND_NOT_BOOL),
2339                             I_COUNT(0),
2340                             CALL_COUNT(0),
2341                             END_OF_PROGRAM(0),
2342                             VALID_PIXEL_MODE(0),
2343                             CF_INST(SQ_CF_INST_CALL),
2344                             WHOLE_QUAD_MODE(0),
2345                             BARRIER(0));
2346     /* 2 */
2347     /* call fetch-src if boolean0 == true */
2348     shader[i++] = CF_DWORD0(ADDR(6));
2349     shader[i++] = CF_DWORD1(POP_COUNT(0),
2350                             CF_CONST(0),
2351                             COND(SQ_CF_COND_BOOL),
2352                             I_COUNT(0),
2353                             CALL_COUNT(0),
2354                             END_OF_PROGRAM(0),
2355                             VALID_PIXEL_MODE(0),
2356                             CF_INST(SQ_CF_INST_CALL),
2357                             WHOLE_QUAD_MODE(0),
2358                             BARRIER(0));
2359 
2360     /* 3 */
2361     /* call read-constant-src if boolean0 == false */
2362     shader[i++] = CF_DWORD0(ADDR(8));
2363     shader[i++] = CF_DWORD1(POP_COUNT(0),
2364 			    CF_CONST(0),
2365 			    COND(SQ_CF_COND_NOT_BOOL),
2366 			    I_COUNT(0),
2367 			    CALL_COUNT(0),
2368 			    END_OF_PROGRAM(0),
2369 			    VALID_PIXEL_MODE(0),
2370 			    CF_INST(SQ_CF_INST_CALL),
2371 			    WHOLE_QUAD_MODE(0),
2372 			    BARRIER(0));
2373 
2374     /* 4 */
2375     /* src IN mask (GPR0 := GPR1 .* GPR0) */
2376     shader[i++] = CF_ALU_DWORD0(ADDR(14),
2377 				KCACHE_BANK0(0),
2378 				KCACHE_BANK1(0),
2379 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2380     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2381 				KCACHE_ADDR0(0),
2382 				KCACHE_ADDR1(0),
2383 				I_COUNT(4),
2384 				USES_WATERFALL(0),
2385 				CF_INST(SQ_CF_INST_ALU),
2386 				WHOLE_QUAD_MODE(0),
2387 				BARRIER(1));
2388 
2389     /* 5 */
2390     /* export pixel data */
2391     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2392 					  TYPE(SQ_EXPORT_PIXEL),
2393 					  RW_GPR(0),
2394 					  RW_REL(ABSOLUTE),
2395 					  INDEX_GPR(0),
2396 					  ELEM_SIZE(1));
2397     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2398 					       SRC_SEL_Y(SQ_SEL_Y),
2399 					       SRC_SEL_Z(SQ_SEL_Z),
2400 					       SRC_SEL_W(SQ_SEL_W),
2401 					       R6xx_ELEM_LOOP(0),
2402 					       BURST_COUNT(1),
2403 					       END_OF_PROGRAM(1),
2404 					       VALID_PIXEL_MODE(0),
2405 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2406 					       WHOLE_QUAD_MODE(0),
2407 					       BARRIER(1));
2408     /* subroutine fetch src */
2409     /* 6 */
2410     /* fetch src into GPR0*/
2411     shader[i++] = CF_DWORD0(ADDR(26));
2412     shader[i++] = CF_DWORD1(POP_COUNT(0),
2413 			    CF_CONST(0),
2414 			    COND(SQ_CF_COND_ACTIVE),
2415 			    I_COUNT(1),
2416 			    CALL_COUNT(0),
2417 			    END_OF_PROGRAM(0),
2418 			    VALID_PIXEL_MODE(0),
2419 			    CF_INST(SQ_CF_INST_TEX),
2420 			    WHOLE_QUAD_MODE(0),
2421 			    BARRIER(1));
2422 
2423     /* 7 */
2424     /* return */
2425     shader[i++] = CF_DWORD0(ADDR(0));
2426     shader[i++] = CF_DWORD1(POP_COUNT(0),
2427 			    CF_CONST(0),
2428 			    COND(SQ_CF_COND_ACTIVE),
2429 			    I_COUNT(0),
2430 			    CALL_COUNT(0),
2431 			    END_OF_PROGRAM(0),
2432 			    VALID_PIXEL_MODE(0),
2433 			    CF_INST(SQ_CF_INST_RETURN),
2434 			    WHOLE_QUAD_MODE(0),
2435 			    BARRIER(1));
2436 
2437     /* subroutine read-constant-src*/
2438     /* 8 */
2439     /* read constants into GPR0 */
2440     shader[i++] = CF_ALU_DWORD0(ADDR(18),
2441 				KCACHE_BANK0(0),
2442 				KCACHE_BANK1(0),
2443 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2444     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2445 				KCACHE_ADDR0(0),
2446 				KCACHE_ADDR1(0),
2447 				I_COUNT(4),
2448 				USES_WATERFALL(0),
2449 				CF_INST(SQ_CF_INST_ALU),
2450 				WHOLE_QUAD_MODE(0),
2451 				BARRIER(1));
2452     /* 9 */
2453     /* return */
2454     shader[i++] = CF_DWORD0(ADDR(0));
2455     shader[i++] = CF_DWORD1(POP_COUNT(0),
2456 			    CF_CONST(0),
2457 			    COND(SQ_CF_COND_ACTIVE),
2458 			    I_COUNT(0),
2459 			    CALL_COUNT(0),
2460 			    END_OF_PROGRAM(0),
2461 			    VALID_PIXEL_MODE(0),
2462 			    CF_INST(SQ_CF_INST_RETURN),
2463 			    WHOLE_QUAD_MODE(0),
2464 			    BARRIER(1));
2465 
2466     /* subroutine fetch mask */
2467     /* 10 */
2468     /* fetch mask into GPR1*/
2469     shader[i++] = CF_DWORD0(ADDR(28));
2470     shader[i++] = CF_DWORD1(POP_COUNT(0),
2471                             CF_CONST(0),
2472                             COND(SQ_CF_COND_ACTIVE),
2473                             I_COUNT(1),
2474                             CALL_COUNT(0),
2475                             END_OF_PROGRAM(0),
2476                             VALID_PIXEL_MODE(0),
2477                             CF_INST(SQ_CF_INST_TEX),
2478                             WHOLE_QUAD_MODE(0),
2479                             BARRIER(1));
2480 
2481     /* 11 */
2482     /* return */
2483     shader[i++] = CF_DWORD0(ADDR(0));
2484     shader[i++] = CF_DWORD1(POP_COUNT(0),
2485                             CF_CONST(0),
2486                             COND(SQ_CF_COND_ACTIVE),
2487                             I_COUNT(0),
2488                             CALL_COUNT(0),
2489                             END_OF_PROGRAM(0),
2490                             VALID_PIXEL_MODE(0),
2491                             CF_INST(SQ_CF_INST_RETURN),
2492                             WHOLE_QUAD_MODE(0),
2493                             BARRIER(1));
2494 
2495     /* subroutine read-constant-mask*/
2496     /* 12 */
2497     /* read constants into GPR1 */
2498     shader[i++] = CF_ALU_DWORD0(ADDR(22),
2499                                 KCACHE_BANK0(0),
2500                                 KCACHE_BANK1(0),
2501                                 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2502     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2503                                 KCACHE_ADDR0(0),
2504                                 KCACHE_ADDR1(0),
2505                                 I_COUNT(4),
2506                                 USES_WATERFALL(0),
2507                                 CF_INST(SQ_CF_INST_ALU),
2508                                 WHOLE_QUAD_MODE(0),
2509                                 BARRIER(1));
2510     /* 13 */
2511     /* return */
2512     shader[i++] = CF_DWORD0(ADDR(0));
2513     shader[i++] = CF_DWORD1(POP_COUNT(0),
2514                             CF_CONST(0),
2515                             COND(SQ_CF_COND_ACTIVE),
2516                             I_COUNT(0),
2517                             CALL_COUNT(0),
2518                             END_OF_PROGRAM(0),
2519                             VALID_PIXEL_MODE(0),
2520                             CF_INST(SQ_CF_INST_RETURN),
2521                             WHOLE_QUAD_MODE(0),
2522                             BARRIER(1));
2523     /* ALU clauses */
2524 
2525     /* 14 - alu 0 */
2526     /* MUL gpr[0].x gpr[1].x gpr[0].x */
2527     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2528 			     SRC0_REL(ABSOLUTE),
2529 			     SRC0_ELEM(ELEM_X),
2530 			     SRC0_NEG(0),
2531 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2532 			     SRC1_REL(ABSOLUTE),
2533 			     SRC1_ELEM(ELEM_X),
2534 			     SRC1_NEG(0),
2535 			     INDEX_MODE(SQ_INDEX_LOOP),
2536 			     PRED_SEL(SQ_PRED_SEL_OFF),
2537 			     LAST(0));
2538     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2539 				 SRC0_ABS(0),
2540 				 SRC1_ABS(0),
2541 				 UPDATE_EXECUTE_MASK(0),
2542 				 UPDATE_PRED(0),
2543 				 WRITE_MASK(1),
2544 				 FOG_MERGE(0),
2545 				 OMOD(SQ_ALU_OMOD_OFF),
2546 				 ALU_INST(SQ_OP2_INST_MUL),
2547 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2548 				 DST_GPR(0),
2549 				 DST_REL(ABSOLUTE),
2550 				 DST_ELEM(ELEM_X),
2551 				 CLAMP(1));
2552     /* 15 - alu 1 */
2553     /* MUL gpr[0].y gpr[1].y gpr[0].y */
2554     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2555 			     SRC0_REL(ABSOLUTE),
2556 			     SRC0_ELEM(ELEM_Y),
2557 			     SRC0_NEG(0),
2558 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2559 			     SRC1_REL(ABSOLUTE),
2560 			     SRC1_ELEM(ELEM_Y),
2561 			     SRC1_NEG(0),
2562 			     INDEX_MODE(SQ_INDEX_LOOP),
2563 			     PRED_SEL(SQ_PRED_SEL_OFF),
2564 			     LAST(0));
2565     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2566 				 SRC0_ABS(0),
2567 				 SRC1_ABS(0),
2568 				 UPDATE_EXECUTE_MASK(0),
2569 				 UPDATE_PRED(0),
2570 				 WRITE_MASK(1),
2571 				 FOG_MERGE(0),
2572 				 OMOD(SQ_ALU_OMOD_OFF),
2573 				 ALU_INST(SQ_OP2_INST_MUL),
2574 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2575 				 DST_GPR(0),
2576 				 DST_REL(ABSOLUTE),
2577 				 DST_ELEM(ELEM_Y),
2578 				 CLAMP(1));
2579     /* 16 - alu 2 */
2580     /* MUL gpr[0].z gpr[1].z gpr[0].z */
2581     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2582 			     SRC0_REL(ABSOLUTE),
2583 			     SRC0_ELEM(ELEM_Z),
2584 			     SRC0_NEG(0),
2585 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2586 			     SRC1_REL(ABSOLUTE),
2587 			     SRC1_ELEM(ELEM_Z),
2588 			     SRC1_NEG(0),
2589 			     INDEX_MODE(SQ_INDEX_LOOP),
2590 			     PRED_SEL(SQ_PRED_SEL_OFF),
2591 			     LAST(0));
2592     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2593 				 SRC0_ABS(0),
2594 				 SRC1_ABS(0),
2595 				 UPDATE_EXECUTE_MASK(0),
2596 				 UPDATE_PRED(0),
2597 				 WRITE_MASK(1),
2598 				 FOG_MERGE(0),
2599 				 OMOD(SQ_ALU_OMOD_OFF),
2600 				 ALU_INST(SQ_OP2_INST_MUL),
2601 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2602 				 DST_GPR(0),
2603 				 DST_REL(ABSOLUTE),
2604 				 DST_ELEM(ELEM_Z),
2605 				 CLAMP(1));
2606     /* 17 - alu 3 */
2607     /* MUL gpr[0].w gpr[1].w gpr[0].w */
2608     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2609 			     SRC0_REL(ABSOLUTE),
2610 			     SRC0_ELEM(ELEM_W),
2611 			     SRC0_NEG(0),
2612 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2613 			     SRC1_REL(ABSOLUTE),
2614 			     SRC1_ELEM(ELEM_W),
2615 			     SRC1_NEG(0),
2616 			     INDEX_MODE(SQ_INDEX_LOOP),
2617 			     PRED_SEL(SQ_PRED_SEL_OFF),
2618 			     LAST(1));
2619     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2620 				 SRC0_ABS(0),
2621 				 SRC1_ABS(0),
2622 				 UPDATE_EXECUTE_MASK(0),
2623 				 UPDATE_PRED(0),
2624 				 WRITE_MASK(1),
2625 				 FOG_MERGE(0),
2626 				 OMOD(SQ_ALU_OMOD_OFF),
2627 				 ALU_INST(SQ_OP2_INST_MUL),
2628 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2629 				 DST_GPR(0),
2630 				 DST_REL(ABSOLUTE),
2631 				 DST_ELEM(ELEM_W),
2632 				 CLAMP(1));
2633 
2634     /* 18 */
2635     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2636 			     SRC0_REL(ABSOLUTE),
2637 			     SRC0_ELEM(ELEM_X),
2638 			     SRC0_NEG(0),
2639 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2640 			     SRC1_REL(ABSOLUTE),
2641 			     SRC1_ELEM(ELEM_X),
2642 			     SRC1_NEG(0),
2643 			     INDEX_MODE(SQ_INDEX_AR_X),
2644 			     PRED_SEL(SQ_PRED_SEL_OFF),
2645 			     LAST(0));
2646     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2647 				 SRC0_ABS(0),
2648 				 SRC1_ABS(0),
2649 				 UPDATE_EXECUTE_MASK(0),
2650 				 UPDATE_PRED(0),
2651 				 WRITE_MASK(1),
2652 				 FOG_MERGE(0),
2653 				 OMOD(SQ_ALU_OMOD_OFF),
2654 				 ALU_INST(SQ_OP2_INST_MOV),
2655 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2656 				 DST_GPR(0),
2657 				 DST_REL(ABSOLUTE),
2658 				 DST_ELEM(ELEM_X),
2659 				 CLAMP(1));
2660     /* 19 */
2661     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2662 			     SRC0_REL(ABSOLUTE),
2663 			     SRC0_ELEM(ELEM_Y),
2664 			     SRC0_NEG(0),
2665 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2666 			     SRC1_REL(ABSOLUTE),
2667 			     SRC1_ELEM(ELEM_Y),
2668 			     SRC1_NEG(0),
2669 			     INDEX_MODE(SQ_INDEX_AR_X),
2670 			     PRED_SEL(SQ_PRED_SEL_OFF),
2671 			     LAST(0));
2672     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2673 				 SRC0_ABS(0),
2674 				 SRC1_ABS(0),
2675 				 UPDATE_EXECUTE_MASK(0),
2676 				 UPDATE_PRED(0),
2677 				 WRITE_MASK(1),
2678 				 FOG_MERGE(0),
2679 				 OMOD(SQ_ALU_OMOD_OFF),
2680 				 ALU_INST(SQ_OP2_INST_MOV),
2681 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2682 				 DST_GPR(0),
2683 				 DST_REL(ABSOLUTE),
2684 				 DST_ELEM(ELEM_Y),
2685 				 CLAMP(1));
2686     /* 20 */
2687     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2688 			     SRC0_REL(ABSOLUTE),
2689 			     SRC0_ELEM(ELEM_Z),
2690 			     SRC0_NEG(0),
2691 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2692 			     SRC1_REL(ABSOLUTE),
2693 			     SRC1_ELEM(ELEM_Z),
2694 			     SRC1_NEG(0),
2695 			     INDEX_MODE(SQ_INDEX_AR_X),
2696 			     PRED_SEL(SQ_PRED_SEL_OFF),
2697 			     LAST(0));
2698     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2699 				 SRC0_ABS(0),
2700 				 SRC1_ABS(0),
2701 				 UPDATE_EXECUTE_MASK(0),
2702 				 UPDATE_PRED(0),
2703 				 WRITE_MASK(1),
2704 				 FOG_MERGE(0),
2705 				 OMOD(SQ_ALU_OMOD_OFF),
2706 				 ALU_INST(SQ_OP2_INST_MOV),
2707 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2708 				 DST_GPR(0),
2709 				 DST_REL(ABSOLUTE),
2710 				 DST_ELEM(ELEM_Z),
2711 				 CLAMP(1));
2712     /* 21 */
2713     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2714 			     SRC0_REL(ABSOLUTE),
2715 			     SRC0_ELEM(ELEM_W),
2716 			     SRC0_NEG(0),
2717 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2718 			     SRC1_REL(ABSOLUTE),
2719 			     SRC1_ELEM(ELEM_W),
2720 			     SRC1_NEG(0),
2721 			     INDEX_MODE(SQ_INDEX_AR_X),
2722 			     PRED_SEL(SQ_PRED_SEL_OFF),
2723 			     LAST(1));
2724     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2725 				 SRC0_ABS(0),
2726 				 SRC1_ABS(0),
2727 				 UPDATE_EXECUTE_MASK(0),
2728 				 UPDATE_PRED(0),
2729 				 WRITE_MASK(1),
2730 				 FOG_MERGE(0),
2731 				 OMOD(SQ_ALU_OMOD_OFF),
2732 				 ALU_INST(SQ_OP2_INST_MOV),
2733 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2734 				 DST_GPR(0),
2735 				 DST_REL(ABSOLUTE),
2736 				 DST_ELEM(ELEM_W),
2737 				 CLAMP(1));
2738 
2739     /* 22 */
2740     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2741 			     SRC0_REL(ABSOLUTE),
2742 			     SRC0_ELEM(ELEM_X),
2743 			     SRC0_NEG(0),
2744 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2745 			     SRC1_REL(ABSOLUTE),
2746 			     SRC1_ELEM(ELEM_X),
2747 			     SRC1_NEG(0),
2748 			     INDEX_MODE(SQ_INDEX_AR_X),
2749 			     PRED_SEL(SQ_PRED_SEL_OFF),
2750 			     LAST(0));
2751     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2752 				 SRC0_ABS(0),
2753 				 SRC1_ABS(0),
2754 				 UPDATE_EXECUTE_MASK(0),
2755 				 UPDATE_PRED(0),
2756 				 WRITE_MASK(1),
2757 				 FOG_MERGE(0),
2758 				 OMOD(SQ_ALU_OMOD_OFF),
2759 				 ALU_INST(SQ_OP2_INST_MOV),
2760 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2761 				 DST_GPR(1),
2762 				 DST_REL(ABSOLUTE),
2763 				 DST_ELEM(ELEM_X),
2764 				 CLAMP(1));
2765     /* 23 */
2766     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2767 			     SRC0_REL(ABSOLUTE),
2768 			     SRC0_ELEM(ELEM_Y),
2769 			     SRC0_NEG(0),
2770 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2771 			     SRC1_REL(ABSOLUTE),
2772 			     SRC1_ELEM(ELEM_Y),
2773 			     SRC1_NEG(0),
2774 			     INDEX_MODE(SQ_INDEX_AR_X),
2775 			     PRED_SEL(SQ_PRED_SEL_OFF),
2776 			     LAST(0));
2777     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2778 				 SRC0_ABS(0),
2779 				 SRC1_ABS(0),
2780 				 UPDATE_EXECUTE_MASK(0),
2781 				 UPDATE_PRED(0),
2782 				 WRITE_MASK(1),
2783 				 FOG_MERGE(0),
2784 				 OMOD(SQ_ALU_OMOD_OFF),
2785 				 ALU_INST(SQ_OP2_INST_MOV),
2786 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2787 				 DST_GPR(1),
2788 				 DST_REL(ABSOLUTE),
2789 				 DST_ELEM(ELEM_Y),
2790 				 CLAMP(1));
2791     /* 24 */
2792     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2793 			     SRC0_REL(ABSOLUTE),
2794 			     SRC0_ELEM(ELEM_Z),
2795 			     SRC0_NEG(0),
2796 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2797 			     SRC1_REL(ABSOLUTE),
2798 			     SRC1_ELEM(ELEM_Z),
2799 			     SRC1_NEG(0),
2800 			     INDEX_MODE(SQ_INDEX_AR_X),
2801 			     PRED_SEL(SQ_PRED_SEL_OFF),
2802 			     LAST(0));
2803     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2804 				 SRC0_ABS(0),
2805 				 SRC1_ABS(0),
2806 				 UPDATE_EXECUTE_MASK(0),
2807 				 UPDATE_PRED(0),
2808 				 WRITE_MASK(1),
2809 				 FOG_MERGE(0),
2810 				 OMOD(SQ_ALU_OMOD_OFF),
2811 				 ALU_INST(SQ_OP2_INST_MOV),
2812 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2813 				 DST_GPR(1),
2814 				 DST_REL(ABSOLUTE),
2815 				 DST_ELEM(ELEM_Z),
2816 				 CLAMP(1));
2817     /* 25 */
2818     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2819 			     SRC0_REL(ABSOLUTE),
2820 			     SRC0_ELEM(ELEM_W),
2821 			     SRC0_NEG(0),
2822 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2823 			     SRC1_REL(ABSOLUTE),
2824 			     SRC1_ELEM(ELEM_W),
2825 			     SRC1_NEG(0),
2826 			     INDEX_MODE(SQ_INDEX_AR_X),
2827 			     PRED_SEL(SQ_PRED_SEL_OFF),
2828 			     LAST(1));
2829     shader[i++] = ALU_DWORD1_OP2(ChipSet,
2830 				 SRC0_ABS(0),
2831 				 SRC1_ABS(0),
2832 				 UPDATE_EXECUTE_MASK(0),
2833 				 UPDATE_PRED(0),
2834 				 WRITE_MASK(1),
2835 				 FOG_MERGE(0),
2836 				 OMOD(SQ_ALU_OMOD_OFF),
2837 				 ALU_INST(SQ_OP2_INST_MOV),
2838 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2839 				 DST_GPR(1),
2840 				 DST_REL(ABSOLUTE),
2841 				 DST_ELEM(ELEM_W),
2842 				 CLAMP(1));
2843 
2844     /* 26/27 - src */
2845     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2846 			     BC_FRAC_MODE(0),
2847 			     FETCH_WHOLE_QUAD(0),
2848 			     RESOURCE_ID(0),
2849 			     SRC_GPR(0),
2850 			     SRC_REL(ABSOLUTE),
2851 			     R7xx_ALT_CONST(0));
2852     shader[i++] = TEX_DWORD1(DST_GPR(0),
2853 			     DST_REL(ABSOLUTE),
2854 			     DST_SEL_X(SQ_SEL_X),
2855 			     DST_SEL_Y(SQ_SEL_Y),
2856 			     DST_SEL_Z(SQ_SEL_Z),
2857 			     DST_SEL_W(SQ_SEL_W),
2858 			     LOD_BIAS(0),
2859 			     COORD_TYPE_X(TEX_NORMALIZED),
2860 			     COORD_TYPE_Y(TEX_NORMALIZED),
2861 			     COORD_TYPE_Z(TEX_NORMALIZED),
2862 			     COORD_TYPE_W(TEX_NORMALIZED));
2863     shader[i++] = TEX_DWORD2(OFFSET_X(0),
2864 			     OFFSET_Y(0),
2865 			     OFFSET_Z(0),
2866 			     SAMPLER_ID(0),
2867 			     SRC_SEL_X(SQ_SEL_X),
2868 			     SRC_SEL_Y(SQ_SEL_Y),
2869 			     SRC_SEL_Z(SQ_SEL_0),
2870 			     SRC_SEL_W(SQ_SEL_1));
2871     shader[i++] = TEX_DWORD_PAD;
2872     /* 28/29 - mask */
2873     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2874 			     BC_FRAC_MODE(0),
2875 			     FETCH_WHOLE_QUAD(0),
2876 			     RESOURCE_ID(1),
2877 			     SRC_GPR(1),
2878 			     SRC_REL(ABSOLUTE),
2879 			     R7xx_ALT_CONST(0));
2880     shader[i++] = TEX_DWORD1(DST_GPR(1),
2881 			     DST_REL(ABSOLUTE),
2882 			     DST_SEL_X(SQ_SEL_X),
2883 			     DST_SEL_Y(SQ_SEL_Y),
2884 			     DST_SEL_Z(SQ_SEL_Z),
2885 			     DST_SEL_W(SQ_SEL_W),
2886 			     LOD_BIAS(0),
2887 			     COORD_TYPE_X(TEX_NORMALIZED),
2888 			     COORD_TYPE_Y(TEX_NORMALIZED),
2889 			     COORD_TYPE_Z(TEX_NORMALIZED),
2890 			     COORD_TYPE_W(TEX_NORMALIZED));
2891     shader[i++] = TEX_DWORD2(OFFSET_X(0),
2892 			     OFFSET_Y(0),
2893 			     OFFSET_Z(0),
2894 			     SAMPLER_ID(1),
2895 			     SRC_SEL_X(SQ_SEL_X),
2896 			     SRC_SEL_Y(SQ_SEL_Y),
2897 			     SRC_SEL_Z(SQ_SEL_0),
2898 			     SRC_SEL_W(SQ_SEL_1));
2899     shader[i++] = TEX_DWORD_PAD;
2900 
2901     return i;
2902 }
2903