1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Author: Alex Deucher <alexander.deucher@amd.com>
24  *
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include "xf86.h"
32 
33 #include "evergreen_shader.h"
34 #include "evergreen_reg.h"
35 
36 /* solid vs --------------------------------------- */
evergreen_solid_vs(RADEONChipFamily ChipSet,uint32_t * shader)37 int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
38 {
39     int i = 0;
40 
41     /* 0 */
42     shader[i++] = CF_DWORD0(ADDR(4),
43 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
44     shader[i++] = CF_DWORD1(POP_COUNT(0),
45 			    CF_CONST(0),
46 			    COND(SQ_CF_COND_ACTIVE),
47 			    I_COUNT(1),
48 			    VALID_PIXEL_MODE(0),
49 			    END_OF_PROGRAM(0),
50 			    CF_INST(SQ_CF_INST_VC),
51 			    WHOLE_QUAD_MODE(0),
52 			    BARRIER(1));
53     /* 1 */
54     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
55 					  TYPE(SQ_EXPORT_POS),
56 					  RW_GPR(1),
57 					  RW_REL(ABSOLUTE),
58 					  INDEX_GPR(0),
59 					  ELEM_SIZE(0));
60     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
61 					       SRC_SEL_Y(SQ_SEL_Y),
62 					       SRC_SEL_Z(SQ_SEL_Z),
63 					       SRC_SEL_W(SQ_SEL_W),
64 					       BURST_COUNT(1),
65 					       VALID_PIXEL_MODE(0),
66 					       END_OF_PROGRAM(0),
67 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
68 					       MARK(0),
69 					       BARRIER(1));
70     /* 2 - always export a param whether it's used or not */
71     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
72 					  TYPE(SQ_EXPORT_PARAM),
73 					  RW_GPR(0),
74 					  RW_REL(ABSOLUTE),
75 					  INDEX_GPR(0),
76 					  ELEM_SIZE(0));
77     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
78 					       SRC_SEL_Y(SQ_SEL_Y),
79 					       SRC_SEL_Z(SQ_SEL_Z),
80 					       SRC_SEL_W(SQ_SEL_W),
81 					       BURST_COUNT(0),
82 					       VALID_PIXEL_MODE(0),
83 					       END_OF_PROGRAM(1),
84 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
85 					       MARK(0),
86 					       BARRIER(0));
87     /* 3 - padding */
88     shader[i++] = 0x00000000;
89     shader[i++] = 0x00000000;
90     /* 4/5 */
91     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
92 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
93 			     FETCH_WHOLE_QUAD(0),
94 			     BUFFER_ID(0),
95 			     SRC_GPR(0),
96 			     SRC_REL(ABSOLUTE),
97 			     SRC_SEL_X(SQ_SEL_X),
98 			     MEGA_FETCH_COUNT(8));
99     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
100 				 DST_REL(0),
101 				 DST_SEL_X(SQ_SEL_X),
102 				 DST_SEL_Y(SQ_SEL_Y),
103 				 DST_SEL_Z(SQ_SEL_0),
104 				 DST_SEL_W(SQ_SEL_1),
105 				 USE_CONST_FIELDS(0),
106 				 DATA_FORMAT(FMT_32_32_FLOAT),
107 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
108 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
109 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
110     shader[i++] = VTX_DWORD2(OFFSET(0),
111 #if X_BYTE_ORDER == X_BIG_ENDIAN
112 			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
113 #else
114 			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
115 #endif
116 			     CONST_BUF_NO_STRIDE(0),
117 			     MEGA_FETCH(1),
118 			     ALT_CONST(0),
119 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
120     shader[i++] = VTX_DWORD_PAD;
121 
122     return i;
123 }
124 
125 /* solid ps --------------------------------------- */
evergreen_solid_ps(RADEONChipFamily ChipSet,uint32_t * shader)126 int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
127 {
128     int i = 0;
129 
130     /* 0 */
131     shader[i++] = CF_ALU_DWORD0(ADDR(2),
132 				KCACHE_BANK0(0),
133 				KCACHE_BANK1(0),
134 				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
135     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
136 				KCACHE_ADDR0(0),
137 				KCACHE_ADDR1(0),
138 				I_COUNT(4),
139 				ALT_CONST(0),
140 				CF_INST(SQ_CF_INST_ALU),
141 				WHOLE_QUAD_MODE(0),
142 				BARRIER(1));
143     /* 1 */
144     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
145 					  TYPE(SQ_EXPORT_PIXEL),
146 					  RW_GPR(0),
147 					  RW_REL(ABSOLUTE),
148 					  INDEX_GPR(0),
149 					  ELEM_SIZE(1));
150     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
151 					       SRC_SEL_Y(SQ_SEL_Y),
152 					       SRC_SEL_Z(SQ_SEL_Z),
153 					       SRC_SEL_W(SQ_SEL_W),
154 					       BURST_COUNT(1),
155 					       VALID_PIXEL_MODE(0),
156 					       END_OF_PROGRAM(1),
157 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
158 					       MARK(0),
159 					       BARRIER(1));
160 
161     /* 2 */
162     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
163 			     SRC0_REL(ABSOLUTE),
164 			     SRC0_ELEM(ELEM_X),
165 			     SRC0_NEG(0),
166 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
167 			     SRC1_REL(ABSOLUTE),
168 			     SRC1_ELEM(ELEM_X),
169 			     SRC1_NEG(0),
170 			     INDEX_MODE(SQ_INDEX_AR_X),
171 			     PRED_SEL(SQ_PRED_SEL_OFF),
172 			     LAST(0));
173     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
174 				 SRC1_ABS(0),
175 				 UPDATE_EXECUTE_MASK(0),
176 				 UPDATE_PRED(0),
177 				 WRITE_MASK(1),
178 				 OMOD(SQ_ALU_OMOD_OFF),
179 				 ALU_INST(SQ_OP2_INST_MOV),
180 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181 				 DST_GPR(0),
182 				 DST_REL(ABSOLUTE),
183 				 DST_ELEM(ELEM_X),
184 				 CLAMP(1));
185     /* 3 */
186     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
187 			     SRC0_REL(ABSOLUTE),
188 			     SRC0_ELEM(ELEM_Y),
189 			     SRC0_NEG(0),
190 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
191 			     SRC1_REL(ABSOLUTE),
192 			     SRC1_ELEM(ELEM_Y),
193 			     SRC1_NEG(0),
194 			     INDEX_MODE(SQ_INDEX_AR_X),
195 			     PRED_SEL(SQ_PRED_SEL_OFF),
196 			     LAST(0));
197     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
198 				 SRC1_ABS(0),
199 				 UPDATE_EXECUTE_MASK(0),
200 				 UPDATE_PRED(0),
201 				 WRITE_MASK(1),
202 				 OMOD(SQ_ALU_OMOD_OFF),
203 				 ALU_INST(SQ_OP2_INST_MOV),
204 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
205 				 DST_GPR(0),
206 				 DST_REL(ABSOLUTE),
207 				 DST_ELEM(ELEM_Y),
208 				 CLAMP(1));
209     /* 4 */
210     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
211 			     SRC0_REL(ABSOLUTE),
212 			     SRC0_ELEM(ELEM_Z),
213 			     SRC0_NEG(0),
214 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
215 			     SRC1_REL(ABSOLUTE),
216 			     SRC1_ELEM(ELEM_Z),
217 			     SRC1_NEG(0),
218 			     INDEX_MODE(SQ_INDEX_AR_X),
219 			     PRED_SEL(SQ_PRED_SEL_OFF),
220 			     LAST(0));
221     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
222 				 SRC1_ABS(0),
223 				 UPDATE_EXECUTE_MASK(0),
224 				 UPDATE_PRED(0),
225 				 WRITE_MASK(1),
226 				 OMOD(SQ_ALU_OMOD_OFF),
227 				 ALU_INST(SQ_OP2_INST_MOV),
228 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
229 				 DST_GPR(0),
230 				 DST_REL(ABSOLUTE),
231 				 DST_ELEM(ELEM_Z),
232 				 CLAMP(1));
233     /* 5 */
234     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
235 			     SRC0_REL(ABSOLUTE),
236 			     SRC0_ELEM(ELEM_W),
237 			     SRC0_NEG(0),
238 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
239 			     SRC1_REL(ABSOLUTE),
240 			     SRC1_ELEM(ELEM_W),
241 			     SRC1_NEG(0),
242 			     INDEX_MODE(SQ_INDEX_AR_X),
243 			     PRED_SEL(SQ_PRED_SEL_OFF),
244 			     LAST(1));
245     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
246 				 SRC1_ABS(0),
247 				 UPDATE_EXECUTE_MASK(0),
248 				 UPDATE_PRED(0),
249 				 WRITE_MASK(1),
250 				 OMOD(SQ_ALU_OMOD_OFF),
251 				 ALU_INST(SQ_OP2_INST_MOV),
252 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
253 				 DST_GPR(0),
254 				 DST_REL(ABSOLUTE),
255 				 DST_ELEM(ELEM_W),
256 				 CLAMP(1));
257 
258     return i;
259 }
260 
261 /* copy vs --------------------------------------- */
evergreen_copy_vs(RADEONChipFamily ChipSet,uint32_t * shader)262 int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
263 {
264     int i = 0;
265 
266     /* 0 */
267     shader[i++] = CF_DWORD0(ADDR(4),
268 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
269     shader[i++] = CF_DWORD1(POP_COUNT(0),
270 			    CF_CONST(0),
271 			    COND(SQ_CF_COND_ACTIVE),
272 			    I_COUNT(2),
273 			    VALID_PIXEL_MODE(0),
274 			    END_OF_PROGRAM(0),
275 			    CF_INST(SQ_CF_INST_VC),
276 			    WHOLE_QUAD_MODE(0),
277 			    BARRIER(1));
278     /* 1 */
279     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
280 					  TYPE(SQ_EXPORT_POS),
281 					  RW_GPR(1),
282 					  RW_REL(ABSOLUTE),
283 					  INDEX_GPR(0),
284 					  ELEM_SIZE(0));
285     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
286 					       SRC_SEL_Y(SQ_SEL_Y),
287 					       SRC_SEL_Z(SQ_SEL_Z),
288 					       SRC_SEL_W(SQ_SEL_W),
289 					       BURST_COUNT(0),
290 					       VALID_PIXEL_MODE(0),
291 					       END_OF_PROGRAM(0),
292 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
293 					       MARK(0),
294 					       BARRIER(1));
295     /* 2 */
296     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
297 					  TYPE(SQ_EXPORT_PARAM),
298 					  RW_GPR(0),
299 					  RW_REL(ABSOLUTE),
300 					  INDEX_GPR(0),
301 					  ELEM_SIZE(0));
302     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
303 					       SRC_SEL_Y(SQ_SEL_Y),
304 					       SRC_SEL_Z(SQ_SEL_Z),
305 					       SRC_SEL_W(SQ_SEL_W),
306 					       BURST_COUNT(0),
307 					       VALID_PIXEL_MODE(0),
308 					       END_OF_PROGRAM(1),
309 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
310 					       MARK(0),
311 					       BARRIER(0));
312     /* 3 */
313     shader[i++] = 0x00000000;
314     shader[i++] = 0x00000000;
315     /* 4/5 */
316     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
317 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
318 			     FETCH_WHOLE_QUAD(0),
319 			     BUFFER_ID(0),
320 			     SRC_GPR(0),
321 			     SRC_REL(ABSOLUTE),
322 			     SRC_SEL_X(SQ_SEL_X),
323 			     MEGA_FETCH_COUNT(16));
324     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
325 				 DST_REL(0),
326 				 DST_SEL_X(SQ_SEL_X),
327 				 DST_SEL_Y(SQ_SEL_Y),
328 				 DST_SEL_Z(SQ_SEL_0),
329 				 DST_SEL_W(SQ_SEL_1),
330 				 USE_CONST_FIELDS(0),
331 				 DATA_FORMAT(FMT_32_32_FLOAT),
332 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
333 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
334 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
335     shader[i++] = VTX_DWORD2(OFFSET(0),
336 #if X_BYTE_ORDER == X_BIG_ENDIAN
337                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
338 #else
339                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
340 #endif
341 			     CONST_BUF_NO_STRIDE(0),
342 			     MEGA_FETCH(1),
343 			     ALT_CONST(0),
344 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
345     shader[i++] = VTX_DWORD_PAD;
346     /* 6/7 */
347     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
348 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
349 			     FETCH_WHOLE_QUAD(0),
350 			     BUFFER_ID(0),
351 			     SRC_GPR(0),
352 			     SRC_REL(ABSOLUTE),
353 			     SRC_SEL_X(SQ_SEL_X),
354 			     MEGA_FETCH_COUNT(8));
355     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
356 				 DST_REL(0),
357 				 DST_SEL_X(SQ_SEL_X),
358 				 DST_SEL_Y(SQ_SEL_Y),
359 				 DST_SEL_Z(SQ_SEL_0),
360 				 DST_SEL_W(SQ_SEL_1),
361 				 USE_CONST_FIELDS(0),
362 				 DATA_FORMAT(FMT_32_32_FLOAT),
363 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
364 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
365 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
366     shader[i++] = VTX_DWORD2(OFFSET(8),
367 #if X_BYTE_ORDER == X_BIG_ENDIAN
368                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
369 #else
370                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
371 #endif
372 			     CONST_BUF_NO_STRIDE(0),
373 			     MEGA_FETCH(0),
374 			     ALT_CONST(0),
375 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
376     shader[i++] = VTX_DWORD_PAD;
377 
378     return i;
379 }
380 
381 /* copy ps --------------------------------------- */
evergreen_copy_ps(RADEONChipFamily ChipSet,uint32_t * shader)382 int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
383 {
384     int i = 0;
385 
386     /* CF INST 0 */
387     shader[i++] = CF_ALU_DWORD0(ADDR(3),
388 				KCACHE_BANK0(0),
389 				KCACHE_BANK1(0),
390 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
391     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
392 				KCACHE_ADDR0(0),
393 				KCACHE_ADDR1(0),
394 				I_COUNT(4),
395 				ALT_CONST(0),
396 				CF_INST(SQ_CF_INST_ALU),
397 				WHOLE_QUAD_MODE(0),
398 				BARRIER(1));
399     /* CF INST 1 */
400     shader[i++] = CF_DWORD0(ADDR(8),
401 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
402     shader[i++] = CF_DWORD1(POP_COUNT(0),
403 			    CF_CONST(0),
404 			    COND(SQ_CF_COND_ACTIVE),
405 			    I_COUNT(1),
406 			    VALID_PIXEL_MODE(0),
407 			    END_OF_PROGRAM(0),
408 			    CF_INST(SQ_CF_INST_TC),
409 			    WHOLE_QUAD_MODE(0),
410 			    BARRIER(1));
411     /* CF INST 2 */
412     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
413 					  TYPE(SQ_EXPORT_PIXEL),
414 					  RW_GPR(0),
415 					  RW_REL(ABSOLUTE),
416 					  INDEX_GPR(0),
417 					  ELEM_SIZE(1));
418     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
419 					       SRC_SEL_Y(SQ_SEL_Y),
420 					       SRC_SEL_Z(SQ_SEL_Z),
421 					       SRC_SEL_W(SQ_SEL_W),
422 					       BURST_COUNT(1),
423 					       VALID_PIXEL_MODE(0),
424 					       END_OF_PROGRAM(1),
425 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
426 					       MARK(0),
427 					       BARRIER(1));
428 
429     /* 3 interpolate tex coords */
430     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
431 			     SRC0_REL(ABSOLUTE),
432 			     SRC0_ELEM(ELEM_Y),
433 			     SRC0_NEG(0),
434 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
435 			     SRC1_REL(ABSOLUTE),
436 			     SRC1_ELEM(ELEM_X),
437 			     SRC1_NEG(0),
438 			     INDEX_MODE(SQ_INDEX_AR_X),
439 			     PRED_SEL(SQ_PRED_SEL_OFF),
440 			     LAST(0));
441     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
442 				 SRC1_ABS(0),
443 				 UPDATE_EXECUTE_MASK(0),
444 				 UPDATE_PRED(0),
445 				 WRITE_MASK(1),
446 				 OMOD(SQ_ALU_OMOD_OFF),
447 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
448 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
449 				 DST_GPR(0),
450 				 DST_REL(ABSOLUTE),
451 				 DST_ELEM(ELEM_X),
452 				 CLAMP(0));
453     /* 4 */
454     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
455 			     SRC0_REL(ABSOLUTE),
456 			     SRC0_ELEM(ELEM_X),
457 			     SRC0_NEG(0),
458 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
459 			     SRC1_REL(ABSOLUTE),
460 			     SRC1_ELEM(ELEM_X),
461 			     SRC1_NEG(0),
462 			     INDEX_MODE(SQ_INDEX_AR_X),
463 			     PRED_SEL(SQ_PRED_SEL_OFF),
464 			     LAST(0));
465     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
466 				 SRC1_ABS(0),
467 				 UPDATE_EXECUTE_MASK(0),
468 				 UPDATE_PRED(0),
469 				 WRITE_MASK(1),
470 				 OMOD(SQ_ALU_OMOD_OFF),
471 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
472 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
473 				 DST_GPR(0),
474 				 DST_REL(ABSOLUTE),
475 				 DST_ELEM(ELEM_Y),
476 				 CLAMP(0));
477     /* 5 */
478     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
479 			     SRC0_REL(ABSOLUTE),
480 			     SRC0_ELEM(ELEM_Y),
481 			     SRC0_NEG(0),
482 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
483 			     SRC1_REL(ABSOLUTE),
484 			     SRC1_ELEM(ELEM_X),
485 			     SRC1_NEG(0),
486 			     INDEX_MODE(SQ_INDEX_AR_X),
487 			     PRED_SEL(SQ_PRED_SEL_OFF),
488 			     LAST(0));
489     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
490 				 SRC1_ABS(0),
491 				 UPDATE_EXECUTE_MASK(0),
492 				 UPDATE_PRED(0),
493 				 WRITE_MASK(0),
494 				 OMOD(SQ_ALU_OMOD_OFF),
495 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
496 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
497 				 DST_GPR(0),
498 				 DST_REL(ABSOLUTE),
499 				 DST_ELEM(ELEM_Z),
500 				 CLAMP(0));
501     /* 6 */
502     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
503 			     SRC0_REL(ABSOLUTE),
504 			     SRC0_ELEM(ELEM_X),
505 			     SRC0_NEG(0),
506 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
507 			     SRC1_REL(ABSOLUTE),
508 			     SRC1_ELEM(ELEM_X),
509 			     SRC1_NEG(0),
510 			     INDEX_MODE(SQ_INDEX_AR_X),
511 			     PRED_SEL(SQ_PRED_SEL_OFF),
512 			     LAST(1));
513     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
514 				 SRC1_ABS(0),
515 				 UPDATE_EXECUTE_MASK(0),
516 				 UPDATE_PRED(0),
517 				 WRITE_MASK(0),
518 				 OMOD(SQ_ALU_OMOD_OFF),
519 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
520 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
521 				 DST_GPR(0),
522 				 DST_REL(ABSOLUTE),
523 				 DST_ELEM(ELEM_W),
524 				 CLAMP(0));
525 
526     /* 7 */
527     shader[i++] = 0x00000000;
528     shader[i++] = 0x00000000;
529 
530     /* 8/9 TEX INST 0 */
531     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
532 			     INST_MOD(0),
533 			     FETCH_WHOLE_QUAD(0),
534 			     RESOURCE_ID(0),
535 			     SRC_GPR(0),
536 			     SRC_REL(ABSOLUTE),
537 			     ALT_CONST(0),
538 			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
539 			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
540     shader[i++] = TEX_DWORD1(DST_GPR(0),
541 			     DST_REL(ABSOLUTE),
542 			     DST_SEL_X(SQ_SEL_X), /* R */
543 			     DST_SEL_Y(SQ_SEL_Y), /* G */
544 			     DST_SEL_Z(SQ_SEL_Z), /* B */
545 			     DST_SEL_W(SQ_SEL_W), /* A */
546 			     LOD_BIAS(0),
547 			     COORD_TYPE_X(TEX_UNNORMALIZED),
548 			     COORD_TYPE_Y(TEX_UNNORMALIZED),
549 			     COORD_TYPE_Z(TEX_UNNORMALIZED),
550 			     COORD_TYPE_W(TEX_UNNORMALIZED));
551     shader[i++] = TEX_DWORD2(OFFSET_X(0),
552 			     OFFSET_Y(0),
553 			     OFFSET_Z(0),
554 			     SAMPLER_ID(0),
555 			     SRC_SEL_X(SQ_SEL_X),
556 			     SRC_SEL_Y(SQ_SEL_Y),
557 			     SRC_SEL_Z(SQ_SEL_0),
558 			     SRC_SEL_W(SQ_SEL_1));
559     shader[i++] = TEX_DWORD_PAD;
560 
561     return i;
562 }
563 
evergreen_xv_vs(RADEONChipFamily ChipSet,uint32_t * shader)564 int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
565 {
566     int i = 0;
567 
568     /* 0 */
569     shader[i++] = CF_DWORD0(ADDR(6),
570 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
571     shader[i++] = CF_DWORD1(POP_COUNT(0),
572                             CF_CONST(0),
573                             COND(SQ_CF_COND_ACTIVE),
574                             I_COUNT(2),
575                             VALID_PIXEL_MODE(0),
576                             END_OF_PROGRAM(0),
577                             CF_INST(SQ_CF_INST_VC),
578                             WHOLE_QUAD_MODE(0),
579                             BARRIER(1));
580 
581     /* 1 - ALU */
582     shader[i++] = CF_ALU_DWORD0(ADDR(4),
583 				KCACHE_BANK0(0),
584 				KCACHE_BANK1(0),
585 				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
586     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
587 				KCACHE_ADDR0(0),
588 				KCACHE_ADDR1(0),
589 				I_COUNT(2),
590 				ALT_CONST(0),
591 				CF_INST(SQ_CF_INST_ALU),
592 				WHOLE_QUAD_MODE(0),
593 				BARRIER(1));
594 
595     /* 2 */
596     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
597                                           TYPE(SQ_EXPORT_POS),
598                                           RW_GPR(1),
599                                           RW_REL(ABSOLUTE),
600                                           INDEX_GPR(0),
601                                           ELEM_SIZE(3));
602     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
603                                                SRC_SEL_Y(SQ_SEL_Y),
604                                                SRC_SEL_Z(SQ_SEL_Z),
605                                                SRC_SEL_W(SQ_SEL_W),
606                                                BURST_COUNT(1),
607                                                VALID_PIXEL_MODE(0),
608                                                END_OF_PROGRAM(0),
609                                                CF_INST(SQ_CF_INST_EXPORT_DONE),
610                                                MARK(0),
611                                                BARRIER(1));
612     /* 3 */
613     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
614                                           TYPE(SQ_EXPORT_PARAM),
615                                           RW_GPR(0),
616                                           RW_REL(ABSOLUTE),
617                                           INDEX_GPR(0),
618                                           ELEM_SIZE(3));
619     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
620                                                SRC_SEL_Y(SQ_SEL_Y),
621                                                SRC_SEL_Z(SQ_SEL_Z),
622                                                SRC_SEL_W(SQ_SEL_W),
623                                                BURST_COUNT(1),
624                                                VALID_PIXEL_MODE(0),
625                                                END_OF_PROGRAM(1),
626                                                CF_INST(SQ_CF_INST_EXPORT_DONE),
627                                                MARK(0),
628                                                BARRIER(0));
629 
630 
631     /* 4 texX / w */
632     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
633                              SRC0_REL(ABSOLUTE),
634                              SRC0_ELEM(ELEM_X),
635                              SRC0_NEG(0),
636                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
637                              SRC1_REL(ABSOLUTE),
638                              SRC1_ELEM(ELEM_X),
639                              SRC1_NEG(0),
640                              INDEX_MODE(SQ_INDEX_AR_X),
641                              PRED_SEL(SQ_PRED_SEL_OFF),
642                              LAST(0));
643     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
644                                  SRC1_ABS(0),
645                                  UPDATE_EXECUTE_MASK(0),
646                                  UPDATE_PRED(0),
647                                  WRITE_MASK(1),
648                                  OMOD(SQ_ALU_OMOD_OFF),
649                                  ALU_INST(SQ_OP2_INST_MUL),
650                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
651                                  DST_GPR(0),
652                                  DST_REL(ABSOLUTE),
653                                  DST_ELEM(ELEM_X),
654                                  CLAMP(0));
655 
656     /* 5 texY / h */
657     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
658                              SRC0_REL(ABSOLUTE),
659                              SRC0_ELEM(ELEM_Y),
660                              SRC0_NEG(0),
661                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
662                              SRC1_REL(ABSOLUTE),
663                              SRC1_ELEM(ELEM_Y),
664                              SRC1_NEG(0),
665                              INDEX_MODE(SQ_INDEX_AR_X),
666                              PRED_SEL(SQ_PRED_SEL_OFF),
667                              LAST(1));
668     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
669                                  SRC1_ABS(0),
670                                  UPDATE_EXECUTE_MASK(0),
671                                  UPDATE_PRED(0),
672                                  WRITE_MASK(1),
673                                  OMOD(SQ_ALU_OMOD_OFF),
674                                  ALU_INST(SQ_OP2_INST_MUL),
675                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
676                                  DST_GPR(0),
677                                  DST_REL(ABSOLUTE),
678                                  DST_ELEM(ELEM_Y),
679                                  CLAMP(0));
680 
681     /* 6/7 */
682     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
683                              FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
684                              FETCH_WHOLE_QUAD(0),
685                              BUFFER_ID(0),
686                              SRC_GPR(0),
687                              SRC_REL(ABSOLUTE),
688                              SRC_SEL_X(SQ_SEL_X),
689                              MEGA_FETCH_COUNT(16));
690     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
691                                  DST_REL(ABSOLUTE),
692                                  DST_SEL_X(SQ_SEL_X),
693                                  DST_SEL_Y(SQ_SEL_Y),
694                                  DST_SEL_Z(SQ_SEL_0),
695                                  DST_SEL_W(SQ_SEL_1),
696                                  USE_CONST_FIELDS(0),
697                                  DATA_FORMAT(FMT_32_32_FLOAT),
698                                  NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
699                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
700                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
701     shader[i++] = VTX_DWORD2(OFFSET(0),
702 #if X_BYTE_ORDER == X_BIG_ENDIAN
703                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
704 #else
705                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
706 #endif
707                              CONST_BUF_NO_STRIDE(0),
708                              MEGA_FETCH(1),
709 			     ALT_CONST(0),
710 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
711     shader[i++] = VTX_DWORD_PAD;
712     /* 8/9 */
713     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
714                              FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
715                              FETCH_WHOLE_QUAD(0),
716                              BUFFER_ID(0),
717                              SRC_GPR(0),
718                              SRC_REL(ABSOLUTE),
719                              SRC_SEL_X(SQ_SEL_X),
720                              MEGA_FETCH_COUNT(8));
721     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
722                                  DST_REL(ABSOLUTE),
723                                  DST_SEL_X(SQ_SEL_X),
724                                  DST_SEL_Y(SQ_SEL_Y),
725                                  DST_SEL_Z(SQ_SEL_0),
726                                  DST_SEL_W(SQ_SEL_1),
727                                  USE_CONST_FIELDS(0),
728                                  DATA_FORMAT(FMT_32_32_FLOAT),
729                                  NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
730                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
731                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
732     shader[i++] = VTX_DWORD2(OFFSET(8),
733 #if X_BYTE_ORDER == X_BIG_ENDIAN
734                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
735 #else
736                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
737 #endif
738                              CONST_BUF_NO_STRIDE(0),
739                              MEGA_FETCH(0),
740 			     ALT_CONST(0),
741 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
742     shader[i++] = VTX_DWORD_PAD;
743 
744     return i;
745 }
746 
evergreen_xv_ps(RADEONChipFamily ChipSet,uint32_t * shader)747 int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
748 {
749     int i = 0;
750 
751     /* 0 */
752     shader[i++] = CF_ALU_DWORD0(ADDR(5),
753 				KCACHE_BANK0(0),
754 				KCACHE_BANK1(0),
755 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
756     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
757 				KCACHE_ADDR0(0),
758 				KCACHE_ADDR1(0),
759 				I_COUNT(4),
760 				ALT_CONST(0),
761 				CF_INST(SQ_CF_INST_ALU),
762 				WHOLE_QUAD_MODE(0),
763 				BARRIER(1));
764     /* 1 */
765     shader[i++] = CF_DWORD0(ADDR(21),
766 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
767     shader[i++] = CF_DWORD1(POP_COUNT(0),
768                             CF_CONST(0),
769                             COND(SQ_CF_COND_BOOL),
770                             I_COUNT(0),
771                             VALID_PIXEL_MODE(0),
772                             END_OF_PROGRAM(0),
773                             CF_INST(SQ_CF_INST_CALL),
774                             WHOLE_QUAD_MODE(0),
775                             BARRIER(0));
776     /* 2 */
777     shader[i++] = CF_DWORD0(ADDR(30),
778 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
779     shader[i++] = CF_DWORD1(POP_COUNT(0),
780                             CF_CONST(0),
781                             COND(SQ_CF_COND_NOT_BOOL),
782                             I_COUNT(0),
783                             VALID_PIXEL_MODE(0),
784                             END_OF_PROGRAM(0),
785                             CF_INST(SQ_CF_INST_CALL),
786                             WHOLE_QUAD_MODE(0),
787                             BARRIER(0));
788     /* 3 */
789     shader[i++] = CF_ALU_DWORD0(ADDR(9),
790                                 KCACHE_BANK0(0),
791                                 KCACHE_BANK1(0),
792                                 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
793     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
794                                 KCACHE_ADDR0(0),
795                                 KCACHE_ADDR1(0),
796                                 I_COUNT(12),
797                                 ALT_CONST(0),
798                                 CF_INST(SQ_CF_INST_ALU),
799                                 WHOLE_QUAD_MODE(0),
800                                 BARRIER(1));
801     /* 4 */
802     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
803                                           TYPE(SQ_EXPORT_PIXEL),
804                                           RW_GPR(2),
805                                           RW_REL(ABSOLUTE),
806                                           INDEX_GPR(0),
807                                           ELEM_SIZE(3));
808     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
809                                                SRC_SEL_Y(SQ_SEL_Y),
810                                                SRC_SEL_Z(SQ_SEL_Z),
811                                                SRC_SEL_W(SQ_SEL_W),
812                                                BURST_COUNT(1),
813                                                VALID_PIXEL_MODE(0),
814                                                END_OF_PROGRAM(1),
815                                                CF_INST(SQ_CF_INST_EXPORT_DONE),
816                                                MARK(0),
817                                                BARRIER(1));
818     /* 5 interpolate tex coords */
819     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
820 			     SRC0_REL(ABSOLUTE),
821 			     SRC0_ELEM(ELEM_Y),
822 			     SRC0_NEG(0),
823 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
824 			     SRC1_REL(ABSOLUTE),
825 			     SRC1_ELEM(ELEM_X),
826 			     SRC1_NEG(0),
827 			     INDEX_MODE(SQ_INDEX_AR_X),
828 			     PRED_SEL(SQ_PRED_SEL_OFF),
829 			     LAST(0));
830     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
831 				 SRC1_ABS(0),
832 				 UPDATE_EXECUTE_MASK(0),
833 				 UPDATE_PRED(0),
834 				 WRITE_MASK(1),
835 				 OMOD(SQ_ALU_OMOD_OFF),
836 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
837 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
838 				 DST_GPR(0),
839 				 DST_REL(ABSOLUTE),
840 				 DST_ELEM(ELEM_X),
841 				 CLAMP(0));
842     /* 6 */
843     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
844 			     SRC0_REL(ABSOLUTE),
845 			     SRC0_ELEM(ELEM_X),
846 			     SRC0_NEG(0),
847 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
848 			     SRC1_REL(ABSOLUTE),
849 			     SRC1_ELEM(ELEM_X),
850 			     SRC1_NEG(0),
851 			     INDEX_MODE(SQ_INDEX_AR_X),
852 			     PRED_SEL(SQ_PRED_SEL_OFF),
853 			     LAST(0));
854     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
855 				 SRC1_ABS(0),
856 				 UPDATE_EXECUTE_MASK(0),
857 				 UPDATE_PRED(0),
858 				 WRITE_MASK(1),
859 				 OMOD(SQ_ALU_OMOD_OFF),
860 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
861 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
862 				 DST_GPR(0),
863 				 DST_REL(ABSOLUTE),
864 				 DST_ELEM(ELEM_Y),
865 				 CLAMP(0));
866     /* 7 */
867     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
868 			     SRC0_REL(ABSOLUTE),
869 			     SRC0_ELEM(ELEM_Y),
870 			     SRC0_NEG(0),
871 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
872 			     SRC1_REL(ABSOLUTE),
873 			     SRC1_ELEM(ELEM_X),
874 			     SRC1_NEG(0),
875 			     INDEX_MODE(SQ_INDEX_AR_X),
876 			     PRED_SEL(SQ_PRED_SEL_OFF),
877 			     LAST(0));
878     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
879 				 SRC1_ABS(0),
880 				 UPDATE_EXECUTE_MASK(0),
881 				 UPDATE_PRED(0),
882 				 WRITE_MASK(0),
883 				 OMOD(SQ_ALU_OMOD_OFF),
884 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
885 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
886 				 DST_GPR(0),
887 				 DST_REL(ABSOLUTE),
888 				 DST_ELEM(ELEM_Z),
889 				 CLAMP(0));
890     /* 8 */
891     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
892 			     SRC0_REL(ABSOLUTE),
893 			     SRC0_ELEM(ELEM_X),
894 			     SRC0_NEG(0),
895 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
896 			     SRC1_REL(ABSOLUTE),
897 			     SRC1_ELEM(ELEM_X),
898 			     SRC1_NEG(0),
899 			     INDEX_MODE(SQ_INDEX_AR_X),
900 			     PRED_SEL(SQ_PRED_SEL_OFF),
901 			     LAST(1));
902     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
903 				 SRC1_ABS(0),
904 				 UPDATE_EXECUTE_MASK(0),
905 				 UPDATE_PRED(0),
906 				 WRITE_MASK(0),
907 				 OMOD(SQ_ALU_OMOD_OFF),
908 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
909 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
910 				 DST_GPR(0),
911 				 DST_REL(ABSOLUTE),
912 				 DST_ELEM(ELEM_W),
913 				 CLAMP(0));
914 
915     /* 9,10,11,12 */
916     /* r2.x = MAD(c0.w, r1.x, c0.x) */
917     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
918                              SRC0_REL(ABSOLUTE),
919                              SRC0_ELEM(ELEM_W),
920                              SRC0_NEG(0),
921                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
922                              SRC1_REL(ABSOLUTE),
923                              SRC1_ELEM(ELEM_X),
924                              SRC1_NEG(0),
925                              INDEX_MODE(SQ_INDEX_LOOP),
926                              PRED_SEL(SQ_PRED_SEL_OFF),
927                              LAST(0));
928     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
929                                  SRC2_REL(ABSOLUTE),
930                                  SRC2_ELEM(ELEM_X),
931                                  SRC2_NEG(0),
932                                  ALU_INST(SQ_OP3_INST_MULADD),
933                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
934                                  DST_GPR(2),
935                                  DST_REL(ABSOLUTE),
936                                  DST_ELEM(ELEM_X),
937                                  CLAMP(0));
938     /* r2.y = MAD(c0.w, r1.x, c0.y) */
939     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
940                              SRC0_REL(ABSOLUTE),
941                              SRC0_ELEM(ELEM_W),
942                              SRC0_NEG(0),
943                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
944                              SRC1_REL(ABSOLUTE),
945                              SRC1_ELEM(ELEM_X),
946                              SRC1_NEG(0),
947                              INDEX_MODE(SQ_INDEX_LOOP),
948                              PRED_SEL(SQ_PRED_SEL_OFF),
949                              LAST(0));
950     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
951                                  SRC2_REL(ABSOLUTE),
952                                  SRC2_ELEM(ELEM_Y),
953                                  SRC2_NEG(0),
954                                  ALU_INST(SQ_OP3_INST_MULADD),
955                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
956                                  DST_GPR(2),
957                                  DST_REL(ABSOLUTE),
958                                  DST_ELEM(ELEM_Y),
959                                  CLAMP(0));
960     /* r2.z = MAD(c0.w, r1.x, c0.z) */
961     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
962                              SRC0_REL(ABSOLUTE),
963                              SRC0_ELEM(ELEM_W),
964                              SRC0_NEG(0),
965                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
966                              SRC1_REL(ABSOLUTE),
967                              SRC1_ELEM(ELEM_X),
968                              SRC1_NEG(0),
969                              INDEX_MODE(SQ_INDEX_LOOP),
970                              PRED_SEL(SQ_PRED_SEL_OFF),
971                              LAST(0));
972     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
973                                  SRC2_REL(ABSOLUTE),
974                                  SRC2_ELEM(ELEM_Z),
975                                  SRC2_NEG(0),
976                                  ALU_INST(SQ_OP3_INST_MULADD),
977                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
978                                  DST_GPR(2),
979                                  DST_REL(ABSOLUTE),
980                                  DST_ELEM(ELEM_Z),
981                                  CLAMP(0));
982     /* r2.w = MAD(0, 0, 1) */
983     shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
984                              SRC0_REL(ABSOLUTE),
985                              SRC0_ELEM(ELEM_X),
986                              SRC0_NEG(0),
987                              SRC1_SEL(SQ_ALU_SRC_0),
988                              SRC1_REL(ABSOLUTE),
989                              SRC1_ELEM(ELEM_X),
990                              SRC1_NEG(0),
991                              INDEX_MODE(SQ_INDEX_LOOP),
992                              PRED_SEL(SQ_PRED_SEL_OFF),
993                              LAST(1));
994     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
995                                  SRC2_REL(ABSOLUTE),
996                                  SRC2_ELEM(ELEM_X),
997                                  SRC2_NEG(0),
998                                  ALU_INST(SQ_OP3_INST_MULADD),
999                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1000                                  DST_GPR(2),
1001                                  DST_REL(ABSOLUTE),
1002                                  DST_ELEM(ELEM_W),
1003                                  CLAMP(0));
1004 
1005     /* 13,14,15,16 */
1006     /* r2.x = MAD(c1.x, r1.y, pv.x) */
1007     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1008                              SRC0_REL(ABSOLUTE),
1009                              SRC0_ELEM(ELEM_X),
1010                              SRC0_NEG(0),
1011                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1012                              SRC1_REL(ABSOLUTE),
1013                              SRC1_ELEM(ELEM_Y),
1014                              SRC1_NEG(0),
1015                              INDEX_MODE(SQ_INDEX_LOOP),
1016                              PRED_SEL(SQ_PRED_SEL_OFF),
1017                              LAST(0));
1018     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1019                                  SRC2_REL(ABSOLUTE),
1020                                  SRC2_ELEM(ELEM_X),
1021                                  SRC2_NEG(0),
1022                                  ALU_INST(SQ_OP3_INST_MULADD),
1023                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1024                                  DST_GPR(2),
1025                                  DST_REL(ABSOLUTE),
1026                                  DST_ELEM(ELEM_X),
1027                                  CLAMP(0));
1028     /* r2.y = MAD(c1.y, r1.y, pv.y) */
1029     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1030                              SRC0_REL(ABSOLUTE),
1031                              SRC0_ELEM(ELEM_Y),
1032                              SRC0_NEG(0),
1033                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1034                              SRC1_REL(ABSOLUTE),
1035                              SRC1_ELEM(ELEM_Y),
1036                              SRC1_NEG(0),
1037                              INDEX_MODE(SQ_INDEX_LOOP),
1038                              PRED_SEL(SQ_PRED_SEL_OFF),
1039                              LAST(0));
1040     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1041                                  SRC2_REL(ABSOLUTE),
1042                                  SRC2_ELEM(ELEM_Y),
1043                                  SRC2_NEG(0),
1044                                  ALU_INST(SQ_OP3_INST_MULADD),
1045                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1046                                  DST_GPR(2),
1047                                  DST_REL(ABSOLUTE),
1048                                  DST_ELEM(ELEM_Y),
1049                                  CLAMP(0));
1050     /* r2.z = MAD(c1.z, r1.y, pv.z) */
1051     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1052                              SRC0_REL(ABSOLUTE),
1053                              SRC0_ELEM(ELEM_Z),
1054                              SRC0_NEG(0),
1055                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1056                              SRC1_REL(ABSOLUTE),
1057                              SRC1_ELEM(ELEM_Y),
1058                              SRC1_NEG(0),
1059                              INDEX_MODE(SQ_INDEX_LOOP),
1060                              PRED_SEL(SQ_PRED_SEL_OFF),
1061                              LAST(0));
1062     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1063                                  SRC2_REL(ABSOLUTE),
1064                                  SRC2_ELEM(ELEM_Z),
1065                                  SRC2_NEG(0),
1066                                  ALU_INST(SQ_OP3_INST_MULADD),
1067                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1068                                  DST_GPR(2),
1069                                  DST_REL(ABSOLUTE),
1070                                  DST_ELEM(ELEM_Z),
1071                                  CLAMP(0));
1072     /* r2.w = MAD(0, 0, 1) */
1073     shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1074                              SRC0_REL(ABSOLUTE),
1075                              SRC0_ELEM(ELEM_X),
1076                              SRC0_NEG(0),
1077                              SRC1_SEL(SQ_ALU_SRC_0),
1078                              SRC1_REL(ABSOLUTE),
1079                              SRC1_ELEM(ELEM_X),
1080                              SRC1_NEG(0),
1081                              INDEX_MODE(SQ_INDEX_LOOP),
1082                              PRED_SEL(SQ_PRED_SEL_OFF),
1083                              LAST(1));
1084     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1085                                  SRC2_REL(ABSOLUTE),
1086                                  SRC2_ELEM(ELEM_W),
1087                                  SRC2_NEG(0),
1088                                  ALU_INST(SQ_OP3_INST_MULADD),
1089                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1090                                  DST_GPR(2),
1091                                  DST_REL(ABSOLUTE),
1092                                  DST_ELEM(ELEM_W),
1093                                  CLAMP(0));
1094     /* 17,18,19,20 */
1095     /* r2.x = MAD(c2.x, r1.z, pv.x) */
1096     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1097                              SRC0_REL(ABSOLUTE),
1098                              SRC0_ELEM(ELEM_X),
1099                              SRC0_NEG(0),
1100                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1101                              SRC1_REL(ABSOLUTE),
1102                              SRC1_ELEM(ELEM_Z),
1103                              SRC1_NEG(0),
1104                              INDEX_MODE(SQ_INDEX_LOOP),
1105                              PRED_SEL(SQ_PRED_SEL_OFF),
1106                              LAST(0));
1107     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1108                                  SRC2_REL(ABSOLUTE),
1109                                  SRC2_ELEM(ELEM_X),
1110                                  SRC2_NEG(0),
1111                                  ALU_INST(SQ_OP3_INST_MULADD),
1112                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1113                                  DST_GPR(2),
1114                                  DST_REL(ABSOLUTE),
1115                                  DST_ELEM(ELEM_X),
1116                                  CLAMP(1));
1117     /* r2.y = MAD(c2.y, r1.z, pv.y) */
1118     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1119                              SRC0_REL(ABSOLUTE),
1120                              SRC0_ELEM(ELEM_Y),
1121                              SRC0_NEG(0),
1122                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1123                              SRC1_REL(ABSOLUTE),
1124                              SRC1_ELEM(ELEM_Z),
1125                              SRC1_NEG(0),
1126                              INDEX_MODE(SQ_INDEX_LOOP),
1127                              PRED_SEL(SQ_PRED_SEL_OFF),
1128                              LAST(0));
1129     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1130                                  SRC2_REL(ABSOLUTE),
1131                                  SRC2_ELEM(ELEM_Y),
1132                                  SRC2_NEG(0),
1133                                  ALU_INST(SQ_OP3_INST_MULADD),
1134                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1135                                  DST_GPR(2),
1136                                  DST_REL(ABSOLUTE),
1137                                  DST_ELEM(ELEM_Y),
1138                                  CLAMP(1));
1139     /* r2.z = MAD(c2.z, r1.z, pv.z) */
1140     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1141                              SRC0_REL(ABSOLUTE),
1142                              SRC0_ELEM(ELEM_Z),
1143                              SRC0_NEG(0),
1144                              SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1145                              SRC1_REL(ABSOLUTE),
1146                              SRC1_ELEM(ELEM_Z),
1147                              SRC1_NEG(0),
1148                              INDEX_MODE(SQ_INDEX_LOOP),
1149                              PRED_SEL(SQ_PRED_SEL_OFF),
1150                              LAST(0));
1151     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1152                                  SRC2_REL(ABSOLUTE),
1153                                  SRC2_ELEM(ELEM_Z),
1154                                  SRC2_NEG(0),
1155                                  ALU_INST(SQ_OP3_INST_MULADD),
1156                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1157                                  DST_GPR(2),
1158                                  DST_REL(ABSOLUTE),
1159                                  DST_ELEM(ELEM_Z),
1160                                  CLAMP(1));
1161     /* r2.w = MAD(0, 0, 1) */
1162     shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1163                              SRC0_REL(ABSOLUTE),
1164                              SRC0_ELEM(ELEM_X),
1165                              SRC0_NEG(0),
1166                              SRC1_SEL(SQ_ALU_SRC_0),
1167                              SRC1_REL(ABSOLUTE),
1168                              SRC1_ELEM(ELEM_X),
1169                              SRC1_NEG(0),
1170                              INDEX_MODE(SQ_INDEX_LOOP),
1171                              PRED_SEL(SQ_PRED_SEL_OFF),
1172                              LAST(1));
1173     shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1174                                  SRC2_REL(ABSOLUTE),
1175                                  SRC2_ELEM(ELEM_X),
1176                                  SRC2_NEG(0),
1177                                  ALU_INST(SQ_OP3_INST_MULADD),
1178                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1179                                  DST_GPR(2),
1180                                  DST_REL(ABSOLUTE),
1181                                  DST_ELEM(ELEM_W),
1182                                  CLAMP(1));
1183 
1184     /* 21 */
1185     shader[i++] = CF_DWORD0(ADDR(24),
1186 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1187     shader[i++] = CF_DWORD1(POP_COUNT(0),
1188                             CF_CONST(0),
1189                             COND(SQ_CF_COND_ACTIVE),
1190                             I_COUNT(3),
1191                             VALID_PIXEL_MODE(0),
1192                             END_OF_PROGRAM(0),
1193                             CF_INST(SQ_CF_INST_TC),
1194                             WHOLE_QUAD_MODE(0),
1195                             BARRIER(1));
1196     /* 22 */
1197     shader[i++] = CF_DWORD0(ADDR(0),
1198 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1199     shader[i++] = CF_DWORD1(POP_COUNT(0),
1200 			    CF_CONST(0),
1201 			    COND(SQ_CF_COND_ACTIVE),
1202 			    I_COUNT(0),
1203 			    VALID_PIXEL_MODE(0),
1204 			    END_OF_PROGRAM(0),
1205 			    CF_INST(SQ_CF_INST_RETURN),
1206 			    WHOLE_QUAD_MODE(0),
1207 			    BARRIER(1));
1208     /* 23 */
1209     shader[i++] = 0x00000000;
1210     shader[i++] = 0x00000000;
1211     /* 24/25 */
1212     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1213                              INST_MOD(0),
1214                              FETCH_WHOLE_QUAD(0),
1215                              RESOURCE_ID(0),
1216                              SRC_GPR(0),
1217                              SRC_REL(ABSOLUTE),
1218                              ALT_CONST(0),
1219 			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1220 			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1221     shader[i++] = TEX_DWORD1(DST_GPR(1),
1222                              DST_REL(ABSOLUTE),
1223                              DST_SEL_X(SQ_SEL_X),
1224                              DST_SEL_Y(SQ_SEL_MASK),
1225                              DST_SEL_Z(SQ_SEL_MASK),
1226                              DST_SEL_W(SQ_SEL_1),
1227                              LOD_BIAS(0),
1228                              COORD_TYPE_X(TEX_NORMALIZED),
1229                              COORD_TYPE_Y(TEX_NORMALIZED),
1230                              COORD_TYPE_Z(TEX_NORMALIZED),
1231                              COORD_TYPE_W(TEX_NORMALIZED));
1232     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1233                              OFFSET_Y(0),
1234                              OFFSET_Z(0),
1235                              SAMPLER_ID(0),
1236                              SRC_SEL_X(SQ_SEL_X),
1237                              SRC_SEL_Y(SQ_SEL_Y),
1238                              SRC_SEL_Z(SQ_SEL_0),
1239                              SRC_SEL_W(SQ_SEL_1));
1240     shader[i++] = TEX_DWORD_PAD;
1241     /* 26/27 */
1242     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1243                              INST_MOD(0),
1244                              FETCH_WHOLE_QUAD(0),
1245                              RESOURCE_ID(1),
1246                              SRC_GPR(0),
1247                              SRC_REL(ABSOLUTE),
1248                              ALT_CONST(0),
1249 			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1250 			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1251     shader[i++] = TEX_DWORD1(DST_GPR(1),
1252                              DST_REL(ABSOLUTE),
1253                              DST_SEL_X(SQ_SEL_MASK),
1254                              DST_SEL_Y(SQ_SEL_MASK),
1255                              DST_SEL_Z(SQ_SEL_X),
1256                              DST_SEL_W(SQ_SEL_MASK),
1257                              LOD_BIAS(0),
1258                              COORD_TYPE_X(TEX_NORMALIZED),
1259                              COORD_TYPE_Y(TEX_NORMALIZED),
1260                              COORD_TYPE_Z(TEX_NORMALIZED),
1261                              COORD_TYPE_W(TEX_NORMALIZED));
1262     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1263                              OFFSET_Y(0),
1264                              OFFSET_Z(0),
1265                              SAMPLER_ID(1),
1266                              SRC_SEL_X(SQ_SEL_X),
1267                              SRC_SEL_Y(SQ_SEL_Y),
1268                              SRC_SEL_Z(SQ_SEL_0),
1269                              SRC_SEL_W(SQ_SEL_1));
1270     shader[i++] = TEX_DWORD_PAD;
1271     /* 28/29 */
1272     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1273                              INST_MOD(0),
1274                              FETCH_WHOLE_QUAD(0),
1275                              RESOURCE_ID(2),
1276                              SRC_GPR(0),
1277                              SRC_REL(ABSOLUTE),
1278                              ALT_CONST(0),
1279 			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1280 			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1281     shader[i++] = TEX_DWORD1(DST_GPR(1),
1282                              DST_REL(ABSOLUTE),
1283                              DST_SEL_X(SQ_SEL_MASK),
1284                              DST_SEL_Y(SQ_SEL_X),
1285                              DST_SEL_Z(SQ_SEL_MASK),
1286                              DST_SEL_W(SQ_SEL_MASK),
1287                              LOD_BIAS(0),
1288                              COORD_TYPE_X(TEX_NORMALIZED),
1289                              COORD_TYPE_Y(TEX_NORMALIZED),
1290                              COORD_TYPE_Z(TEX_NORMALIZED),
1291                              COORD_TYPE_W(TEX_NORMALIZED));
1292     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1293                              OFFSET_Y(0),
1294                              OFFSET_Z(0),
1295                              SAMPLER_ID(2),
1296                              SRC_SEL_X(SQ_SEL_X),
1297                              SRC_SEL_Y(SQ_SEL_Y),
1298                              SRC_SEL_Z(SQ_SEL_0),
1299                              SRC_SEL_W(SQ_SEL_1));
1300     shader[i++] = TEX_DWORD_PAD;
1301     /* 30 */
1302     shader[i++] = CF_DWORD0(ADDR(32),
1303 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1304     shader[i++] = CF_DWORD1(POP_COUNT(0),
1305                             CF_CONST(0),
1306                             COND(SQ_CF_COND_ACTIVE),
1307                             I_COUNT(1),
1308                             VALID_PIXEL_MODE(0),
1309                             END_OF_PROGRAM(0),
1310                             CF_INST(SQ_CF_INST_TC),
1311                             WHOLE_QUAD_MODE(0),
1312                             BARRIER(1));
1313     /* 31 */
1314     shader[i++] = CF_DWORD0(ADDR(0),
1315 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1316     shader[i++] = CF_DWORD1(POP_COUNT(0),
1317 			    CF_CONST(0),
1318 			    COND(SQ_CF_COND_ACTIVE),
1319 			    I_COUNT(0),
1320 			    VALID_PIXEL_MODE(0),
1321 			    END_OF_PROGRAM(0),
1322 			    CF_INST(SQ_CF_INST_RETURN),
1323 			    WHOLE_QUAD_MODE(0),
1324 			    BARRIER(1));
1325     /* 32/33 */
1326     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1327                              INST_MOD(0),
1328                              FETCH_WHOLE_QUAD(0),
1329                              RESOURCE_ID(0),
1330                              SRC_GPR(0),
1331                              SRC_REL(ABSOLUTE),
1332                              ALT_CONST(0),
1333                              RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1334                              SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1335     shader[i++] = TEX_DWORD1(DST_GPR(1),
1336                              DST_REL(ABSOLUTE),
1337                              DST_SEL_X(SQ_SEL_X),
1338                              DST_SEL_Y(SQ_SEL_Y),
1339                              DST_SEL_Z(SQ_SEL_Z),
1340                              DST_SEL_W(SQ_SEL_1),
1341                              LOD_BIAS(0),
1342                              COORD_TYPE_X(TEX_NORMALIZED),
1343                              COORD_TYPE_Y(TEX_NORMALIZED),
1344                              COORD_TYPE_Z(TEX_NORMALIZED),
1345                              COORD_TYPE_W(TEX_NORMALIZED));
1346     shader[i++] = TEX_DWORD2(OFFSET_X(0),
1347                              OFFSET_Y(0),
1348                              OFFSET_Z(0),
1349                              SAMPLER_ID(0),
1350                              SRC_SEL_X(SQ_SEL_X),
1351                              SRC_SEL_Y(SQ_SEL_Y),
1352                              SRC_SEL_Z(SQ_SEL_0),
1353                              SRC_SEL_W(SQ_SEL_1));
1354     shader[i++] = TEX_DWORD_PAD;
1355 
1356     return i;
1357 }
1358 
1359 /* comp vs --------------------------------------- */
evergreen_comp_vs(RADEONChipFamily ChipSet,uint32_t * shader)1360 int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1361 {
1362     int i = 0;
1363 
1364     /* 0 */
1365     shader[i++] = CF_DWORD0(ADDR(3),
1366 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1367     shader[i++] = CF_DWORD1(POP_COUNT(0),
1368                             CF_CONST(0),
1369                             COND(SQ_CF_COND_BOOL),
1370                             I_COUNT(0),
1371                             VALID_PIXEL_MODE(0),
1372                             END_OF_PROGRAM(0),
1373                             CF_INST(SQ_CF_INST_CALL),
1374                             WHOLE_QUAD_MODE(0),
1375                             BARRIER(0));
1376     /* 1 */
1377     shader[i++] = CF_DWORD0(ADDR(9),
1378 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1379     shader[i++] = CF_DWORD1(POP_COUNT(0),
1380                             CF_CONST(0),
1381                             COND(SQ_CF_COND_NOT_BOOL),
1382                             I_COUNT(0),
1383                             VALID_PIXEL_MODE(0),
1384                             END_OF_PROGRAM(0),
1385                             CF_INST(SQ_CF_INST_CALL),
1386                             WHOLE_QUAD_MODE(0),
1387                             BARRIER(0));
1388     /* 2 */
1389     shader[i++] = CF_DWORD0(ADDR(0),
1390                             JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1391     shader[i++] = CF_DWORD1(POP_COUNT(0),
1392                             CF_CONST(0),
1393                             COND(SQ_CF_COND_ACTIVE),
1394                             I_COUNT(0),
1395                             VALID_PIXEL_MODE(0),
1396                             END_OF_PROGRAM(1),
1397                             CF_INST(SQ_CF_INST_NOP),
1398                             WHOLE_QUAD_MODE(0),
1399                             BARRIER(1));
1400     /* 3 - mask sub */
1401     shader[i++] = CF_DWORD0(ADDR(44),
1402 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1403     shader[i++] = CF_DWORD1(POP_COUNT(0),
1404 			    CF_CONST(0),
1405 			    COND(SQ_CF_COND_ACTIVE),
1406 			    I_COUNT(3),
1407 			    VALID_PIXEL_MODE(0),
1408 			    END_OF_PROGRAM(0),
1409 			    CF_INST(SQ_CF_INST_VC),
1410 			    WHOLE_QUAD_MODE(0),
1411 			    BARRIER(1));
1412 
1413     /* 4 - ALU */
1414     shader[i++] = CF_ALU_DWORD0(ADDR(14),
1415 				KCACHE_BANK0(0),
1416 				KCACHE_BANK1(0),
1417 				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1418     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1419 				KCACHE_ADDR0(0),
1420 				KCACHE_ADDR1(0),
1421 				I_COUNT(20),
1422 				ALT_CONST(0),
1423 				CF_INST(SQ_CF_INST_ALU),
1424 				WHOLE_QUAD_MODE(0),
1425 				BARRIER(1));
1426 
1427     /* 5 - dst */
1428     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1429 					  TYPE(SQ_EXPORT_POS),
1430 					  RW_GPR(2),
1431 					  RW_REL(ABSOLUTE),
1432 					  INDEX_GPR(0),
1433 					  ELEM_SIZE(0));
1434     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1435 					       SRC_SEL_Y(SQ_SEL_Y),
1436 					       SRC_SEL_Z(SQ_SEL_0),
1437 					       SRC_SEL_W(SQ_SEL_1),
1438 					       BURST_COUNT(1),
1439 					       VALID_PIXEL_MODE(0),
1440 					       END_OF_PROGRAM(0),
1441 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1442 					       MARK(0),
1443 					       BARRIER(1));
1444     /* 6 - src */
1445     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1446 					  TYPE(SQ_EXPORT_PARAM),
1447 					  RW_GPR(1),
1448 					  RW_REL(ABSOLUTE),
1449 					  INDEX_GPR(0),
1450 					  ELEM_SIZE(0));
1451     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1452 					       SRC_SEL_Y(SQ_SEL_Y),
1453 					       SRC_SEL_Z(SQ_SEL_0),
1454 					       SRC_SEL_W(SQ_SEL_1),
1455 					       BURST_COUNT(1),
1456 					       VALID_PIXEL_MODE(0),
1457 					       END_OF_PROGRAM(0),
1458 					       CF_INST(SQ_CF_INST_EXPORT),
1459 					       MARK(0),
1460 					       BARRIER(0));
1461     /* 7 - mask */
1462     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1463 					  TYPE(SQ_EXPORT_PARAM),
1464 					  RW_GPR(0),
1465 					  RW_REL(ABSOLUTE),
1466 					  INDEX_GPR(0),
1467 					  ELEM_SIZE(0));
1468     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1469 					       SRC_SEL_Y(SQ_SEL_Y),
1470 					       SRC_SEL_Z(SQ_SEL_0),
1471 					       SRC_SEL_W(SQ_SEL_1),
1472 					       BURST_COUNT(1),
1473 					       VALID_PIXEL_MODE(0),
1474 					       END_OF_PROGRAM(0),
1475 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1476 					       WHOLE_QUAD_MODE(0),
1477 					       BARRIER(0));
1478     /* 8 */
1479     shader[i++] = CF_DWORD0(ADDR(0),
1480 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1481     shader[i++] = CF_DWORD1(POP_COUNT(0),
1482 			    CF_CONST(0),
1483 			    COND(SQ_CF_COND_ACTIVE),
1484 			    I_COUNT(0),
1485 			    VALID_PIXEL_MODE(0),
1486 			    END_OF_PROGRAM(0),
1487 			    CF_INST(SQ_CF_INST_RETURN),
1488 			    WHOLE_QUAD_MODE(0),
1489 			    BARRIER(1));
1490     /* 9 - non-mask sub */
1491     shader[i++] = CF_DWORD0(ADDR(50),
1492 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1493     shader[i++] = CF_DWORD1(POP_COUNT(0),
1494 			    CF_CONST(0),
1495 			    COND(SQ_CF_COND_ACTIVE),
1496 			    I_COUNT(2),
1497 			    VALID_PIXEL_MODE(0),
1498 			    END_OF_PROGRAM(0),
1499 			    CF_INST(SQ_CF_INST_VC),
1500 			    WHOLE_QUAD_MODE(0),
1501 			    BARRIER(1));
1502 
1503     /* 10 - ALU */
1504     shader[i++] = CF_ALU_DWORD0(ADDR(34),
1505 				KCACHE_BANK0(0),
1506 				KCACHE_BANK1(0),
1507 				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1508     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1509 				KCACHE_ADDR0(0),
1510 				KCACHE_ADDR1(0),
1511 				I_COUNT(10),
1512 				ALT_CONST(0),
1513 				CF_INST(SQ_CF_INST_ALU),
1514 				WHOLE_QUAD_MODE(0),
1515 				BARRIER(1));
1516 
1517     /* 11 - dst */
1518     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1519 					  TYPE(SQ_EXPORT_POS),
1520 					  RW_GPR(1),
1521 					  RW_REL(ABSOLUTE),
1522 					  INDEX_GPR(0),
1523 					  ELEM_SIZE(0));
1524     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1525 					       SRC_SEL_Y(SQ_SEL_Y),
1526 					       SRC_SEL_Z(SQ_SEL_0),
1527 					       SRC_SEL_W(SQ_SEL_1),
1528 					       BURST_COUNT(0),
1529 					       VALID_PIXEL_MODE(0),
1530 					       END_OF_PROGRAM(0),
1531 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1532 					       MARK(0),
1533 					       BARRIER(1));
1534     /* 12 - src */
1535     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1536 					  TYPE(SQ_EXPORT_PARAM),
1537 					  RW_GPR(0),
1538 					  RW_REL(ABSOLUTE),
1539 					  INDEX_GPR(0),
1540 					  ELEM_SIZE(0));
1541     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1542 					       SRC_SEL_Y(SQ_SEL_Y),
1543 					       SRC_SEL_Z(SQ_SEL_0),
1544 					       SRC_SEL_W(SQ_SEL_1),
1545 					       BURST_COUNT(0),
1546 					       VALID_PIXEL_MODE(0),
1547 					       END_OF_PROGRAM(0),
1548 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1549 					       MARK(0),
1550 					       BARRIER(0));
1551     /* 13 */
1552     shader[i++] = CF_DWORD0(ADDR(0),
1553 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1554     shader[i++] = CF_DWORD1(POP_COUNT(0),
1555 			    CF_CONST(0),
1556 			    COND(SQ_CF_COND_ACTIVE),
1557 			    I_COUNT(0),
1558 			    VALID_PIXEL_MODE(0),
1559 			    END_OF_PROGRAM(0),
1560 			    CF_INST(SQ_CF_INST_RETURN),
1561 			    WHOLE_QUAD_MODE(0),
1562 			    BARRIER(1));
1563 
1564     /* 14 srcX.x DOT4 - mask */
1565     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1566                              SRC0_REL(ABSOLUTE),
1567                              SRC0_ELEM(ELEM_X),
1568                              SRC0_NEG(0),
1569                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1570                              SRC1_REL(ABSOLUTE),
1571                              SRC1_ELEM(ELEM_X),
1572                              SRC1_NEG(0),
1573                              INDEX_MODE(SQ_INDEX_LOOP),
1574                              PRED_SEL(SQ_PRED_SEL_OFF),
1575                              LAST(0));
1576     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1577                                  SRC1_ABS(0),
1578                                  UPDATE_EXECUTE_MASK(0),
1579                                  UPDATE_PRED(0),
1580                                  WRITE_MASK(1),
1581                                  OMOD(SQ_ALU_OMOD_OFF),
1582                                  ALU_INST(SQ_OP2_INST_DOT4),
1583                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1584                                  DST_GPR(3),
1585                                  DST_REL(ABSOLUTE),
1586                                  DST_ELEM(ELEM_X),
1587                                  CLAMP(0));
1588 
1589     /* 15 srcX.y DOT4 - mask */
1590     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1591                              SRC0_REL(ABSOLUTE),
1592                              SRC0_ELEM(ELEM_Y),
1593                              SRC0_NEG(0),
1594                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1595                              SRC1_REL(ABSOLUTE),
1596                              SRC1_ELEM(ELEM_Y),
1597                              SRC1_NEG(0),
1598                              INDEX_MODE(SQ_INDEX_LOOP),
1599                              PRED_SEL(SQ_PRED_SEL_OFF),
1600                              LAST(0));
1601     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1602                                  SRC1_ABS(0),
1603                                  UPDATE_EXECUTE_MASK(0),
1604                                  UPDATE_PRED(0),
1605                                  WRITE_MASK(0),
1606                                  OMOD(SQ_ALU_OMOD_OFF),
1607                                  ALU_INST(SQ_OP2_INST_DOT4),
1608                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1609                                  DST_GPR(3),
1610                                  DST_REL(ABSOLUTE),
1611                                  DST_ELEM(ELEM_Y),
1612                                  CLAMP(0));
1613 
1614     /* 16 srcX.z DOT4 - mask */
1615     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1616                              SRC0_REL(ABSOLUTE),
1617                              SRC0_ELEM(ELEM_Z),
1618                              SRC0_NEG(0),
1619                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1620                              SRC1_REL(ABSOLUTE),
1621                              SRC1_ELEM(ELEM_Z),
1622                              SRC1_NEG(0),
1623                              INDEX_MODE(SQ_INDEX_LOOP),
1624                              PRED_SEL(SQ_PRED_SEL_OFF),
1625                              LAST(0));
1626     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1627                                  SRC1_ABS(0),
1628                                  UPDATE_EXECUTE_MASK(0),
1629                                  UPDATE_PRED(0),
1630                                  WRITE_MASK(0),
1631                                  OMOD(SQ_ALU_OMOD_OFF),
1632                                  ALU_INST(SQ_OP2_INST_DOT4),
1633                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1634                                  DST_GPR(3),
1635                                  DST_REL(ABSOLUTE),
1636                                  DST_ELEM(ELEM_Z),
1637                                  CLAMP(0));
1638 
1639     /* 17 srcX.w DOT4 - mask */
1640     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1641                              SRC0_REL(ABSOLUTE),
1642                              SRC0_ELEM(ELEM_W),
1643                              SRC0_NEG(0),
1644                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1645                              SRC1_REL(ABSOLUTE),
1646                              SRC1_ELEM(ELEM_W),
1647                              SRC1_NEG(0),
1648                              INDEX_MODE(SQ_INDEX_LOOP),
1649                              PRED_SEL(SQ_PRED_SEL_OFF),
1650                              LAST(1));
1651     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1652                                  SRC1_ABS(0),
1653                                  UPDATE_EXECUTE_MASK(0),
1654                                  UPDATE_PRED(0),
1655                                  WRITE_MASK(0),
1656                                  OMOD(SQ_ALU_OMOD_OFF),
1657                                  ALU_INST(SQ_OP2_INST_DOT4),
1658                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1659                                  DST_GPR(3),
1660                                  DST_REL(ABSOLUTE),
1661                                  DST_ELEM(ELEM_W),
1662                                  CLAMP(0));
1663 
1664     /* 18 srcY.x DOT4 - mask */
1665     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1666                              SRC0_REL(ABSOLUTE),
1667                              SRC0_ELEM(ELEM_X),
1668                              SRC0_NEG(0),
1669                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1670                              SRC1_REL(ABSOLUTE),
1671                              SRC1_ELEM(ELEM_X),
1672                              SRC1_NEG(0),
1673                              INDEX_MODE(SQ_INDEX_LOOP),
1674                              PRED_SEL(SQ_PRED_SEL_OFF),
1675                              LAST(0));
1676     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1677                                  SRC1_ABS(0),
1678                                  UPDATE_EXECUTE_MASK(0),
1679                                  UPDATE_PRED(0),
1680                                  WRITE_MASK(0),
1681                                  OMOD(SQ_ALU_OMOD_OFF),
1682                                  ALU_INST(SQ_OP2_INST_DOT4),
1683                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1684                                  DST_GPR(3),
1685                                  DST_REL(ABSOLUTE),
1686                                  DST_ELEM(ELEM_X),
1687                                  CLAMP(0));
1688 
1689     /* 19 srcY.y DOT4 - mask */
1690     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1691                              SRC0_REL(ABSOLUTE),
1692                              SRC0_ELEM(ELEM_Y),
1693                              SRC0_NEG(0),
1694                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1695                              SRC1_REL(ABSOLUTE),
1696                              SRC1_ELEM(ELEM_Y),
1697                              SRC1_NEG(0),
1698                              INDEX_MODE(SQ_INDEX_LOOP),
1699                              PRED_SEL(SQ_PRED_SEL_OFF),
1700                              LAST(0));
1701     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1702                                  SRC1_ABS(0),
1703                                  UPDATE_EXECUTE_MASK(0),
1704                                  UPDATE_PRED(0),
1705                                  WRITE_MASK(1),
1706                                  OMOD(SQ_ALU_OMOD_OFF),
1707                                  ALU_INST(SQ_OP2_INST_DOT4),
1708                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1709                                  DST_GPR(3),
1710                                  DST_REL(ABSOLUTE),
1711                                  DST_ELEM(ELEM_Y),
1712                                  CLAMP(0));
1713 
1714     /* 20 srcY.z DOT4 - mask */
1715     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1716                              SRC0_REL(ABSOLUTE),
1717                              SRC0_ELEM(ELEM_Z),
1718                              SRC0_NEG(0),
1719                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1720                              SRC1_REL(ABSOLUTE),
1721                              SRC1_ELEM(ELEM_Z),
1722                              SRC1_NEG(0),
1723                              INDEX_MODE(SQ_INDEX_LOOP),
1724                              PRED_SEL(SQ_PRED_SEL_OFF),
1725                              LAST(0));
1726     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1727                                  SRC1_ABS(0),
1728                                  UPDATE_EXECUTE_MASK(0),
1729                                  UPDATE_PRED(0),
1730                                  WRITE_MASK(0),
1731                                  OMOD(SQ_ALU_OMOD_OFF),
1732                                  ALU_INST(SQ_OP2_INST_DOT4),
1733                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1734                                  DST_GPR(3),
1735                                  DST_REL(ABSOLUTE),
1736                                  DST_ELEM(ELEM_Z),
1737                                  CLAMP(0));
1738 
1739     /* 21 srcY.w DOT4 - mask */
1740     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1741                              SRC0_REL(ABSOLUTE),
1742                              SRC0_ELEM(ELEM_W),
1743                              SRC0_NEG(0),
1744                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1745                              SRC1_REL(ABSOLUTE),
1746                              SRC1_ELEM(ELEM_W),
1747                              SRC1_NEG(0),
1748                              INDEX_MODE(SQ_INDEX_LOOP),
1749                              PRED_SEL(SQ_PRED_SEL_OFF),
1750                              LAST(1));
1751     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1752                                  SRC1_ABS(0),
1753                                  UPDATE_EXECUTE_MASK(0),
1754                                  UPDATE_PRED(0),
1755                                  WRITE_MASK(0),
1756                                  OMOD(SQ_ALU_OMOD_OFF),
1757                                  ALU_INST(SQ_OP2_INST_DOT4),
1758                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1759                                  DST_GPR(3),
1760                                  DST_REL(ABSOLUTE),
1761                                  DST_ELEM(ELEM_W),
1762                                  CLAMP(0));
1763 
1764     /* 22 maskX.x DOT4 - mask */
1765     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1766                              SRC0_REL(ABSOLUTE),
1767                              SRC0_ELEM(ELEM_X),
1768                              SRC0_NEG(0),
1769                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1770                              SRC1_REL(ABSOLUTE),
1771                              SRC1_ELEM(ELEM_X),
1772                              SRC1_NEG(0),
1773                              INDEX_MODE(SQ_INDEX_LOOP),
1774                              PRED_SEL(SQ_PRED_SEL_OFF),
1775                              LAST(0));
1776     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1777                                  SRC1_ABS(0),
1778                                  UPDATE_EXECUTE_MASK(0),
1779                                  UPDATE_PRED(0),
1780                                  WRITE_MASK(1),
1781                                  OMOD(SQ_ALU_OMOD_OFF),
1782                                  ALU_INST(SQ_OP2_INST_DOT4),
1783                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1784                                  DST_GPR(4),
1785                                  DST_REL(ABSOLUTE),
1786                                  DST_ELEM(ELEM_X),
1787                                  CLAMP(0));
1788 
1789     /* 23 maskX.y DOT4 - mask */
1790     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1791                              SRC0_REL(ABSOLUTE),
1792                              SRC0_ELEM(ELEM_Y),
1793                              SRC0_NEG(0),
1794                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1795                              SRC1_REL(ABSOLUTE),
1796                              SRC1_ELEM(ELEM_Y),
1797                              SRC1_NEG(0),
1798                              INDEX_MODE(SQ_INDEX_LOOP),
1799                              PRED_SEL(SQ_PRED_SEL_OFF),
1800                              LAST(0));
1801     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1802                                  SRC1_ABS(0),
1803                                  UPDATE_EXECUTE_MASK(0),
1804                                  UPDATE_PRED(0),
1805                                  WRITE_MASK(0),
1806                                  OMOD(SQ_ALU_OMOD_OFF),
1807                                  ALU_INST(SQ_OP2_INST_DOT4),
1808                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1809                                  DST_GPR(4),
1810                                  DST_REL(ABSOLUTE),
1811                                  DST_ELEM(ELEM_Y),
1812                                  CLAMP(0));
1813 
1814     /* 24 maskX.z DOT4 - mask */
1815     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1816                              SRC0_REL(ABSOLUTE),
1817                              SRC0_ELEM(ELEM_Z),
1818                              SRC0_NEG(0),
1819                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1820                              SRC1_REL(ABSOLUTE),
1821                              SRC1_ELEM(ELEM_Z),
1822                              SRC1_NEG(0),
1823                              INDEX_MODE(SQ_INDEX_LOOP),
1824                              PRED_SEL(SQ_PRED_SEL_OFF),
1825                              LAST(0));
1826     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1827                                  SRC1_ABS(0),
1828                                  UPDATE_EXECUTE_MASK(0),
1829                                  UPDATE_PRED(0),
1830                                  WRITE_MASK(0),
1831                                  OMOD(SQ_ALU_OMOD_OFF),
1832                                  ALU_INST(SQ_OP2_INST_DOT4),
1833                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1834                                  DST_GPR(4),
1835                                  DST_REL(ABSOLUTE),
1836                                  DST_ELEM(ELEM_Z),
1837                                  CLAMP(0));
1838 
1839     /* 25 maskX.w DOT4 - mask */
1840     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1841                              SRC0_REL(ABSOLUTE),
1842                              SRC0_ELEM(ELEM_W),
1843                              SRC0_NEG(0),
1844                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1845                              SRC1_REL(ABSOLUTE),
1846                              SRC1_ELEM(ELEM_W),
1847                              SRC1_NEG(0),
1848                              INDEX_MODE(SQ_INDEX_LOOP),
1849                              PRED_SEL(SQ_PRED_SEL_OFF),
1850                              LAST(1));
1851     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1852                                  SRC1_ABS(0),
1853                                  UPDATE_EXECUTE_MASK(0),
1854                                  UPDATE_PRED(0),
1855                                  WRITE_MASK(0),
1856                                  OMOD(SQ_ALU_OMOD_OFF),
1857                                  ALU_INST(SQ_OP2_INST_DOT4),
1858                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1859                                  DST_GPR(4),
1860                                  DST_REL(ABSOLUTE),
1861                                  DST_ELEM(ELEM_W),
1862                                  CLAMP(0));
1863 
1864     /* 26 maskY.x DOT4 - mask */
1865     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1866                              SRC0_REL(ABSOLUTE),
1867                              SRC0_ELEM(ELEM_X),
1868                              SRC0_NEG(0),
1869                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1870                              SRC1_REL(ABSOLUTE),
1871                              SRC1_ELEM(ELEM_X),
1872                              SRC1_NEG(0),
1873                              INDEX_MODE(SQ_INDEX_LOOP),
1874                              PRED_SEL(SQ_PRED_SEL_OFF),
1875                              LAST(0));
1876     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1877                                  SRC1_ABS(0),
1878                                  UPDATE_EXECUTE_MASK(0),
1879                                  UPDATE_PRED(0),
1880                                  WRITE_MASK(0),
1881                                  OMOD(SQ_ALU_OMOD_OFF),
1882                                  ALU_INST(SQ_OP2_INST_DOT4),
1883                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1884                                  DST_GPR(4),
1885                                  DST_REL(ABSOLUTE),
1886                                  DST_ELEM(ELEM_X),
1887                                  CLAMP(0));
1888 
1889     /* 27 maskY.y DOT4 - mask */
1890     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1891                              SRC0_REL(ABSOLUTE),
1892                              SRC0_ELEM(ELEM_Y),
1893                              SRC0_NEG(0),
1894                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1895                              SRC1_REL(ABSOLUTE),
1896                              SRC1_ELEM(ELEM_Y),
1897                              SRC1_NEG(0),
1898                              INDEX_MODE(SQ_INDEX_LOOP),
1899                              PRED_SEL(SQ_PRED_SEL_OFF),
1900                              LAST(0));
1901     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1902                                  SRC1_ABS(0),
1903                                  UPDATE_EXECUTE_MASK(0),
1904                                  UPDATE_PRED(0),
1905                                  WRITE_MASK(1),
1906                                  OMOD(SQ_ALU_OMOD_OFF),
1907                                  ALU_INST(SQ_OP2_INST_DOT4),
1908                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1909                                  DST_GPR(4),
1910                                  DST_REL(ABSOLUTE),
1911                                  DST_ELEM(ELEM_Y),
1912                                  CLAMP(0));
1913 
1914     /* 28 maskY.z DOT4 - mask */
1915     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1916                              SRC0_REL(ABSOLUTE),
1917                              SRC0_ELEM(ELEM_Z),
1918                              SRC0_NEG(0),
1919                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1920                              SRC1_REL(ABSOLUTE),
1921                              SRC1_ELEM(ELEM_Z),
1922                              SRC1_NEG(0),
1923                              INDEX_MODE(SQ_INDEX_LOOP),
1924                              PRED_SEL(SQ_PRED_SEL_OFF),
1925                              LAST(0));
1926     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1927                                  SRC1_ABS(0),
1928                                  UPDATE_EXECUTE_MASK(0),
1929                                  UPDATE_PRED(0),
1930                                  WRITE_MASK(0),
1931                                  OMOD(SQ_ALU_OMOD_OFF),
1932                                  ALU_INST(SQ_OP2_INST_DOT4),
1933                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1934                                  DST_GPR(4),
1935                                  DST_REL(ABSOLUTE),
1936                                  DST_ELEM(ELEM_Z),
1937                                  CLAMP(0));
1938 
1939     /* 29 maskY.w DOT4 - mask */
1940     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1941                              SRC0_REL(ABSOLUTE),
1942                              SRC0_ELEM(ELEM_W),
1943                              SRC0_NEG(0),
1944                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1945                              SRC1_REL(ABSOLUTE),
1946                              SRC1_ELEM(ELEM_W),
1947                              SRC1_NEG(0),
1948                              INDEX_MODE(SQ_INDEX_LOOP),
1949                              PRED_SEL(SQ_PRED_SEL_OFF),
1950                              LAST(1));
1951     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1952                                  SRC1_ABS(0),
1953                                  UPDATE_EXECUTE_MASK(0),
1954                                  UPDATE_PRED(0),
1955                                  WRITE_MASK(0),
1956                                  OMOD(SQ_ALU_OMOD_OFF),
1957                                  ALU_INST(SQ_OP2_INST_DOT4),
1958                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1959                                  DST_GPR(4),
1960                                  DST_REL(ABSOLUTE),
1961                                  DST_ELEM(ELEM_W),
1962                                  CLAMP(0));
1963 
1964     /* 30 srcX / w */
1965     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1966                              SRC0_REL(ABSOLUTE),
1967                              SRC0_ELEM(ELEM_X),
1968                              SRC0_NEG(0),
1969                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1970                              SRC1_REL(ABSOLUTE),
1971                              SRC1_ELEM(ELEM_W),
1972                              SRC1_NEG(0),
1973                              INDEX_MODE(SQ_INDEX_AR_X),
1974                              PRED_SEL(SQ_PRED_SEL_OFF),
1975                              LAST(1));
1976     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1977                                  SRC1_ABS(0),
1978                                  UPDATE_EXECUTE_MASK(0),
1979                                  UPDATE_PRED(0),
1980                                  WRITE_MASK(1),
1981                                  OMOD(SQ_ALU_OMOD_OFF),
1982                                  ALU_INST(SQ_OP2_INST_MUL),
1983                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
1984                                  DST_GPR(1),
1985                                  DST_REL(ABSOLUTE),
1986                                  DST_ELEM(ELEM_X),
1987                                  CLAMP(0));
1988 
1989     /* 31 srcY / h */
1990     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1991                              SRC0_REL(ABSOLUTE),
1992                              SRC0_ELEM(ELEM_Y),
1993                              SRC0_NEG(0),
1994                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1995                              SRC1_REL(ABSOLUTE),
1996                              SRC1_ELEM(ELEM_W),
1997                              SRC1_NEG(0),
1998                              INDEX_MODE(SQ_INDEX_AR_X),
1999                              PRED_SEL(SQ_PRED_SEL_OFF),
2000                              LAST(1));
2001     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2002                                  SRC1_ABS(0),
2003                                  UPDATE_EXECUTE_MASK(0),
2004                                  UPDATE_PRED(0),
2005                                  WRITE_MASK(1),
2006                                  OMOD(SQ_ALU_OMOD_OFF),
2007                                  ALU_INST(SQ_OP2_INST_MUL),
2008                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2009                                  DST_GPR(1),
2010                                  DST_REL(ABSOLUTE),
2011                                  DST_ELEM(ELEM_Y),
2012                                  CLAMP(0));
2013 
2014     /* 32 maskX / w */
2015     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2016                              SRC0_REL(ABSOLUTE),
2017                              SRC0_ELEM(ELEM_X),
2018                              SRC0_NEG(0),
2019                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2020                              SRC1_REL(ABSOLUTE),
2021                              SRC1_ELEM(ELEM_W),
2022                              SRC1_NEG(0),
2023                              INDEX_MODE(SQ_INDEX_AR_X),
2024                              PRED_SEL(SQ_PRED_SEL_OFF),
2025                              LAST(1));
2026     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2027                                  SRC1_ABS(0),
2028                                  UPDATE_EXECUTE_MASK(0),
2029                                  UPDATE_PRED(0),
2030                                  WRITE_MASK(1),
2031                                  OMOD(SQ_ALU_OMOD_OFF),
2032                                  ALU_INST(SQ_OP2_INST_MUL),
2033                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2034                                  DST_GPR(0),
2035                                  DST_REL(ABSOLUTE),
2036                                  DST_ELEM(ELEM_X),
2037                                  CLAMP(0));
2038 
2039     /* 33 maskY / h */
2040     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2041                              SRC0_REL(ABSOLUTE),
2042                              SRC0_ELEM(ELEM_Y),
2043                              SRC0_NEG(0),
2044                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2045                              SRC1_REL(ABSOLUTE),
2046                              SRC1_ELEM(ELEM_W),
2047                              SRC1_NEG(0),
2048                              INDEX_MODE(SQ_INDEX_AR_X),
2049                              PRED_SEL(SQ_PRED_SEL_OFF),
2050                              LAST(1));
2051     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2052                                  SRC1_ABS(0),
2053                                  UPDATE_EXECUTE_MASK(0),
2054                                  UPDATE_PRED(0),
2055                                  WRITE_MASK(1),
2056                                  OMOD(SQ_ALU_OMOD_OFF),
2057                                  ALU_INST(SQ_OP2_INST_MUL),
2058                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2059                                  DST_GPR(0),
2060                                  DST_REL(ABSOLUTE),
2061                                  DST_ELEM(ELEM_Y),
2062                                  CLAMP(0));
2063 
2064     /* 34 srcX.x DOT4 - non-mask */
2065     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2066                              SRC0_REL(ABSOLUTE),
2067                              SRC0_ELEM(ELEM_X),
2068                              SRC0_NEG(0),
2069                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2070                              SRC1_REL(ABSOLUTE),
2071                              SRC1_ELEM(ELEM_X),
2072                              SRC1_NEG(0),
2073                              INDEX_MODE(SQ_INDEX_LOOP),
2074                              PRED_SEL(SQ_PRED_SEL_OFF),
2075                              LAST(0));
2076     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2077                                  SRC1_ABS(0),
2078                                  UPDATE_EXECUTE_MASK(0),
2079                                  UPDATE_PRED(0),
2080                                  WRITE_MASK(1),
2081                                  OMOD(SQ_ALU_OMOD_OFF),
2082                                  ALU_INST(SQ_OP2_INST_DOT4),
2083                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2084                                  DST_GPR(2),
2085                                  DST_REL(ABSOLUTE),
2086                                  DST_ELEM(ELEM_X),
2087                                  CLAMP(0));
2088 
2089     /* 35 srcX.y DOT4 - non-mask */
2090     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2091                              SRC0_REL(ABSOLUTE),
2092                              SRC0_ELEM(ELEM_Y),
2093                              SRC0_NEG(0),
2094                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2095                              SRC1_REL(ABSOLUTE),
2096                              SRC1_ELEM(ELEM_Y),
2097                              SRC1_NEG(0),
2098                              INDEX_MODE(SQ_INDEX_LOOP),
2099                              PRED_SEL(SQ_PRED_SEL_OFF),
2100                              LAST(0));
2101     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2102                                  SRC1_ABS(0),
2103                                  UPDATE_EXECUTE_MASK(0),
2104                                  UPDATE_PRED(0),
2105                                  WRITE_MASK(0),
2106                                  OMOD(SQ_ALU_OMOD_OFF),
2107                                  ALU_INST(SQ_OP2_INST_DOT4),
2108                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2109                                  DST_GPR(2),
2110                                  DST_REL(ABSOLUTE),
2111                                  DST_ELEM(ELEM_Y),
2112                                  CLAMP(0));
2113 
2114     /* 36 srcX.z DOT4 - non-mask */
2115     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2116                              SRC0_REL(ABSOLUTE),
2117                              SRC0_ELEM(ELEM_Z),
2118                              SRC0_NEG(0),
2119                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2120                              SRC1_REL(ABSOLUTE),
2121                              SRC1_ELEM(ELEM_Z),
2122                              SRC1_NEG(0),
2123                              INDEX_MODE(SQ_INDEX_LOOP),
2124                              PRED_SEL(SQ_PRED_SEL_OFF),
2125                              LAST(0));
2126     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2127                                  SRC1_ABS(0),
2128                                  UPDATE_EXECUTE_MASK(0),
2129                                  UPDATE_PRED(0),
2130                                  WRITE_MASK(0),
2131                                  OMOD(SQ_ALU_OMOD_OFF),
2132                                  ALU_INST(SQ_OP2_INST_DOT4),
2133                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2134                                  DST_GPR(2),
2135                                  DST_REL(ABSOLUTE),
2136                                  DST_ELEM(ELEM_Z),
2137                                  CLAMP(0));
2138 
2139     /* 37 srcX.w DOT4 - non-mask */
2140     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2141                              SRC0_REL(ABSOLUTE),
2142                              SRC0_ELEM(ELEM_W),
2143                              SRC0_NEG(0),
2144                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2145                              SRC1_REL(ABSOLUTE),
2146                              SRC1_ELEM(ELEM_W),
2147                              SRC1_NEG(0),
2148                              INDEX_MODE(SQ_INDEX_LOOP),
2149                              PRED_SEL(SQ_PRED_SEL_OFF),
2150                              LAST(1));
2151     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2152                                  SRC1_ABS(0),
2153                                  UPDATE_EXECUTE_MASK(0),
2154                                  UPDATE_PRED(0),
2155                                  WRITE_MASK(0),
2156                                  OMOD(SQ_ALU_OMOD_OFF),
2157                                  ALU_INST(SQ_OP2_INST_DOT4),
2158                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2159                                  DST_GPR(2),
2160                                  DST_REL(ABSOLUTE),
2161                                  DST_ELEM(ELEM_W),
2162                                  CLAMP(0));
2163 
2164     /* 38 srcY.x DOT4 - non-mask */
2165     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2166                              SRC0_REL(ABSOLUTE),
2167                              SRC0_ELEM(ELEM_X),
2168                              SRC0_NEG(0),
2169                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2170                              SRC1_REL(ABSOLUTE),
2171                              SRC1_ELEM(ELEM_X),
2172                              SRC1_NEG(0),
2173                              INDEX_MODE(SQ_INDEX_LOOP),
2174                              PRED_SEL(SQ_PRED_SEL_OFF),
2175                              LAST(0));
2176     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2177                                  SRC1_ABS(0),
2178                                  UPDATE_EXECUTE_MASK(0),
2179                                  UPDATE_PRED(0),
2180                                  WRITE_MASK(0),
2181                                  OMOD(SQ_ALU_OMOD_OFF),
2182                                  ALU_INST(SQ_OP2_INST_DOT4),
2183                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2184                                  DST_GPR(2),
2185                                  DST_REL(ABSOLUTE),
2186                                  DST_ELEM(ELEM_X),
2187                                  CLAMP(0));
2188 
2189     /* 39 srcY.y DOT4 - non-mask */
2190     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2191                              SRC0_REL(ABSOLUTE),
2192                              SRC0_ELEM(ELEM_Y),
2193                              SRC0_NEG(0),
2194                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2195                              SRC1_REL(ABSOLUTE),
2196                              SRC1_ELEM(ELEM_Y),
2197                              SRC1_NEG(0),
2198                              INDEX_MODE(SQ_INDEX_LOOP),
2199                              PRED_SEL(SQ_PRED_SEL_OFF),
2200                              LAST(0));
2201     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2202                                  SRC1_ABS(0),
2203                                  UPDATE_EXECUTE_MASK(0),
2204                                  UPDATE_PRED(0),
2205                                  WRITE_MASK(1),
2206                                  OMOD(SQ_ALU_OMOD_OFF),
2207                                  ALU_INST(SQ_OP2_INST_DOT4),
2208                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2209                                  DST_GPR(2),
2210                                  DST_REL(ABSOLUTE),
2211                                  DST_ELEM(ELEM_Y),
2212                                  CLAMP(0));
2213 
2214     /* 40 srcY.z DOT4 - non-mask */
2215     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2216                              SRC0_REL(ABSOLUTE),
2217                              SRC0_ELEM(ELEM_Z),
2218                              SRC0_NEG(0),
2219                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2220                              SRC1_REL(ABSOLUTE),
2221                              SRC1_ELEM(ELEM_Z),
2222                              SRC1_NEG(0),
2223                              INDEX_MODE(SQ_INDEX_LOOP),
2224                              PRED_SEL(SQ_PRED_SEL_OFF),
2225                              LAST(0));
2226     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2227                                  SRC1_ABS(0),
2228                                  UPDATE_EXECUTE_MASK(0),
2229                                  UPDATE_PRED(0),
2230                                  WRITE_MASK(0),
2231                                  OMOD(SQ_ALU_OMOD_OFF),
2232                                  ALU_INST(SQ_OP2_INST_DOT4),
2233                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2234                                  DST_GPR(2),
2235                                  DST_REL(ABSOLUTE),
2236                                  DST_ELEM(ELEM_Z),
2237                                  CLAMP(0));
2238 
2239     /* 41 srcY.w DOT4 - non-mask */
2240     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2241                              SRC0_REL(ABSOLUTE),
2242                              SRC0_ELEM(ELEM_W),
2243                              SRC0_NEG(0),
2244                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2245                              SRC1_REL(ABSOLUTE),
2246                              SRC1_ELEM(ELEM_W),
2247                              SRC1_NEG(0),
2248                              INDEX_MODE(SQ_INDEX_LOOP),
2249                              PRED_SEL(SQ_PRED_SEL_OFF),
2250                              LAST(1));
2251     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2252                                  SRC1_ABS(0),
2253                                  UPDATE_EXECUTE_MASK(0),
2254                                  UPDATE_PRED(0),
2255                                  WRITE_MASK(0),
2256                                  OMOD(SQ_ALU_OMOD_OFF),
2257                                  ALU_INST(SQ_OP2_INST_DOT4),
2258                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2259                                  DST_GPR(2),
2260                                  DST_REL(ABSOLUTE),
2261                                  DST_ELEM(ELEM_W),
2262                                  CLAMP(0));
2263 
2264     /* 42 srcX / w */
2265     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2266                              SRC0_REL(ABSOLUTE),
2267                              SRC0_ELEM(ELEM_X),
2268                              SRC0_NEG(0),
2269                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2270                              SRC1_REL(ABSOLUTE),
2271                              SRC1_ELEM(ELEM_W),
2272                              SRC1_NEG(0),
2273                              INDEX_MODE(SQ_INDEX_AR_X),
2274                              PRED_SEL(SQ_PRED_SEL_OFF),
2275                              LAST(1));
2276     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2277                                  SRC1_ABS(0),
2278                                  UPDATE_EXECUTE_MASK(0),
2279                                  UPDATE_PRED(0),
2280                                  WRITE_MASK(1),
2281                                  OMOD(SQ_ALU_OMOD_OFF),
2282                                  ALU_INST(SQ_OP2_INST_MUL),
2283                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2284                                  DST_GPR(0),
2285                                  DST_REL(ABSOLUTE),
2286                                  DST_ELEM(ELEM_X),
2287                                  CLAMP(0));
2288 
2289     /* 43 srcY / h */
2290     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2291                              SRC0_REL(ABSOLUTE),
2292                              SRC0_ELEM(ELEM_Y),
2293                              SRC0_NEG(0),
2294                              SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2295                              SRC1_REL(ABSOLUTE),
2296                              SRC1_ELEM(ELEM_W),
2297                              SRC1_NEG(0),
2298                              INDEX_MODE(SQ_INDEX_AR_X),
2299                              PRED_SEL(SQ_PRED_SEL_OFF),
2300                              LAST(1));
2301     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2302                                  SRC1_ABS(0),
2303                                  UPDATE_EXECUTE_MASK(0),
2304                                  UPDATE_PRED(0),
2305                                  WRITE_MASK(1),
2306                                  OMOD(SQ_ALU_OMOD_OFF),
2307                                  ALU_INST(SQ_OP2_INST_MUL),
2308                                  BANK_SWIZZLE(SQ_ALU_VEC_012),
2309                                  DST_GPR(0),
2310                                  DST_REL(ABSOLUTE),
2311                                  DST_ELEM(ELEM_Y),
2312                                  CLAMP(0));
2313 
2314     /* mask vfetch - 44/45 - dst */
2315     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2316 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2317 			     FETCH_WHOLE_QUAD(0),
2318 			     BUFFER_ID(0),
2319 			     SRC_GPR(0),
2320 			     SRC_REL(ABSOLUTE),
2321 			     SRC_SEL_X(SQ_SEL_X),
2322 			     MEGA_FETCH_COUNT(24));
2323     shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2324 				 DST_REL(0),
2325 				 DST_SEL_X(SQ_SEL_X),
2326 				 DST_SEL_Y(SQ_SEL_Y),
2327 				 DST_SEL_Z(SQ_SEL_0),
2328 				 DST_SEL_W(SQ_SEL_1),
2329 				 USE_CONST_FIELDS(0),
2330 				 DATA_FORMAT(FMT_32_32_FLOAT),
2331 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2332 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2333 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2334     shader[i++] = VTX_DWORD2(OFFSET(0),
2335 #if X_BYTE_ORDER == X_BIG_ENDIAN
2336                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2337 #else
2338                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2339 #endif
2340 			     CONST_BUF_NO_STRIDE(0),
2341 			     MEGA_FETCH(1),
2342 			     ALT_CONST(0),
2343 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2344     shader[i++] = VTX_DWORD_PAD;
2345     /* 46/47 - src */
2346     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2347 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2348 			     FETCH_WHOLE_QUAD(0),
2349 			     BUFFER_ID(0),
2350 			     SRC_GPR(0),
2351 			     SRC_REL(ABSOLUTE),
2352 			     SRC_SEL_X(SQ_SEL_X),
2353 			     MEGA_FETCH_COUNT(8));
2354     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2355 				 DST_REL(0),
2356 				 DST_SEL_X(SQ_SEL_X),
2357 				 DST_SEL_Y(SQ_SEL_Y),
2358 				 DST_SEL_Z(SQ_SEL_1),
2359 				 DST_SEL_W(SQ_SEL_0),
2360 				 USE_CONST_FIELDS(0),
2361 				 DATA_FORMAT(FMT_32_32_FLOAT),
2362 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2363 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2364 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2365     shader[i++] = VTX_DWORD2(OFFSET(8),
2366 #if X_BYTE_ORDER == X_BIG_ENDIAN
2367                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2368 #else
2369                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2370 #endif
2371 			     CONST_BUF_NO_STRIDE(0),
2372 			     MEGA_FETCH(0),
2373 			     ALT_CONST(0),
2374 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2375     shader[i++] = VTX_DWORD_PAD;
2376     /* 48/49 - mask */
2377     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2378 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2379 			     FETCH_WHOLE_QUAD(0),
2380 			     BUFFER_ID(0),
2381 			     SRC_GPR(0),
2382 			     SRC_REL(ABSOLUTE),
2383 			     SRC_SEL_X(SQ_SEL_X),
2384 			     MEGA_FETCH_COUNT(8));
2385     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2386 				 DST_REL(0),
2387 				 DST_SEL_X(SQ_SEL_X),
2388 				 DST_SEL_Y(SQ_SEL_Y),
2389 				 DST_SEL_Z(SQ_SEL_1),
2390 				 DST_SEL_W(SQ_SEL_0),
2391 				 USE_CONST_FIELDS(0),
2392 				 DATA_FORMAT(FMT_32_32_FLOAT),
2393 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2394 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2395 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2396     shader[i++] = VTX_DWORD2(OFFSET(16),
2397 #if X_BYTE_ORDER == X_BIG_ENDIAN
2398                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2399 #else
2400                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2401 #endif
2402 			     CONST_BUF_NO_STRIDE(0),
2403 			     MEGA_FETCH(0),
2404 			     ALT_CONST(0),
2405 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2406     shader[i++] = VTX_DWORD_PAD;
2407 
2408     /* no mask vfetch - 50/51 - dst */
2409     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2410 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2411 			     FETCH_WHOLE_QUAD(0),
2412 			     BUFFER_ID(0),
2413 			     SRC_GPR(0),
2414 			     SRC_REL(ABSOLUTE),
2415 			     SRC_SEL_X(SQ_SEL_X),
2416 			     MEGA_FETCH_COUNT(16));
2417     shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2418 				 DST_REL(0),
2419 				 DST_SEL_X(SQ_SEL_X),
2420 				 DST_SEL_Y(SQ_SEL_Y),
2421 				 DST_SEL_Z(SQ_SEL_0),
2422 				 DST_SEL_W(SQ_SEL_1),
2423 				 USE_CONST_FIELDS(0),
2424 				 DATA_FORMAT(FMT_32_32_FLOAT),
2425 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2426 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2427 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2428     shader[i++] = VTX_DWORD2(OFFSET(0),
2429 #if X_BYTE_ORDER == X_BIG_ENDIAN
2430                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2431 #else
2432                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2433 #endif
2434 			     CONST_BUF_NO_STRIDE(0),
2435 			     MEGA_FETCH(1),
2436 			     ALT_CONST(0),
2437 			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2438     shader[i++] = VTX_DWORD_PAD;
2439     /* 52/53 - src */
2440     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2441 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2442 			     FETCH_WHOLE_QUAD(0),
2443 			     BUFFER_ID(0),
2444 			     SRC_GPR(0),
2445 			     SRC_REL(ABSOLUTE),
2446 			     SRC_SEL_X(SQ_SEL_X),
2447 			     MEGA_FETCH_COUNT(8));
2448     shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2449 				 DST_REL(0),
2450 				 DST_SEL_X(SQ_SEL_X),
2451 				 DST_SEL_Y(SQ_SEL_Y),
2452 				 DST_SEL_Z(SQ_SEL_1),
2453 				 DST_SEL_W(SQ_SEL_0),
2454 				 USE_CONST_FIELDS(0),
2455 				 DATA_FORMAT(FMT_32_32_FLOAT),
2456 				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2457 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2458 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2459     shader[i++] = VTX_DWORD2(OFFSET(8),
2460 #if X_BYTE_ORDER == X_BIG_ENDIAN
2461                              ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2462 #else
2463                              ENDIAN_SWAP(SQ_ENDIAN_NONE),
2464 #endif
2465 			     CONST_BUF_NO_STRIDE(0),
2466 			     MEGA_FETCH(0),
2467                              ALT_CONST(0),
2468                              BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2469     shader[i++] = VTX_DWORD_PAD;
2470 
2471     return i;
2472 }
2473 
2474 /* comp ps --------------------------------------- */
evergreen_comp_ps(RADEONChipFamily ChipSet,uint32_t * shader)2475 int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader)
2476 {
2477     int i = 0;
2478 
2479     /* 0 */
2480     /* call interp-fetch-mask if boolean1 == true */
2481     shader[i++] = CF_DWORD0(ADDR(11),
2482 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2483     shader[i++] = CF_DWORD1(POP_COUNT(0),
2484                             CF_CONST(1),
2485                             COND(SQ_CF_COND_BOOL),
2486                             I_COUNT(0),
2487                             VALID_PIXEL_MODE(0),
2488                             END_OF_PROGRAM(0),
2489                             CF_INST(SQ_CF_INST_CALL),
2490                             WHOLE_QUAD_MODE(0),
2491                             BARRIER(0));
2492 
2493     /* 1 */
2494     /* call read-constant-mask if boolean1 == false */
2495     shader[i++] = CF_DWORD0(ADDR(14),
2496 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2497     shader[i++] = CF_DWORD1(POP_COUNT(0),
2498                             CF_CONST(1),
2499                             COND(SQ_CF_COND_NOT_BOOL),
2500                             I_COUNT(0),
2501                             VALID_PIXEL_MODE(0),
2502                             END_OF_PROGRAM(0),
2503                             CF_INST(SQ_CF_INST_CALL),
2504                             WHOLE_QUAD_MODE(0),
2505                             BARRIER(0));
2506 
2507     /* 2 */
2508     /* call interp-fetch-src if boolean0 == true */
2509     shader[i++] = CF_DWORD0(ADDR(6),
2510 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2511     shader[i++] = CF_DWORD1(POP_COUNT(0),
2512                             CF_CONST(0),
2513                             COND(SQ_CF_COND_BOOL),
2514                             I_COUNT(0),
2515                             VALID_PIXEL_MODE(0),
2516                             END_OF_PROGRAM(0),
2517                             CF_INST(SQ_CF_INST_CALL),
2518                             WHOLE_QUAD_MODE(0),
2519                             BARRIER(0));
2520 
2521     /* 3 */
2522     /* call read-constant-src if boolean0 == false */
2523     shader[i++] = CF_DWORD0(ADDR(9),
2524 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2525     shader[i++] = CF_DWORD1(POP_COUNT(0),
2526                             CF_CONST(0),
2527                             COND(SQ_CF_COND_NOT_BOOL),
2528                             I_COUNT(0),
2529                             VALID_PIXEL_MODE(0),
2530                             END_OF_PROGRAM(0),
2531                             CF_INST(SQ_CF_INST_CALL),
2532                             WHOLE_QUAD_MODE(0),
2533                             BARRIER(0));
2534     /* 4 */
2535     /* src IN mask (GPR2 := GPR1 .* GPR0) */
2536     shader[i++] = CF_ALU_DWORD0(ADDR(16),
2537 				KCACHE_BANK0(0),
2538 				KCACHE_BANK1(0),
2539 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2540     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2541 				KCACHE_ADDR0(0),
2542 				KCACHE_ADDR1(0),
2543 				I_COUNT(4),
2544 				ALT_CONST(0),
2545 				CF_INST(SQ_CF_INST_ALU),
2546 				WHOLE_QUAD_MODE(0),
2547 				BARRIER(1));
2548 
2549     /* 5 */
2550     /* export pixel data */
2551     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2552 					  TYPE(SQ_EXPORT_PIXEL),
2553 					  RW_GPR(0),
2554 					  RW_REL(ABSOLUTE),
2555 					  INDEX_GPR(0),
2556 					  ELEM_SIZE(1));
2557     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2558 					       SRC_SEL_Y(SQ_SEL_Y),
2559 					       SRC_SEL_Z(SQ_SEL_Z),
2560 					       SRC_SEL_W(SQ_SEL_W),
2561 					       BURST_COUNT(1),
2562 					       VALID_PIXEL_MODE(0),
2563 					       END_OF_PROGRAM(1),
2564 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2565 					       MARK(0),
2566 					       BARRIER(1));
2567 
2568     /* subroutine interp-fetch-src */
2569 
2570     /* 6 */
2571     /* interpolate src */
2572     shader[i++] = CF_ALU_DWORD0(ADDR(20),
2573 				KCACHE_BANK0(0),
2574 				KCACHE_BANK1(0),
2575 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2576     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2577 				KCACHE_ADDR0(0),
2578 				KCACHE_ADDR1(0),
2579 				I_COUNT(4),
2580 				ALT_CONST(0),
2581 				CF_INST(SQ_CF_INST_ALU),
2582 				WHOLE_QUAD_MODE(0),
2583 				BARRIER(1));
2584 
2585     /* 7 */
2586     /* texture fetch src into GPR0 */
2587     shader[i++] = CF_DWORD0(ADDR(24),
2588 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2589     shader[i++] = CF_DWORD1(POP_COUNT(0),
2590 			    CF_CONST(0),
2591 			    COND(SQ_CF_COND_ACTIVE),
2592 			    I_COUNT(1),
2593 			    VALID_PIXEL_MODE(0),
2594 			    END_OF_PROGRAM(0),
2595 			    CF_INST(SQ_CF_INST_TC),
2596 			    WHOLE_QUAD_MODE(0),
2597 			    BARRIER(1));
2598 
2599     /* 8 */
2600     /* return */
2601     shader[i++] = CF_DWORD0(ADDR(0),
2602 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2603     shader[i++] = CF_DWORD1(POP_COUNT(0),
2604 			    CF_CONST(0),
2605 			    COND(SQ_CF_COND_ACTIVE),
2606 			    I_COUNT(0),
2607 			    VALID_PIXEL_MODE(0),
2608 			    END_OF_PROGRAM(0),
2609 			    CF_INST(SQ_CF_INST_RETURN),
2610 			    WHOLE_QUAD_MODE(0),
2611 			    BARRIER(0));
2612 
2613     /* subroutine read-constant-src */
2614 
2615     /* 9 */
2616     /* read constants into GPR0 */
2617     shader[i++] = CF_ALU_DWORD0(ADDR(26),
2618 				KCACHE_BANK0(0),
2619 				KCACHE_BANK1(0),
2620 				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2621     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2622 				KCACHE_ADDR0(0),
2623 				KCACHE_ADDR1(0),
2624 				I_COUNT(4),
2625 				ALT_CONST(1),
2626 				CF_INST(SQ_CF_INST_ALU),
2627 				WHOLE_QUAD_MODE(0),
2628 				BARRIER(1));
2629 
2630     /* 10 */
2631     /* return */
2632     shader[i++] = CF_DWORD0(ADDR(0),
2633 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2634     shader[i++] = CF_DWORD1(POP_COUNT(0),
2635 			    CF_CONST(0),
2636 			    COND(SQ_CF_COND_ACTIVE),
2637 			    I_COUNT(0),
2638 			    VALID_PIXEL_MODE(0),
2639 			    END_OF_PROGRAM(0),
2640 			    CF_INST(SQ_CF_INST_RETURN),
2641 			    WHOLE_QUAD_MODE(0),
2642 			    BARRIER(0));
2643 
2644     /* subroutine interp-fetch-mask */
2645 
2646     /* 11 */
2647     /* interpolate mask */
2648     shader[i++] = CF_ALU_DWORD0(ADDR(30),
2649 				KCACHE_BANK0(0),
2650 				KCACHE_BANK1(0),
2651 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2652     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2653 				KCACHE_ADDR0(0),
2654 				KCACHE_ADDR1(0),
2655 				I_COUNT(4),
2656 				ALT_CONST(0),
2657 				CF_INST(SQ_CF_INST_ALU),
2658 				WHOLE_QUAD_MODE(0),
2659 				BARRIER(1));
2660 
2661     /* 12 */
2662     /* texture fetch mask into GPR1 */
2663     shader[i++] = CF_DWORD0(ADDR(34),
2664 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2665     shader[i++] = CF_DWORD1(POP_COUNT(0),
2666 			    CF_CONST(0),
2667 			    COND(SQ_CF_COND_ACTIVE),
2668 			    I_COUNT(1),
2669 			    VALID_PIXEL_MODE(0),
2670 			    END_OF_PROGRAM(0),
2671 			    CF_INST(SQ_CF_INST_TC),
2672 			    WHOLE_QUAD_MODE(0),
2673 			    BARRIER(1));
2674 
2675     /* 13 */
2676     /* return */
2677     shader[i++] = CF_DWORD0(ADDR(0),
2678 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2679     shader[i++] = CF_DWORD1(POP_COUNT(0),
2680 			    CF_CONST(0),
2681 			    COND(SQ_CF_COND_ACTIVE),
2682 			    I_COUNT(0),
2683 			    VALID_PIXEL_MODE(0),
2684 			    END_OF_PROGRAM(0),
2685 			    CF_INST(SQ_CF_INST_RETURN),
2686 			    WHOLE_QUAD_MODE(0),
2687 			    BARRIER(0));
2688 
2689     /* subroutine read-constant-src */
2690 
2691     /* 14 */
2692     /* read constants into GPR1 */
2693     shader[i++] = CF_ALU_DWORD0(ADDR(36),
2694 				KCACHE_BANK0(0),
2695 				KCACHE_BANK1(0),
2696 				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2697     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2698 				KCACHE_ADDR0(0),
2699 				KCACHE_ADDR1(0),
2700 				I_COUNT(4),
2701 				ALT_CONST(1),
2702 				CF_INST(SQ_CF_INST_ALU),
2703 				WHOLE_QUAD_MODE(0),
2704 				BARRIER(1));
2705 
2706     /* 15 */
2707     /* return */
2708     shader[i++] = CF_DWORD0(ADDR(0),
2709 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2710     shader[i++] = CF_DWORD1(POP_COUNT(0),
2711 			    CF_CONST(0),
2712 			    COND(SQ_CF_COND_ACTIVE),
2713 			    I_COUNT(0),
2714 			    VALID_PIXEL_MODE(0),
2715 			    END_OF_PROGRAM(0),
2716 			    CF_INST(SQ_CF_INST_RETURN),
2717 			    WHOLE_QUAD_MODE(0),
2718 			    BARRIER(0));
2719 
2720     /* ALU clauses */
2721 
2722     /* 16 */
2723     /* MUL gpr[0].x gpr[0].x gpr[1].x */
2724     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2725 			     SRC0_REL(ABSOLUTE),
2726 			     SRC0_ELEM(ELEM_X),
2727 			     SRC0_NEG(0),
2728 			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2729 			     SRC1_REL(ABSOLUTE),
2730 			     SRC1_ELEM(ELEM_X),
2731 			     SRC1_NEG(0),
2732 			     INDEX_MODE(SQ_INDEX_LOOP),
2733 			     PRED_SEL(SQ_PRED_SEL_OFF),
2734 			     LAST(0));
2735     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2736 				 SRC1_ABS(0),
2737 				 UPDATE_EXECUTE_MASK(0),
2738 				 UPDATE_PRED(0),
2739 				 WRITE_MASK(1),
2740 				 OMOD(SQ_ALU_OMOD_OFF),
2741 				 ALU_INST(SQ_OP2_INST_MUL),
2742 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2743 				 DST_GPR(0),
2744 				 DST_REL(ABSOLUTE),
2745 				 DST_ELEM(ELEM_X),
2746 				 CLAMP(1));
2747 
2748     /* 17 */
2749     /* MUL gpr[0].y gpr[0].y gpr[1].y */
2750     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2751 			     SRC0_REL(ABSOLUTE),
2752 			     SRC0_ELEM(ELEM_Y),
2753 			     SRC0_NEG(0),
2754 			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2755 			     SRC1_REL(ABSOLUTE),
2756 			     SRC1_ELEM(ELEM_Y),
2757 			     SRC1_NEG(0),
2758 			     INDEX_MODE(SQ_INDEX_LOOP),
2759 			     PRED_SEL(SQ_PRED_SEL_OFF),
2760 			     LAST(0));
2761     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2762 				 SRC1_ABS(0),
2763 				 UPDATE_EXECUTE_MASK(0),
2764 				 UPDATE_PRED(0),
2765 				 WRITE_MASK(1),
2766 				 OMOD(SQ_ALU_OMOD_OFF),
2767 				 ALU_INST(SQ_OP2_INST_MUL),
2768 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2769 				 DST_GPR(0),
2770 				 DST_REL(ABSOLUTE),
2771 				 DST_ELEM(ELEM_Y),
2772 				 CLAMP(1));
2773     /* 18 */
2774     /* MUL gpr[0].z gpr[0].z gpr[1].z */
2775     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2776 			     SRC0_REL(ABSOLUTE),
2777 			     SRC0_ELEM(ELEM_Z),
2778 			     SRC0_NEG(0),
2779 			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2780 			     SRC1_REL(ABSOLUTE),
2781 			     SRC1_ELEM(ELEM_Z),
2782 			     SRC1_NEG(0),
2783 			     INDEX_MODE(SQ_INDEX_LOOP),
2784 			     PRED_SEL(SQ_PRED_SEL_OFF),
2785 			     LAST(0));
2786     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2787 				 SRC1_ABS(0),
2788 				 UPDATE_EXECUTE_MASK(0),
2789 				 UPDATE_PRED(0),
2790 				 WRITE_MASK(1),
2791 				 OMOD(SQ_ALU_OMOD_OFF),
2792 				 ALU_INST(SQ_OP2_INST_MUL),
2793 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2794 				 DST_GPR(0),
2795 				 DST_REL(ABSOLUTE),
2796 				 DST_ELEM(ELEM_Z),
2797 				 CLAMP(1));
2798     /* 19 */
2799     /* MUL gpr[0].w gpr[0].w gpr[1].w */
2800     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2801 			     SRC0_REL(ABSOLUTE),
2802 			     SRC0_ELEM(ELEM_W),
2803 			     SRC0_NEG(0),
2804 			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2805 			     SRC1_REL(ABSOLUTE),
2806 			     SRC1_ELEM(ELEM_W),
2807 			     SRC1_NEG(0),
2808 			     INDEX_MODE(SQ_INDEX_LOOP),
2809 			     PRED_SEL(SQ_PRED_SEL_OFF),
2810 			     LAST(1));
2811     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2812 				 SRC1_ABS(0),
2813 				 UPDATE_EXECUTE_MASK(0),
2814 				 UPDATE_PRED(0),
2815 				 WRITE_MASK(1),
2816 				 OMOD(SQ_ALU_OMOD_OFF),
2817 				 ALU_INST(SQ_OP2_INST_MUL),
2818 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2819 				 DST_GPR(0),
2820 				 DST_REL(ABSOLUTE),
2821 				 DST_ELEM(ELEM_W),
2822 				 CLAMP(1));
2823 
2824     /* 20 */
2825     /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
2826     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2827 			     SRC0_REL(ABSOLUTE),
2828 			     SRC0_ELEM(ELEM_Y),
2829 			     SRC0_NEG(0),
2830 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2831 			     SRC1_REL(ABSOLUTE),
2832 			     SRC1_ELEM(ELEM_X),
2833 			     SRC1_NEG(0),
2834 			     INDEX_MODE(SQ_INDEX_AR_X),
2835 			     PRED_SEL(SQ_PRED_SEL_OFF),
2836 			     LAST(0));
2837     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2838 				 SRC1_ABS(0),
2839 				 UPDATE_EXECUTE_MASK(0),
2840 				 UPDATE_PRED(0),
2841 				 WRITE_MASK(1),
2842 				 OMOD(SQ_ALU_OMOD_OFF),
2843 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2844 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2845 				 DST_GPR(0),
2846 				 DST_REL(ABSOLUTE),
2847 				 DST_ELEM(ELEM_X),
2848 				 CLAMP(0));
2849     /* 21 */
2850     /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
2851     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2852 			     SRC0_REL(ABSOLUTE),
2853 			     SRC0_ELEM(ELEM_X),
2854 			     SRC0_NEG(0),
2855 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2856 			     SRC1_REL(ABSOLUTE),
2857 			     SRC1_ELEM(ELEM_X),
2858 			     SRC1_NEG(0),
2859 			     INDEX_MODE(SQ_INDEX_AR_X),
2860 			     PRED_SEL(SQ_PRED_SEL_OFF),
2861 			     LAST(0));
2862     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2863 				 SRC1_ABS(0),
2864 				 UPDATE_EXECUTE_MASK(0),
2865 				 UPDATE_PRED(0),
2866 				 WRITE_MASK(1),
2867 				 OMOD(SQ_ALU_OMOD_OFF),
2868 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2869 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2870 				 DST_GPR(0),
2871 				 DST_REL(ABSOLUTE),
2872 				 DST_ELEM(ELEM_Y),
2873 				 CLAMP(0));
2874     /* 22 */
2875     /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
2876     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2877 			     SRC0_REL(ABSOLUTE),
2878 			     SRC0_ELEM(ELEM_Y),
2879 			     SRC0_NEG(0),
2880 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2881 			     SRC1_REL(ABSOLUTE),
2882 			     SRC1_ELEM(ELEM_X),
2883 			     SRC1_NEG(0),
2884 			     INDEX_MODE(SQ_INDEX_AR_X),
2885 			     PRED_SEL(SQ_PRED_SEL_OFF),
2886 			     LAST(0));
2887     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2888 				 SRC1_ABS(0),
2889 				 UPDATE_EXECUTE_MASK(0),
2890 				 UPDATE_PRED(0),
2891 				 WRITE_MASK(0),
2892 				 OMOD(SQ_ALU_OMOD_OFF),
2893 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2894 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2895 				 DST_GPR(0),
2896 				 DST_REL(ABSOLUTE),
2897 				 DST_ELEM(ELEM_Z),
2898 				 CLAMP(0));
2899 
2900     /* 23 */
2901     /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
2902     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2903 			     SRC0_REL(ABSOLUTE),
2904 			     SRC0_ELEM(ELEM_X),
2905 			     SRC0_NEG(0),
2906 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2907 			     SRC1_REL(ABSOLUTE),
2908 			     SRC1_ELEM(ELEM_X),
2909 			     SRC1_NEG(0),
2910 			     INDEX_MODE(SQ_INDEX_AR_X),
2911 			     PRED_SEL(SQ_PRED_SEL_OFF),
2912 			     LAST(1));
2913     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2914 				 SRC1_ABS(0),
2915 				 UPDATE_EXECUTE_MASK(0),
2916 				 UPDATE_PRED(0),
2917 				 WRITE_MASK(0),
2918 				 OMOD(SQ_ALU_OMOD_OFF),
2919 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2920 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2921 				 DST_GPR(0),
2922 				 DST_REL(ABSOLUTE),
2923 				 DST_ELEM(ELEM_W),
2924 				 CLAMP(0));
2925 
2926     /* 24/25 */
2927     /* SAMPLE RID=0 GPR0, GPR0 */
2928     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2929 			     INST_MOD(0),
2930 			     FETCH_WHOLE_QUAD(0),
2931 			     RESOURCE_ID(0),
2932 			     SRC_GPR(0),
2933 			     SRC_REL(ABSOLUTE),
2934 			     ALT_CONST(0),
2935 			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
2936 			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
2937     shader[i++] = TEX_DWORD1(DST_GPR(0),
2938 			     DST_REL(ABSOLUTE),
2939 			     DST_SEL_X(SQ_SEL_X),
2940 			     DST_SEL_Y(SQ_SEL_Y),
2941 			     DST_SEL_Z(SQ_SEL_Z),
2942 			     DST_SEL_W(SQ_SEL_W),
2943 			     LOD_BIAS(0),
2944 			     COORD_TYPE_X(TEX_NORMALIZED),
2945 			     COORD_TYPE_Y(TEX_NORMALIZED),
2946 			     COORD_TYPE_Z(TEX_NORMALIZED),
2947 			     COORD_TYPE_W(TEX_NORMALIZED));
2948     shader[i++] = TEX_DWORD2(OFFSET_X(0),
2949 			     OFFSET_Y(0),
2950 			     OFFSET_Z(0),
2951 			     SAMPLER_ID(0),
2952 			     SRC_SEL_X(SQ_SEL_X),
2953 			     SRC_SEL_Y(SQ_SEL_Y),
2954 			     SRC_SEL_Z(SQ_SEL_0),
2955 			     SRC_SEL_W(SQ_SEL_1));
2956     shader[i++] = TEX_DWORD_PAD;
2957 
2958     /* 26 */
2959     /* MOV GPR0.x, KC4.x */
2960     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2961 			     SRC0_REL(ABSOLUTE),
2962 			     SRC0_ELEM(ELEM_X),
2963 			     SRC0_NEG(0),
2964 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2965 			     SRC1_REL(ABSOLUTE),
2966 			     SRC1_ELEM(ELEM_X),
2967 			     SRC1_NEG(0),
2968 			     INDEX_MODE(SQ_INDEX_AR_X),
2969 			     PRED_SEL(SQ_PRED_SEL_OFF),
2970 			     LAST(0));
2971     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2972 				 SRC1_ABS(0),
2973 				 UPDATE_EXECUTE_MASK(0),
2974 				 UPDATE_PRED(0),
2975 				 WRITE_MASK(1),
2976 				 OMOD(SQ_ALU_OMOD_OFF),
2977 				 ALU_INST(SQ_OP2_INST_MOV),
2978 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2979 				 DST_GPR(0),
2980 				 DST_REL(ABSOLUTE),
2981 				 DST_ELEM(ELEM_X),
2982 				 CLAMP(1));
2983 
2984     /* 27 */
2985     /* MOV GPR0.y, KC4.y */
2986     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2987 			     SRC0_REL(ABSOLUTE),
2988 			     SRC0_ELEM(ELEM_Y),
2989 			     SRC0_NEG(0),
2990 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2991 			     SRC1_REL(ABSOLUTE),
2992 			     SRC1_ELEM(ELEM_X),
2993 			     SRC1_NEG(0),
2994 			     INDEX_MODE(SQ_INDEX_AR_X),
2995 			     PRED_SEL(SQ_PRED_SEL_OFF),
2996 			     LAST(0));
2997     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2998 				 SRC1_ABS(0),
2999 				 UPDATE_EXECUTE_MASK(0),
3000 				 UPDATE_PRED(0),
3001 				 WRITE_MASK(1),
3002 				 OMOD(SQ_ALU_OMOD_OFF),
3003 				 ALU_INST(SQ_OP2_INST_MOV),
3004 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3005 				 DST_GPR(0),
3006 				 DST_REL(ABSOLUTE),
3007 				 DST_ELEM(ELEM_Y),
3008 				 CLAMP(1));
3009 
3010     /* 28  */
3011     /* MOV GPR0.z, KC4.z */
3012     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3013 			     SRC0_REL(ABSOLUTE),
3014 			     SRC0_ELEM(ELEM_Z),
3015 			     SRC0_NEG(0),
3016 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3017 			     SRC1_REL(ABSOLUTE),
3018 			     SRC1_ELEM(ELEM_X),
3019 			     SRC1_NEG(0),
3020 			     INDEX_MODE(SQ_INDEX_AR_X),
3021 			     PRED_SEL(SQ_PRED_SEL_OFF),
3022 			     LAST(0));
3023     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3024 				 SRC1_ABS(0),
3025 				 UPDATE_EXECUTE_MASK(0),
3026 				 UPDATE_PRED(0),
3027 				 WRITE_MASK(1),
3028 				 OMOD(SQ_ALU_OMOD_OFF),
3029 				 ALU_INST(SQ_OP2_INST_MOV),
3030 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3031 				 DST_GPR(0),
3032 				 DST_REL(ABSOLUTE),
3033 				 DST_ELEM(ELEM_Z),
3034 				 CLAMP(1));
3035 
3036     /* 29 */
3037     /* MOV GPR0.w, KC4.w */
3038     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3039 			     SRC0_REL(ABSOLUTE),
3040 			     SRC0_ELEM(ELEM_W),
3041 			     SRC0_NEG(0),
3042 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3043 			     SRC1_REL(ABSOLUTE),
3044 			     SRC1_ELEM(ELEM_X),
3045 			     SRC1_NEG(0),
3046 			     INDEX_MODE(SQ_INDEX_AR_X),
3047 			     PRED_SEL(SQ_PRED_SEL_OFF),
3048 			     LAST(1));
3049     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3050 				 SRC1_ABS(0),
3051 				 UPDATE_EXECUTE_MASK(0),
3052 				 UPDATE_PRED(0),
3053 				 WRITE_MASK(1),
3054 				 OMOD(SQ_ALU_OMOD_OFF),
3055 				 ALU_INST(SQ_OP2_INST_MOV),
3056 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3057 				 DST_GPR(0),
3058 				 DST_REL(ABSOLUTE),
3059 				 DST_ELEM(ELEM_W),
3060 				 CLAMP(1));
3061 
3062     /* 30 */
3063     /* INTERP_XY GPR1.x, PARAM1 */
3064     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3065 			     SRC0_REL(ABSOLUTE),
3066 			     SRC0_ELEM(ELEM_Y),
3067 			     SRC0_NEG(0),
3068 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3069 			     SRC1_REL(ABSOLUTE),
3070 			     SRC1_ELEM(ELEM_X),
3071 			     SRC1_NEG(0),
3072 			     INDEX_MODE(SQ_INDEX_AR_X),
3073 			     PRED_SEL(SQ_PRED_SEL_OFF),
3074 			     LAST(0));
3075     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3076 				 SRC1_ABS(0),
3077 				 UPDATE_EXECUTE_MASK(0),
3078 				 UPDATE_PRED(0),
3079 				 WRITE_MASK(1),
3080 				 OMOD(SQ_ALU_OMOD_OFF),
3081 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3082 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3083 				 DST_GPR(1),
3084 				 DST_REL(ABSOLUTE),
3085 				 DST_ELEM(ELEM_X),
3086 				 CLAMP(0));
3087     /* 31 */
3088     /* INTERP_XY GPR1.y, PARAM1 */
3089     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3090 			     SRC0_REL(ABSOLUTE),
3091 			     SRC0_ELEM(ELEM_X),
3092 			     SRC0_NEG(0),
3093 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3094 			     SRC1_REL(ABSOLUTE),
3095 			     SRC1_ELEM(ELEM_X),
3096 			     SRC1_NEG(0),
3097 			     INDEX_MODE(SQ_INDEX_AR_X),
3098 			     PRED_SEL(SQ_PRED_SEL_OFF),
3099 			     LAST(0));
3100     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3101 				 SRC1_ABS(0),
3102 				 UPDATE_EXECUTE_MASK(0),
3103 				 UPDATE_PRED(0),
3104 				 WRITE_MASK(1),
3105 				 OMOD(SQ_ALU_OMOD_OFF),
3106 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3107 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3108 				 DST_GPR(1),
3109 				 DST_REL(ABSOLUTE),
3110 				 DST_ELEM(ELEM_Y),
3111 				 CLAMP(0));
3112     /* 32 */
3113     /* INTERP_XY GPR1.z, PARAM1 */
3114     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3115 			     SRC0_REL(ABSOLUTE),
3116 			     SRC0_ELEM(ELEM_Y),
3117 			     SRC0_NEG(0),
3118 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3119 			     SRC1_REL(ABSOLUTE),
3120 			     SRC1_ELEM(ELEM_X),
3121 			     SRC1_NEG(0),
3122 			     INDEX_MODE(SQ_INDEX_AR_X),
3123 			     PRED_SEL(SQ_PRED_SEL_OFF),
3124 			     LAST(0));
3125     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3126 				 SRC1_ABS(0),
3127 				 UPDATE_EXECUTE_MASK(0),
3128 				 UPDATE_PRED(0),
3129 				 WRITE_MASK(0),
3130 				 OMOD(SQ_ALU_OMOD_OFF),
3131 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3132 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3133 				 DST_GPR(1),
3134 				 DST_REL(ABSOLUTE),
3135 				 DST_ELEM(ELEM_Z),
3136 				 CLAMP(0));
3137     /* 33 */
3138     /* INTERP_XY GPR1.w, PARAM1 */
3139     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3140 			     SRC0_REL(ABSOLUTE),
3141 			     SRC0_ELEM(ELEM_X),
3142 			     SRC0_NEG(0),
3143 			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3144 			     SRC1_REL(ABSOLUTE),
3145 			     SRC1_ELEM(ELEM_X),
3146 			     SRC1_NEG(0),
3147 			     INDEX_MODE(SQ_INDEX_AR_X),
3148 			     PRED_SEL(SQ_PRED_SEL_OFF),
3149 			     LAST(1));
3150     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3151 				 SRC1_ABS(0),
3152 				 UPDATE_EXECUTE_MASK(0),
3153 				 UPDATE_PRED(0),
3154 				 WRITE_MASK(0),
3155 				 OMOD(SQ_ALU_OMOD_OFF),
3156 				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3157 				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3158 				 DST_GPR(1),
3159 				 DST_REL(ABSOLUTE),
3160 				 DST_ELEM(ELEM_W),
3161 				 CLAMP(0));
3162 
3163     /* 34/35 */
3164     /* SAMPLE RID=1 GPR1, GPR1 */
3165     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3166 			     INST_MOD(0),
3167 			     FETCH_WHOLE_QUAD(0),
3168 			     RESOURCE_ID(1),
3169 			     SRC_GPR(1),
3170 			     SRC_REL(ABSOLUTE),
3171 			     ALT_CONST(0),
3172 			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3173 			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3174     shader[i++] = TEX_DWORD1(DST_GPR(1),
3175 			     DST_REL(ABSOLUTE),
3176 			     DST_SEL_X(SQ_SEL_X),
3177 			     DST_SEL_Y(SQ_SEL_Y),
3178 			     DST_SEL_Z(SQ_SEL_Z),
3179 			     DST_SEL_W(SQ_SEL_W),
3180 			     LOD_BIAS(0),
3181 			     COORD_TYPE_X(TEX_NORMALIZED),
3182 			     COORD_TYPE_Y(TEX_NORMALIZED),
3183 			     COORD_TYPE_Z(TEX_NORMALIZED),
3184 			     COORD_TYPE_W(TEX_NORMALIZED));
3185     shader[i++] = TEX_DWORD2(OFFSET_X(0),
3186 			     OFFSET_Y(0),
3187 			     OFFSET_Z(0),
3188 			     SAMPLER_ID(1),
3189 			     SRC_SEL_X(SQ_SEL_X),
3190 			     SRC_SEL_Y(SQ_SEL_Y),
3191 			     SRC_SEL_Z(SQ_SEL_0),
3192 			     SRC_SEL_W(SQ_SEL_1));
3193     shader[i++] = TEX_DWORD_PAD;
3194 
3195     /* 36 */
3196     /* MOV GPR1.x, KC5.x */
3197     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3198 			     SRC0_REL(ABSOLUTE),
3199 			     SRC0_ELEM(ELEM_X),
3200 			     SRC0_NEG(0),
3201 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3202 			     SRC1_REL(ABSOLUTE),
3203 			     SRC1_ELEM(ELEM_X),
3204 			     SRC1_NEG(0),
3205 			     INDEX_MODE(SQ_INDEX_AR_X),
3206 			     PRED_SEL(SQ_PRED_SEL_OFF),
3207 			     LAST(0));
3208     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3209 				 SRC1_ABS(0),
3210 				 UPDATE_EXECUTE_MASK(0),
3211 				 UPDATE_PRED(0),
3212 				 WRITE_MASK(1),
3213 				 OMOD(SQ_ALU_OMOD_OFF),
3214 				 ALU_INST(SQ_OP2_INST_MOV),
3215 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3216 				 DST_GPR(1),
3217 				 DST_REL(ABSOLUTE),
3218 				 DST_ELEM(ELEM_X),
3219 				 CLAMP(1));
3220 
3221     /* 37 */
3222     /* MOV GPR1.y, KC5.y */
3223     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3224 			     SRC0_REL(ABSOLUTE),
3225 			     SRC0_ELEM(ELEM_Y),
3226 			     SRC0_NEG(0),
3227 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3228 			     SRC1_REL(ABSOLUTE),
3229 			     SRC1_ELEM(ELEM_X),
3230 			     SRC1_NEG(0),
3231 			     INDEX_MODE(SQ_INDEX_AR_X),
3232 			     PRED_SEL(SQ_PRED_SEL_OFF),
3233 			     LAST(0));
3234     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3235 				 SRC1_ABS(0),
3236 				 UPDATE_EXECUTE_MASK(0),
3237 				 UPDATE_PRED(0),
3238 				 WRITE_MASK(1),
3239 				 OMOD(SQ_ALU_OMOD_OFF),
3240 				 ALU_INST(SQ_OP2_INST_MOV),
3241 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3242 				 DST_GPR(1),
3243 				 DST_REL(ABSOLUTE),
3244 				 DST_ELEM(ELEM_Y),
3245 				 CLAMP(1));
3246 
3247     /* 38 */
3248     /* MOV GPR1.z, KC5.z */
3249     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3250 			     SRC0_REL(ABSOLUTE),
3251 			     SRC0_ELEM(ELEM_Z),
3252 			     SRC0_NEG(0),
3253 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3254 			     SRC1_REL(ABSOLUTE),
3255 			     SRC1_ELEM(ELEM_X),
3256 			     SRC1_NEG(0),
3257 			     INDEX_MODE(SQ_INDEX_AR_X),
3258 			     PRED_SEL(SQ_PRED_SEL_OFF),
3259 			     LAST(0));
3260     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3261 				 SRC1_ABS(0),
3262 				 UPDATE_EXECUTE_MASK(0),
3263 				 UPDATE_PRED(0),
3264 				 WRITE_MASK(1),
3265 				 OMOD(SQ_ALU_OMOD_OFF),
3266 				 ALU_INST(SQ_OP2_INST_MOV),
3267 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3268 				 DST_GPR(1),
3269 				 DST_REL(ABSOLUTE),
3270 				 DST_ELEM(ELEM_Z),
3271 				 CLAMP(1));
3272 
3273     /* 39 */
3274     /* MOV GPR1.w, KC5.w */
3275     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3276 			     SRC0_REL(ABSOLUTE),
3277 			     SRC0_ELEM(ELEM_W),
3278 			     SRC0_NEG(0),
3279 			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3280 			     SRC1_REL(ABSOLUTE),
3281 			     SRC1_ELEM(ELEM_X),
3282 			     SRC1_NEG(0),
3283 			     INDEX_MODE(SQ_INDEX_AR_X),
3284 			     PRED_SEL(SQ_PRED_SEL_OFF),
3285 			     LAST(1));
3286     shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3287 				 SRC1_ABS(0),
3288 				 UPDATE_EXECUTE_MASK(0),
3289 				 UPDATE_PRED(0),
3290 				 WRITE_MASK(1),
3291 				 OMOD(SQ_ALU_OMOD_OFF),
3292 				 ALU_INST(SQ_OP2_INST_MOV),
3293 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3294 				 DST_GPR(1),
3295 				 DST_REL(ABSOLUTE),
3296 				 DST_ELEM(ELEM_W),
3297 				 CLAMP(1));
3298 
3299     return i;
3300 }
3301