1 /*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include "xf86.h"
32
33 #include "evergreen_shader.h"
34 #include "evergreen_reg.h"
35
36 /* solid vs --------------------------------------- */
evergreen_solid_vs(RADEONChipFamily ChipSet,uint32_t * shader)37 int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
38 {
39 int i = 0;
40
41 /* 0 */
42 shader[i++] = CF_DWORD0(ADDR(4),
43 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
44 shader[i++] = CF_DWORD1(POP_COUNT(0),
45 CF_CONST(0),
46 COND(SQ_CF_COND_ACTIVE),
47 I_COUNT(1),
48 VALID_PIXEL_MODE(0),
49 END_OF_PROGRAM(0),
50 CF_INST(SQ_CF_INST_VC),
51 WHOLE_QUAD_MODE(0),
52 BARRIER(1));
53 /* 1 */
54 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
55 TYPE(SQ_EXPORT_POS),
56 RW_GPR(1),
57 RW_REL(ABSOLUTE),
58 INDEX_GPR(0),
59 ELEM_SIZE(0));
60 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
61 SRC_SEL_Y(SQ_SEL_Y),
62 SRC_SEL_Z(SQ_SEL_Z),
63 SRC_SEL_W(SQ_SEL_W),
64 BURST_COUNT(1),
65 VALID_PIXEL_MODE(0),
66 END_OF_PROGRAM(0),
67 CF_INST(SQ_CF_INST_EXPORT_DONE),
68 MARK(0),
69 BARRIER(1));
70 /* 2 - always export a param whether it's used or not */
71 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
72 TYPE(SQ_EXPORT_PARAM),
73 RW_GPR(0),
74 RW_REL(ABSOLUTE),
75 INDEX_GPR(0),
76 ELEM_SIZE(0));
77 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
78 SRC_SEL_Y(SQ_SEL_Y),
79 SRC_SEL_Z(SQ_SEL_Z),
80 SRC_SEL_W(SQ_SEL_W),
81 BURST_COUNT(0),
82 VALID_PIXEL_MODE(0),
83 END_OF_PROGRAM(1),
84 CF_INST(SQ_CF_INST_EXPORT_DONE),
85 MARK(0),
86 BARRIER(0));
87 /* 3 - padding */
88 shader[i++] = 0x00000000;
89 shader[i++] = 0x00000000;
90 /* 4/5 */
91 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
92 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
93 FETCH_WHOLE_QUAD(0),
94 BUFFER_ID(0),
95 SRC_GPR(0),
96 SRC_REL(ABSOLUTE),
97 SRC_SEL_X(SQ_SEL_X),
98 MEGA_FETCH_COUNT(8));
99 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
100 DST_REL(0),
101 DST_SEL_X(SQ_SEL_X),
102 DST_SEL_Y(SQ_SEL_Y),
103 DST_SEL_Z(SQ_SEL_0),
104 DST_SEL_W(SQ_SEL_1),
105 USE_CONST_FIELDS(0),
106 DATA_FORMAT(FMT_32_32_FLOAT),
107 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
108 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
109 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
110 shader[i++] = VTX_DWORD2(OFFSET(0),
111 #if X_BYTE_ORDER == X_BIG_ENDIAN
112 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
113 #else
114 ENDIAN_SWAP(SQ_ENDIAN_NONE),
115 #endif
116 CONST_BUF_NO_STRIDE(0),
117 MEGA_FETCH(1),
118 ALT_CONST(0),
119 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
120 shader[i++] = VTX_DWORD_PAD;
121
122 return i;
123 }
124
125 /* solid ps --------------------------------------- */
evergreen_solid_ps(RADEONChipFamily ChipSet,uint32_t * shader)126 int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
127 {
128 int i = 0;
129
130 /* 0 */
131 shader[i++] = CF_ALU_DWORD0(ADDR(2),
132 KCACHE_BANK0(0),
133 KCACHE_BANK1(0),
134 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
135 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
136 KCACHE_ADDR0(0),
137 KCACHE_ADDR1(0),
138 I_COUNT(4),
139 ALT_CONST(0),
140 CF_INST(SQ_CF_INST_ALU),
141 WHOLE_QUAD_MODE(0),
142 BARRIER(1));
143 /* 1 */
144 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
145 TYPE(SQ_EXPORT_PIXEL),
146 RW_GPR(0),
147 RW_REL(ABSOLUTE),
148 INDEX_GPR(0),
149 ELEM_SIZE(1));
150 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
151 SRC_SEL_Y(SQ_SEL_Y),
152 SRC_SEL_Z(SQ_SEL_Z),
153 SRC_SEL_W(SQ_SEL_W),
154 BURST_COUNT(1),
155 VALID_PIXEL_MODE(0),
156 END_OF_PROGRAM(1),
157 CF_INST(SQ_CF_INST_EXPORT_DONE),
158 MARK(0),
159 BARRIER(1));
160
161 /* 2 */
162 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
163 SRC0_REL(ABSOLUTE),
164 SRC0_ELEM(ELEM_X),
165 SRC0_NEG(0),
166 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
167 SRC1_REL(ABSOLUTE),
168 SRC1_ELEM(ELEM_X),
169 SRC1_NEG(0),
170 INDEX_MODE(SQ_INDEX_AR_X),
171 PRED_SEL(SQ_PRED_SEL_OFF),
172 LAST(0));
173 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
174 SRC1_ABS(0),
175 UPDATE_EXECUTE_MASK(0),
176 UPDATE_PRED(0),
177 WRITE_MASK(1),
178 OMOD(SQ_ALU_OMOD_OFF),
179 ALU_INST(SQ_OP2_INST_MOV),
180 BANK_SWIZZLE(SQ_ALU_VEC_012),
181 DST_GPR(0),
182 DST_REL(ABSOLUTE),
183 DST_ELEM(ELEM_X),
184 CLAMP(1));
185 /* 3 */
186 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
187 SRC0_REL(ABSOLUTE),
188 SRC0_ELEM(ELEM_Y),
189 SRC0_NEG(0),
190 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
191 SRC1_REL(ABSOLUTE),
192 SRC1_ELEM(ELEM_Y),
193 SRC1_NEG(0),
194 INDEX_MODE(SQ_INDEX_AR_X),
195 PRED_SEL(SQ_PRED_SEL_OFF),
196 LAST(0));
197 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
198 SRC1_ABS(0),
199 UPDATE_EXECUTE_MASK(0),
200 UPDATE_PRED(0),
201 WRITE_MASK(1),
202 OMOD(SQ_ALU_OMOD_OFF),
203 ALU_INST(SQ_OP2_INST_MOV),
204 BANK_SWIZZLE(SQ_ALU_VEC_012),
205 DST_GPR(0),
206 DST_REL(ABSOLUTE),
207 DST_ELEM(ELEM_Y),
208 CLAMP(1));
209 /* 4 */
210 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
211 SRC0_REL(ABSOLUTE),
212 SRC0_ELEM(ELEM_Z),
213 SRC0_NEG(0),
214 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
215 SRC1_REL(ABSOLUTE),
216 SRC1_ELEM(ELEM_Z),
217 SRC1_NEG(0),
218 INDEX_MODE(SQ_INDEX_AR_X),
219 PRED_SEL(SQ_PRED_SEL_OFF),
220 LAST(0));
221 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
222 SRC1_ABS(0),
223 UPDATE_EXECUTE_MASK(0),
224 UPDATE_PRED(0),
225 WRITE_MASK(1),
226 OMOD(SQ_ALU_OMOD_OFF),
227 ALU_INST(SQ_OP2_INST_MOV),
228 BANK_SWIZZLE(SQ_ALU_VEC_012),
229 DST_GPR(0),
230 DST_REL(ABSOLUTE),
231 DST_ELEM(ELEM_Z),
232 CLAMP(1));
233 /* 5 */
234 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
235 SRC0_REL(ABSOLUTE),
236 SRC0_ELEM(ELEM_W),
237 SRC0_NEG(0),
238 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
239 SRC1_REL(ABSOLUTE),
240 SRC1_ELEM(ELEM_W),
241 SRC1_NEG(0),
242 INDEX_MODE(SQ_INDEX_AR_X),
243 PRED_SEL(SQ_PRED_SEL_OFF),
244 LAST(1));
245 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
246 SRC1_ABS(0),
247 UPDATE_EXECUTE_MASK(0),
248 UPDATE_PRED(0),
249 WRITE_MASK(1),
250 OMOD(SQ_ALU_OMOD_OFF),
251 ALU_INST(SQ_OP2_INST_MOV),
252 BANK_SWIZZLE(SQ_ALU_VEC_012),
253 DST_GPR(0),
254 DST_REL(ABSOLUTE),
255 DST_ELEM(ELEM_W),
256 CLAMP(1));
257
258 return i;
259 }
260
261 /* copy vs --------------------------------------- */
evergreen_copy_vs(RADEONChipFamily ChipSet,uint32_t * shader)262 int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
263 {
264 int i = 0;
265
266 /* 0 */
267 shader[i++] = CF_DWORD0(ADDR(4),
268 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
269 shader[i++] = CF_DWORD1(POP_COUNT(0),
270 CF_CONST(0),
271 COND(SQ_CF_COND_ACTIVE),
272 I_COUNT(2),
273 VALID_PIXEL_MODE(0),
274 END_OF_PROGRAM(0),
275 CF_INST(SQ_CF_INST_VC),
276 WHOLE_QUAD_MODE(0),
277 BARRIER(1));
278 /* 1 */
279 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
280 TYPE(SQ_EXPORT_POS),
281 RW_GPR(1),
282 RW_REL(ABSOLUTE),
283 INDEX_GPR(0),
284 ELEM_SIZE(0));
285 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
286 SRC_SEL_Y(SQ_SEL_Y),
287 SRC_SEL_Z(SQ_SEL_Z),
288 SRC_SEL_W(SQ_SEL_W),
289 BURST_COUNT(0),
290 VALID_PIXEL_MODE(0),
291 END_OF_PROGRAM(0),
292 CF_INST(SQ_CF_INST_EXPORT_DONE),
293 MARK(0),
294 BARRIER(1));
295 /* 2 */
296 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
297 TYPE(SQ_EXPORT_PARAM),
298 RW_GPR(0),
299 RW_REL(ABSOLUTE),
300 INDEX_GPR(0),
301 ELEM_SIZE(0));
302 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
303 SRC_SEL_Y(SQ_SEL_Y),
304 SRC_SEL_Z(SQ_SEL_Z),
305 SRC_SEL_W(SQ_SEL_W),
306 BURST_COUNT(0),
307 VALID_PIXEL_MODE(0),
308 END_OF_PROGRAM(1),
309 CF_INST(SQ_CF_INST_EXPORT_DONE),
310 MARK(0),
311 BARRIER(0));
312 /* 3 */
313 shader[i++] = 0x00000000;
314 shader[i++] = 0x00000000;
315 /* 4/5 */
316 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
317 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
318 FETCH_WHOLE_QUAD(0),
319 BUFFER_ID(0),
320 SRC_GPR(0),
321 SRC_REL(ABSOLUTE),
322 SRC_SEL_X(SQ_SEL_X),
323 MEGA_FETCH_COUNT(16));
324 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
325 DST_REL(0),
326 DST_SEL_X(SQ_SEL_X),
327 DST_SEL_Y(SQ_SEL_Y),
328 DST_SEL_Z(SQ_SEL_0),
329 DST_SEL_W(SQ_SEL_1),
330 USE_CONST_FIELDS(0),
331 DATA_FORMAT(FMT_32_32_FLOAT),
332 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
333 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
334 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
335 shader[i++] = VTX_DWORD2(OFFSET(0),
336 #if X_BYTE_ORDER == X_BIG_ENDIAN
337 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
338 #else
339 ENDIAN_SWAP(SQ_ENDIAN_NONE),
340 #endif
341 CONST_BUF_NO_STRIDE(0),
342 MEGA_FETCH(1),
343 ALT_CONST(0),
344 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
345 shader[i++] = VTX_DWORD_PAD;
346 /* 6/7 */
347 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
348 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
349 FETCH_WHOLE_QUAD(0),
350 BUFFER_ID(0),
351 SRC_GPR(0),
352 SRC_REL(ABSOLUTE),
353 SRC_SEL_X(SQ_SEL_X),
354 MEGA_FETCH_COUNT(8));
355 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
356 DST_REL(0),
357 DST_SEL_X(SQ_SEL_X),
358 DST_SEL_Y(SQ_SEL_Y),
359 DST_SEL_Z(SQ_SEL_0),
360 DST_SEL_W(SQ_SEL_1),
361 USE_CONST_FIELDS(0),
362 DATA_FORMAT(FMT_32_32_FLOAT),
363 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
364 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
365 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
366 shader[i++] = VTX_DWORD2(OFFSET(8),
367 #if X_BYTE_ORDER == X_BIG_ENDIAN
368 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
369 #else
370 ENDIAN_SWAP(SQ_ENDIAN_NONE),
371 #endif
372 CONST_BUF_NO_STRIDE(0),
373 MEGA_FETCH(0),
374 ALT_CONST(0),
375 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
376 shader[i++] = VTX_DWORD_PAD;
377
378 return i;
379 }
380
381 /* copy ps --------------------------------------- */
evergreen_copy_ps(RADEONChipFamily ChipSet,uint32_t * shader)382 int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
383 {
384 int i = 0;
385
386 /* CF INST 0 */
387 shader[i++] = CF_ALU_DWORD0(ADDR(3),
388 KCACHE_BANK0(0),
389 KCACHE_BANK1(0),
390 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
391 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
392 KCACHE_ADDR0(0),
393 KCACHE_ADDR1(0),
394 I_COUNT(4),
395 ALT_CONST(0),
396 CF_INST(SQ_CF_INST_ALU),
397 WHOLE_QUAD_MODE(0),
398 BARRIER(1));
399 /* CF INST 1 */
400 shader[i++] = CF_DWORD0(ADDR(8),
401 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
402 shader[i++] = CF_DWORD1(POP_COUNT(0),
403 CF_CONST(0),
404 COND(SQ_CF_COND_ACTIVE),
405 I_COUNT(1),
406 VALID_PIXEL_MODE(0),
407 END_OF_PROGRAM(0),
408 CF_INST(SQ_CF_INST_TC),
409 WHOLE_QUAD_MODE(0),
410 BARRIER(1));
411 /* CF INST 2 */
412 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
413 TYPE(SQ_EXPORT_PIXEL),
414 RW_GPR(0),
415 RW_REL(ABSOLUTE),
416 INDEX_GPR(0),
417 ELEM_SIZE(1));
418 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
419 SRC_SEL_Y(SQ_SEL_Y),
420 SRC_SEL_Z(SQ_SEL_Z),
421 SRC_SEL_W(SQ_SEL_W),
422 BURST_COUNT(1),
423 VALID_PIXEL_MODE(0),
424 END_OF_PROGRAM(1),
425 CF_INST(SQ_CF_INST_EXPORT_DONE),
426 MARK(0),
427 BARRIER(1));
428
429 /* 3 interpolate tex coords */
430 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
431 SRC0_REL(ABSOLUTE),
432 SRC0_ELEM(ELEM_Y),
433 SRC0_NEG(0),
434 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
435 SRC1_REL(ABSOLUTE),
436 SRC1_ELEM(ELEM_X),
437 SRC1_NEG(0),
438 INDEX_MODE(SQ_INDEX_AR_X),
439 PRED_SEL(SQ_PRED_SEL_OFF),
440 LAST(0));
441 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
442 SRC1_ABS(0),
443 UPDATE_EXECUTE_MASK(0),
444 UPDATE_PRED(0),
445 WRITE_MASK(1),
446 OMOD(SQ_ALU_OMOD_OFF),
447 ALU_INST(SQ_OP2_INST_INTERP_XY),
448 BANK_SWIZZLE(SQ_ALU_VEC_210),
449 DST_GPR(0),
450 DST_REL(ABSOLUTE),
451 DST_ELEM(ELEM_X),
452 CLAMP(0));
453 /* 4 */
454 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
455 SRC0_REL(ABSOLUTE),
456 SRC0_ELEM(ELEM_X),
457 SRC0_NEG(0),
458 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
459 SRC1_REL(ABSOLUTE),
460 SRC1_ELEM(ELEM_X),
461 SRC1_NEG(0),
462 INDEX_MODE(SQ_INDEX_AR_X),
463 PRED_SEL(SQ_PRED_SEL_OFF),
464 LAST(0));
465 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
466 SRC1_ABS(0),
467 UPDATE_EXECUTE_MASK(0),
468 UPDATE_PRED(0),
469 WRITE_MASK(1),
470 OMOD(SQ_ALU_OMOD_OFF),
471 ALU_INST(SQ_OP2_INST_INTERP_XY),
472 BANK_SWIZZLE(SQ_ALU_VEC_210),
473 DST_GPR(0),
474 DST_REL(ABSOLUTE),
475 DST_ELEM(ELEM_Y),
476 CLAMP(0));
477 /* 5 */
478 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
479 SRC0_REL(ABSOLUTE),
480 SRC0_ELEM(ELEM_Y),
481 SRC0_NEG(0),
482 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
483 SRC1_REL(ABSOLUTE),
484 SRC1_ELEM(ELEM_X),
485 SRC1_NEG(0),
486 INDEX_MODE(SQ_INDEX_AR_X),
487 PRED_SEL(SQ_PRED_SEL_OFF),
488 LAST(0));
489 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
490 SRC1_ABS(0),
491 UPDATE_EXECUTE_MASK(0),
492 UPDATE_PRED(0),
493 WRITE_MASK(0),
494 OMOD(SQ_ALU_OMOD_OFF),
495 ALU_INST(SQ_OP2_INST_INTERP_XY),
496 BANK_SWIZZLE(SQ_ALU_VEC_210),
497 DST_GPR(0),
498 DST_REL(ABSOLUTE),
499 DST_ELEM(ELEM_Z),
500 CLAMP(0));
501 /* 6 */
502 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
503 SRC0_REL(ABSOLUTE),
504 SRC0_ELEM(ELEM_X),
505 SRC0_NEG(0),
506 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
507 SRC1_REL(ABSOLUTE),
508 SRC1_ELEM(ELEM_X),
509 SRC1_NEG(0),
510 INDEX_MODE(SQ_INDEX_AR_X),
511 PRED_SEL(SQ_PRED_SEL_OFF),
512 LAST(1));
513 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
514 SRC1_ABS(0),
515 UPDATE_EXECUTE_MASK(0),
516 UPDATE_PRED(0),
517 WRITE_MASK(0),
518 OMOD(SQ_ALU_OMOD_OFF),
519 ALU_INST(SQ_OP2_INST_INTERP_XY),
520 BANK_SWIZZLE(SQ_ALU_VEC_210),
521 DST_GPR(0),
522 DST_REL(ABSOLUTE),
523 DST_ELEM(ELEM_W),
524 CLAMP(0));
525
526 /* 7 */
527 shader[i++] = 0x00000000;
528 shader[i++] = 0x00000000;
529
530 /* 8/9 TEX INST 0 */
531 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
532 INST_MOD(0),
533 FETCH_WHOLE_QUAD(0),
534 RESOURCE_ID(0),
535 SRC_GPR(0),
536 SRC_REL(ABSOLUTE),
537 ALT_CONST(0),
538 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
539 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
540 shader[i++] = TEX_DWORD1(DST_GPR(0),
541 DST_REL(ABSOLUTE),
542 DST_SEL_X(SQ_SEL_X), /* R */
543 DST_SEL_Y(SQ_SEL_Y), /* G */
544 DST_SEL_Z(SQ_SEL_Z), /* B */
545 DST_SEL_W(SQ_SEL_W), /* A */
546 LOD_BIAS(0),
547 COORD_TYPE_X(TEX_UNNORMALIZED),
548 COORD_TYPE_Y(TEX_UNNORMALIZED),
549 COORD_TYPE_Z(TEX_UNNORMALIZED),
550 COORD_TYPE_W(TEX_UNNORMALIZED));
551 shader[i++] = TEX_DWORD2(OFFSET_X(0),
552 OFFSET_Y(0),
553 OFFSET_Z(0),
554 SAMPLER_ID(0),
555 SRC_SEL_X(SQ_SEL_X),
556 SRC_SEL_Y(SQ_SEL_Y),
557 SRC_SEL_Z(SQ_SEL_0),
558 SRC_SEL_W(SQ_SEL_1));
559 shader[i++] = TEX_DWORD_PAD;
560
561 return i;
562 }
563
evergreen_xv_vs(RADEONChipFamily ChipSet,uint32_t * shader)564 int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
565 {
566 int i = 0;
567
568 /* 0 */
569 shader[i++] = CF_DWORD0(ADDR(6),
570 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
571 shader[i++] = CF_DWORD1(POP_COUNT(0),
572 CF_CONST(0),
573 COND(SQ_CF_COND_ACTIVE),
574 I_COUNT(2),
575 VALID_PIXEL_MODE(0),
576 END_OF_PROGRAM(0),
577 CF_INST(SQ_CF_INST_VC),
578 WHOLE_QUAD_MODE(0),
579 BARRIER(1));
580
581 /* 1 - ALU */
582 shader[i++] = CF_ALU_DWORD0(ADDR(4),
583 KCACHE_BANK0(0),
584 KCACHE_BANK1(0),
585 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
586 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
587 KCACHE_ADDR0(0),
588 KCACHE_ADDR1(0),
589 I_COUNT(2),
590 ALT_CONST(0),
591 CF_INST(SQ_CF_INST_ALU),
592 WHOLE_QUAD_MODE(0),
593 BARRIER(1));
594
595 /* 2 */
596 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
597 TYPE(SQ_EXPORT_POS),
598 RW_GPR(1),
599 RW_REL(ABSOLUTE),
600 INDEX_GPR(0),
601 ELEM_SIZE(3));
602 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
603 SRC_SEL_Y(SQ_SEL_Y),
604 SRC_SEL_Z(SQ_SEL_Z),
605 SRC_SEL_W(SQ_SEL_W),
606 BURST_COUNT(1),
607 VALID_PIXEL_MODE(0),
608 END_OF_PROGRAM(0),
609 CF_INST(SQ_CF_INST_EXPORT_DONE),
610 MARK(0),
611 BARRIER(1));
612 /* 3 */
613 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
614 TYPE(SQ_EXPORT_PARAM),
615 RW_GPR(0),
616 RW_REL(ABSOLUTE),
617 INDEX_GPR(0),
618 ELEM_SIZE(3));
619 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
620 SRC_SEL_Y(SQ_SEL_Y),
621 SRC_SEL_Z(SQ_SEL_Z),
622 SRC_SEL_W(SQ_SEL_W),
623 BURST_COUNT(1),
624 VALID_PIXEL_MODE(0),
625 END_OF_PROGRAM(1),
626 CF_INST(SQ_CF_INST_EXPORT_DONE),
627 MARK(0),
628 BARRIER(0));
629
630
631 /* 4 texX / w */
632 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
633 SRC0_REL(ABSOLUTE),
634 SRC0_ELEM(ELEM_X),
635 SRC0_NEG(0),
636 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
637 SRC1_REL(ABSOLUTE),
638 SRC1_ELEM(ELEM_X),
639 SRC1_NEG(0),
640 INDEX_MODE(SQ_INDEX_AR_X),
641 PRED_SEL(SQ_PRED_SEL_OFF),
642 LAST(0));
643 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
644 SRC1_ABS(0),
645 UPDATE_EXECUTE_MASK(0),
646 UPDATE_PRED(0),
647 WRITE_MASK(1),
648 OMOD(SQ_ALU_OMOD_OFF),
649 ALU_INST(SQ_OP2_INST_MUL),
650 BANK_SWIZZLE(SQ_ALU_VEC_012),
651 DST_GPR(0),
652 DST_REL(ABSOLUTE),
653 DST_ELEM(ELEM_X),
654 CLAMP(0));
655
656 /* 5 texY / h */
657 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
658 SRC0_REL(ABSOLUTE),
659 SRC0_ELEM(ELEM_Y),
660 SRC0_NEG(0),
661 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
662 SRC1_REL(ABSOLUTE),
663 SRC1_ELEM(ELEM_Y),
664 SRC1_NEG(0),
665 INDEX_MODE(SQ_INDEX_AR_X),
666 PRED_SEL(SQ_PRED_SEL_OFF),
667 LAST(1));
668 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
669 SRC1_ABS(0),
670 UPDATE_EXECUTE_MASK(0),
671 UPDATE_PRED(0),
672 WRITE_MASK(1),
673 OMOD(SQ_ALU_OMOD_OFF),
674 ALU_INST(SQ_OP2_INST_MUL),
675 BANK_SWIZZLE(SQ_ALU_VEC_012),
676 DST_GPR(0),
677 DST_REL(ABSOLUTE),
678 DST_ELEM(ELEM_Y),
679 CLAMP(0));
680
681 /* 6/7 */
682 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
683 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
684 FETCH_WHOLE_QUAD(0),
685 BUFFER_ID(0),
686 SRC_GPR(0),
687 SRC_REL(ABSOLUTE),
688 SRC_SEL_X(SQ_SEL_X),
689 MEGA_FETCH_COUNT(16));
690 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
691 DST_REL(ABSOLUTE),
692 DST_SEL_X(SQ_SEL_X),
693 DST_SEL_Y(SQ_SEL_Y),
694 DST_SEL_Z(SQ_SEL_0),
695 DST_SEL_W(SQ_SEL_1),
696 USE_CONST_FIELDS(0),
697 DATA_FORMAT(FMT_32_32_FLOAT),
698 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
699 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
700 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
701 shader[i++] = VTX_DWORD2(OFFSET(0),
702 #if X_BYTE_ORDER == X_BIG_ENDIAN
703 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
704 #else
705 ENDIAN_SWAP(SQ_ENDIAN_NONE),
706 #endif
707 CONST_BUF_NO_STRIDE(0),
708 MEGA_FETCH(1),
709 ALT_CONST(0),
710 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
711 shader[i++] = VTX_DWORD_PAD;
712 /* 8/9 */
713 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
714 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
715 FETCH_WHOLE_QUAD(0),
716 BUFFER_ID(0),
717 SRC_GPR(0),
718 SRC_REL(ABSOLUTE),
719 SRC_SEL_X(SQ_SEL_X),
720 MEGA_FETCH_COUNT(8));
721 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
722 DST_REL(ABSOLUTE),
723 DST_SEL_X(SQ_SEL_X),
724 DST_SEL_Y(SQ_SEL_Y),
725 DST_SEL_Z(SQ_SEL_0),
726 DST_SEL_W(SQ_SEL_1),
727 USE_CONST_FIELDS(0),
728 DATA_FORMAT(FMT_32_32_FLOAT),
729 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
730 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
731 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
732 shader[i++] = VTX_DWORD2(OFFSET(8),
733 #if X_BYTE_ORDER == X_BIG_ENDIAN
734 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
735 #else
736 ENDIAN_SWAP(SQ_ENDIAN_NONE),
737 #endif
738 CONST_BUF_NO_STRIDE(0),
739 MEGA_FETCH(0),
740 ALT_CONST(0),
741 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
742 shader[i++] = VTX_DWORD_PAD;
743
744 return i;
745 }
746
evergreen_xv_ps(RADEONChipFamily ChipSet,uint32_t * shader)747 int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
748 {
749 int i = 0;
750
751 /* 0 */
752 shader[i++] = CF_ALU_DWORD0(ADDR(5),
753 KCACHE_BANK0(0),
754 KCACHE_BANK1(0),
755 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
756 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
757 KCACHE_ADDR0(0),
758 KCACHE_ADDR1(0),
759 I_COUNT(4),
760 ALT_CONST(0),
761 CF_INST(SQ_CF_INST_ALU),
762 WHOLE_QUAD_MODE(0),
763 BARRIER(1));
764 /* 1 */
765 shader[i++] = CF_DWORD0(ADDR(21),
766 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
767 shader[i++] = CF_DWORD1(POP_COUNT(0),
768 CF_CONST(0),
769 COND(SQ_CF_COND_BOOL),
770 I_COUNT(0),
771 VALID_PIXEL_MODE(0),
772 END_OF_PROGRAM(0),
773 CF_INST(SQ_CF_INST_CALL),
774 WHOLE_QUAD_MODE(0),
775 BARRIER(0));
776 /* 2 */
777 shader[i++] = CF_DWORD0(ADDR(30),
778 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
779 shader[i++] = CF_DWORD1(POP_COUNT(0),
780 CF_CONST(0),
781 COND(SQ_CF_COND_NOT_BOOL),
782 I_COUNT(0),
783 VALID_PIXEL_MODE(0),
784 END_OF_PROGRAM(0),
785 CF_INST(SQ_CF_INST_CALL),
786 WHOLE_QUAD_MODE(0),
787 BARRIER(0));
788 /* 3 */
789 shader[i++] = CF_ALU_DWORD0(ADDR(9),
790 KCACHE_BANK0(0),
791 KCACHE_BANK1(0),
792 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
793 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
794 KCACHE_ADDR0(0),
795 KCACHE_ADDR1(0),
796 I_COUNT(12),
797 ALT_CONST(0),
798 CF_INST(SQ_CF_INST_ALU),
799 WHOLE_QUAD_MODE(0),
800 BARRIER(1));
801 /* 4 */
802 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
803 TYPE(SQ_EXPORT_PIXEL),
804 RW_GPR(2),
805 RW_REL(ABSOLUTE),
806 INDEX_GPR(0),
807 ELEM_SIZE(3));
808 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
809 SRC_SEL_Y(SQ_SEL_Y),
810 SRC_SEL_Z(SQ_SEL_Z),
811 SRC_SEL_W(SQ_SEL_W),
812 BURST_COUNT(1),
813 VALID_PIXEL_MODE(0),
814 END_OF_PROGRAM(1),
815 CF_INST(SQ_CF_INST_EXPORT_DONE),
816 MARK(0),
817 BARRIER(1));
818 /* 5 interpolate tex coords */
819 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
820 SRC0_REL(ABSOLUTE),
821 SRC0_ELEM(ELEM_Y),
822 SRC0_NEG(0),
823 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
824 SRC1_REL(ABSOLUTE),
825 SRC1_ELEM(ELEM_X),
826 SRC1_NEG(0),
827 INDEX_MODE(SQ_INDEX_AR_X),
828 PRED_SEL(SQ_PRED_SEL_OFF),
829 LAST(0));
830 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
831 SRC1_ABS(0),
832 UPDATE_EXECUTE_MASK(0),
833 UPDATE_PRED(0),
834 WRITE_MASK(1),
835 OMOD(SQ_ALU_OMOD_OFF),
836 ALU_INST(SQ_OP2_INST_INTERP_XY),
837 BANK_SWIZZLE(SQ_ALU_VEC_210),
838 DST_GPR(0),
839 DST_REL(ABSOLUTE),
840 DST_ELEM(ELEM_X),
841 CLAMP(0));
842 /* 6 */
843 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
844 SRC0_REL(ABSOLUTE),
845 SRC0_ELEM(ELEM_X),
846 SRC0_NEG(0),
847 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
848 SRC1_REL(ABSOLUTE),
849 SRC1_ELEM(ELEM_X),
850 SRC1_NEG(0),
851 INDEX_MODE(SQ_INDEX_AR_X),
852 PRED_SEL(SQ_PRED_SEL_OFF),
853 LAST(0));
854 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
855 SRC1_ABS(0),
856 UPDATE_EXECUTE_MASK(0),
857 UPDATE_PRED(0),
858 WRITE_MASK(1),
859 OMOD(SQ_ALU_OMOD_OFF),
860 ALU_INST(SQ_OP2_INST_INTERP_XY),
861 BANK_SWIZZLE(SQ_ALU_VEC_210),
862 DST_GPR(0),
863 DST_REL(ABSOLUTE),
864 DST_ELEM(ELEM_Y),
865 CLAMP(0));
866 /* 7 */
867 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
868 SRC0_REL(ABSOLUTE),
869 SRC0_ELEM(ELEM_Y),
870 SRC0_NEG(0),
871 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
872 SRC1_REL(ABSOLUTE),
873 SRC1_ELEM(ELEM_X),
874 SRC1_NEG(0),
875 INDEX_MODE(SQ_INDEX_AR_X),
876 PRED_SEL(SQ_PRED_SEL_OFF),
877 LAST(0));
878 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
879 SRC1_ABS(0),
880 UPDATE_EXECUTE_MASK(0),
881 UPDATE_PRED(0),
882 WRITE_MASK(0),
883 OMOD(SQ_ALU_OMOD_OFF),
884 ALU_INST(SQ_OP2_INST_INTERP_XY),
885 BANK_SWIZZLE(SQ_ALU_VEC_210),
886 DST_GPR(0),
887 DST_REL(ABSOLUTE),
888 DST_ELEM(ELEM_Z),
889 CLAMP(0));
890 /* 8 */
891 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
892 SRC0_REL(ABSOLUTE),
893 SRC0_ELEM(ELEM_X),
894 SRC0_NEG(0),
895 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
896 SRC1_REL(ABSOLUTE),
897 SRC1_ELEM(ELEM_X),
898 SRC1_NEG(0),
899 INDEX_MODE(SQ_INDEX_AR_X),
900 PRED_SEL(SQ_PRED_SEL_OFF),
901 LAST(1));
902 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
903 SRC1_ABS(0),
904 UPDATE_EXECUTE_MASK(0),
905 UPDATE_PRED(0),
906 WRITE_MASK(0),
907 OMOD(SQ_ALU_OMOD_OFF),
908 ALU_INST(SQ_OP2_INST_INTERP_XY),
909 BANK_SWIZZLE(SQ_ALU_VEC_210),
910 DST_GPR(0),
911 DST_REL(ABSOLUTE),
912 DST_ELEM(ELEM_W),
913 CLAMP(0));
914
915 /* 9,10,11,12 */
916 /* r2.x = MAD(c0.w, r1.x, c0.x) */
917 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
918 SRC0_REL(ABSOLUTE),
919 SRC0_ELEM(ELEM_W),
920 SRC0_NEG(0),
921 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
922 SRC1_REL(ABSOLUTE),
923 SRC1_ELEM(ELEM_X),
924 SRC1_NEG(0),
925 INDEX_MODE(SQ_INDEX_LOOP),
926 PRED_SEL(SQ_PRED_SEL_OFF),
927 LAST(0));
928 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
929 SRC2_REL(ABSOLUTE),
930 SRC2_ELEM(ELEM_X),
931 SRC2_NEG(0),
932 ALU_INST(SQ_OP3_INST_MULADD),
933 BANK_SWIZZLE(SQ_ALU_VEC_012),
934 DST_GPR(2),
935 DST_REL(ABSOLUTE),
936 DST_ELEM(ELEM_X),
937 CLAMP(0));
938 /* r2.y = MAD(c0.w, r1.x, c0.y) */
939 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
940 SRC0_REL(ABSOLUTE),
941 SRC0_ELEM(ELEM_W),
942 SRC0_NEG(0),
943 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
944 SRC1_REL(ABSOLUTE),
945 SRC1_ELEM(ELEM_X),
946 SRC1_NEG(0),
947 INDEX_MODE(SQ_INDEX_LOOP),
948 PRED_SEL(SQ_PRED_SEL_OFF),
949 LAST(0));
950 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
951 SRC2_REL(ABSOLUTE),
952 SRC2_ELEM(ELEM_Y),
953 SRC2_NEG(0),
954 ALU_INST(SQ_OP3_INST_MULADD),
955 BANK_SWIZZLE(SQ_ALU_VEC_012),
956 DST_GPR(2),
957 DST_REL(ABSOLUTE),
958 DST_ELEM(ELEM_Y),
959 CLAMP(0));
960 /* r2.z = MAD(c0.w, r1.x, c0.z) */
961 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
962 SRC0_REL(ABSOLUTE),
963 SRC0_ELEM(ELEM_W),
964 SRC0_NEG(0),
965 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
966 SRC1_REL(ABSOLUTE),
967 SRC1_ELEM(ELEM_X),
968 SRC1_NEG(0),
969 INDEX_MODE(SQ_INDEX_LOOP),
970 PRED_SEL(SQ_PRED_SEL_OFF),
971 LAST(0));
972 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
973 SRC2_REL(ABSOLUTE),
974 SRC2_ELEM(ELEM_Z),
975 SRC2_NEG(0),
976 ALU_INST(SQ_OP3_INST_MULADD),
977 BANK_SWIZZLE(SQ_ALU_VEC_012),
978 DST_GPR(2),
979 DST_REL(ABSOLUTE),
980 DST_ELEM(ELEM_Z),
981 CLAMP(0));
982 /* r2.w = MAD(0, 0, 1) */
983 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
984 SRC0_REL(ABSOLUTE),
985 SRC0_ELEM(ELEM_X),
986 SRC0_NEG(0),
987 SRC1_SEL(SQ_ALU_SRC_0),
988 SRC1_REL(ABSOLUTE),
989 SRC1_ELEM(ELEM_X),
990 SRC1_NEG(0),
991 INDEX_MODE(SQ_INDEX_LOOP),
992 PRED_SEL(SQ_PRED_SEL_OFF),
993 LAST(1));
994 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
995 SRC2_REL(ABSOLUTE),
996 SRC2_ELEM(ELEM_X),
997 SRC2_NEG(0),
998 ALU_INST(SQ_OP3_INST_MULADD),
999 BANK_SWIZZLE(SQ_ALU_VEC_012),
1000 DST_GPR(2),
1001 DST_REL(ABSOLUTE),
1002 DST_ELEM(ELEM_W),
1003 CLAMP(0));
1004
1005 /* 13,14,15,16 */
1006 /* r2.x = MAD(c1.x, r1.y, pv.x) */
1007 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1008 SRC0_REL(ABSOLUTE),
1009 SRC0_ELEM(ELEM_X),
1010 SRC0_NEG(0),
1011 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1012 SRC1_REL(ABSOLUTE),
1013 SRC1_ELEM(ELEM_Y),
1014 SRC1_NEG(0),
1015 INDEX_MODE(SQ_INDEX_LOOP),
1016 PRED_SEL(SQ_PRED_SEL_OFF),
1017 LAST(0));
1018 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1019 SRC2_REL(ABSOLUTE),
1020 SRC2_ELEM(ELEM_X),
1021 SRC2_NEG(0),
1022 ALU_INST(SQ_OP3_INST_MULADD),
1023 BANK_SWIZZLE(SQ_ALU_VEC_012),
1024 DST_GPR(2),
1025 DST_REL(ABSOLUTE),
1026 DST_ELEM(ELEM_X),
1027 CLAMP(0));
1028 /* r2.y = MAD(c1.y, r1.y, pv.y) */
1029 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1030 SRC0_REL(ABSOLUTE),
1031 SRC0_ELEM(ELEM_Y),
1032 SRC0_NEG(0),
1033 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1034 SRC1_REL(ABSOLUTE),
1035 SRC1_ELEM(ELEM_Y),
1036 SRC1_NEG(0),
1037 INDEX_MODE(SQ_INDEX_LOOP),
1038 PRED_SEL(SQ_PRED_SEL_OFF),
1039 LAST(0));
1040 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1041 SRC2_REL(ABSOLUTE),
1042 SRC2_ELEM(ELEM_Y),
1043 SRC2_NEG(0),
1044 ALU_INST(SQ_OP3_INST_MULADD),
1045 BANK_SWIZZLE(SQ_ALU_VEC_012),
1046 DST_GPR(2),
1047 DST_REL(ABSOLUTE),
1048 DST_ELEM(ELEM_Y),
1049 CLAMP(0));
1050 /* r2.z = MAD(c1.z, r1.y, pv.z) */
1051 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1052 SRC0_REL(ABSOLUTE),
1053 SRC0_ELEM(ELEM_Z),
1054 SRC0_NEG(0),
1055 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1056 SRC1_REL(ABSOLUTE),
1057 SRC1_ELEM(ELEM_Y),
1058 SRC1_NEG(0),
1059 INDEX_MODE(SQ_INDEX_LOOP),
1060 PRED_SEL(SQ_PRED_SEL_OFF),
1061 LAST(0));
1062 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1063 SRC2_REL(ABSOLUTE),
1064 SRC2_ELEM(ELEM_Z),
1065 SRC2_NEG(0),
1066 ALU_INST(SQ_OP3_INST_MULADD),
1067 BANK_SWIZZLE(SQ_ALU_VEC_012),
1068 DST_GPR(2),
1069 DST_REL(ABSOLUTE),
1070 DST_ELEM(ELEM_Z),
1071 CLAMP(0));
1072 /* r2.w = MAD(0, 0, 1) */
1073 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1074 SRC0_REL(ABSOLUTE),
1075 SRC0_ELEM(ELEM_X),
1076 SRC0_NEG(0),
1077 SRC1_SEL(SQ_ALU_SRC_0),
1078 SRC1_REL(ABSOLUTE),
1079 SRC1_ELEM(ELEM_X),
1080 SRC1_NEG(0),
1081 INDEX_MODE(SQ_INDEX_LOOP),
1082 PRED_SEL(SQ_PRED_SEL_OFF),
1083 LAST(1));
1084 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1085 SRC2_REL(ABSOLUTE),
1086 SRC2_ELEM(ELEM_W),
1087 SRC2_NEG(0),
1088 ALU_INST(SQ_OP3_INST_MULADD),
1089 BANK_SWIZZLE(SQ_ALU_VEC_012),
1090 DST_GPR(2),
1091 DST_REL(ABSOLUTE),
1092 DST_ELEM(ELEM_W),
1093 CLAMP(0));
1094 /* 17,18,19,20 */
1095 /* r2.x = MAD(c2.x, r1.z, pv.x) */
1096 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1097 SRC0_REL(ABSOLUTE),
1098 SRC0_ELEM(ELEM_X),
1099 SRC0_NEG(0),
1100 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1101 SRC1_REL(ABSOLUTE),
1102 SRC1_ELEM(ELEM_Z),
1103 SRC1_NEG(0),
1104 INDEX_MODE(SQ_INDEX_LOOP),
1105 PRED_SEL(SQ_PRED_SEL_OFF),
1106 LAST(0));
1107 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1108 SRC2_REL(ABSOLUTE),
1109 SRC2_ELEM(ELEM_X),
1110 SRC2_NEG(0),
1111 ALU_INST(SQ_OP3_INST_MULADD),
1112 BANK_SWIZZLE(SQ_ALU_VEC_012),
1113 DST_GPR(2),
1114 DST_REL(ABSOLUTE),
1115 DST_ELEM(ELEM_X),
1116 CLAMP(1));
1117 /* r2.y = MAD(c2.y, r1.z, pv.y) */
1118 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1119 SRC0_REL(ABSOLUTE),
1120 SRC0_ELEM(ELEM_Y),
1121 SRC0_NEG(0),
1122 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1123 SRC1_REL(ABSOLUTE),
1124 SRC1_ELEM(ELEM_Z),
1125 SRC1_NEG(0),
1126 INDEX_MODE(SQ_INDEX_LOOP),
1127 PRED_SEL(SQ_PRED_SEL_OFF),
1128 LAST(0));
1129 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1130 SRC2_REL(ABSOLUTE),
1131 SRC2_ELEM(ELEM_Y),
1132 SRC2_NEG(0),
1133 ALU_INST(SQ_OP3_INST_MULADD),
1134 BANK_SWIZZLE(SQ_ALU_VEC_012),
1135 DST_GPR(2),
1136 DST_REL(ABSOLUTE),
1137 DST_ELEM(ELEM_Y),
1138 CLAMP(1));
1139 /* r2.z = MAD(c2.z, r1.z, pv.z) */
1140 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1141 SRC0_REL(ABSOLUTE),
1142 SRC0_ELEM(ELEM_Z),
1143 SRC0_NEG(0),
1144 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1145 SRC1_REL(ABSOLUTE),
1146 SRC1_ELEM(ELEM_Z),
1147 SRC1_NEG(0),
1148 INDEX_MODE(SQ_INDEX_LOOP),
1149 PRED_SEL(SQ_PRED_SEL_OFF),
1150 LAST(0));
1151 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1152 SRC2_REL(ABSOLUTE),
1153 SRC2_ELEM(ELEM_Z),
1154 SRC2_NEG(0),
1155 ALU_INST(SQ_OP3_INST_MULADD),
1156 BANK_SWIZZLE(SQ_ALU_VEC_012),
1157 DST_GPR(2),
1158 DST_REL(ABSOLUTE),
1159 DST_ELEM(ELEM_Z),
1160 CLAMP(1));
1161 /* r2.w = MAD(0, 0, 1) */
1162 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1163 SRC0_REL(ABSOLUTE),
1164 SRC0_ELEM(ELEM_X),
1165 SRC0_NEG(0),
1166 SRC1_SEL(SQ_ALU_SRC_0),
1167 SRC1_REL(ABSOLUTE),
1168 SRC1_ELEM(ELEM_X),
1169 SRC1_NEG(0),
1170 INDEX_MODE(SQ_INDEX_LOOP),
1171 PRED_SEL(SQ_PRED_SEL_OFF),
1172 LAST(1));
1173 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1174 SRC2_REL(ABSOLUTE),
1175 SRC2_ELEM(ELEM_X),
1176 SRC2_NEG(0),
1177 ALU_INST(SQ_OP3_INST_MULADD),
1178 BANK_SWIZZLE(SQ_ALU_VEC_012),
1179 DST_GPR(2),
1180 DST_REL(ABSOLUTE),
1181 DST_ELEM(ELEM_W),
1182 CLAMP(1));
1183
1184 /* 21 */
1185 shader[i++] = CF_DWORD0(ADDR(24),
1186 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1187 shader[i++] = CF_DWORD1(POP_COUNT(0),
1188 CF_CONST(0),
1189 COND(SQ_CF_COND_ACTIVE),
1190 I_COUNT(3),
1191 VALID_PIXEL_MODE(0),
1192 END_OF_PROGRAM(0),
1193 CF_INST(SQ_CF_INST_TC),
1194 WHOLE_QUAD_MODE(0),
1195 BARRIER(1));
1196 /* 22 */
1197 shader[i++] = CF_DWORD0(ADDR(0),
1198 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1199 shader[i++] = CF_DWORD1(POP_COUNT(0),
1200 CF_CONST(0),
1201 COND(SQ_CF_COND_ACTIVE),
1202 I_COUNT(0),
1203 VALID_PIXEL_MODE(0),
1204 END_OF_PROGRAM(0),
1205 CF_INST(SQ_CF_INST_RETURN),
1206 WHOLE_QUAD_MODE(0),
1207 BARRIER(1));
1208 /* 23 */
1209 shader[i++] = 0x00000000;
1210 shader[i++] = 0x00000000;
1211 /* 24/25 */
1212 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1213 INST_MOD(0),
1214 FETCH_WHOLE_QUAD(0),
1215 RESOURCE_ID(0),
1216 SRC_GPR(0),
1217 SRC_REL(ABSOLUTE),
1218 ALT_CONST(0),
1219 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1220 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1221 shader[i++] = TEX_DWORD1(DST_GPR(1),
1222 DST_REL(ABSOLUTE),
1223 DST_SEL_X(SQ_SEL_X),
1224 DST_SEL_Y(SQ_SEL_MASK),
1225 DST_SEL_Z(SQ_SEL_MASK),
1226 DST_SEL_W(SQ_SEL_1),
1227 LOD_BIAS(0),
1228 COORD_TYPE_X(TEX_NORMALIZED),
1229 COORD_TYPE_Y(TEX_NORMALIZED),
1230 COORD_TYPE_Z(TEX_NORMALIZED),
1231 COORD_TYPE_W(TEX_NORMALIZED));
1232 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1233 OFFSET_Y(0),
1234 OFFSET_Z(0),
1235 SAMPLER_ID(0),
1236 SRC_SEL_X(SQ_SEL_X),
1237 SRC_SEL_Y(SQ_SEL_Y),
1238 SRC_SEL_Z(SQ_SEL_0),
1239 SRC_SEL_W(SQ_SEL_1));
1240 shader[i++] = TEX_DWORD_PAD;
1241 /* 26/27 */
1242 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1243 INST_MOD(0),
1244 FETCH_WHOLE_QUAD(0),
1245 RESOURCE_ID(1),
1246 SRC_GPR(0),
1247 SRC_REL(ABSOLUTE),
1248 ALT_CONST(0),
1249 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1250 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1251 shader[i++] = TEX_DWORD1(DST_GPR(1),
1252 DST_REL(ABSOLUTE),
1253 DST_SEL_X(SQ_SEL_MASK),
1254 DST_SEL_Y(SQ_SEL_MASK),
1255 DST_SEL_Z(SQ_SEL_X),
1256 DST_SEL_W(SQ_SEL_MASK),
1257 LOD_BIAS(0),
1258 COORD_TYPE_X(TEX_NORMALIZED),
1259 COORD_TYPE_Y(TEX_NORMALIZED),
1260 COORD_TYPE_Z(TEX_NORMALIZED),
1261 COORD_TYPE_W(TEX_NORMALIZED));
1262 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1263 OFFSET_Y(0),
1264 OFFSET_Z(0),
1265 SAMPLER_ID(1),
1266 SRC_SEL_X(SQ_SEL_X),
1267 SRC_SEL_Y(SQ_SEL_Y),
1268 SRC_SEL_Z(SQ_SEL_0),
1269 SRC_SEL_W(SQ_SEL_1));
1270 shader[i++] = TEX_DWORD_PAD;
1271 /* 28/29 */
1272 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1273 INST_MOD(0),
1274 FETCH_WHOLE_QUAD(0),
1275 RESOURCE_ID(2),
1276 SRC_GPR(0),
1277 SRC_REL(ABSOLUTE),
1278 ALT_CONST(0),
1279 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1280 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1281 shader[i++] = TEX_DWORD1(DST_GPR(1),
1282 DST_REL(ABSOLUTE),
1283 DST_SEL_X(SQ_SEL_MASK),
1284 DST_SEL_Y(SQ_SEL_X),
1285 DST_SEL_Z(SQ_SEL_MASK),
1286 DST_SEL_W(SQ_SEL_MASK),
1287 LOD_BIAS(0),
1288 COORD_TYPE_X(TEX_NORMALIZED),
1289 COORD_TYPE_Y(TEX_NORMALIZED),
1290 COORD_TYPE_Z(TEX_NORMALIZED),
1291 COORD_TYPE_W(TEX_NORMALIZED));
1292 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1293 OFFSET_Y(0),
1294 OFFSET_Z(0),
1295 SAMPLER_ID(2),
1296 SRC_SEL_X(SQ_SEL_X),
1297 SRC_SEL_Y(SQ_SEL_Y),
1298 SRC_SEL_Z(SQ_SEL_0),
1299 SRC_SEL_W(SQ_SEL_1));
1300 shader[i++] = TEX_DWORD_PAD;
1301 /* 30 */
1302 shader[i++] = CF_DWORD0(ADDR(32),
1303 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1304 shader[i++] = CF_DWORD1(POP_COUNT(0),
1305 CF_CONST(0),
1306 COND(SQ_CF_COND_ACTIVE),
1307 I_COUNT(1),
1308 VALID_PIXEL_MODE(0),
1309 END_OF_PROGRAM(0),
1310 CF_INST(SQ_CF_INST_TC),
1311 WHOLE_QUAD_MODE(0),
1312 BARRIER(1));
1313 /* 31 */
1314 shader[i++] = CF_DWORD0(ADDR(0),
1315 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1316 shader[i++] = CF_DWORD1(POP_COUNT(0),
1317 CF_CONST(0),
1318 COND(SQ_CF_COND_ACTIVE),
1319 I_COUNT(0),
1320 VALID_PIXEL_MODE(0),
1321 END_OF_PROGRAM(0),
1322 CF_INST(SQ_CF_INST_RETURN),
1323 WHOLE_QUAD_MODE(0),
1324 BARRIER(1));
1325 /* 32/33 */
1326 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1327 INST_MOD(0),
1328 FETCH_WHOLE_QUAD(0),
1329 RESOURCE_ID(0),
1330 SRC_GPR(0),
1331 SRC_REL(ABSOLUTE),
1332 ALT_CONST(0),
1333 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1334 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1335 shader[i++] = TEX_DWORD1(DST_GPR(1),
1336 DST_REL(ABSOLUTE),
1337 DST_SEL_X(SQ_SEL_X),
1338 DST_SEL_Y(SQ_SEL_Y),
1339 DST_SEL_Z(SQ_SEL_Z),
1340 DST_SEL_W(SQ_SEL_1),
1341 LOD_BIAS(0),
1342 COORD_TYPE_X(TEX_NORMALIZED),
1343 COORD_TYPE_Y(TEX_NORMALIZED),
1344 COORD_TYPE_Z(TEX_NORMALIZED),
1345 COORD_TYPE_W(TEX_NORMALIZED));
1346 shader[i++] = TEX_DWORD2(OFFSET_X(0),
1347 OFFSET_Y(0),
1348 OFFSET_Z(0),
1349 SAMPLER_ID(0),
1350 SRC_SEL_X(SQ_SEL_X),
1351 SRC_SEL_Y(SQ_SEL_Y),
1352 SRC_SEL_Z(SQ_SEL_0),
1353 SRC_SEL_W(SQ_SEL_1));
1354 shader[i++] = TEX_DWORD_PAD;
1355
1356 return i;
1357 }
1358
1359 /* comp vs --------------------------------------- */
evergreen_comp_vs(RADEONChipFamily ChipSet,uint32_t * shader)1360 int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1361 {
1362 int i = 0;
1363
1364 /* 0 */
1365 shader[i++] = CF_DWORD0(ADDR(3),
1366 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1367 shader[i++] = CF_DWORD1(POP_COUNT(0),
1368 CF_CONST(0),
1369 COND(SQ_CF_COND_BOOL),
1370 I_COUNT(0),
1371 VALID_PIXEL_MODE(0),
1372 END_OF_PROGRAM(0),
1373 CF_INST(SQ_CF_INST_CALL),
1374 WHOLE_QUAD_MODE(0),
1375 BARRIER(0));
1376 /* 1 */
1377 shader[i++] = CF_DWORD0(ADDR(9),
1378 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1379 shader[i++] = CF_DWORD1(POP_COUNT(0),
1380 CF_CONST(0),
1381 COND(SQ_CF_COND_NOT_BOOL),
1382 I_COUNT(0),
1383 VALID_PIXEL_MODE(0),
1384 END_OF_PROGRAM(0),
1385 CF_INST(SQ_CF_INST_CALL),
1386 WHOLE_QUAD_MODE(0),
1387 BARRIER(0));
1388 /* 2 */
1389 shader[i++] = CF_DWORD0(ADDR(0),
1390 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1391 shader[i++] = CF_DWORD1(POP_COUNT(0),
1392 CF_CONST(0),
1393 COND(SQ_CF_COND_ACTIVE),
1394 I_COUNT(0),
1395 VALID_PIXEL_MODE(0),
1396 END_OF_PROGRAM(1),
1397 CF_INST(SQ_CF_INST_NOP),
1398 WHOLE_QUAD_MODE(0),
1399 BARRIER(1));
1400 /* 3 - mask sub */
1401 shader[i++] = CF_DWORD0(ADDR(44),
1402 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1403 shader[i++] = CF_DWORD1(POP_COUNT(0),
1404 CF_CONST(0),
1405 COND(SQ_CF_COND_ACTIVE),
1406 I_COUNT(3),
1407 VALID_PIXEL_MODE(0),
1408 END_OF_PROGRAM(0),
1409 CF_INST(SQ_CF_INST_VC),
1410 WHOLE_QUAD_MODE(0),
1411 BARRIER(1));
1412
1413 /* 4 - ALU */
1414 shader[i++] = CF_ALU_DWORD0(ADDR(14),
1415 KCACHE_BANK0(0),
1416 KCACHE_BANK1(0),
1417 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1418 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1419 KCACHE_ADDR0(0),
1420 KCACHE_ADDR1(0),
1421 I_COUNT(20),
1422 ALT_CONST(0),
1423 CF_INST(SQ_CF_INST_ALU),
1424 WHOLE_QUAD_MODE(0),
1425 BARRIER(1));
1426
1427 /* 5 - dst */
1428 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1429 TYPE(SQ_EXPORT_POS),
1430 RW_GPR(2),
1431 RW_REL(ABSOLUTE),
1432 INDEX_GPR(0),
1433 ELEM_SIZE(0));
1434 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1435 SRC_SEL_Y(SQ_SEL_Y),
1436 SRC_SEL_Z(SQ_SEL_0),
1437 SRC_SEL_W(SQ_SEL_1),
1438 BURST_COUNT(1),
1439 VALID_PIXEL_MODE(0),
1440 END_OF_PROGRAM(0),
1441 CF_INST(SQ_CF_INST_EXPORT_DONE),
1442 MARK(0),
1443 BARRIER(1));
1444 /* 6 - src */
1445 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1446 TYPE(SQ_EXPORT_PARAM),
1447 RW_GPR(1),
1448 RW_REL(ABSOLUTE),
1449 INDEX_GPR(0),
1450 ELEM_SIZE(0));
1451 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1452 SRC_SEL_Y(SQ_SEL_Y),
1453 SRC_SEL_Z(SQ_SEL_0),
1454 SRC_SEL_W(SQ_SEL_1),
1455 BURST_COUNT(1),
1456 VALID_PIXEL_MODE(0),
1457 END_OF_PROGRAM(0),
1458 CF_INST(SQ_CF_INST_EXPORT),
1459 MARK(0),
1460 BARRIER(0));
1461 /* 7 - mask */
1462 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1463 TYPE(SQ_EXPORT_PARAM),
1464 RW_GPR(0),
1465 RW_REL(ABSOLUTE),
1466 INDEX_GPR(0),
1467 ELEM_SIZE(0));
1468 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1469 SRC_SEL_Y(SQ_SEL_Y),
1470 SRC_SEL_Z(SQ_SEL_0),
1471 SRC_SEL_W(SQ_SEL_1),
1472 BURST_COUNT(1),
1473 VALID_PIXEL_MODE(0),
1474 END_OF_PROGRAM(0),
1475 CF_INST(SQ_CF_INST_EXPORT_DONE),
1476 WHOLE_QUAD_MODE(0),
1477 BARRIER(0));
1478 /* 8 */
1479 shader[i++] = CF_DWORD0(ADDR(0),
1480 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1481 shader[i++] = CF_DWORD1(POP_COUNT(0),
1482 CF_CONST(0),
1483 COND(SQ_CF_COND_ACTIVE),
1484 I_COUNT(0),
1485 VALID_PIXEL_MODE(0),
1486 END_OF_PROGRAM(0),
1487 CF_INST(SQ_CF_INST_RETURN),
1488 WHOLE_QUAD_MODE(0),
1489 BARRIER(1));
1490 /* 9 - non-mask sub */
1491 shader[i++] = CF_DWORD0(ADDR(50),
1492 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1493 shader[i++] = CF_DWORD1(POP_COUNT(0),
1494 CF_CONST(0),
1495 COND(SQ_CF_COND_ACTIVE),
1496 I_COUNT(2),
1497 VALID_PIXEL_MODE(0),
1498 END_OF_PROGRAM(0),
1499 CF_INST(SQ_CF_INST_VC),
1500 WHOLE_QUAD_MODE(0),
1501 BARRIER(1));
1502
1503 /* 10 - ALU */
1504 shader[i++] = CF_ALU_DWORD0(ADDR(34),
1505 KCACHE_BANK0(0),
1506 KCACHE_BANK1(0),
1507 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1508 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1509 KCACHE_ADDR0(0),
1510 KCACHE_ADDR1(0),
1511 I_COUNT(10),
1512 ALT_CONST(0),
1513 CF_INST(SQ_CF_INST_ALU),
1514 WHOLE_QUAD_MODE(0),
1515 BARRIER(1));
1516
1517 /* 11 - dst */
1518 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1519 TYPE(SQ_EXPORT_POS),
1520 RW_GPR(1),
1521 RW_REL(ABSOLUTE),
1522 INDEX_GPR(0),
1523 ELEM_SIZE(0));
1524 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1525 SRC_SEL_Y(SQ_SEL_Y),
1526 SRC_SEL_Z(SQ_SEL_0),
1527 SRC_SEL_W(SQ_SEL_1),
1528 BURST_COUNT(0),
1529 VALID_PIXEL_MODE(0),
1530 END_OF_PROGRAM(0),
1531 CF_INST(SQ_CF_INST_EXPORT_DONE),
1532 MARK(0),
1533 BARRIER(1));
1534 /* 12 - src */
1535 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1536 TYPE(SQ_EXPORT_PARAM),
1537 RW_GPR(0),
1538 RW_REL(ABSOLUTE),
1539 INDEX_GPR(0),
1540 ELEM_SIZE(0));
1541 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1542 SRC_SEL_Y(SQ_SEL_Y),
1543 SRC_SEL_Z(SQ_SEL_0),
1544 SRC_SEL_W(SQ_SEL_1),
1545 BURST_COUNT(0),
1546 VALID_PIXEL_MODE(0),
1547 END_OF_PROGRAM(0),
1548 CF_INST(SQ_CF_INST_EXPORT_DONE),
1549 MARK(0),
1550 BARRIER(0));
1551 /* 13 */
1552 shader[i++] = CF_DWORD0(ADDR(0),
1553 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1554 shader[i++] = CF_DWORD1(POP_COUNT(0),
1555 CF_CONST(0),
1556 COND(SQ_CF_COND_ACTIVE),
1557 I_COUNT(0),
1558 VALID_PIXEL_MODE(0),
1559 END_OF_PROGRAM(0),
1560 CF_INST(SQ_CF_INST_RETURN),
1561 WHOLE_QUAD_MODE(0),
1562 BARRIER(1));
1563
1564 /* 14 srcX.x DOT4 - mask */
1565 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1566 SRC0_REL(ABSOLUTE),
1567 SRC0_ELEM(ELEM_X),
1568 SRC0_NEG(0),
1569 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1570 SRC1_REL(ABSOLUTE),
1571 SRC1_ELEM(ELEM_X),
1572 SRC1_NEG(0),
1573 INDEX_MODE(SQ_INDEX_LOOP),
1574 PRED_SEL(SQ_PRED_SEL_OFF),
1575 LAST(0));
1576 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1577 SRC1_ABS(0),
1578 UPDATE_EXECUTE_MASK(0),
1579 UPDATE_PRED(0),
1580 WRITE_MASK(1),
1581 OMOD(SQ_ALU_OMOD_OFF),
1582 ALU_INST(SQ_OP2_INST_DOT4),
1583 BANK_SWIZZLE(SQ_ALU_VEC_012),
1584 DST_GPR(3),
1585 DST_REL(ABSOLUTE),
1586 DST_ELEM(ELEM_X),
1587 CLAMP(0));
1588
1589 /* 15 srcX.y DOT4 - mask */
1590 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1591 SRC0_REL(ABSOLUTE),
1592 SRC0_ELEM(ELEM_Y),
1593 SRC0_NEG(0),
1594 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1595 SRC1_REL(ABSOLUTE),
1596 SRC1_ELEM(ELEM_Y),
1597 SRC1_NEG(0),
1598 INDEX_MODE(SQ_INDEX_LOOP),
1599 PRED_SEL(SQ_PRED_SEL_OFF),
1600 LAST(0));
1601 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1602 SRC1_ABS(0),
1603 UPDATE_EXECUTE_MASK(0),
1604 UPDATE_PRED(0),
1605 WRITE_MASK(0),
1606 OMOD(SQ_ALU_OMOD_OFF),
1607 ALU_INST(SQ_OP2_INST_DOT4),
1608 BANK_SWIZZLE(SQ_ALU_VEC_012),
1609 DST_GPR(3),
1610 DST_REL(ABSOLUTE),
1611 DST_ELEM(ELEM_Y),
1612 CLAMP(0));
1613
1614 /* 16 srcX.z DOT4 - mask */
1615 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1616 SRC0_REL(ABSOLUTE),
1617 SRC0_ELEM(ELEM_Z),
1618 SRC0_NEG(0),
1619 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1620 SRC1_REL(ABSOLUTE),
1621 SRC1_ELEM(ELEM_Z),
1622 SRC1_NEG(0),
1623 INDEX_MODE(SQ_INDEX_LOOP),
1624 PRED_SEL(SQ_PRED_SEL_OFF),
1625 LAST(0));
1626 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1627 SRC1_ABS(0),
1628 UPDATE_EXECUTE_MASK(0),
1629 UPDATE_PRED(0),
1630 WRITE_MASK(0),
1631 OMOD(SQ_ALU_OMOD_OFF),
1632 ALU_INST(SQ_OP2_INST_DOT4),
1633 BANK_SWIZZLE(SQ_ALU_VEC_012),
1634 DST_GPR(3),
1635 DST_REL(ABSOLUTE),
1636 DST_ELEM(ELEM_Z),
1637 CLAMP(0));
1638
1639 /* 17 srcX.w DOT4 - mask */
1640 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1641 SRC0_REL(ABSOLUTE),
1642 SRC0_ELEM(ELEM_W),
1643 SRC0_NEG(0),
1644 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1645 SRC1_REL(ABSOLUTE),
1646 SRC1_ELEM(ELEM_W),
1647 SRC1_NEG(0),
1648 INDEX_MODE(SQ_INDEX_LOOP),
1649 PRED_SEL(SQ_PRED_SEL_OFF),
1650 LAST(1));
1651 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1652 SRC1_ABS(0),
1653 UPDATE_EXECUTE_MASK(0),
1654 UPDATE_PRED(0),
1655 WRITE_MASK(0),
1656 OMOD(SQ_ALU_OMOD_OFF),
1657 ALU_INST(SQ_OP2_INST_DOT4),
1658 BANK_SWIZZLE(SQ_ALU_VEC_012),
1659 DST_GPR(3),
1660 DST_REL(ABSOLUTE),
1661 DST_ELEM(ELEM_W),
1662 CLAMP(0));
1663
1664 /* 18 srcY.x DOT4 - mask */
1665 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1666 SRC0_REL(ABSOLUTE),
1667 SRC0_ELEM(ELEM_X),
1668 SRC0_NEG(0),
1669 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1670 SRC1_REL(ABSOLUTE),
1671 SRC1_ELEM(ELEM_X),
1672 SRC1_NEG(0),
1673 INDEX_MODE(SQ_INDEX_LOOP),
1674 PRED_SEL(SQ_PRED_SEL_OFF),
1675 LAST(0));
1676 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1677 SRC1_ABS(0),
1678 UPDATE_EXECUTE_MASK(0),
1679 UPDATE_PRED(0),
1680 WRITE_MASK(0),
1681 OMOD(SQ_ALU_OMOD_OFF),
1682 ALU_INST(SQ_OP2_INST_DOT4),
1683 BANK_SWIZZLE(SQ_ALU_VEC_012),
1684 DST_GPR(3),
1685 DST_REL(ABSOLUTE),
1686 DST_ELEM(ELEM_X),
1687 CLAMP(0));
1688
1689 /* 19 srcY.y DOT4 - mask */
1690 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1691 SRC0_REL(ABSOLUTE),
1692 SRC0_ELEM(ELEM_Y),
1693 SRC0_NEG(0),
1694 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1695 SRC1_REL(ABSOLUTE),
1696 SRC1_ELEM(ELEM_Y),
1697 SRC1_NEG(0),
1698 INDEX_MODE(SQ_INDEX_LOOP),
1699 PRED_SEL(SQ_PRED_SEL_OFF),
1700 LAST(0));
1701 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1702 SRC1_ABS(0),
1703 UPDATE_EXECUTE_MASK(0),
1704 UPDATE_PRED(0),
1705 WRITE_MASK(1),
1706 OMOD(SQ_ALU_OMOD_OFF),
1707 ALU_INST(SQ_OP2_INST_DOT4),
1708 BANK_SWIZZLE(SQ_ALU_VEC_012),
1709 DST_GPR(3),
1710 DST_REL(ABSOLUTE),
1711 DST_ELEM(ELEM_Y),
1712 CLAMP(0));
1713
1714 /* 20 srcY.z DOT4 - mask */
1715 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1716 SRC0_REL(ABSOLUTE),
1717 SRC0_ELEM(ELEM_Z),
1718 SRC0_NEG(0),
1719 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1720 SRC1_REL(ABSOLUTE),
1721 SRC1_ELEM(ELEM_Z),
1722 SRC1_NEG(0),
1723 INDEX_MODE(SQ_INDEX_LOOP),
1724 PRED_SEL(SQ_PRED_SEL_OFF),
1725 LAST(0));
1726 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1727 SRC1_ABS(0),
1728 UPDATE_EXECUTE_MASK(0),
1729 UPDATE_PRED(0),
1730 WRITE_MASK(0),
1731 OMOD(SQ_ALU_OMOD_OFF),
1732 ALU_INST(SQ_OP2_INST_DOT4),
1733 BANK_SWIZZLE(SQ_ALU_VEC_012),
1734 DST_GPR(3),
1735 DST_REL(ABSOLUTE),
1736 DST_ELEM(ELEM_Z),
1737 CLAMP(0));
1738
1739 /* 21 srcY.w DOT4 - mask */
1740 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1741 SRC0_REL(ABSOLUTE),
1742 SRC0_ELEM(ELEM_W),
1743 SRC0_NEG(0),
1744 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1745 SRC1_REL(ABSOLUTE),
1746 SRC1_ELEM(ELEM_W),
1747 SRC1_NEG(0),
1748 INDEX_MODE(SQ_INDEX_LOOP),
1749 PRED_SEL(SQ_PRED_SEL_OFF),
1750 LAST(1));
1751 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1752 SRC1_ABS(0),
1753 UPDATE_EXECUTE_MASK(0),
1754 UPDATE_PRED(0),
1755 WRITE_MASK(0),
1756 OMOD(SQ_ALU_OMOD_OFF),
1757 ALU_INST(SQ_OP2_INST_DOT4),
1758 BANK_SWIZZLE(SQ_ALU_VEC_012),
1759 DST_GPR(3),
1760 DST_REL(ABSOLUTE),
1761 DST_ELEM(ELEM_W),
1762 CLAMP(0));
1763
1764 /* 22 maskX.x DOT4 - mask */
1765 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1766 SRC0_REL(ABSOLUTE),
1767 SRC0_ELEM(ELEM_X),
1768 SRC0_NEG(0),
1769 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1770 SRC1_REL(ABSOLUTE),
1771 SRC1_ELEM(ELEM_X),
1772 SRC1_NEG(0),
1773 INDEX_MODE(SQ_INDEX_LOOP),
1774 PRED_SEL(SQ_PRED_SEL_OFF),
1775 LAST(0));
1776 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1777 SRC1_ABS(0),
1778 UPDATE_EXECUTE_MASK(0),
1779 UPDATE_PRED(0),
1780 WRITE_MASK(1),
1781 OMOD(SQ_ALU_OMOD_OFF),
1782 ALU_INST(SQ_OP2_INST_DOT4),
1783 BANK_SWIZZLE(SQ_ALU_VEC_012),
1784 DST_GPR(4),
1785 DST_REL(ABSOLUTE),
1786 DST_ELEM(ELEM_X),
1787 CLAMP(0));
1788
1789 /* 23 maskX.y DOT4 - mask */
1790 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1791 SRC0_REL(ABSOLUTE),
1792 SRC0_ELEM(ELEM_Y),
1793 SRC0_NEG(0),
1794 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1795 SRC1_REL(ABSOLUTE),
1796 SRC1_ELEM(ELEM_Y),
1797 SRC1_NEG(0),
1798 INDEX_MODE(SQ_INDEX_LOOP),
1799 PRED_SEL(SQ_PRED_SEL_OFF),
1800 LAST(0));
1801 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1802 SRC1_ABS(0),
1803 UPDATE_EXECUTE_MASK(0),
1804 UPDATE_PRED(0),
1805 WRITE_MASK(0),
1806 OMOD(SQ_ALU_OMOD_OFF),
1807 ALU_INST(SQ_OP2_INST_DOT4),
1808 BANK_SWIZZLE(SQ_ALU_VEC_012),
1809 DST_GPR(4),
1810 DST_REL(ABSOLUTE),
1811 DST_ELEM(ELEM_Y),
1812 CLAMP(0));
1813
1814 /* 24 maskX.z DOT4 - mask */
1815 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1816 SRC0_REL(ABSOLUTE),
1817 SRC0_ELEM(ELEM_Z),
1818 SRC0_NEG(0),
1819 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1820 SRC1_REL(ABSOLUTE),
1821 SRC1_ELEM(ELEM_Z),
1822 SRC1_NEG(0),
1823 INDEX_MODE(SQ_INDEX_LOOP),
1824 PRED_SEL(SQ_PRED_SEL_OFF),
1825 LAST(0));
1826 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1827 SRC1_ABS(0),
1828 UPDATE_EXECUTE_MASK(0),
1829 UPDATE_PRED(0),
1830 WRITE_MASK(0),
1831 OMOD(SQ_ALU_OMOD_OFF),
1832 ALU_INST(SQ_OP2_INST_DOT4),
1833 BANK_SWIZZLE(SQ_ALU_VEC_012),
1834 DST_GPR(4),
1835 DST_REL(ABSOLUTE),
1836 DST_ELEM(ELEM_Z),
1837 CLAMP(0));
1838
1839 /* 25 maskX.w DOT4 - mask */
1840 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1841 SRC0_REL(ABSOLUTE),
1842 SRC0_ELEM(ELEM_W),
1843 SRC0_NEG(0),
1844 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1845 SRC1_REL(ABSOLUTE),
1846 SRC1_ELEM(ELEM_W),
1847 SRC1_NEG(0),
1848 INDEX_MODE(SQ_INDEX_LOOP),
1849 PRED_SEL(SQ_PRED_SEL_OFF),
1850 LAST(1));
1851 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1852 SRC1_ABS(0),
1853 UPDATE_EXECUTE_MASK(0),
1854 UPDATE_PRED(0),
1855 WRITE_MASK(0),
1856 OMOD(SQ_ALU_OMOD_OFF),
1857 ALU_INST(SQ_OP2_INST_DOT4),
1858 BANK_SWIZZLE(SQ_ALU_VEC_012),
1859 DST_GPR(4),
1860 DST_REL(ABSOLUTE),
1861 DST_ELEM(ELEM_W),
1862 CLAMP(0));
1863
1864 /* 26 maskY.x DOT4 - mask */
1865 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1866 SRC0_REL(ABSOLUTE),
1867 SRC0_ELEM(ELEM_X),
1868 SRC0_NEG(0),
1869 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1870 SRC1_REL(ABSOLUTE),
1871 SRC1_ELEM(ELEM_X),
1872 SRC1_NEG(0),
1873 INDEX_MODE(SQ_INDEX_LOOP),
1874 PRED_SEL(SQ_PRED_SEL_OFF),
1875 LAST(0));
1876 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1877 SRC1_ABS(0),
1878 UPDATE_EXECUTE_MASK(0),
1879 UPDATE_PRED(0),
1880 WRITE_MASK(0),
1881 OMOD(SQ_ALU_OMOD_OFF),
1882 ALU_INST(SQ_OP2_INST_DOT4),
1883 BANK_SWIZZLE(SQ_ALU_VEC_012),
1884 DST_GPR(4),
1885 DST_REL(ABSOLUTE),
1886 DST_ELEM(ELEM_X),
1887 CLAMP(0));
1888
1889 /* 27 maskY.y DOT4 - mask */
1890 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1891 SRC0_REL(ABSOLUTE),
1892 SRC0_ELEM(ELEM_Y),
1893 SRC0_NEG(0),
1894 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1895 SRC1_REL(ABSOLUTE),
1896 SRC1_ELEM(ELEM_Y),
1897 SRC1_NEG(0),
1898 INDEX_MODE(SQ_INDEX_LOOP),
1899 PRED_SEL(SQ_PRED_SEL_OFF),
1900 LAST(0));
1901 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1902 SRC1_ABS(0),
1903 UPDATE_EXECUTE_MASK(0),
1904 UPDATE_PRED(0),
1905 WRITE_MASK(1),
1906 OMOD(SQ_ALU_OMOD_OFF),
1907 ALU_INST(SQ_OP2_INST_DOT4),
1908 BANK_SWIZZLE(SQ_ALU_VEC_012),
1909 DST_GPR(4),
1910 DST_REL(ABSOLUTE),
1911 DST_ELEM(ELEM_Y),
1912 CLAMP(0));
1913
1914 /* 28 maskY.z DOT4 - mask */
1915 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1916 SRC0_REL(ABSOLUTE),
1917 SRC0_ELEM(ELEM_Z),
1918 SRC0_NEG(0),
1919 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1920 SRC1_REL(ABSOLUTE),
1921 SRC1_ELEM(ELEM_Z),
1922 SRC1_NEG(0),
1923 INDEX_MODE(SQ_INDEX_LOOP),
1924 PRED_SEL(SQ_PRED_SEL_OFF),
1925 LAST(0));
1926 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1927 SRC1_ABS(0),
1928 UPDATE_EXECUTE_MASK(0),
1929 UPDATE_PRED(0),
1930 WRITE_MASK(0),
1931 OMOD(SQ_ALU_OMOD_OFF),
1932 ALU_INST(SQ_OP2_INST_DOT4),
1933 BANK_SWIZZLE(SQ_ALU_VEC_012),
1934 DST_GPR(4),
1935 DST_REL(ABSOLUTE),
1936 DST_ELEM(ELEM_Z),
1937 CLAMP(0));
1938
1939 /* 29 maskY.w DOT4 - mask */
1940 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1941 SRC0_REL(ABSOLUTE),
1942 SRC0_ELEM(ELEM_W),
1943 SRC0_NEG(0),
1944 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1945 SRC1_REL(ABSOLUTE),
1946 SRC1_ELEM(ELEM_W),
1947 SRC1_NEG(0),
1948 INDEX_MODE(SQ_INDEX_LOOP),
1949 PRED_SEL(SQ_PRED_SEL_OFF),
1950 LAST(1));
1951 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1952 SRC1_ABS(0),
1953 UPDATE_EXECUTE_MASK(0),
1954 UPDATE_PRED(0),
1955 WRITE_MASK(0),
1956 OMOD(SQ_ALU_OMOD_OFF),
1957 ALU_INST(SQ_OP2_INST_DOT4),
1958 BANK_SWIZZLE(SQ_ALU_VEC_012),
1959 DST_GPR(4),
1960 DST_REL(ABSOLUTE),
1961 DST_ELEM(ELEM_W),
1962 CLAMP(0));
1963
1964 /* 30 srcX / w */
1965 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1966 SRC0_REL(ABSOLUTE),
1967 SRC0_ELEM(ELEM_X),
1968 SRC0_NEG(0),
1969 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1970 SRC1_REL(ABSOLUTE),
1971 SRC1_ELEM(ELEM_W),
1972 SRC1_NEG(0),
1973 INDEX_MODE(SQ_INDEX_AR_X),
1974 PRED_SEL(SQ_PRED_SEL_OFF),
1975 LAST(1));
1976 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1977 SRC1_ABS(0),
1978 UPDATE_EXECUTE_MASK(0),
1979 UPDATE_PRED(0),
1980 WRITE_MASK(1),
1981 OMOD(SQ_ALU_OMOD_OFF),
1982 ALU_INST(SQ_OP2_INST_MUL),
1983 BANK_SWIZZLE(SQ_ALU_VEC_012),
1984 DST_GPR(1),
1985 DST_REL(ABSOLUTE),
1986 DST_ELEM(ELEM_X),
1987 CLAMP(0));
1988
1989 /* 31 srcY / h */
1990 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1991 SRC0_REL(ABSOLUTE),
1992 SRC0_ELEM(ELEM_Y),
1993 SRC0_NEG(0),
1994 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1995 SRC1_REL(ABSOLUTE),
1996 SRC1_ELEM(ELEM_W),
1997 SRC1_NEG(0),
1998 INDEX_MODE(SQ_INDEX_AR_X),
1999 PRED_SEL(SQ_PRED_SEL_OFF),
2000 LAST(1));
2001 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2002 SRC1_ABS(0),
2003 UPDATE_EXECUTE_MASK(0),
2004 UPDATE_PRED(0),
2005 WRITE_MASK(1),
2006 OMOD(SQ_ALU_OMOD_OFF),
2007 ALU_INST(SQ_OP2_INST_MUL),
2008 BANK_SWIZZLE(SQ_ALU_VEC_012),
2009 DST_GPR(1),
2010 DST_REL(ABSOLUTE),
2011 DST_ELEM(ELEM_Y),
2012 CLAMP(0));
2013
2014 /* 32 maskX / w */
2015 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2016 SRC0_REL(ABSOLUTE),
2017 SRC0_ELEM(ELEM_X),
2018 SRC0_NEG(0),
2019 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2020 SRC1_REL(ABSOLUTE),
2021 SRC1_ELEM(ELEM_W),
2022 SRC1_NEG(0),
2023 INDEX_MODE(SQ_INDEX_AR_X),
2024 PRED_SEL(SQ_PRED_SEL_OFF),
2025 LAST(1));
2026 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2027 SRC1_ABS(0),
2028 UPDATE_EXECUTE_MASK(0),
2029 UPDATE_PRED(0),
2030 WRITE_MASK(1),
2031 OMOD(SQ_ALU_OMOD_OFF),
2032 ALU_INST(SQ_OP2_INST_MUL),
2033 BANK_SWIZZLE(SQ_ALU_VEC_012),
2034 DST_GPR(0),
2035 DST_REL(ABSOLUTE),
2036 DST_ELEM(ELEM_X),
2037 CLAMP(0));
2038
2039 /* 33 maskY / h */
2040 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2041 SRC0_REL(ABSOLUTE),
2042 SRC0_ELEM(ELEM_Y),
2043 SRC0_NEG(0),
2044 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2045 SRC1_REL(ABSOLUTE),
2046 SRC1_ELEM(ELEM_W),
2047 SRC1_NEG(0),
2048 INDEX_MODE(SQ_INDEX_AR_X),
2049 PRED_SEL(SQ_PRED_SEL_OFF),
2050 LAST(1));
2051 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2052 SRC1_ABS(0),
2053 UPDATE_EXECUTE_MASK(0),
2054 UPDATE_PRED(0),
2055 WRITE_MASK(1),
2056 OMOD(SQ_ALU_OMOD_OFF),
2057 ALU_INST(SQ_OP2_INST_MUL),
2058 BANK_SWIZZLE(SQ_ALU_VEC_012),
2059 DST_GPR(0),
2060 DST_REL(ABSOLUTE),
2061 DST_ELEM(ELEM_Y),
2062 CLAMP(0));
2063
2064 /* 34 srcX.x DOT4 - non-mask */
2065 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2066 SRC0_REL(ABSOLUTE),
2067 SRC0_ELEM(ELEM_X),
2068 SRC0_NEG(0),
2069 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2070 SRC1_REL(ABSOLUTE),
2071 SRC1_ELEM(ELEM_X),
2072 SRC1_NEG(0),
2073 INDEX_MODE(SQ_INDEX_LOOP),
2074 PRED_SEL(SQ_PRED_SEL_OFF),
2075 LAST(0));
2076 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2077 SRC1_ABS(0),
2078 UPDATE_EXECUTE_MASK(0),
2079 UPDATE_PRED(0),
2080 WRITE_MASK(1),
2081 OMOD(SQ_ALU_OMOD_OFF),
2082 ALU_INST(SQ_OP2_INST_DOT4),
2083 BANK_SWIZZLE(SQ_ALU_VEC_012),
2084 DST_GPR(2),
2085 DST_REL(ABSOLUTE),
2086 DST_ELEM(ELEM_X),
2087 CLAMP(0));
2088
2089 /* 35 srcX.y DOT4 - non-mask */
2090 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2091 SRC0_REL(ABSOLUTE),
2092 SRC0_ELEM(ELEM_Y),
2093 SRC0_NEG(0),
2094 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2095 SRC1_REL(ABSOLUTE),
2096 SRC1_ELEM(ELEM_Y),
2097 SRC1_NEG(0),
2098 INDEX_MODE(SQ_INDEX_LOOP),
2099 PRED_SEL(SQ_PRED_SEL_OFF),
2100 LAST(0));
2101 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2102 SRC1_ABS(0),
2103 UPDATE_EXECUTE_MASK(0),
2104 UPDATE_PRED(0),
2105 WRITE_MASK(0),
2106 OMOD(SQ_ALU_OMOD_OFF),
2107 ALU_INST(SQ_OP2_INST_DOT4),
2108 BANK_SWIZZLE(SQ_ALU_VEC_012),
2109 DST_GPR(2),
2110 DST_REL(ABSOLUTE),
2111 DST_ELEM(ELEM_Y),
2112 CLAMP(0));
2113
2114 /* 36 srcX.z DOT4 - non-mask */
2115 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2116 SRC0_REL(ABSOLUTE),
2117 SRC0_ELEM(ELEM_Z),
2118 SRC0_NEG(0),
2119 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2120 SRC1_REL(ABSOLUTE),
2121 SRC1_ELEM(ELEM_Z),
2122 SRC1_NEG(0),
2123 INDEX_MODE(SQ_INDEX_LOOP),
2124 PRED_SEL(SQ_PRED_SEL_OFF),
2125 LAST(0));
2126 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2127 SRC1_ABS(0),
2128 UPDATE_EXECUTE_MASK(0),
2129 UPDATE_PRED(0),
2130 WRITE_MASK(0),
2131 OMOD(SQ_ALU_OMOD_OFF),
2132 ALU_INST(SQ_OP2_INST_DOT4),
2133 BANK_SWIZZLE(SQ_ALU_VEC_012),
2134 DST_GPR(2),
2135 DST_REL(ABSOLUTE),
2136 DST_ELEM(ELEM_Z),
2137 CLAMP(0));
2138
2139 /* 37 srcX.w DOT4 - non-mask */
2140 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2141 SRC0_REL(ABSOLUTE),
2142 SRC0_ELEM(ELEM_W),
2143 SRC0_NEG(0),
2144 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2145 SRC1_REL(ABSOLUTE),
2146 SRC1_ELEM(ELEM_W),
2147 SRC1_NEG(0),
2148 INDEX_MODE(SQ_INDEX_LOOP),
2149 PRED_SEL(SQ_PRED_SEL_OFF),
2150 LAST(1));
2151 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2152 SRC1_ABS(0),
2153 UPDATE_EXECUTE_MASK(0),
2154 UPDATE_PRED(0),
2155 WRITE_MASK(0),
2156 OMOD(SQ_ALU_OMOD_OFF),
2157 ALU_INST(SQ_OP2_INST_DOT4),
2158 BANK_SWIZZLE(SQ_ALU_VEC_012),
2159 DST_GPR(2),
2160 DST_REL(ABSOLUTE),
2161 DST_ELEM(ELEM_W),
2162 CLAMP(0));
2163
2164 /* 38 srcY.x DOT4 - non-mask */
2165 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2166 SRC0_REL(ABSOLUTE),
2167 SRC0_ELEM(ELEM_X),
2168 SRC0_NEG(0),
2169 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2170 SRC1_REL(ABSOLUTE),
2171 SRC1_ELEM(ELEM_X),
2172 SRC1_NEG(0),
2173 INDEX_MODE(SQ_INDEX_LOOP),
2174 PRED_SEL(SQ_PRED_SEL_OFF),
2175 LAST(0));
2176 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2177 SRC1_ABS(0),
2178 UPDATE_EXECUTE_MASK(0),
2179 UPDATE_PRED(0),
2180 WRITE_MASK(0),
2181 OMOD(SQ_ALU_OMOD_OFF),
2182 ALU_INST(SQ_OP2_INST_DOT4),
2183 BANK_SWIZZLE(SQ_ALU_VEC_012),
2184 DST_GPR(2),
2185 DST_REL(ABSOLUTE),
2186 DST_ELEM(ELEM_X),
2187 CLAMP(0));
2188
2189 /* 39 srcY.y DOT4 - non-mask */
2190 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2191 SRC0_REL(ABSOLUTE),
2192 SRC0_ELEM(ELEM_Y),
2193 SRC0_NEG(0),
2194 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2195 SRC1_REL(ABSOLUTE),
2196 SRC1_ELEM(ELEM_Y),
2197 SRC1_NEG(0),
2198 INDEX_MODE(SQ_INDEX_LOOP),
2199 PRED_SEL(SQ_PRED_SEL_OFF),
2200 LAST(0));
2201 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2202 SRC1_ABS(0),
2203 UPDATE_EXECUTE_MASK(0),
2204 UPDATE_PRED(0),
2205 WRITE_MASK(1),
2206 OMOD(SQ_ALU_OMOD_OFF),
2207 ALU_INST(SQ_OP2_INST_DOT4),
2208 BANK_SWIZZLE(SQ_ALU_VEC_012),
2209 DST_GPR(2),
2210 DST_REL(ABSOLUTE),
2211 DST_ELEM(ELEM_Y),
2212 CLAMP(0));
2213
2214 /* 40 srcY.z DOT4 - non-mask */
2215 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2216 SRC0_REL(ABSOLUTE),
2217 SRC0_ELEM(ELEM_Z),
2218 SRC0_NEG(0),
2219 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2220 SRC1_REL(ABSOLUTE),
2221 SRC1_ELEM(ELEM_Z),
2222 SRC1_NEG(0),
2223 INDEX_MODE(SQ_INDEX_LOOP),
2224 PRED_SEL(SQ_PRED_SEL_OFF),
2225 LAST(0));
2226 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2227 SRC1_ABS(0),
2228 UPDATE_EXECUTE_MASK(0),
2229 UPDATE_PRED(0),
2230 WRITE_MASK(0),
2231 OMOD(SQ_ALU_OMOD_OFF),
2232 ALU_INST(SQ_OP2_INST_DOT4),
2233 BANK_SWIZZLE(SQ_ALU_VEC_012),
2234 DST_GPR(2),
2235 DST_REL(ABSOLUTE),
2236 DST_ELEM(ELEM_Z),
2237 CLAMP(0));
2238
2239 /* 41 srcY.w DOT4 - non-mask */
2240 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2241 SRC0_REL(ABSOLUTE),
2242 SRC0_ELEM(ELEM_W),
2243 SRC0_NEG(0),
2244 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2245 SRC1_REL(ABSOLUTE),
2246 SRC1_ELEM(ELEM_W),
2247 SRC1_NEG(0),
2248 INDEX_MODE(SQ_INDEX_LOOP),
2249 PRED_SEL(SQ_PRED_SEL_OFF),
2250 LAST(1));
2251 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2252 SRC1_ABS(0),
2253 UPDATE_EXECUTE_MASK(0),
2254 UPDATE_PRED(0),
2255 WRITE_MASK(0),
2256 OMOD(SQ_ALU_OMOD_OFF),
2257 ALU_INST(SQ_OP2_INST_DOT4),
2258 BANK_SWIZZLE(SQ_ALU_VEC_012),
2259 DST_GPR(2),
2260 DST_REL(ABSOLUTE),
2261 DST_ELEM(ELEM_W),
2262 CLAMP(0));
2263
2264 /* 42 srcX / w */
2265 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2266 SRC0_REL(ABSOLUTE),
2267 SRC0_ELEM(ELEM_X),
2268 SRC0_NEG(0),
2269 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2270 SRC1_REL(ABSOLUTE),
2271 SRC1_ELEM(ELEM_W),
2272 SRC1_NEG(0),
2273 INDEX_MODE(SQ_INDEX_AR_X),
2274 PRED_SEL(SQ_PRED_SEL_OFF),
2275 LAST(1));
2276 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2277 SRC1_ABS(0),
2278 UPDATE_EXECUTE_MASK(0),
2279 UPDATE_PRED(0),
2280 WRITE_MASK(1),
2281 OMOD(SQ_ALU_OMOD_OFF),
2282 ALU_INST(SQ_OP2_INST_MUL),
2283 BANK_SWIZZLE(SQ_ALU_VEC_012),
2284 DST_GPR(0),
2285 DST_REL(ABSOLUTE),
2286 DST_ELEM(ELEM_X),
2287 CLAMP(0));
2288
2289 /* 43 srcY / h */
2290 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2291 SRC0_REL(ABSOLUTE),
2292 SRC0_ELEM(ELEM_Y),
2293 SRC0_NEG(0),
2294 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2295 SRC1_REL(ABSOLUTE),
2296 SRC1_ELEM(ELEM_W),
2297 SRC1_NEG(0),
2298 INDEX_MODE(SQ_INDEX_AR_X),
2299 PRED_SEL(SQ_PRED_SEL_OFF),
2300 LAST(1));
2301 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2302 SRC1_ABS(0),
2303 UPDATE_EXECUTE_MASK(0),
2304 UPDATE_PRED(0),
2305 WRITE_MASK(1),
2306 OMOD(SQ_ALU_OMOD_OFF),
2307 ALU_INST(SQ_OP2_INST_MUL),
2308 BANK_SWIZZLE(SQ_ALU_VEC_012),
2309 DST_GPR(0),
2310 DST_REL(ABSOLUTE),
2311 DST_ELEM(ELEM_Y),
2312 CLAMP(0));
2313
2314 /* mask vfetch - 44/45 - dst */
2315 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2316 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2317 FETCH_WHOLE_QUAD(0),
2318 BUFFER_ID(0),
2319 SRC_GPR(0),
2320 SRC_REL(ABSOLUTE),
2321 SRC_SEL_X(SQ_SEL_X),
2322 MEGA_FETCH_COUNT(24));
2323 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2324 DST_REL(0),
2325 DST_SEL_X(SQ_SEL_X),
2326 DST_SEL_Y(SQ_SEL_Y),
2327 DST_SEL_Z(SQ_SEL_0),
2328 DST_SEL_W(SQ_SEL_1),
2329 USE_CONST_FIELDS(0),
2330 DATA_FORMAT(FMT_32_32_FLOAT),
2331 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2332 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2333 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2334 shader[i++] = VTX_DWORD2(OFFSET(0),
2335 #if X_BYTE_ORDER == X_BIG_ENDIAN
2336 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2337 #else
2338 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2339 #endif
2340 CONST_BUF_NO_STRIDE(0),
2341 MEGA_FETCH(1),
2342 ALT_CONST(0),
2343 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2344 shader[i++] = VTX_DWORD_PAD;
2345 /* 46/47 - src */
2346 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2347 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2348 FETCH_WHOLE_QUAD(0),
2349 BUFFER_ID(0),
2350 SRC_GPR(0),
2351 SRC_REL(ABSOLUTE),
2352 SRC_SEL_X(SQ_SEL_X),
2353 MEGA_FETCH_COUNT(8));
2354 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2355 DST_REL(0),
2356 DST_SEL_X(SQ_SEL_X),
2357 DST_SEL_Y(SQ_SEL_Y),
2358 DST_SEL_Z(SQ_SEL_1),
2359 DST_SEL_W(SQ_SEL_0),
2360 USE_CONST_FIELDS(0),
2361 DATA_FORMAT(FMT_32_32_FLOAT),
2362 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2363 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2364 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2365 shader[i++] = VTX_DWORD2(OFFSET(8),
2366 #if X_BYTE_ORDER == X_BIG_ENDIAN
2367 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2368 #else
2369 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2370 #endif
2371 CONST_BUF_NO_STRIDE(0),
2372 MEGA_FETCH(0),
2373 ALT_CONST(0),
2374 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2375 shader[i++] = VTX_DWORD_PAD;
2376 /* 48/49 - mask */
2377 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2378 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2379 FETCH_WHOLE_QUAD(0),
2380 BUFFER_ID(0),
2381 SRC_GPR(0),
2382 SRC_REL(ABSOLUTE),
2383 SRC_SEL_X(SQ_SEL_X),
2384 MEGA_FETCH_COUNT(8));
2385 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2386 DST_REL(0),
2387 DST_SEL_X(SQ_SEL_X),
2388 DST_SEL_Y(SQ_SEL_Y),
2389 DST_SEL_Z(SQ_SEL_1),
2390 DST_SEL_W(SQ_SEL_0),
2391 USE_CONST_FIELDS(0),
2392 DATA_FORMAT(FMT_32_32_FLOAT),
2393 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2394 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2395 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2396 shader[i++] = VTX_DWORD2(OFFSET(16),
2397 #if X_BYTE_ORDER == X_BIG_ENDIAN
2398 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2399 #else
2400 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2401 #endif
2402 CONST_BUF_NO_STRIDE(0),
2403 MEGA_FETCH(0),
2404 ALT_CONST(0),
2405 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2406 shader[i++] = VTX_DWORD_PAD;
2407
2408 /* no mask vfetch - 50/51 - dst */
2409 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2410 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2411 FETCH_WHOLE_QUAD(0),
2412 BUFFER_ID(0),
2413 SRC_GPR(0),
2414 SRC_REL(ABSOLUTE),
2415 SRC_SEL_X(SQ_SEL_X),
2416 MEGA_FETCH_COUNT(16));
2417 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2418 DST_REL(0),
2419 DST_SEL_X(SQ_SEL_X),
2420 DST_SEL_Y(SQ_SEL_Y),
2421 DST_SEL_Z(SQ_SEL_0),
2422 DST_SEL_W(SQ_SEL_1),
2423 USE_CONST_FIELDS(0),
2424 DATA_FORMAT(FMT_32_32_FLOAT),
2425 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2426 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2427 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2428 shader[i++] = VTX_DWORD2(OFFSET(0),
2429 #if X_BYTE_ORDER == X_BIG_ENDIAN
2430 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2431 #else
2432 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2433 #endif
2434 CONST_BUF_NO_STRIDE(0),
2435 MEGA_FETCH(1),
2436 ALT_CONST(0),
2437 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2438 shader[i++] = VTX_DWORD_PAD;
2439 /* 52/53 - src */
2440 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2441 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2442 FETCH_WHOLE_QUAD(0),
2443 BUFFER_ID(0),
2444 SRC_GPR(0),
2445 SRC_REL(ABSOLUTE),
2446 SRC_SEL_X(SQ_SEL_X),
2447 MEGA_FETCH_COUNT(8));
2448 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2449 DST_REL(0),
2450 DST_SEL_X(SQ_SEL_X),
2451 DST_SEL_Y(SQ_SEL_Y),
2452 DST_SEL_Z(SQ_SEL_1),
2453 DST_SEL_W(SQ_SEL_0),
2454 USE_CONST_FIELDS(0),
2455 DATA_FORMAT(FMT_32_32_FLOAT),
2456 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2457 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2458 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2459 shader[i++] = VTX_DWORD2(OFFSET(8),
2460 #if X_BYTE_ORDER == X_BIG_ENDIAN
2461 ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2462 #else
2463 ENDIAN_SWAP(SQ_ENDIAN_NONE),
2464 #endif
2465 CONST_BUF_NO_STRIDE(0),
2466 MEGA_FETCH(0),
2467 ALT_CONST(0),
2468 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2469 shader[i++] = VTX_DWORD_PAD;
2470
2471 return i;
2472 }
2473
2474 /* comp ps --------------------------------------- */
evergreen_comp_ps(RADEONChipFamily ChipSet,uint32_t * shader)2475 int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader)
2476 {
2477 int i = 0;
2478
2479 /* 0 */
2480 /* call interp-fetch-mask if boolean1 == true */
2481 shader[i++] = CF_DWORD0(ADDR(11),
2482 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2483 shader[i++] = CF_DWORD1(POP_COUNT(0),
2484 CF_CONST(1),
2485 COND(SQ_CF_COND_BOOL),
2486 I_COUNT(0),
2487 VALID_PIXEL_MODE(0),
2488 END_OF_PROGRAM(0),
2489 CF_INST(SQ_CF_INST_CALL),
2490 WHOLE_QUAD_MODE(0),
2491 BARRIER(0));
2492
2493 /* 1 */
2494 /* call read-constant-mask if boolean1 == false */
2495 shader[i++] = CF_DWORD0(ADDR(14),
2496 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2497 shader[i++] = CF_DWORD1(POP_COUNT(0),
2498 CF_CONST(1),
2499 COND(SQ_CF_COND_NOT_BOOL),
2500 I_COUNT(0),
2501 VALID_PIXEL_MODE(0),
2502 END_OF_PROGRAM(0),
2503 CF_INST(SQ_CF_INST_CALL),
2504 WHOLE_QUAD_MODE(0),
2505 BARRIER(0));
2506
2507 /* 2 */
2508 /* call interp-fetch-src if boolean0 == true */
2509 shader[i++] = CF_DWORD0(ADDR(6),
2510 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2511 shader[i++] = CF_DWORD1(POP_COUNT(0),
2512 CF_CONST(0),
2513 COND(SQ_CF_COND_BOOL),
2514 I_COUNT(0),
2515 VALID_PIXEL_MODE(0),
2516 END_OF_PROGRAM(0),
2517 CF_INST(SQ_CF_INST_CALL),
2518 WHOLE_QUAD_MODE(0),
2519 BARRIER(0));
2520
2521 /* 3 */
2522 /* call read-constant-src if boolean0 == false */
2523 shader[i++] = CF_DWORD0(ADDR(9),
2524 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2525 shader[i++] = CF_DWORD1(POP_COUNT(0),
2526 CF_CONST(0),
2527 COND(SQ_CF_COND_NOT_BOOL),
2528 I_COUNT(0),
2529 VALID_PIXEL_MODE(0),
2530 END_OF_PROGRAM(0),
2531 CF_INST(SQ_CF_INST_CALL),
2532 WHOLE_QUAD_MODE(0),
2533 BARRIER(0));
2534 /* 4 */
2535 /* src IN mask (GPR2 := GPR1 .* GPR0) */
2536 shader[i++] = CF_ALU_DWORD0(ADDR(16),
2537 KCACHE_BANK0(0),
2538 KCACHE_BANK1(0),
2539 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2540 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2541 KCACHE_ADDR0(0),
2542 KCACHE_ADDR1(0),
2543 I_COUNT(4),
2544 ALT_CONST(0),
2545 CF_INST(SQ_CF_INST_ALU),
2546 WHOLE_QUAD_MODE(0),
2547 BARRIER(1));
2548
2549 /* 5 */
2550 /* export pixel data */
2551 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2552 TYPE(SQ_EXPORT_PIXEL),
2553 RW_GPR(0),
2554 RW_REL(ABSOLUTE),
2555 INDEX_GPR(0),
2556 ELEM_SIZE(1));
2557 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2558 SRC_SEL_Y(SQ_SEL_Y),
2559 SRC_SEL_Z(SQ_SEL_Z),
2560 SRC_SEL_W(SQ_SEL_W),
2561 BURST_COUNT(1),
2562 VALID_PIXEL_MODE(0),
2563 END_OF_PROGRAM(1),
2564 CF_INST(SQ_CF_INST_EXPORT_DONE),
2565 MARK(0),
2566 BARRIER(1));
2567
2568 /* subroutine interp-fetch-src */
2569
2570 /* 6 */
2571 /* interpolate src */
2572 shader[i++] = CF_ALU_DWORD0(ADDR(20),
2573 KCACHE_BANK0(0),
2574 KCACHE_BANK1(0),
2575 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2576 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2577 KCACHE_ADDR0(0),
2578 KCACHE_ADDR1(0),
2579 I_COUNT(4),
2580 ALT_CONST(0),
2581 CF_INST(SQ_CF_INST_ALU),
2582 WHOLE_QUAD_MODE(0),
2583 BARRIER(1));
2584
2585 /* 7 */
2586 /* texture fetch src into GPR0 */
2587 shader[i++] = CF_DWORD0(ADDR(24),
2588 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2589 shader[i++] = CF_DWORD1(POP_COUNT(0),
2590 CF_CONST(0),
2591 COND(SQ_CF_COND_ACTIVE),
2592 I_COUNT(1),
2593 VALID_PIXEL_MODE(0),
2594 END_OF_PROGRAM(0),
2595 CF_INST(SQ_CF_INST_TC),
2596 WHOLE_QUAD_MODE(0),
2597 BARRIER(1));
2598
2599 /* 8 */
2600 /* return */
2601 shader[i++] = CF_DWORD0(ADDR(0),
2602 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2603 shader[i++] = CF_DWORD1(POP_COUNT(0),
2604 CF_CONST(0),
2605 COND(SQ_CF_COND_ACTIVE),
2606 I_COUNT(0),
2607 VALID_PIXEL_MODE(0),
2608 END_OF_PROGRAM(0),
2609 CF_INST(SQ_CF_INST_RETURN),
2610 WHOLE_QUAD_MODE(0),
2611 BARRIER(0));
2612
2613 /* subroutine read-constant-src */
2614
2615 /* 9 */
2616 /* read constants into GPR0 */
2617 shader[i++] = CF_ALU_DWORD0(ADDR(26),
2618 KCACHE_BANK0(0),
2619 KCACHE_BANK1(0),
2620 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2621 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2622 KCACHE_ADDR0(0),
2623 KCACHE_ADDR1(0),
2624 I_COUNT(4),
2625 ALT_CONST(1),
2626 CF_INST(SQ_CF_INST_ALU),
2627 WHOLE_QUAD_MODE(0),
2628 BARRIER(1));
2629
2630 /* 10 */
2631 /* return */
2632 shader[i++] = CF_DWORD0(ADDR(0),
2633 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2634 shader[i++] = CF_DWORD1(POP_COUNT(0),
2635 CF_CONST(0),
2636 COND(SQ_CF_COND_ACTIVE),
2637 I_COUNT(0),
2638 VALID_PIXEL_MODE(0),
2639 END_OF_PROGRAM(0),
2640 CF_INST(SQ_CF_INST_RETURN),
2641 WHOLE_QUAD_MODE(0),
2642 BARRIER(0));
2643
2644 /* subroutine interp-fetch-mask */
2645
2646 /* 11 */
2647 /* interpolate mask */
2648 shader[i++] = CF_ALU_DWORD0(ADDR(30),
2649 KCACHE_BANK0(0),
2650 KCACHE_BANK1(0),
2651 KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2652 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2653 KCACHE_ADDR0(0),
2654 KCACHE_ADDR1(0),
2655 I_COUNT(4),
2656 ALT_CONST(0),
2657 CF_INST(SQ_CF_INST_ALU),
2658 WHOLE_QUAD_MODE(0),
2659 BARRIER(1));
2660
2661 /* 12 */
2662 /* texture fetch mask into GPR1 */
2663 shader[i++] = CF_DWORD0(ADDR(34),
2664 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2665 shader[i++] = CF_DWORD1(POP_COUNT(0),
2666 CF_CONST(0),
2667 COND(SQ_CF_COND_ACTIVE),
2668 I_COUNT(1),
2669 VALID_PIXEL_MODE(0),
2670 END_OF_PROGRAM(0),
2671 CF_INST(SQ_CF_INST_TC),
2672 WHOLE_QUAD_MODE(0),
2673 BARRIER(1));
2674
2675 /* 13 */
2676 /* return */
2677 shader[i++] = CF_DWORD0(ADDR(0),
2678 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2679 shader[i++] = CF_DWORD1(POP_COUNT(0),
2680 CF_CONST(0),
2681 COND(SQ_CF_COND_ACTIVE),
2682 I_COUNT(0),
2683 VALID_PIXEL_MODE(0),
2684 END_OF_PROGRAM(0),
2685 CF_INST(SQ_CF_INST_RETURN),
2686 WHOLE_QUAD_MODE(0),
2687 BARRIER(0));
2688
2689 /* subroutine read-constant-src */
2690
2691 /* 14 */
2692 /* read constants into GPR1 */
2693 shader[i++] = CF_ALU_DWORD0(ADDR(36),
2694 KCACHE_BANK0(0),
2695 KCACHE_BANK1(0),
2696 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2697 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2698 KCACHE_ADDR0(0),
2699 KCACHE_ADDR1(0),
2700 I_COUNT(4),
2701 ALT_CONST(1),
2702 CF_INST(SQ_CF_INST_ALU),
2703 WHOLE_QUAD_MODE(0),
2704 BARRIER(1));
2705
2706 /* 15 */
2707 /* return */
2708 shader[i++] = CF_DWORD0(ADDR(0),
2709 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2710 shader[i++] = CF_DWORD1(POP_COUNT(0),
2711 CF_CONST(0),
2712 COND(SQ_CF_COND_ACTIVE),
2713 I_COUNT(0),
2714 VALID_PIXEL_MODE(0),
2715 END_OF_PROGRAM(0),
2716 CF_INST(SQ_CF_INST_RETURN),
2717 WHOLE_QUAD_MODE(0),
2718 BARRIER(0));
2719
2720 /* ALU clauses */
2721
2722 /* 16 */
2723 /* MUL gpr[0].x gpr[0].x gpr[1].x */
2724 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2725 SRC0_REL(ABSOLUTE),
2726 SRC0_ELEM(ELEM_X),
2727 SRC0_NEG(0),
2728 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2729 SRC1_REL(ABSOLUTE),
2730 SRC1_ELEM(ELEM_X),
2731 SRC1_NEG(0),
2732 INDEX_MODE(SQ_INDEX_LOOP),
2733 PRED_SEL(SQ_PRED_SEL_OFF),
2734 LAST(0));
2735 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2736 SRC1_ABS(0),
2737 UPDATE_EXECUTE_MASK(0),
2738 UPDATE_PRED(0),
2739 WRITE_MASK(1),
2740 OMOD(SQ_ALU_OMOD_OFF),
2741 ALU_INST(SQ_OP2_INST_MUL),
2742 BANK_SWIZZLE(SQ_ALU_VEC_012),
2743 DST_GPR(0),
2744 DST_REL(ABSOLUTE),
2745 DST_ELEM(ELEM_X),
2746 CLAMP(1));
2747
2748 /* 17 */
2749 /* MUL gpr[0].y gpr[0].y gpr[1].y */
2750 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2751 SRC0_REL(ABSOLUTE),
2752 SRC0_ELEM(ELEM_Y),
2753 SRC0_NEG(0),
2754 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2755 SRC1_REL(ABSOLUTE),
2756 SRC1_ELEM(ELEM_Y),
2757 SRC1_NEG(0),
2758 INDEX_MODE(SQ_INDEX_LOOP),
2759 PRED_SEL(SQ_PRED_SEL_OFF),
2760 LAST(0));
2761 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2762 SRC1_ABS(0),
2763 UPDATE_EXECUTE_MASK(0),
2764 UPDATE_PRED(0),
2765 WRITE_MASK(1),
2766 OMOD(SQ_ALU_OMOD_OFF),
2767 ALU_INST(SQ_OP2_INST_MUL),
2768 BANK_SWIZZLE(SQ_ALU_VEC_012),
2769 DST_GPR(0),
2770 DST_REL(ABSOLUTE),
2771 DST_ELEM(ELEM_Y),
2772 CLAMP(1));
2773 /* 18 */
2774 /* MUL gpr[0].z gpr[0].z gpr[1].z */
2775 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2776 SRC0_REL(ABSOLUTE),
2777 SRC0_ELEM(ELEM_Z),
2778 SRC0_NEG(0),
2779 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2780 SRC1_REL(ABSOLUTE),
2781 SRC1_ELEM(ELEM_Z),
2782 SRC1_NEG(0),
2783 INDEX_MODE(SQ_INDEX_LOOP),
2784 PRED_SEL(SQ_PRED_SEL_OFF),
2785 LAST(0));
2786 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2787 SRC1_ABS(0),
2788 UPDATE_EXECUTE_MASK(0),
2789 UPDATE_PRED(0),
2790 WRITE_MASK(1),
2791 OMOD(SQ_ALU_OMOD_OFF),
2792 ALU_INST(SQ_OP2_INST_MUL),
2793 BANK_SWIZZLE(SQ_ALU_VEC_012),
2794 DST_GPR(0),
2795 DST_REL(ABSOLUTE),
2796 DST_ELEM(ELEM_Z),
2797 CLAMP(1));
2798 /* 19 */
2799 /* MUL gpr[0].w gpr[0].w gpr[1].w */
2800 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2801 SRC0_REL(ABSOLUTE),
2802 SRC0_ELEM(ELEM_W),
2803 SRC0_NEG(0),
2804 SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2805 SRC1_REL(ABSOLUTE),
2806 SRC1_ELEM(ELEM_W),
2807 SRC1_NEG(0),
2808 INDEX_MODE(SQ_INDEX_LOOP),
2809 PRED_SEL(SQ_PRED_SEL_OFF),
2810 LAST(1));
2811 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2812 SRC1_ABS(0),
2813 UPDATE_EXECUTE_MASK(0),
2814 UPDATE_PRED(0),
2815 WRITE_MASK(1),
2816 OMOD(SQ_ALU_OMOD_OFF),
2817 ALU_INST(SQ_OP2_INST_MUL),
2818 BANK_SWIZZLE(SQ_ALU_VEC_012),
2819 DST_GPR(0),
2820 DST_REL(ABSOLUTE),
2821 DST_ELEM(ELEM_W),
2822 CLAMP(1));
2823
2824 /* 20 */
2825 /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
2826 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2827 SRC0_REL(ABSOLUTE),
2828 SRC0_ELEM(ELEM_Y),
2829 SRC0_NEG(0),
2830 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2831 SRC1_REL(ABSOLUTE),
2832 SRC1_ELEM(ELEM_X),
2833 SRC1_NEG(0),
2834 INDEX_MODE(SQ_INDEX_AR_X),
2835 PRED_SEL(SQ_PRED_SEL_OFF),
2836 LAST(0));
2837 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2838 SRC1_ABS(0),
2839 UPDATE_EXECUTE_MASK(0),
2840 UPDATE_PRED(0),
2841 WRITE_MASK(1),
2842 OMOD(SQ_ALU_OMOD_OFF),
2843 ALU_INST(SQ_OP2_INST_INTERP_XY),
2844 BANK_SWIZZLE(SQ_ALU_VEC_210),
2845 DST_GPR(0),
2846 DST_REL(ABSOLUTE),
2847 DST_ELEM(ELEM_X),
2848 CLAMP(0));
2849 /* 21 */
2850 /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
2851 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2852 SRC0_REL(ABSOLUTE),
2853 SRC0_ELEM(ELEM_X),
2854 SRC0_NEG(0),
2855 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2856 SRC1_REL(ABSOLUTE),
2857 SRC1_ELEM(ELEM_X),
2858 SRC1_NEG(0),
2859 INDEX_MODE(SQ_INDEX_AR_X),
2860 PRED_SEL(SQ_PRED_SEL_OFF),
2861 LAST(0));
2862 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2863 SRC1_ABS(0),
2864 UPDATE_EXECUTE_MASK(0),
2865 UPDATE_PRED(0),
2866 WRITE_MASK(1),
2867 OMOD(SQ_ALU_OMOD_OFF),
2868 ALU_INST(SQ_OP2_INST_INTERP_XY),
2869 BANK_SWIZZLE(SQ_ALU_VEC_210),
2870 DST_GPR(0),
2871 DST_REL(ABSOLUTE),
2872 DST_ELEM(ELEM_Y),
2873 CLAMP(0));
2874 /* 22 */
2875 /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
2876 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2877 SRC0_REL(ABSOLUTE),
2878 SRC0_ELEM(ELEM_Y),
2879 SRC0_NEG(0),
2880 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2881 SRC1_REL(ABSOLUTE),
2882 SRC1_ELEM(ELEM_X),
2883 SRC1_NEG(0),
2884 INDEX_MODE(SQ_INDEX_AR_X),
2885 PRED_SEL(SQ_PRED_SEL_OFF),
2886 LAST(0));
2887 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2888 SRC1_ABS(0),
2889 UPDATE_EXECUTE_MASK(0),
2890 UPDATE_PRED(0),
2891 WRITE_MASK(0),
2892 OMOD(SQ_ALU_OMOD_OFF),
2893 ALU_INST(SQ_OP2_INST_INTERP_XY),
2894 BANK_SWIZZLE(SQ_ALU_VEC_210),
2895 DST_GPR(0),
2896 DST_REL(ABSOLUTE),
2897 DST_ELEM(ELEM_Z),
2898 CLAMP(0));
2899
2900 /* 23 */
2901 /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
2902 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2903 SRC0_REL(ABSOLUTE),
2904 SRC0_ELEM(ELEM_X),
2905 SRC0_NEG(0),
2906 SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2907 SRC1_REL(ABSOLUTE),
2908 SRC1_ELEM(ELEM_X),
2909 SRC1_NEG(0),
2910 INDEX_MODE(SQ_INDEX_AR_X),
2911 PRED_SEL(SQ_PRED_SEL_OFF),
2912 LAST(1));
2913 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2914 SRC1_ABS(0),
2915 UPDATE_EXECUTE_MASK(0),
2916 UPDATE_PRED(0),
2917 WRITE_MASK(0),
2918 OMOD(SQ_ALU_OMOD_OFF),
2919 ALU_INST(SQ_OP2_INST_INTERP_XY),
2920 BANK_SWIZZLE(SQ_ALU_VEC_210),
2921 DST_GPR(0),
2922 DST_REL(ABSOLUTE),
2923 DST_ELEM(ELEM_W),
2924 CLAMP(0));
2925
2926 /* 24/25 */
2927 /* SAMPLE RID=0 GPR0, GPR0 */
2928 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2929 INST_MOD(0),
2930 FETCH_WHOLE_QUAD(0),
2931 RESOURCE_ID(0),
2932 SRC_GPR(0),
2933 SRC_REL(ABSOLUTE),
2934 ALT_CONST(0),
2935 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
2936 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
2937 shader[i++] = TEX_DWORD1(DST_GPR(0),
2938 DST_REL(ABSOLUTE),
2939 DST_SEL_X(SQ_SEL_X),
2940 DST_SEL_Y(SQ_SEL_Y),
2941 DST_SEL_Z(SQ_SEL_Z),
2942 DST_SEL_W(SQ_SEL_W),
2943 LOD_BIAS(0),
2944 COORD_TYPE_X(TEX_NORMALIZED),
2945 COORD_TYPE_Y(TEX_NORMALIZED),
2946 COORD_TYPE_Z(TEX_NORMALIZED),
2947 COORD_TYPE_W(TEX_NORMALIZED));
2948 shader[i++] = TEX_DWORD2(OFFSET_X(0),
2949 OFFSET_Y(0),
2950 OFFSET_Z(0),
2951 SAMPLER_ID(0),
2952 SRC_SEL_X(SQ_SEL_X),
2953 SRC_SEL_Y(SQ_SEL_Y),
2954 SRC_SEL_Z(SQ_SEL_0),
2955 SRC_SEL_W(SQ_SEL_1));
2956 shader[i++] = TEX_DWORD_PAD;
2957
2958 /* 26 */
2959 /* MOV GPR0.x, KC4.x */
2960 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2961 SRC0_REL(ABSOLUTE),
2962 SRC0_ELEM(ELEM_X),
2963 SRC0_NEG(0),
2964 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2965 SRC1_REL(ABSOLUTE),
2966 SRC1_ELEM(ELEM_X),
2967 SRC1_NEG(0),
2968 INDEX_MODE(SQ_INDEX_AR_X),
2969 PRED_SEL(SQ_PRED_SEL_OFF),
2970 LAST(0));
2971 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2972 SRC1_ABS(0),
2973 UPDATE_EXECUTE_MASK(0),
2974 UPDATE_PRED(0),
2975 WRITE_MASK(1),
2976 OMOD(SQ_ALU_OMOD_OFF),
2977 ALU_INST(SQ_OP2_INST_MOV),
2978 BANK_SWIZZLE(SQ_ALU_VEC_012),
2979 DST_GPR(0),
2980 DST_REL(ABSOLUTE),
2981 DST_ELEM(ELEM_X),
2982 CLAMP(1));
2983
2984 /* 27 */
2985 /* MOV GPR0.y, KC4.y */
2986 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2987 SRC0_REL(ABSOLUTE),
2988 SRC0_ELEM(ELEM_Y),
2989 SRC0_NEG(0),
2990 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2991 SRC1_REL(ABSOLUTE),
2992 SRC1_ELEM(ELEM_X),
2993 SRC1_NEG(0),
2994 INDEX_MODE(SQ_INDEX_AR_X),
2995 PRED_SEL(SQ_PRED_SEL_OFF),
2996 LAST(0));
2997 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2998 SRC1_ABS(0),
2999 UPDATE_EXECUTE_MASK(0),
3000 UPDATE_PRED(0),
3001 WRITE_MASK(1),
3002 OMOD(SQ_ALU_OMOD_OFF),
3003 ALU_INST(SQ_OP2_INST_MOV),
3004 BANK_SWIZZLE(SQ_ALU_VEC_012),
3005 DST_GPR(0),
3006 DST_REL(ABSOLUTE),
3007 DST_ELEM(ELEM_Y),
3008 CLAMP(1));
3009
3010 /* 28 */
3011 /* MOV GPR0.z, KC4.z */
3012 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3013 SRC0_REL(ABSOLUTE),
3014 SRC0_ELEM(ELEM_Z),
3015 SRC0_NEG(0),
3016 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3017 SRC1_REL(ABSOLUTE),
3018 SRC1_ELEM(ELEM_X),
3019 SRC1_NEG(0),
3020 INDEX_MODE(SQ_INDEX_AR_X),
3021 PRED_SEL(SQ_PRED_SEL_OFF),
3022 LAST(0));
3023 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3024 SRC1_ABS(0),
3025 UPDATE_EXECUTE_MASK(0),
3026 UPDATE_PRED(0),
3027 WRITE_MASK(1),
3028 OMOD(SQ_ALU_OMOD_OFF),
3029 ALU_INST(SQ_OP2_INST_MOV),
3030 BANK_SWIZZLE(SQ_ALU_VEC_012),
3031 DST_GPR(0),
3032 DST_REL(ABSOLUTE),
3033 DST_ELEM(ELEM_Z),
3034 CLAMP(1));
3035
3036 /* 29 */
3037 /* MOV GPR0.w, KC4.w */
3038 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3039 SRC0_REL(ABSOLUTE),
3040 SRC0_ELEM(ELEM_W),
3041 SRC0_NEG(0),
3042 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3043 SRC1_REL(ABSOLUTE),
3044 SRC1_ELEM(ELEM_X),
3045 SRC1_NEG(0),
3046 INDEX_MODE(SQ_INDEX_AR_X),
3047 PRED_SEL(SQ_PRED_SEL_OFF),
3048 LAST(1));
3049 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3050 SRC1_ABS(0),
3051 UPDATE_EXECUTE_MASK(0),
3052 UPDATE_PRED(0),
3053 WRITE_MASK(1),
3054 OMOD(SQ_ALU_OMOD_OFF),
3055 ALU_INST(SQ_OP2_INST_MOV),
3056 BANK_SWIZZLE(SQ_ALU_VEC_012),
3057 DST_GPR(0),
3058 DST_REL(ABSOLUTE),
3059 DST_ELEM(ELEM_W),
3060 CLAMP(1));
3061
3062 /* 30 */
3063 /* INTERP_XY GPR1.x, PARAM1 */
3064 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3065 SRC0_REL(ABSOLUTE),
3066 SRC0_ELEM(ELEM_Y),
3067 SRC0_NEG(0),
3068 SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3069 SRC1_REL(ABSOLUTE),
3070 SRC1_ELEM(ELEM_X),
3071 SRC1_NEG(0),
3072 INDEX_MODE(SQ_INDEX_AR_X),
3073 PRED_SEL(SQ_PRED_SEL_OFF),
3074 LAST(0));
3075 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3076 SRC1_ABS(0),
3077 UPDATE_EXECUTE_MASK(0),
3078 UPDATE_PRED(0),
3079 WRITE_MASK(1),
3080 OMOD(SQ_ALU_OMOD_OFF),
3081 ALU_INST(SQ_OP2_INST_INTERP_XY),
3082 BANK_SWIZZLE(SQ_ALU_VEC_210),
3083 DST_GPR(1),
3084 DST_REL(ABSOLUTE),
3085 DST_ELEM(ELEM_X),
3086 CLAMP(0));
3087 /* 31 */
3088 /* INTERP_XY GPR1.y, PARAM1 */
3089 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3090 SRC0_REL(ABSOLUTE),
3091 SRC0_ELEM(ELEM_X),
3092 SRC0_NEG(0),
3093 SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3094 SRC1_REL(ABSOLUTE),
3095 SRC1_ELEM(ELEM_X),
3096 SRC1_NEG(0),
3097 INDEX_MODE(SQ_INDEX_AR_X),
3098 PRED_SEL(SQ_PRED_SEL_OFF),
3099 LAST(0));
3100 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3101 SRC1_ABS(0),
3102 UPDATE_EXECUTE_MASK(0),
3103 UPDATE_PRED(0),
3104 WRITE_MASK(1),
3105 OMOD(SQ_ALU_OMOD_OFF),
3106 ALU_INST(SQ_OP2_INST_INTERP_XY),
3107 BANK_SWIZZLE(SQ_ALU_VEC_210),
3108 DST_GPR(1),
3109 DST_REL(ABSOLUTE),
3110 DST_ELEM(ELEM_Y),
3111 CLAMP(0));
3112 /* 32 */
3113 /* INTERP_XY GPR1.z, PARAM1 */
3114 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3115 SRC0_REL(ABSOLUTE),
3116 SRC0_ELEM(ELEM_Y),
3117 SRC0_NEG(0),
3118 SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3119 SRC1_REL(ABSOLUTE),
3120 SRC1_ELEM(ELEM_X),
3121 SRC1_NEG(0),
3122 INDEX_MODE(SQ_INDEX_AR_X),
3123 PRED_SEL(SQ_PRED_SEL_OFF),
3124 LAST(0));
3125 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3126 SRC1_ABS(0),
3127 UPDATE_EXECUTE_MASK(0),
3128 UPDATE_PRED(0),
3129 WRITE_MASK(0),
3130 OMOD(SQ_ALU_OMOD_OFF),
3131 ALU_INST(SQ_OP2_INST_INTERP_XY),
3132 BANK_SWIZZLE(SQ_ALU_VEC_210),
3133 DST_GPR(1),
3134 DST_REL(ABSOLUTE),
3135 DST_ELEM(ELEM_Z),
3136 CLAMP(0));
3137 /* 33 */
3138 /* INTERP_XY GPR1.w, PARAM1 */
3139 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3140 SRC0_REL(ABSOLUTE),
3141 SRC0_ELEM(ELEM_X),
3142 SRC0_NEG(0),
3143 SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3144 SRC1_REL(ABSOLUTE),
3145 SRC1_ELEM(ELEM_X),
3146 SRC1_NEG(0),
3147 INDEX_MODE(SQ_INDEX_AR_X),
3148 PRED_SEL(SQ_PRED_SEL_OFF),
3149 LAST(1));
3150 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3151 SRC1_ABS(0),
3152 UPDATE_EXECUTE_MASK(0),
3153 UPDATE_PRED(0),
3154 WRITE_MASK(0),
3155 OMOD(SQ_ALU_OMOD_OFF),
3156 ALU_INST(SQ_OP2_INST_INTERP_XY),
3157 BANK_SWIZZLE(SQ_ALU_VEC_210),
3158 DST_GPR(1),
3159 DST_REL(ABSOLUTE),
3160 DST_ELEM(ELEM_W),
3161 CLAMP(0));
3162
3163 /* 34/35 */
3164 /* SAMPLE RID=1 GPR1, GPR1 */
3165 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3166 INST_MOD(0),
3167 FETCH_WHOLE_QUAD(0),
3168 RESOURCE_ID(1),
3169 SRC_GPR(1),
3170 SRC_REL(ABSOLUTE),
3171 ALT_CONST(0),
3172 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3173 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3174 shader[i++] = TEX_DWORD1(DST_GPR(1),
3175 DST_REL(ABSOLUTE),
3176 DST_SEL_X(SQ_SEL_X),
3177 DST_SEL_Y(SQ_SEL_Y),
3178 DST_SEL_Z(SQ_SEL_Z),
3179 DST_SEL_W(SQ_SEL_W),
3180 LOD_BIAS(0),
3181 COORD_TYPE_X(TEX_NORMALIZED),
3182 COORD_TYPE_Y(TEX_NORMALIZED),
3183 COORD_TYPE_Z(TEX_NORMALIZED),
3184 COORD_TYPE_W(TEX_NORMALIZED));
3185 shader[i++] = TEX_DWORD2(OFFSET_X(0),
3186 OFFSET_Y(0),
3187 OFFSET_Z(0),
3188 SAMPLER_ID(1),
3189 SRC_SEL_X(SQ_SEL_X),
3190 SRC_SEL_Y(SQ_SEL_Y),
3191 SRC_SEL_Z(SQ_SEL_0),
3192 SRC_SEL_W(SQ_SEL_1));
3193 shader[i++] = TEX_DWORD_PAD;
3194
3195 /* 36 */
3196 /* MOV GPR1.x, KC5.x */
3197 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3198 SRC0_REL(ABSOLUTE),
3199 SRC0_ELEM(ELEM_X),
3200 SRC0_NEG(0),
3201 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3202 SRC1_REL(ABSOLUTE),
3203 SRC1_ELEM(ELEM_X),
3204 SRC1_NEG(0),
3205 INDEX_MODE(SQ_INDEX_AR_X),
3206 PRED_SEL(SQ_PRED_SEL_OFF),
3207 LAST(0));
3208 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3209 SRC1_ABS(0),
3210 UPDATE_EXECUTE_MASK(0),
3211 UPDATE_PRED(0),
3212 WRITE_MASK(1),
3213 OMOD(SQ_ALU_OMOD_OFF),
3214 ALU_INST(SQ_OP2_INST_MOV),
3215 BANK_SWIZZLE(SQ_ALU_VEC_012),
3216 DST_GPR(1),
3217 DST_REL(ABSOLUTE),
3218 DST_ELEM(ELEM_X),
3219 CLAMP(1));
3220
3221 /* 37 */
3222 /* MOV GPR1.y, KC5.y */
3223 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3224 SRC0_REL(ABSOLUTE),
3225 SRC0_ELEM(ELEM_Y),
3226 SRC0_NEG(0),
3227 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3228 SRC1_REL(ABSOLUTE),
3229 SRC1_ELEM(ELEM_X),
3230 SRC1_NEG(0),
3231 INDEX_MODE(SQ_INDEX_AR_X),
3232 PRED_SEL(SQ_PRED_SEL_OFF),
3233 LAST(0));
3234 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3235 SRC1_ABS(0),
3236 UPDATE_EXECUTE_MASK(0),
3237 UPDATE_PRED(0),
3238 WRITE_MASK(1),
3239 OMOD(SQ_ALU_OMOD_OFF),
3240 ALU_INST(SQ_OP2_INST_MOV),
3241 BANK_SWIZZLE(SQ_ALU_VEC_012),
3242 DST_GPR(1),
3243 DST_REL(ABSOLUTE),
3244 DST_ELEM(ELEM_Y),
3245 CLAMP(1));
3246
3247 /* 38 */
3248 /* MOV GPR1.z, KC5.z */
3249 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3250 SRC0_REL(ABSOLUTE),
3251 SRC0_ELEM(ELEM_Z),
3252 SRC0_NEG(0),
3253 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3254 SRC1_REL(ABSOLUTE),
3255 SRC1_ELEM(ELEM_X),
3256 SRC1_NEG(0),
3257 INDEX_MODE(SQ_INDEX_AR_X),
3258 PRED_SEL(SQ_PRED_SEL_OFF),
3259 LAST(0));
3260 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3261 SRC1_ABS(0),
3262 UPDATE_EXECUTE_MASK(0),
3263 UPDATE_PRED(0),
3264 WRITE_MASK(1),
3265 OMOD(SQ_ALU_OMOD_OFF),
3266 ALU_INST(SQ_OP2_INST_MOV),
3267 BANK_SWIZZLE(SQ_ALU_VEC_012),
3268 DST_GPR(1),
3269 DST_REL(ABSOLUTE),
3270 DST_ELEM(ELEM_Z),
3271 CLAMP(1));
3272
3273 /* 39 */
3274 /* MOV GPR1.w, KC5.w */
3275 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3276 SRC0_REL(ABSOLUTE),
3277 SRC0_ELEM(ELEM_W),
3278 SRC0_NEG(0),
3279 SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3280 SRC1_REL(ABSOLUTE),
3281 SRC1_ELEM(ELEM_X),
3282 SRC1_NEG(0),
3283 INDEX_MODE(SQ_INDEX_AR_X),
3284 PRED_SEL(SQ_PRED_SEL_OFF),
3285 LAST(1));
3286 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3287 SRC1_ABS(0),
3288 UPDATE_EXECUTE_MASK(0),
3289 UPDATE_PRED(0),
3290 WRITE_MASK(1),
3291 OMOD(SQ_ALU_OMOD_OFF),
3292 ALU_INST(SQ_OP2_INST_MOV),
3293 BANK_SWIZZLE(SQ_ALU_VEC_012),
3294 DST_GPR(1),
3295 DST_REL(ABSOLUTE),
3296 DST_ELEM(ELEM_W),
3297 CLAMP(1));
3298
3299 return i;
3300 }
3301