1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors: Alex Deucher <alexander.deucher@amd.com>
24  *
25  */
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29 
30 #include "xf86.h"
31 
32 #include <errno.h>
33 
34 #include "radeon.h"
35 #include "evergreen_shader.h"
36 #include "radeon_reg.h"
37 #include "evergreen_reg.h"
38 #include "evergreen_state.h"
39 
40 #include "radeon_vbo.h"
41 #include "radeon_exa_shared.h"
42 
43 static const uint32_t EVERGREEN_ROP[16] = {
44     RADEON_ROP3_ZERO, /* GXclear        */
45     RADEON_ROP3_DSa,  /* Gxand          */
46     RADEON_ROP3_SDna, /* GXandReverse   */
47     RADEON_ROP3_S,    /* GXcopy         */
48     RADEON_ROP3_DSna, /* GXandInverted  */
49     RADEON_ROP3_D,    /* GXnoop         */
50     RADEON_ROP3_DSx,  /* GXxor          */
51     RADEON_ROP3_DSo,  /* GXor           */
52     RADEON_ROP3_DSon, /* GXnor          */
53     RADEON_ROP3_DSxn, /* GXequiv        */
54     RADEON_ROP3_Dn,   /* GXinvert       */
55     RADEON_ROP3_SDno, /* GXorReverse    */
56     RADEON_ROP3_Sn,   /* GXcopyInverted */
57     RADEON_ROP3_DSno, /* GXorInverted   */
58     RADEON_ROP3_DSan, /* GXnand         */
59     RADEON_ROP3_ONE,  /* GXset          */
60 };
61 
62 void
evergreen_start_3d(ScrnInfoPtr pScrn)63 evergreen_start_3d(ScrnInfoPtr pScrn)
64 {
65     RADEONInfoPtr info = RADEONPTR(pScrn);
66 
67     BEGIN_BATCH(3);
68     PACK3(IT_CONTEXT_CONTROL, 2);
69     E32(0x80000000);
70     E32(0x80000000);
71     END_BATCH();
72 
73 }
74 
eg_tile_split(unsigned tile_split)75 unsigned eg_tile_split(unsigned tile_split)
76 {
77 	switch (tile_split) {
78 	case 64:	tile_split = 0;	break;
79 	case 128:	tile_split = 1;	break;
80 	case 256:	tile_split = 2;	break;
81 	case 512:	tile_split = 3;	break;
82 	default:
83 	case 1024:	tile_split = 4;	break;
84 	case 2048:	tile_split = 5;	break;
85 	case 4096:	tile_split = 6;	break;
86 	}
87 	return tile_split;
88 }
89 
eg_macro_tile_aspect(unsigned macro_tile_aspect)90 static unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect)
91 {
92 	switch (macro_tile_aspect) {
93 	default:
94 	case 1:	macro_tile_aspect = 0;	break;
95 	case 2:	macro_tile_aspect = 1;	break;
96 	case 4:	macro_tile_aspect = 2;	break;
97 	case 8:	macro_tile_aspect = 3;	break;
98 	}
99 	return macro_tile_aspect;
100 }
101 
eg_bank_wh(unsigned bankwh)102 static unsigned eg_bank_wh(unsigned bankwh)
103 {
104 	switch (bankwh) {
105 	default:
106 	case 1:	bankwh = 0;	break;
107 	case 2:	bankwh = 1;	break;
108 	case 4:	bankwh = 2;	break;
109 	case 8:	bankwh = 3;	break;
110 	}
111 	return bankwh;
112 }
113 
eg_nbanks(unsigned nbanks)114 static unsigned eg_nbanks(unsigned nbanks)
115 {
116 	switch (nbanks) {
117 	default:
118 	case 2: nbanks = 0; break;
119 	case 4: nbanks = 1; break;
120 	case 8: nbanks = 2; break;
121 	case 16: nbanks = 3; break;
122 	}
123 	return nbanks;
124 }
125 
126 /*
127  * Setup of functional groups
128  */
129 
130 // asic stack/thread/gpr limits - need to query the drm
131 static void
evergreen_sq_setup(ScrnInfoPtr pScrn,sq_config_t * sq_conf)132 evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
133 {
134     uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
135     uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
136     uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
137     RADEONInfoPtr info = RADEONPTR(pScrn);
138 
139     if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
140 	(info->ChipFamily == CHIP_FAMILY_PALM) ||
141 	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
142 	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
143 	(info->ChipFamily == CHIP_FAMILY_CAICOS))
144 	sq_config = 0;
145     else
146 	sq_config = VC_ENABLE_bit;
147 
148     sq_config |= (EXPORT_SRC_C_bit |
149 		  (sq_conf->cs_prio << CS_PRIO_shift) |
150 		  (sq_conf->ls_prio << LS_PRIO_shift) |
151 		  (sq_conf->hs_prio << HS_PRIO_shift) |
152 		  (sq_conf->ps_prio << PS_PRIO_shift) |
153 		  (sq_conf->vs_prio << VS_PRIO_shift) |
154 		  (sq_conf->gs_prio << GS_PRIO_shift) |
155 		  (sq_conf->es_prio << ES_PRIO_shift));
156 
157     sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
158 			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
159 			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
160     sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
161 			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
162     sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
163 			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
164 
165     sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
166 			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
167 			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
168 			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
169     sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
170 				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
171 
172     sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
173 				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
174 
175     sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
176 				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
177 
178     sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
179 				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
180 
181     BEGIN_BATCH(16);
182     /* disable dyn gprs */
183     EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
184     PACK0(SQ_CONFIG, 4);
185     E32(sq_config);
186     E32(sq_gpr_resource_mgmt_1);
187     E32(sq_gpr_resource_mgmt_2);
188     E32(sq_gpr_resource_mgmt_3);
189     PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
190     E32(sq_thread_resource_mgmt);
191     E32(sq_thread_resource_mgmt_2);
192     E32(sq_stack_resource_mgmt_1);
193     E32(sq_stack_resource_mgmt_2);
194     E32(sq_stack_resource_mgmt_3);
195     END_BATCH();
196 }
197 
198 /* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
199  * we use here.
200  */
201 void
evergreen_set_render_target(ScrnInfoPtr pScrn,cb_config_t * cb_conf,uint32_t domain)202 evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
203 {
204     uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
205     unsigned pitch, slice, w, h, array_mode, nbanks;
206     uint32_t tile_split, macro_aspect, bankw, bankh;
207     RADEONInfoPtr info = RADEONPTR(pScrn);
208 
209     if (cb_conf->surface) {
210 	switch (cb_conf->surface->level[0].mode) {
211 	case RADEON_SURF_MODE_1D:
212 		array_mode = 2;
213 		break;
214 	case RADEON_SURF_MODE_2D:
215 		array_mode = 4;
216 		break;
217 	default:
218 		array_mode = 0;
219 		break;
220 	}
221 	w = cb_conf->surface->level[0].npix_x;
222 	h = cb_conf->surface->level[0].npix_y;
223 	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
224 	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
225 	tile_split = cb_conf->surface->tile_split;
226 	macro_aspect = cb_conf->surface->mtilea;
227 	bankw = cb_conf->surface->bankw;
228 	bankh = cb_conf->surface->bankh;
229 	tile_split = eg_tile_split(tile_split);
230 	macro_aspect = eg_macro_tile_aspect(macro_aspect);
231 	bankw = eg_bank_wh(bankw);
232 	bankh = eg_bank_wh(bankh);
233     } else {
234 	pitch = (cb_conf->w / 8) - 1;
235 	h = RADEON_ALIGN(cb_conf->h, 8);
236 	slice = ((cb_conf->w * h) / 64) - 1;
237 	array_mode = cb_conf->array_mode;
238 	w = cb_conf->w;
239 	tile_split = 4;
240 	macro_aspect = 0;
241 	bankw = 0;
242 	bankh = 0;
243     }
244     nbanks = info->num_banks;
245     nbanks = eg_nbanks(nbanks);
246 
247     cb_color_attrib |= (tile_split << CB_COLOR0_ATTRIB__TILE_SPLIT_shift)|
248 		       (nbanks << CB_COLOR0_ATTRIB__NUM_BANKS_shift) |
249 		       (bankw << CB_COLOR0_ATTRIB__BANK_WIDTH_shift) |
250 		       (bankh << CB_COLOR0_ATTRIB__BANK_HEIGHT_shift) |
251 		       (macro_aspect << CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift);
252     cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
253 		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
254 		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
255 		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
256 		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
257 		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
258 		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
259     if (cb_conf->blend_clamp)
260 	cb_color_info |= BLEND_CLAMP_bit;
261     if (cb_conf->fast_clear)
262 	cb_color_info |= FAST_CLEAR_bit;
263     if (cb_conf->compression)
264 	cb_color_info |= COMPRESSION_bit;
265     if (cb_conf->blend_bypass)
266 	cb_color_info |= BLEND_BYPASS_bit;
267     if (cb_conf->simple_float)
268 	cb_color_info |= SIMPLE_FLOAT_bit;
269     if (cb_conf->round_mode)
270 	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
271     if (cb_conf->tile_compact)
272 	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
273     if (cb_conf->rat)
274 	cb_color_info |= RAT_bit;
275 
276     /* bit 4 needs to be set for linear and depth/stencil surfaces */
277     if (cb_conf->non_disp_tiling)
278 	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
279 
280     switch (cb_conf->resource_type) {
281     case BUFFER:
282 	/* number of elements in the surface */
283 	cb_color_dim = pitch * slice;
284 	break;
285     default:
286 	/* w/h of the surface */
287 	cb_color_dim = (((w - 1) << WIDTH_MAX_shift) |
288 			((cb_conf->h - 1) << HEIGHT_MAX_shift));
289 	break;
290     }
291 
292     BEGIN_BATCH(3 + 2);
293     EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
294     RELOC_BATCH(cb_conf->bo, 0, domain);
295     END_BATCH();
296 
297     /* Set CMASK & FMASK buffer to the offset of color buffer as
298      * we don't use those this shouldn't cause any issue and we
299      * then have a valid cmd stream
300      */
301     BEGIN_BATCH(3 + 2);
302     EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
303     RELOC_BATCH(cb_conf->bo, 0, domain);
304     END_BATCH();
305     BEGIN_BATCH(3 + 2);
306     EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
307     RELOC_BATCH(cb_conf->bo, 0, domain);
308     END_BATCH();
309 
310     /* tiling config */
311     BEGIN_BATCH(3 + 2);
312     EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
313     RELOC_BATCH(cb_conf->bo, 0, domain);
314     END_BATCH();
315     BEGIN_BATCH(3 + 2);
316     EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
317     RELOC_BATCH(cb_conf->bo, 0, domain);
318     END_BATCH();
319 
320     BEGIN_BATCH(33);
321     EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
322     EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
323     EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
324     EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
325     EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
326     EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
327     PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
328     E32(0);
329     E32(0);
330     E32(0);
331     E32(0);
332     EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
333     EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
334 					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
335     EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
336     END_BATCH();
337 }
338 
evergreen_set_blend_color(ScrnInfoPtr pScrn,float * color)339 void evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color)
340 {
341     RADEONInfoPtr info = RADEONPTR(pScrn);
342 
343     BEGIN_BATCH(2 + 4);
344     PACK0(CB_BLEND_RED, 4);
345     EFLOAT(color[0]); /* R */
346     EFLOAT(color[1]); /* G */
347     EFLOAT(color[2]); /* B */
348     EFLOAT(color[3]); /* A */
349     END_BATCH();
350 }
351 
352 static void
evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn,uint32_t sync_type,uint32_t size,uint64_t mc_addr,struct radeon_bo * bo,uint32_t rdomains,uint32_t wdomain)353 evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
354 			      uint32_t size, uint64_t mc_addr,
355 			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
356 {
357     RADEONInfoPtr info = RADEONPTR(pScrn);
358     uint32_t cp_coher_size;
359     if (size == 0xffffffff)
360 	cp_coher_size = 0xffffffff;
361     else
362 	cp_coher_size = ((size + 255) >> 8);
363 
364     BEGIN_BATCH(5 + 2);
365     PACK3(IT_SURFACE_SYNC, 4);
366     E32(sync_type);
367     E32(cp_coher_size);
368     E32((mc_addr >> 8));
369     E32(10); /* poll interval */
370     RELOC_BATCH(bo, rdomains, wdomain);
371     END_BATCH();
372 }
373 
374 /* inserts a wait for vline in the command stream */
evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn,PixmapPtr pPix,xf86CrtcPtr crtc,int start,int stop)375 void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
376 				  xf86CrtcPtr crtc, int start, int stop)
377 {
378     RADEONInfoPtr  info = RADEONPTR(pScrn);
379     drmmode_crtc_private_ptr drmmode_crtc;
380 
381     if (!crtc)
382         return;
383 
384     drmmode_crtc = crtc->driver_private;
385 
386     if (!crtc->enabled)
387         return;
388 
389     if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
390         return;
391 
392     start = max(start, crtc->y);
393     stop = min(stop, crtc->y + crtc->mode.VDisplay);
394 
395     if (start >= stop)
396         return;
397 
398     BEGIN_BATCH(11);
399     /* set the VLINE range */
400     EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
401 	 (start << EVERGREEN_VLINE_START_SHIFT) |
402 	 (stop << EVERGREEN_VLINE_END_SHIFT));
403 
404     /* tell the CP to poll the VLINE state register */
405     PACK3(IT_WAIT_REG_MEM, 6);
406     E32(IT_WAIT_REG | IT_WAIT_EQ);
407     E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
408     E32(0);
409     E32(0);                          // Ref value
410     E32(EVERGREEN_VLINE_STAT);    // Mask
411     E32(10);                         // Wait interval
412     /* add crtc reloc */
413     PACK3(IT_NOP, 1);
414     E32(drmmode_crtc->mode_crtc->crtc_id);
415     END_BATCH();
416 }
417 
418 void
evergreen_set_spi(ScrnInfoPtr pScrn,int vs_export_count,int num_interp)419 evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
420 {
421     RADEONInfoPtr info = RADEONPTR(pScrn);
422 
423     BEGIN_BATCH(8);
424     /* Interpolator setup */
425     EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
426     PACK0(SPI_PS_IN_CONTROL_0, 3);
427     E32(((num_interp << NUM_INTERP_shift) |
428 	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
429     E32(0); // SPI_PS_IN_CONTROL_1
430     E32(0); // SPI_INTERP_CONTROL_0
431     END_BATCH();
432 }
433 
434 void
evergreen_fs_setup(ScrnInfoPtr pScrn,shader_config_t * fs_conf,uint32_t domain)435 evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
436 {
437     RADEONInfoPtr info = RADEONPTR(pScrn);
438     uint32_t sq_pgm_resources;
439 
440     sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
441 			(fs_conf->stack_size << STACK_SIZE_shift));
442 
443     if (fs_conf->dx10_clamp)
444 	sq_pgm_resources |= DX10_CLAMP_bit;
445 
446     BEGIN_BATCH(3 + 2);
447     EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
448     RELOC_BATCH(fs_conf->bo, domain, 0);
449     END_BATCH();
450 
451     BEGIN_BATCH(3);
452     EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
453     END_BATCH();
454 }
455 
456 /* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
457  * but none that we use here.
458  */
459 void
evergreen_vs_setup(ScrnInfoPtr pScrn,shader_config_t * vs_conf,uint32_t domain)460 evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
461 {
462     RADEONInfoPtr info = RADEONPTR(pScrn);
463     uint32_t sq_pgm_resources, sq_pgm_resources_2;
464 
465     sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
466 			(vs_conf->stack_size << STACK_SIZE_shift));
467 
468     if (vs_conf->dx10_clamp)
469 	sq_pgm_resources |= DX10_CLAMP_bit;
470     if (vs_conf->uncached_first_inst)
471 	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
472 
473     sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
474 			  (vs_conf->double_round << DOUBLE_ROUND_shift));
475 
476     if (vs_conf->allow_sdi)
477 	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
478     if (vs_conf->allow_sd0)
479 	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
480     if (vs_conf->allow_ddi)
481 	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
482     if (vs_conf->allow_ddo)
483 	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
484 
485     /* flush SQ cache */
486     evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
487 				  vs_conf->shader_size, vs_conf->shader_addr,
488 				  vs_conf->bo, domain, 0);
489 
490     BEGIN_BATCH(3 + 2);
491     EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
492     RELOC_BATCH(vs_conf->bo, domain, 0);
493     END_BATCH();
494 
495     BEGIN_BATCH(4);
496     PACK0(SQ_PGM_RESOURCES_VS, 2);
497     E32(sq_pgm_resources);
498     E32(sq_pgm_resources_2);
499     END_BATCH();
500 }
501 
502 /* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
503  * but none that we use here.
504  */
505 void
evergreen_ps_setup(ScrnInfoPtr pScrn,shader_config_t * ps_conf,uint32_t domain)506 evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
507 {
508     RADEONInfoPtr info = RADEONPTR(pScrn);
509     uint32_t sq_pgm_resources, sq_pgm_resources_2;
510 
511     sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
512 			(ps_conf->stack_size << STACK_SIZE_shift));
513 
514     if (ps_conf->dx10_clamp)
515 	sq_pgm_resources |= DX10_CLAMP_bit;
516     if (ps_conf->uncached_first_inst)
517 	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
518     if (ps_conf->clamp_consts)
519 	sq_pgm_resources |= CLAMP_CONSTS_bit;
520 
521     sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
522 			  (ps_conf->double_round << DOUBLE_ROUND_shift));
523 
524     if (ps_conf->allow_sdi)
525 	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
526     if (ps_conf->allow_sd0)
527 	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
528     if (ps_conf->allow_ddi)
529 	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
530     if (ps_conf->allow_ddo)
531 	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
532 
533     /* flush SQ cache */
534     evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
535 				  ps_conf->shader_size, ps_conf->shader_addr,
536 				  ps_conf->bo, domain, 0);
537 
538     BEGIN_BATCH(3 + 2);
539     EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
540     RELOC_BATCH(ps_conf->bo, domain, 0);
541     END_BATCH();
542 
543     BEGIN_BATCH(5);
544     PACK0(SQ_PGM_RESOURCES_PS, 3);
545     E32(sq_pgm_resources);
546     E32(sq_pgm_resources_2);
547     E32(ps_conf->export_mode);
548     END_BATCH();
549 }
550 
551 void
evergreen_set_alu_consts(ScrnInfoPtr pScrn,const_config_t * const_conf,uint32_t domain)552 evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
553 {
554     RADEONInfoPtr info = RADEONPTR(pScrn);
555     /* size reg is units of 16 consts (4 dwords each) */
556     uint32_t size = const_conf->size_bytes >> 8;
557 
558     if (size == 0)
559 	size = 1;
560 
561 #if X_BYTE_ORDER == X_BIG_ENDIAN
562     {
563 	    uint32_t count = size << 6, *p = const_conf->cpu_ptr;
564 
565 	    while(count--) {
566 		    *p = cpu_to_le32(*p);
567 		    p++;
568 	    }
569     }
570 #endif
571 
572     /* flush SQ cache */
573     evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
574 				  const_conf->size_bytes, const_conf->const_addr,
575 				  const_conf->bo, domain, 0);
576 
577     switch (const_conf->type) {
578     case SHADER_TYPE_VS:
579 	BEGIN_BATCH(3);
580 	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
581 	END_BATCH();
582 	BEGIN_BATCH(3 + 2);
583 	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
584 	RELOC_BATCH(const_conf->bo, domain, 0);
585 	END_BATCH();
586 	break;
587     case SHADER_TYPE_PS:
588 	BEGIN_BATCH(3);
589 	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
590 	END_BATCH();
591 	BEGIN_BATCH(3 + 2);
592 	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
593 	RELOC_BATCH(const_conf->bo, domain, 0);
594 	END_BATCH();
595 	break;
596     default:
597 	ErrorF("Unsupported const type %d\n", const_conf->type);
598 	break;
599     }
600 
601 }
602 
603 void
evergreen_set_bool_consts(ScrnInfoPtr pScrn,int offset,uint32_t val)604 evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
605 {
606     RADEONInfoPtr info = RADEONPTR(pScrn);
607     /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
608      * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
609      */
610     BEGIN_BATCH(3);
611     EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
612     END_BATCH();
613 }
614 
615 /* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
616  * but none that we use here.
617  */
618 static void
evergreen_set_vtx_resource(ScrnInfoPtr pScrn,vtx_resource_t * res,uint32_t domain)619 evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
620 {
621     RADEONInfoPtr info = RADEONPTR(pScrn);
622     struct radeon_accel_state *accel_state = info->accel_state;
623     uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
624 
625     sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
626 			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
627 			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
628 			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
629 			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
630     if (res->clamp_x)
631 	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
632 
633     if (res->format_comp_all)
634 	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
635 
636     if (res->srf_mode_all)
637 	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
638 
639     sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
640 			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
641 			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
642 			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
643 
644     if (res->uncached)
645 	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
646 
647     /* XXX ??? */
648     sq_vtx_constant_word4 = 0;
649 
650     /* flush vertex cache */
651     if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
652 	(info->ChipFamily == CHIP_FAMILY_PALM) ||
653 	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
654 	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
655 	(info->ChipFamily == CHIP_FAMILY_CAICOS) ||
656 	(info->ChipFamily == CHIP_FAMILY_CAYMAN) ||
657 	(info->ChipFamily == CHIP_FAMILY_ARUBA))
658 	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
659 				      accel_state->vbo.vb_offset, 0,
660 				      res->bo,
661 				      domain, 0);
662     else
663 	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
664 				      accel_state->vbo.vb_offset, 0,
665 				      res->bo,
666 				      domain, 0);
667 
668     BEGIN_BATCH(10 + 2);
669     PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
670     E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
671     E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
672     E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
673     E32(sq_vtx_constant_word3);		// 3: swizzles
674     E32(sq_vtx_constant_word4);		// 4: num elements
675     E32(0);							// 5: n/a
676     E32(0);							// 6: n/a
677     E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
678     RELOC_BATCH(res->bo, domain, 0);
679     END_BATCH();
680 }
681 
682 /* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
683  * but none that we use here.
684  */
685 void
evergreen_set_tex_resource(ScrnInfoPtr pScrn,tex_resource_t * tex_res,uint32_t domain)686 evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
687 {
688     RADEONInfoPtr info = RADEONPTR(pScrn);
689     uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
690     uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
691     uint32_t array_mode, pitch, tile_split, macro_aspect, bankw, bankh, nbanks;
692 
693     if (tex_res->surface) {
694 	switch (tex_res->surface->level[0].mode) {
695 	case RADEON_SURF_MODE_1D:
696 		array_mode = 2;
697 		break;
698 	case RADEON_SURF_MODE_2D:
699 		array_mode = 4;
700 		break;
701 	default:
702 		array_mode = 0;
703 		break;
704 	}
705 	pitch = tex_res->surface->level[0].nblk_x >> 3;
706 	tile_split = tex_res->surface->tile_split;
707 	macro_aspect = tex_res->surface->mtilea;
708 	bankw = tex_res->surface->bankw;
709 	bankh = tex_res->surface->bankh;
710 	tile_split = eg_tile_split(tile_split);
711 	macro_aspect = eg_macro_tile_aspect(macro_aspect);
712 	bankw = eg_bank_wh(bankw);
713 	bankh = eg_bank_wh(bankh);
714     } else {
715 	array_mode = tex_res->array_mode;
716 	pitch = (tex_res->pitch + 7) >> 3;
717 	tile_split = 4;
718 	macro_aspect = 0;
719 	bankw = 0;
720 	bankh = 0;
721     }
722     nbanks = info->num_banks;
723     nbanks = eg_nbanks(nbanks);
724 
725     sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
726 
727     if (tex_res->w)
728 	sq_tex_resource_word0 |= ( ((pitch - 1) << PITCH_shift) |
729 				   ((tex_res->w - 1) << TEX_WIDTH_shift) );
730 
731     if (tex_res->tile_type)
732 	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
733 
734     sq_tex_resource_word1 = (array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
735 
736     if (tex_res->h)
737 	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
738     if (tex_res->depth)
739 	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
740 
741     sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
742 			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
743 			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
744 			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
745 			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
746 			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
747 			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
748 			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
749 			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
750 			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
751 			     (tex_res->base_level << BASE_LEVEL_shift));
752 
753     if (tex_res->srf_mode_all)
754 	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
755     if (tex_res->force_degamma)
756 	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
757 
758     sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
759 			     (tex_res->base_array << BASE_ARRAY_shift) |
760 			     (tex_res->last_array << LAST_ARRAY_shift));
761 
762     sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
763 			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
764 			     (tile_split << SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift));
765 
766     if (tex_res->interlaced)
767 	sq_tex_resource_word6 |= INTERLACED_bit;
768 
769     sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
770 			     (macro_aspect << SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift) |
771 			     (nbanks << SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift) |
772 			     (bankw << SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift) |
773 			     (bankh << SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift) |
774 			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
775 
776     /* flush texture cache */
777     evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
778 				  tex_res->size, tex_res->base,
779 				  tex_res->bo, domain, 0);
780 
781     BEGIN_BATCH(10 + 4);
782     PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
783     E32(sq_tex_resource_word0);
784     E32(sq_tex_resource_word1);
785     E32(((tex_res->base) >> 8));
786     E32(((tex_res->mip_base) >> 8));
787     E32(sq_tex_resource_word4);
788     E32(sq_tex_resource_word5);
789     E32(sq_tex_resource_word6);
790     E32(sq_tex_resource_word7);
791     RELOC_BATCH(tex_res->bo, domain, 0);
792     RELOC_BATCH(tex_res->mip_bo, domain, 0);
793     END_BATCH();
794 }
795 
796 /* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
797  * but none that we use here.
798  */
799 void
evergreen_set_tex_sampler(ScrnInfoPtr pScrn,tex_sampler_t * s)800 evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
801 {
802     RADEONInfoPtr info = RADEONPTR(pScrn);
803     uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
804 
805     sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
806 			    (s->clamp_y       << CLAMP_Y_shift)					|
807 			    (s->clamp_z       << CLAMP_Z_shift)					|
808 			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
809 			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
810 			    (s->z_filter      << Z_FILTER_shift)	|
811 			    (s->mip_filter    << MIP_FILTER_shift)				|
812 			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
813 			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
814 			    (s->chroma_key    << CHROMA_KEY_shift));
815 
816     sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
817 			    (s->max_lod       << MAX_LOD_shift)					|
818 			    (s->perf_mip      << PERF_MIP_shift)	|
819 			    (s->perf_z        << PERF_Z_shift));
820 
821 
822     sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
823 			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
824 
825     if (s->mc_coord_truncate)
826 	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
827     if (s->force_degamma)
828 	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
829     if (s->truncate_coord)
830 	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
831     if (s->disable_cube_wrap)
832 	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
833     if (s->type)
834 	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
835 
836     BEGIN_BATCH(5);
837     PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
838     E32(sq_tex_sampler_word0);
839     E32(sq_tex_sampler_word1);
840     E32(sq_tex_sampler_word2);
841     END_BATCH();
842 }
843 
844 /* workarounds for hw bugs in eg+ */
845 /* only affects screen/window/generic/vport.  cliprects are not affected */
846 static void
evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn,int * x1,int * y1,int * x2,int * y2)847 evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2)
848 {
849     RADEONInfoPtr info = RADEONPTR(pScrn);
850 
851     /* all eg+ asics */
852     if (*x2 == 0)
853 	*x1 = 1;
854     if (*y2 == 0)
855 	*y1 = 1;
856 
857     /* cayman/tn only */
858     if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
859 	/* cliprects aren't affected so we can use them to clip if we need
860 	 * a true 1x1 clip region
861 	 */
862 	if ((*x2 == 1) && (*y2 == 1))
863 	    *x2 = 2;
864     }
865 }
866 
867 //XXX deal with clip offsets in clip setup
868 void
evergreen_set_screen_scissor(ScrnInfoPtr pScrn,int x1,int y1,int x2,int y2)869 evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
870 {
871     RADEONInfoPtr info = RADEONPTR(pScrn);
872 
873     evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
874 
875     BEGIN_BATCH(4);
876     PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
877     E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
878 	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
879     E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
880 	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
881     END_BATCH();
882 }
883 
884 void
evergreen_set_vport_scissor(ScrnInfoPtr pScrn,int id,int x1,int y1,int x2,int y2)885 evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
886 {
887     RADEONInfoPtr info = RADEONPTR(pScrn);
888 
889     evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
890 
891     BEGIN_BATCH(4);
892     PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
893     E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
894 	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
895 	 WINDOW_OFFSET_DISABLE_bit));
896     E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
897 	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
898     END_BATCH();
899 }
900 
901 void
evergreen_set_generic_scissor(ScrnInfoPtr pScrn,int x1,int y1,int x2,int y2)902 evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
903 {
904     RADEONInfoPtr info = RADEONPTR(pScrn);
905 
906     evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
907 
908     BEGIN_BATCH(4);
909     PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
910     E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
911 	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
912 	 WINDOW_OFFSET_DISABLE_bit));
913     E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
914 	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
915     END_BATCH();
916 }
917 
918 void
evergreen_set_window_scissor(ScrnInfoPtr pScrn,int x1,int y1,int x2,int y2)919 evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
920 {
921     RADEONInfoPtr info = RADEONPTR(pScrn);
922 
923     evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
924 
925     BEGIN_BATCH(4);
926     PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
927     E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
928 	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
929 	 WINDOW_OFFSET_DISABLE_bit));
930     E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
931 	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
932     END_BATCH();
933 }
934 
935 void
evergreen_set_clip_rect(ScrnInfoPtr pScrn,int id,int x1,int y1,int x2,int y2)936 evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
937 {
938     RADEONInfoPtr info = RADEONPTR(pScrn);
939 
940     BEGIN_BATCH(4);
941     PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
942     E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
943 	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
944     E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
945 	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
946     END_BATCH();
947 }
948 
949 /*
950  * Setup of default state
951  */
952 
953 void
evergreen_set_default_state(ScrnInfoPtr pScrn)954 evergreen_set_default_state(ScrnInfoPtr pScrn)
955 {
956     tex_resource_t tex_res;
957     shader_config_t fs_conf;
958     sq_config_t sq_conf;
959     int i;
960     RADEONInfoPtr info = RADEONPTR(pScrn);
961     struct radeon_accel_state *accel_state = info->accel_state;
962 
963     if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
964 	cayman_set_default_state(pScrn);
965 	return;
966     }
967 
968     if (accel_state->XInited3D)
969 	return;
970 
971     memset(&tex_res, 0, sizeof(tex_resource_t));
972     memset(&fs_conf, 0, sizeof(shader_config_t));
973 
974     accel_state->XInited3D = TRUE;
975 
976     evergreen_start_3d(pScrn);
977 
978     /* SQ */
979     sq_conf.ps_prio = 0;
980     sq_conf.vs_prio = 1;
981     sq_conf.gs_prio = 2;
982     sq_conf.es_prio = 3;
983     sq_conf.hs_prio = 0;
984     sq_conf.ls_prio = 0;
985     sq_conf.cs_prio = 0;
986 
987     switch (info->ChipFamily) {
988     case CHIP_FAMILY_CEDAR:
989     default:
990 	sq_conf.num_ps_gprs = 93;
991 	sq_conf.num_vs_gprs = 46;
992 	sq_conf.num_temp_gprs = 4;
993 	sq_conf.num_gs_gprs = 31;
994 	sq_conf.num_es_gprs = 31;
995 	sq_conf.num_hs_gprs = 23;
996 	sq_conf.num_ls_gprs = 23;
997 	sq_conf.num_ps_threads = 96;
998 	sq_conf.num_vs_threads = 16;
999 	sq_conf.num_gs_threads = 16;
1000 	sq_conf.num_es_threads = 16;
1001 	sq_conf.num_hs_threads = 16;
1002 	sq_conf.num_ls_threads = 16;
1003 	sq_conf.num_ps_stack_entries = 42;
1004 	sq_conf.num_vs_stack_entries = 42;
1005 	sq_conf.num_gs_stack_entries = 42;
1006 	sq_conf.num_es_stack_entries = 42;
1007 	sq_conf.num_hs_stack_entries = 42;
1008 	sq_conf.num_ls_stack_entries = 42;
1009 	break;
1010     case CHIP_FAMILY_REDWOOD:
1011 	sq_conf.num_ps_gprs = 93;
1012 	sq_conf.num_vs_gprs = 46;
1013 	sq_conf.num_temp_gprs = 4;
1014 	sq_conf.num_gs_gprs = 31;
1015 	sq_conf.num_es_gprs = 31;
1016 	sq_conf.num_hs_gprs = 23;
1017 	sq_conf.num_ls_gprs = 23;
1018 	sq_conf.num_ps_threads = 128;
1019 	sq_conf.num_vs_threads = 20;
1020 	sq_conf.num_gs_threads = 20;
1021 	sq_conf.num_es_threads = 20;
1022 	sq_conf.num_hs_threads = 20;
1023 	sq_conf.num_ls_threads = 20;
1024 	sq_conf.num_ps_stack_entries = 42;
1025 	sq_conf.num_vs_stack_entries = 42;
1026 	sq_conf.num_gs_stack_entries = 42;
1027 	sq_conf.num_es_stack_entries = 42;
1028 	sq_conf.num_hs_stack_entries = 42;
1029 	sq_conf.num_ls_stack_entries = 42;
1030 	break;
1031     case CHIP_FAMILY_JUNIPER:
1032 	sq_conf.num_ps_gprs = 93;
1033 	sq_conf.num_vs_gprs = 46;
1034 	sq_conf.num_temp_gprs = 4;
1035 	sq_conf.num_gs_gprs = 31;
1036 	sq_conf.num_es_gprs = 31;
1037 	sq_conf.num_hs_gprs = 23;
1038 	sq_conf.num_ls_gprs = 23;
1039 	sq_conf.num_ps_threads = 128;
1040 	sq_conf.num_vs_threads = 20;
1041 	sq_conf.num_gs_threads = 20;
1042 	sq_conf.num_es_threads = 20;
1043 	sq_conf.num_hs_threads = 20;
1044 	sq_conf.num_ls_threads = 20;
1045 	sq_conf.num_ps_stack_entries = 85;
1046 	sq_conf.num_vs_stack_entries = 85;
1047 	sq_conf.num_gs_stack_entries = 85;
1048 	sq_conf.num_es_stack_entries = 85;
1049 	sq_conf.num_hs_stack_entries = 85;
1050 	sq_conf.num_ls_stack_entries = 85;
1051 	break;
1052     case CHIP_FAMILY_CYPRESS:
1053     case CHIP_FAMILY_HEMLOCK:
1054 	sq_conf.num_ps_gprs = 93;
1055 	sq_conf.num_vs_gprs = 46;
1056 	sq_conf.num_temp_gprs = 4;
1057 	sq_conf.num_gs_gprs = 31;
1058 	sq_conf.num_es_gprs = 31;
1059 	sq_conf.num_hs_gprs = 23;
1060 	sq_conf.num_ls_gprs = 23;
1061 	sq_conf.num_ps_threads = 128;
1062 	sq_conf.num_vs_threads = 20;
1063 	sq_conf.num_gs_threads = 20;
1064 	sq_conf.num_es_threads = 20;
1065 	sq_conf.num_hs_threads = 20;
1066 	sq_conf.num_ls_threads = 20;
1067 	sq_conf.num_ps_stack_entries = 85;
1068 	sq_conf.num_vs_stack_entries = 85;
1069 	sq_conf.num_gs_stack_entries = 85;
1070 	sq_conf.num_es_stack_entries = 85;
1071 	sq_conf.num_hs_stack_entries = 85;
1072 	sq_conf.num_ls_stack_entries = 85;
1073 	break;
1074     case CHIP_FAMILY_PALM:
1075 	sq_conf.num_ps_gprs = 93;
1076 	sq_conf.num_vs_gprs = 46;
1077 	sq_conf.num_temp_gprs = 4;
1078 	sq_conf.num_gs_gprs = 31;
1079 	sq_conf.num_es_gprs = 31;
1080 	sq_conf.num_hs_gprs = 23;
1081 	sq_conf.num_ls_gprs = 23;
1082 	sq_conf.num_ps_threads = 96;
1083 	sq_conf.num_vs_threads = 16;
1084 	sq_conf.num_gs_threads = 16;
1085 	sq_conf.num_es_threads = 16;
1086 	sq_conf.num_hs_threads = 16;
1087 	sq_conf.num_ls_threads = 16;
1088 	sq_conf.num_ps_stack_entries = 42;
1089 	sq_conf.num_vs_stack_entries = 42;
1090 	sq_conf.num_gs_stack_entries = 42;
1091 	sq_conf.num_es_stack_entries = 42;
1092 	sq_conf.num_hs_stack_entries = 42;
1093 	sq_conf.num_ls_stack_entries = 42;
1094 	break;
1095     case CHIP_FAMILY_SUMO:
1096 	sq_conf.num_ps_gprs = 93;
1097 	sq_conf.num_vs_gprs = 46;
1098 	sq_conf.num_temp_gprs = 4;
1099 	sq_conf.num_gs_gprs = 31;
1100 	sq_conf.num_es_gprs = 31;
1101 	sq_conf.num_hs_gprs = 23;
1102 	sq_conf.num_ls_gprs = 23;
1103 	sq_conf.num_ps_threads = 96;
1104 	sq_conf.num_vs_threads = 25;
1105 	sq_conf.num_gs_threads = 25;
1106 	sq_conf.num_es_threads = 25;
1107 	sq_conf.num_hs_threads = 25;
1108 	sq_conf.num_ls_threads = 25;
1109 	sq_conf.num_ps_stack_entries = 42;
1110 	sq_conf.num_vs_stack_entries = 42;
1111 	sq_conf.num_gs_stack_entries = 42;
1112 	sq_conf.num_es_stack_entries = 42;
1113 	sq_conf.num_hs_stack_entries = 42;
1114 	sq_conf.num_ls_stack_entries = 42;
1115 	break;
1116     case CHIP_FAMILY_SUMO2:
1117 	sq_conf.num_ps_gprs = 93;
1118 	sq_conf.num_vs_gprs = 46;
1119 	sq_conf.num_temp_gprs = 4;
1120 	sq_conf.num_gs_gprs = 31;
1121 	sq_conf.num_es_gprs = 31;
1122 	sq_conf.num_hs_gprs = 23;
1123 	sq_conf.num_ls_gprs = 23;
1124 	sq_conf.num_ps_threads = 96;
1125 	sq_conf.num_vs_threads = 25;
1126 	sq_conf.num_gs_threads = 25;
1127 	sq_conf.num_es_threads = 25;
1128 	sq_conf.num_hs_threads = 25;
1129 	sq_conf.num_ls_threads = 25;
1130 	sq_conf.num_ps_stack_entries = 85;
1131 	sq_conf.num_vs_stack_entries = 85;
1132 	sq_conf.num_gs_stack_entries = 85;
1133 	sq_conf.num_es_stack_entries = 85;
1134 	sq_conf.num_hs_stack_entries = 85;
1135 	sq_conf.num_ls_stack_entries = 85;
1136 	break;
1137     case CHIP_FAMILY_BARTS:
1138 	sq_conf.num_ps_gprs = 93;
1139 	sq_conf.num_vs_gprs = 46;
1140 	sq_conf.num_temp_gprs = 4;
1141 	sq_conf.num_gs_gprs = 31;
1142 	sq_conf.num_es_gprs = 31;
1143 	sq_conf.num_hs_gprs = 23;
1144 	sq_conf.num_ls_gprs = 23;
1145 	sq_conf.num_ps_threads = 128;
1146 	sq_conf.num_vs_threads = 20;
1147 	sq_conf.num_gs_threads = 20;
1148 	sq_conf.num_es_threads = 20;
1149 	sq_conf.num_hs_threads = 20;
1150 	sq_conf.num_ls_threads = 20;
1151 	sq_conf.num_ps_stack_entries = 85;
1152 	sq_conf.num_vs_stack_entries = 85;
1153 	sq_conf.num_gs_stack_entries = 85;
1154 	sq_conf.num_es_stack_entries = 85;
1155 	sq_conf.num_hs_stack_entries = 85;
1156 	sq_conf.num_ls_stack_entries = 85;
1157 	break;
1158     case CHIP_FAMILY_TURKS:
1159 	sq_conf.num_ps_gprs = 93;
1160 	sq_conf.num_vs_gprs = 46;
1161 	sq_conf.num_temp_gprs = 4;
1162 	sq_conf.num_gs_gprs = 31;
1163 	sq_conf.num_es_gprs = 31;
1164 	sq_conf.num_hs_gprs = 23;
1165 	sq_conf.num_ls_gprs = 23;
1166 	sq_conf.num_ps_threads = 128;
1167 	sq_conf.num_vs_threads = 20;
1168 	sq_conf.num_gs_threads = 20;
1169 	sq_conf.num_es_threads = 20;
1170 	sq_conf.num_hs_threads = 20;
1171 	sq_conf.num_ls_threads = 20;
1172 	sq_conf.num_ps_stack_entries = 42;
1173 	sq_conf.num_vs_stack_entries = 42;
1174 	sq_conf.num_gs_stack_entries = 42;
1175 	sq_conf.num_es_stack_entries = 42;
1176 	sq_conf.num_hs_stack_entries = 42;
1177 	sq_conf.num_ls_stack_entries = 42;
1178 	break;
1179     case CHIP_FAMILY_CAICOS:
1180 	sq_conf.num_ps_gprs = 93;
1181 	sq_conf.num_vs_gprs = 46;
1182 	sq_conf.num_temp_gprs = 4;
1183 	sq_conf.num_gs_gprs = 31;
1184 	sq_conf.num_es_gprs = 31;
1185 	sq_conf.num_hs_gprs = 23;
1186 	sq_conf.num_ls_gprs = 23;
1187 	sq_conf.num_ps_threads = 128;
1188 	sq_conf.num_vs_threads = 10;
1189 	sq_conf.num_gs_threads = 10;
1190 	sq_conf.num_es_threads = 10;
1191 	sq_conf.num_hs_threads = 10;
1192 	sq_conf.num_ls_threads = 10;
1193 	sq_conf.num_ps_stack_entries = 42;
1194 	sq_conf.num_vs_stack_entries = 42;
1195 	sq_conf.num_gs_stack_entries = 42;
1196 	sq_conf.num_es_stack_entries = 42;
1197 	sq_conf.num_hs_stack_entries = 42;
1198 	sq_conf.num_ls_stack_entries = 42;
1199 	break;
1200     }
1201 
1202     evergreen_sq_setup(pScrn, &sq_conf);
1203 
1204     BEGIN_BATCH(27);
1205     EREG(SQ_LDS_ALLOC_PS, 0);
1206     EREG(SQ_LDS_RESOURCE_MGMT, 0x10001000);
1207     EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
1208 
1209     PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
1210     E32(0);
1211     E32(0);
1212     E32(0);
1213     E32(0);
1214     E32(0);
1215     E32(0);
1216 
1217     PACK0(SQ_GS_VERT_ITEMSIZE, 4);
1218     E32(0);
1219     E32(0);
1220     E32(0);
1221     E32(0);
1222 
1223     PACK0(SQ_VTX_BASE_VTX_LOC, 2);
1224     E32(0);
1225     E32(0);
1226     END_BATCH();
1227 
1228     /* DB */
1229     BEGIN_BATCH(3 + 2);
1230     EREG(DB_Z_INFO,                           0);
1231     RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1232     END_BATCH();
1233 
1234     BEGIN_BATCH(3 + 2);
1235     EREG(DB_STENCIL_INFO,                     0);
1236     RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1237     END_BATCH();
1238 
1239     BEGIN_BATCH(3 + 2);
1240     EREG(DB_HTILE_DATA_BASE,                    0);
1241     RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1242     END_BATCH();
1243 
1244     BEGIN_BATCH(49);
1245     EREG(DB_DEPTH_CONTROL,                    0);
1246 
1247     PACK0(PA_SC_VPORT_ZMIN_0, 2);
1248     EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1249     EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1250 
1251     PACK0(DB_RENDER_CONTROL, 5);
1252     E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1253     E32(0); // DB_COUNT_CONTROL
1254     E32(0); // DB_DEPTH_VIEW
1255     E32(0x2a); // DB_RENDER_OVERRIDE
1256     E32(0); // DB_RENDER_OVERRIDE2
1257 
1258     PACK0(DB_STENCIL_CLEAR, 2);
1259     E32(0); // DB_STENCIL_CLEAR
1260     E32(0); // DB_DEPTH_CLEAR
1261 
1262     EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1263 					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1264 					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1265 					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1266 
1267     EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1268 			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1269 
1270     // SX
1271     EREG(SX_MISC,               0);
1272 
1273     // CB
1274     PACK0(SX_ALPHA_TEST_CONTROL, 5);
1275     E32(0); // SX_ALPHA_TEST_CONTROL
1276     E32(0x00000000); //CB_BLEND_RED
1277     E32(0x00000000); //CB_BLEND_GREEN
1278     E32(0x00000000); //CB_BLEND_BLUE
1279     E32(0x00000000); //CB_BLEND_ALPHA
1280 
1281     EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1282 
1283     // SC
1284     EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1285 					       (0 << WINDOW_Y_OFFSET_shift)));
1286     EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1287     EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1288     EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1289     END_BATCH();
1290 
1291     /* clip boolean is set to always visible -> doesn't matter */
1292     for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1293 	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1294 
1295     for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1296 	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1297 
1298     BEGIN_BATCH(57);
1299     PACK0(PA_SC_MODE_CNTL_0, 2);
1300     E32(0); // PA_SC_MODE_CNTL_0
1301     E32(0); // PA_SC_MODE_CNTL_1
1302 
1303     PACK0(PA_SC_LINE_CNTL, 16);
1304     E32(0); // PA_SC_LINE_CNTL
1305     E32(0); // PA_SC_AA_CONFIG
1306     E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1307 	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1308     EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1309     EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1310     EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1311     EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1312     E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1313     E32(0);
1314     E32(0);
1315     E32(0);
1316     E32(0);
1317     E32(0);
1318     E32(0);
1319     E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1320     E32(0xFFFFFFFF); // PA_SC_AA_MASK
1321 
1322     // CL
1323     PACK0(PA_CL_CLIP_CNTL, 8);
1324     E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1325     E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1326     E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1327     E32(0); // PA_CL_VS_OUT_CNTL
1328     E32(0); // PA_CL_NANINF_CNTL
1329     E32(0); // PA_SU_LINE_STIPPLE_CNTL
1330     E32(0); // PA_SU_LINE_STIPPLE_SCALE
1331     E32(0); // PA_SU_PRIM_FILTER_CNTL
1332 
1333     // SU
1334     PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1335     E32(0);
1336     E32(0);
1337     E32(0);
1338     E32(0);
1339     E32(0);
1340     E32(0);
1341 
1342     /* src = semantic id 0; mask = semantic id 1 */
1343     EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1344 			   (1 << SEMANTIC_1_shift)));
1345     PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1346     /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1347     E32(((0    << SEMANTIC_shift)	|
1348 	 (0x01 << DEFAULT_VAL_shift)));
1349     /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1350     E32(((1    << SEMANTIC_shift)	|
1351 	 (0x01 << DEFAULT_VAL_shift)));
1352 
1353     PACK0(SPI_INPUT_Z, 8);
1354     E32(0); // SPI_INPUT_Z
1355     E32(0); // SPI_FOG_CNTL
1356     E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1357     E32(0); // SPI_PS_IN_CONTROL_2
1358     E32(0);
1359     E32(0);
1360     E32(0);
1361     E32(0);
1362     END_BATCH();
1363 
1364     // clear FS
1365     fs_conf.bo = accel_state->shaders_bo;
1366     evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1367 
1368     // VGT
1369     BEGIN_BATCH(46);
1370 
1371     PACK0(VGT_MAX_VTX_INDX, 4);
1372     E32(0xffffff);
1373     E32(0);
1374     E32(0);
1375     E32(0);
1376 
1377     PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1378     E32(0);
1379     E32(0);
1380 
1381     PACK0(VGT_REUSE_OFF, 2);
1382     E32(0);
1383     E32(0);
1384 
1385     PACK0(PA_SU_POINT_SIZE, 17);
1386     E32(0); // PA_SU_POINT_SIZE
1387     E32(0); // PA_SU_POINT_MINMAX
1388     E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1389     E32(0); // PA_SC_LINE_STIPPLE
1390     E32(0); // VGT_OUTPUT_PATH_CNTL
1391     E32(0); // VGT_HOS_CNTL
1392     E32(0);
1393     E32(0);
1394     E32(0);
1395     E32(0);
1396     E32(0);
1397     E32(0);
1398     E32(0);
1399     E32(0);
1400     E32(0);
1401     E32(0);
1402     E32(0); // VGT_GS_MODE
1403 
1404     EREG(VGT_PRIMITIVEID_EN,                  0);
1405     EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1406     EREG(VGT_SHADER_STAGES_EN,          0);
1407 
1408     PACK0(VGT_STRMOUT_CONFIG, 2);
1409     E32(0);
1410     E32(0);
1411     END_BATCH();
1412 }
1413 
1414 
1415 /*
1416  * Commands
1417  */
1418 
1419 void
evergreen_draw_auto(ScrnInfoPtr pScrn,draw_config_t * draw_conf)1420 evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1421 {
1422     RADEONInfoPtr info = RADEONPTR(pScrn);
1423 
1424     BEGIN_BATCH(10);
1425     EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1426     PACK3(IT_INDEX_TYPE, 1);
1427 #if X_BYTE_ORDER == X_BIG_ENDIAN
1428     E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1429 #else
1430     E32(draw_conf->index_type);
1431 #endif
1432     PACK3(IT_NUM_INSTANCES, 1);
1433     E32(draw_conf->num_instances);
1434     PACK3(IT_DRAW_INDEX_AUTO, 2);
1435     E32(draw_conf->num_indices);
1436     E32(draw_conf->vgt_draw_initiator);
1437     END_BATCH();
1438 }
1439 
evergreen_finish_op(ScrnInfoPtr pScrn,int vtx_size)1440 void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1441 {
1442     RADEONInfoPtr info = RADEONPTR(pScrn);
1443     struct radeon_accel_state *accel_state = info->accel_state;
1444     draw_config_t   draw_conf;
1445     vtx_resource_t  vtx_res;
1446 
1447     if (accel_state->vbo.vb_start_op == -1)
1448       return;
1449 
1450     CLEAR (draw_conf);
1451     CLEAR (vtx_res);
1452 
1453     if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1454 	radeon_ib_discard(pScrn);
1455 	radeon_cs_flush_indirect(pScrn);
1456 	return;
1457     }
1458 
1459     /* Vertex buffer setup */
1460     accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1461     vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1462     vtx_res.vtx_size_dw     = vtx_size / 4;
1463     vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1464     vtx_res.vb_addr         = accel_state->vbo.vb_start_op;
1465     vtx_res.bo              = accel_state->vbo.vb_bo;
1466     vtx_res.dst_sel_x       = SQ_SEL_X;
1467     vtx_res.dst_sel_y       = SQ_SEL_Y;
1468     vtx_res.dst_sel_z       = SQ_SEL_Z;
1469     vtx_res.dst_sel_w       = SQ_SEL_W;
1470 #if X_BYTE_ORDER == X_BIG_ENDIAN
1471     vtx_res.endian          = SQ_ENDIAN_8IN32;
1472 #endif
1473     evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1474 
1475     /* Draw */
1476     draw_conf.prim_type          = DI_PT_RECTLIST;
1477     draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1478     draw_conf.num_instances      = 1;
1479     draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1480     draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1481 
1482     evergreen_draw_auto(pScrn, &draw_conf);
1483 
1484     /* sync dst surface */
1485     evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1486 				  accel_state->dst_size, 0,
1487 				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1488 
1489     accel_state->vbo.vb_start_op = -1;
1490     accel_state->cbuf.vb_start_op = -1;
1491     accel_state->ib_reset_op = 0;
1492 
1493 }
1494 
1495