1 /*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29
30 #include "xf86.h"
31
32 #include <errno.h>
33
34 #include "radeon.h"
35 #include "evergreen_shader.h"
36 #include "radeon_reg.h"
37 #include "evergreen_reg.h"
38 #include "evergreen_state.h"
39
40 #include "radeon_vbo.h"
41 #include "radeon_exa_shared.h"
42
43 static const uint32_t EVERGREEN_ROP[16] = {
44 RADEON_ROP3_ZERO, /* GXclear */
45 RADEON_ROP3_DSa, /* Gxand */
46 RADEON_ROP3_SDna, /* GXandReverse */
47 RADEON_ROP3_S, /* GXcopy */
48 RADEON_ROP3_DSna, /* GXandInverted */
49 RADEON_ROP3_D, /* GXnoop */
50 RADEON_ROP3_DSx, /* GXxor */
51 RADEON_ROP3_DSo, /* GXor */
52 RADEON_ROP3_DSon, /* GXnor */
53 RADEON_ROP3_DSxn, /* GXequiv */
54 RADEON_ROP3_Dn, /* GXinvert */
55 RADEON_ROP3_SDno, /* GXorReverse */
56 RADEON_ROP3_Sn, /* GXcopyInverted */
57 RADEON_ROP3_DSno, /* GXorInverted */
58 RADEON_ROP3_DSan, /* GXnand */
59 RADEON_ROP3_ONE, /* GXset */
60 };
61
62 void
evergreen_start_3d(ScrnInfoPtr pScrn)63 evergreen_start_3d(ScrnInfoPtr pScrn)
64 {
65 RADEONInfoPtr info = RADEONPTR(pScrn);
66
67 BEGIN_BATCH(3);
68 PACK3(IT_CONTEXT_CONTROL, 2);
69 E32(0x80000000);
70 E32(0x80000000);
71 END_BATCH();
72
73 }
74
eg_tile_split(unsigned tile_split)75 unsigned eg_tile_split(unsigned tile_split)
76 {
77 switch (tile_split) {
78 case 64: tile_split = 0; break;
79 case 128: tile_split = 1; break;
80 case 256: tile_split = 2; break;
81 case 512: tile_split = 3; break;
82 default:
83 case 1024: tile_split = 4; break;
84 case 2048: tile_split = 5; break;
85 case 4096: tile_split = 6; break;
86 }
87 return tile_split;
88 }
89
eg_macro_tile_aspect(unsigned macro_tile_aspect)90 static unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect)
91 {
92 switch (macro_tile_aspect) {
93 default:
94 case 1: macro_tile_aspect = 0; break;
95 case 2: macro_tile_aspect = 1; break;
96 case 4: macro_tile_aspect = 2; break;
97 case 8: macro_tile_aspect = 3; break;
98 }
99 return macro_tile_aspect;
100 }
101
eg_bank_wh(unsigned bankwh)102 static unsigned eg_bank_wh(unsigned bankwh)
103 {
104 switch (bankwh) {
105 default:
106 case 1: bankwh = 0; break;
107 case 2: bankwh = 1; break;
108 case 4: bankwh = 2; break;
109 case 8: bankwh = 3; break;
110 }
111 return bankwh;
112 }
113
eg_nbanks(unsigned nbanks)114 static unsigned eg_nbanks(unsigned nbanks)
115 {
116 switch (nbanks) {
117 default:
118 case 2: nbanks = 0; break;
119 case 4: nbanks = 1; break;
120 case 8: nbanks = 2; break;
121 case 16: nbanks = 3; break;
122 }
123 return nbanks;
124 }
125
126 /*
127 * Setup of functional groups
128 */
129
130 // asic stack/thread/gpr limits - need to query the drm
131 static void
evergreen_sq_setup(ScrnInfoPtr pScrn,sq_config_t * sq_conf)132 evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
133 {
134 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
135 uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
136 uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
137 RADEONInfoPtr info = RADEONPTR(pScrn);
138
139 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
140 (info->ChipFamily == CHIP_FAMILY_PALM) ||
141 (info->ChipFamily == CHIP_FAMILY_SUMO) ||
142 (info->ChipFamily == CHIP_FAMILY_SUMO2) ||
143 (info->ChipFamily == CHIP_FAMILY_CAICOS))
144 sq_config = 0;
145 else
146 sq_config = VC_ENABLE_bit;
147
148 sq_config |= (EXPORT_SRC_C_bit |
149 (sq_conf->cs_prio << CS_PRIO_shift) |
150 (sq_conf->ls_prio << LS_PRIO_shift) |
151 (sq_conf->hs_prio << HS_PRIO_shift) |
152 (sq_conf->ps_prio << PS_PRIO_shift) |
153 (sq_conf->vs_prio << VS_PRIO_shift) |
154 (sq_conf->gs_prio << GS_PRIO_shift) |
155 (sq_conf->es_prio << ES_PRIO_shift));
156
157 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
158 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
159 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
160 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
161 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
162 sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
163 (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
164
165 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
166 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
167 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
168 (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
169 sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
170 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
171
172 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
173 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
174
175 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
176 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
177
178 sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
179 (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
180
181 BEGIN_BATCH(16);
182 /* disable dyn gprs */
183 EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
184 PACK0(SQ_CONFIG, 4);
185 E32(sq_config);
186 E32(sq_gpr_resource_mgmt_1);
187 E32(sq_gpr_resource_mgmt_2);
188 E32(sq_gpr_resource_mgmt_3);
189 PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
190 E32(sq_thread_resource_mgmt);
191 E32(sq_thread_resource_mgmt_2);
192 E32(sq_stack_resource_mgmt_1);
193 E32(sq_stack_resource_mgmt_2);
194 E32(sq_stack_resource_mgmt_3);
195 END_BATCH();
196 }
197
198 /* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
199 * we use here.
200 */
201 void
evergreen_set_render_target(ScrnInfoPtr pScrn,cb_config_t * cb_conf,uint32_t domain)202 evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
203 {
204 uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
205 unsigned pitch, slice, w, h, array_mode, nbanks;
206 uint32_t tile_split, macro_aspect, bankw, bankh;
207 RADEONInfoPtr info = RADEONPTR(pScrn);
208
209 if (cb_conf->surface) {
210 switch (cb_conf->surface->level[0].mode) {
211 case RADEON_SURF_MODE_1D:
212 array_mode = 2;
213 break;
214 case RADEON_SURF_MODE_2D:
215 array_mode = 4;
216 break;
217 default:
218 array_mode = 0;
219 break;
220 }
221 w = cb_conf->surface->level[0].npix_x;
222 h = cb_conf->surface->level[0].npix_y;
223 pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
224 slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
225 tile_split = cb_conf->surface->tile_split;
226 macro_aspect = cb_conf->surface->mtilea;
227 bankw = cb_conf->surface->bankw;
228 bankh = cb_conf->surface->bankh;
229 tile_split = eg_tile_split(tile_split);
230 macro_aspect = eg_macro_tile_aspect(macro_aspect);
231 bankw = eg_bank_wh(bankw);
232 bankh = eg_bank_wh(bankh);
233 } else {
234 pitch = (cb_conf->w / 8) - 1;
235 h = RADEON_ALIGN(cb_conf->h, 8);
236 slice = ((cb_conf->w * h) / 64) - 1;
237 array_mode = cb_conf->array_mode;
238 w = cb_conf->w;
239 tile_split = 4;
240 macro_aspect = 0;
241 bankw = 0;
242 bankh = 0;
243 }
244 nbanks = info->num_banks;
245 nbanks = eg_nbanks(nbanks);
246
247 cb_color_attrib |= (tile_split << CB_COLOR0_ATTRIB__TILE_SPLIT_shift)|
248 (nbanks << CB_COLOR0_ATTRIB__NUM_BANKS_shift) |
249 (bankw << CB_COLOR0_ATTRIB__BANK_WIDTH_shift) |
250 (bankh << CB_COLOR0_ATTRIB__BANK_HEIGHT_shift) |
251 (macro_aspect << CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift);
252 cb_color_info = ((cb_conf->endian << ENDIAN_shift) |
253 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) |
254 (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) |
255 (cb_conf->number_type << NUMBER_TYPE_shift) |
256 (cb_conf->comp_swap << COMP_SWAP_shift) |
257 (cb_conf->source_format << SOURCE_FORMAT_shift) |
258 (cb_conf->resource_type << RESOURCE_TYPE_shift));
259 if (cb_conf->blend_clamp)
260 cb_color_info |= BLEND_CLAMP_bit;
261 if (cb_conf->fast_clear)
262 cb_color_info |= FAST_CLEAR_bit;
263 if (cb_conf->compression)
264 cb_color_info |= COMPRESSION_bit;
265 if (cb_conf->blend_bypass)
266 cb_color_info |= BLEND_BYPASS_bit;
267 if (cb_conf->simple_float)
268 cb_color_info |= SIMPLE_FLOAT_bit;
269 if (cb_conf->round_mode)
270 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
271 if (cb_conf->tile_compact)
272 cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
273 if (cb_conf->rat)
274 cb_color_info |= RAT_bit;
275
276 /* bit 4 needs to be set for linear and depth/stencil surfaces */
277 if (cb_conf->non_disp_tiling)
278 cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
279
280 switch (cb_conf->resource_type) {
281 case BUFFER:
282 /* number of elements in the surface */
283 cb_color_dim = pitch * slice;
284 break;
285 default:
286 /* w/h of the surface */
287 cb_color_dim = (((w - 1) << WIDTH_MAX_shift) |
288 ((cb_conf->h - 1) << HEIGHT_MAX_shift));
289 break;
290 }
291
292 BEGIN_BATCH(3 + 2);
293 EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
294 RELOC_BATCH(cb_conf->bo, 0, domain);
295 END_BATCH();
296
297 /* Set CMASK & FMASK buffer to the offset of color buffer as
298 * we don't use those this shouldn't cause any issue and we
299 * then have a valid cmd stream
300 */
301 BEGIN_BATCH(3 + 2);
302 EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8));
303 RELOC_BATCH(cb_conf->bo, 0, domain);
304 END_BATCH();
305 BEGIN_BATCH(3 + 2);
306 EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8));
307 RELOC_BATCH(cb_conf->bo, 0, domain);
308 END_BATCH();
309
310 /* tiling config */
311 BEGIN_BATCH(3 + 2);
312 EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
313 RELOC_BATCH(cb_conf->bo, 0, domain);
314 END_BATCH();
315 BEGIN_BATCH(3 + 2);
316 EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
317 RELOC_BATCH(cb_conf->bo, 0, domain);
318 END_BATCH();
319
320 BEGIN_BATCH(33);
321 EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
322 EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
323 EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
324 EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
325 EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
326 EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
327 PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
328 E32(0);
329 E32(0);
330 E32(0);
331 E32(0);
332 EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift));
333 EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[cb_conf->rop] |
334 (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
335 EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl);
336 END_BATCH();
337 }
338
evergreen_set_blend_color(ScrnInfoPtr pScrn,float * color)339 void evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color)
340 {
341 RADEONInfoPtr info = RADEONPTR(pScrn);
342
343 BEGIN_BATCH(2 + 4);
344 PACK0(CB_BLEND_RED, 4);
345 EFLOAT(color[0]); /* R */
346 EFLOAT(color[1]); /* G */
347 EFLOAT(color[2]); /* B */
348 EFLOAT(color[3]); /* A */
349 END_BATCH();
350 }
351
352 static void
evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn,uint32_t sync_type,uint32_t size,uint64_t mc_addr,struct radeon_bo * bo,uint32_t rdomains,uint32_t wdomain)353 evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
354 uint32_t size, uint64_t mc_addr,
355 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
356 {
357 RADEONInfoPtr info = RADEONPTR(pScrn);
358 uint32_t cp_coher_size;
359 if (size == 0xffffffff)
360 cp_coher_size = 0xffffffff;
361 else
362 cp_coher_size = ((size + 255) >> 8);
363
364 BEGIN_BATCH(5 + 2);
365 PACK3(IT_SURFACE_SYNC, 4);
366 E32(sync_type);
367 E32(cp_coher_size);
368 E32((mc_addr >> 8));
369 E32(10); /* poll interval */
370 RELOC_BATCH(bo, rdomains, wdomain);
371 END_BATCH();
372 }
373
374 /* inserts a wait for vline in the command stream */
evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn,PixmapPtr pPix,xf86CrtcPtr crtc,int start,int stop)375 void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
376 xf86CrtcPtr crtc, int start, int stop)
377 {
378 RADEONInfoPtr info = RADEONPTR(pScrn);
379 drmmode_crtc_private_ptr drmmode_crtc;
380
381 if (!crtc)
382 return;
383
384 drmmode_crtc = crtc->driver_private;
385
386 if (!crtc->enabled)
387 return;
388
389 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
390 return;
391
392 start = max(start, crtc->y);
393 stop = min(stop, crtc->y + crtc->mode.VDisplay);
394
395 if (start >= stop)
396 return;
397
398 BEGIN_BATCH(11);
399 /* set the VLINE range */
400 EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
401 (start << EVERGREEN_VLINE_START_SHIFT) |
402 (stop << EVERGREEN_VLINE_END_SHIFT));
403
404 /* tell the CP to poll the VLINE state register */
405 PACK3(IT_WAIT_REG_MEM, 6);
406 E32(IT_WAIT_REG | IT_WAIT_EQ);
407 E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
408 E32(0);
409 E32(0); // Ref value
410 E32(EVERGREEN_VLINE_STAT); // Mask
411 E32(10); // Wait interval
412 /* add crtc reloc */
413 PACK3(IT_NOP, 1);
414 E32(drmmode_crtc->mode_crtc->crtc_id);
415 END_BATCH();
416 }
417
418 void
evergreen_set_spi(ScrnInfoPtr pScrn,int vs_export_count,int num_interp)419 evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
420 {
421 RADEONInfoPtr info = RADEONPTR(pScrn);
422
423 BEGIN_BATCH(8);
424 /* Interpolator setup */
425 EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
426 PACK0(SPI_PS_IN_CONTROL_0, 3);
427 E32(((num_interp << NUM_INTERP_shift) |
428 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
429 E32(0); // SPI_PS_IN_CONTROL_1
430 E32(0); // SPI_INTERP_CONTROL_0
431 END_BATCH();
432 }
433
434 void
evergreen_fs_setup(ScrnInfoPtr pScrn,shader_config_t * fs_conf,uint32_t domain)435 evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
436 {
437 RADEONInfoPtr info = RADEONPTR(pScrn);
438 uint32_t sq_pgm_resources;
439
440 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
441 (fs_conf->stack_size << STACK_SIZE_shift));
442
443 if (fs_conf->dx10_clamp)
444 sq_pgm_resources |= DX10_CLAMP_bit;
445
446 BEGIN_BATCH(3 + 2);
447 EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
448 RELOC_BATCH(fs_conf->bo, domain, 0);
449 END_BATCH();
450
451 BEGIN_BATCH(3);
452 EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
453 END_BATCH();
454 }
455
456 /* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
457 * but none that we use here.
458 */
459 void
evergreen_vs_setup(ScrnInfoPtr pScrn,shader_config_t * vs_conf,uint32_t domain)460 evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
461 {
462 RADEONInfoPtr info = RADEONPTR(pScrn);
463 uint32_t sq_pgm_resources, sq_pgm_resources_2;
464
465 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
466 (vs_conf->stack_size << STACK_SIZE_shift));
467
468 if (vs_conf->dx10_clamp)
469 sq_pgm_resources |= DX10_CLAMP_bit;
470 if (vs_conf->uncached_first_inst)
471 sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
472
473 sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
474 (vs_conf->double_round << DOUBLE_ROUND_shift));
475
476 if (vs_conf->allow_sdi)
477 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
478 if (vs_conf->allow_sd0)
479 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
480 if (vs_conf->allow_ddi)
481 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
482 if (vs_conf->allow_ddo)
483 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
484
485 /* flush SQ cache */
486 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
487 vs_conf->shader_size, vs_conf->shader_addr,
488 vs_conf->bo, domain, 0);
489
490 BEGIN_BATCH(3 + 2);
491 EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
492 RELOC_BATCH(vs_conf->bo, domain, 0);
493 END_BATCH();
494
495 BEGIN_BATCH(4);
496 PACK0(SQ_PGM_RESOURCES_VS, 2);
497 E32(sq_pgm_resources);
498 E32(sq_pgm_resources_2);
499 END_BATCH();
500 }
501
502 /* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
503 * but none that we use here.
504 */
505 void
evergreen_ps_setup(ScrnInfoPtr pScrn,shader_config_t * ps_conf,uint32_t domain)506 evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
507 {
508 RADEONInfoPtr info = RADEONPTR(pScrn);
509 uint32_t sq_pgm_resources, sq_pgm_resources_2;
510
511 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
512 (ps_conf->stack_size << STACK_SIZE_shift));
513
514 if (ps_conf->dx10_clamp)
515 sq_pgm_resources |= DX10_CLAMP_bit;
516 if (ps_conf->uncached_first_inst)
517 sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
518 if (ps_conf->clamp_consts)
519 sq_pgm_resources |= CLAMP_CONSTS_bit;
520
521 sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
522 (ps_conf->double_round << DOUBLE_ROUND_shift));
523
524 if (ps_conf->allow_sdi)
525 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
526 if (ps_conf->allow_sd0)
527 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
528 if (ps_conf->allow_ddi)
529 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
530 if (ps_conf->allow_ddo)
531 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
532
533 /* flush SQ cache */
534 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
535 ps_conf->shader_size, ps_conf->shader_addr,
536 ps_conf->bo, domain, 0);
537
538 BEGIN_BATCH(3 + 2);
539 EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
540 RELOC_BATCH(ps_conf->bo, domain, 0);
541 END_BATCH();
542
543 BEGIN_BATCH(5);
544 PACK0(SQ_PGM_RESOURCES_PS, 3);
545 E32(sq_pgm_resources);
546 E32(sq_pgm_resources_2);
547 E32(ps_conf->export_mode);
548 END_BATCH();
549 }
550
551 void
evergreen_set_alu_consts(ScrnInfoPtr pScrn,const_config_t * const_conf,uint32_t domain)552 evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
553 {
554 RADEONInfoPtr info = RADEONPTR(pScrn);
555 /* size reg is units of 16 consts (4 dwords each) */
556 uint32_t size = const_conf->size_bytes >> 8;
557
558 if (size == 0)
559 size = 1;
560
561 #if X_BYTE_ORDER == X_BIG_ENDIAN
562 {
563 uint32_t count = size << 6, *p = const_conf->cpu_ptr;
564
565 while(count--) {
566 *p = cpu_to_le32(*p);
567 p++;
568 }
569 }
570 #endif
571
572 /* flush SQ cache */
573 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
574 const_conf->size_bytes, const_conf->const_addr,
575 const_conf->bo, domain, 0);
576
577 switch (const_conf->type) {
578 case SHADER_TYPE_VS:
579 BEGIN_BATCH(3);
580 EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
581 END_BATCH();
582 BEGIN_BATCH(3 + 2);
583 EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
584 RELOC_BATCH(const_conf->bo, domain, 0);
585 END_BATCH();
586 break;
587 case SHADER_TYPE_PS:
588 BEGIN_BATCH(3);
589 EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
590 END_BATCH();
591 BEGIN_BATCH(3 + 2);
592 EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
593 RELOC_BATCH(const_conf->bo, domain, 0);
594 END_BATCH();
595 break;
596 default:
597 ErrorF("Unsupported const type %d\n", const_conf->type);
598 break;
599 }
600
601 }
602
603 void
evergreen_set_bool_consts(ScrnInfoPtr pScrn,int offset,uint32_t val)604 evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
605 {
606 RADEONInfoPtr info = RADEONPTR(pScrn);
607 /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
608 * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
609 */
610 BEGIN_BATCH(3);
611 EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
612 END_BATCH();
613 }
614
615 /* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
616 * but none that we use here.
617 */
618 static void
evergreen_set_vtx_resource(ScrnInfoPtr pScrn,vtx_resource_t * res,uint32_t domain)619 evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
620 {
621 RADEONInfoPtr info = RADEONPTR(pScrn);
622 struct radeon_accel_state *accel_state = info->accel_state;
623 uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
624
625 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
626 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
627 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
628 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
629 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
630 if (res->clamp_x)
631 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
632
633 if (res->format_comp_all)
634 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
635
636 if (res->srf_mode_all)
637 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
638
639 sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
640 (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
641 (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
642 (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
643
644 if (res->uncached)
645 sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
646
647 /* XXX ??? */
648 sq_vtx_constant_word4 = 0;
649
650 /* flush vertex cache */
651 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
652 (info->ChipFamily == CHIP_FAMILY_PALM) ||
653 (info->ChipFamily == CHIP_FAMILY_SUMO) ||
654 (info->ChipFamily == CHIP_FAMILY_SUMO2) ||
655 (info->ChipFamily == CHIP_FAMILY_CAICOS) ||
656 (info->ChipFamily == CHIP_FAMILY_CAYMAN) ||
657 (info->ChipFamily == CHIP_FAMILY_ARUBA))
658 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
659 accel_state->vbo.vb_offset, 0,
660 res->bo,
661 domain, 0);
662 else
663 evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
664 accel_state->vbo.vb_offset, 0,
665 res->bo,
666 domain, 0);
667
668 BEGIN_BATCH(10 + 2);
669 PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
670 E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS
671 E32((res->vtx_num_entries << 2) - 1); // 1: SIZE
672 E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
673 E32(sq_vtx_constant_word3); // 3: swizzles
674 E32(sq_vtx_constant_word4); // 4: num elements
675 E32(0); // 5: n/a
676 E32(0); // 6: n/a
677 E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE
678 RELOC_BATCH(res->bo, domain, 0);
679 END_BATCH();
680 }
681
682 /* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
683 * but none that we use here.
684 */
685 void
evergreen_set_tex_resource(ScrnInfoPtr pScrn,tex_resource_t * tex_res,uint32_t domain)686 evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
687 {
688 RADEONInfoPtr info = RADEONPTR(pScrn);
689 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
690 uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
691 uint32_t array_mode, pitch, tile_split, macro_aspect, bankw, bankh, nbanks;
692
693 if (tex_res->surface) {
694 switch (tex_res->surface->level[0].mode) {
695 case RADEON_SURF_MODE_1D:
696 array_mode = 2;
697 break;
698 case RADEON_SURF_MODE_2D:
699 array_mode = 4;
700 break;
701 default:
702 array_mode = 0;
703 break;
704 }
705 pitch = tex_res->surface->level[0].nblk_x >> 3;
706 tile_split = tex_res->surface->tile_split;
707 macro_aspect = tex_res->surface->mtilea;
708 bankw = tex_res->surface->bankw;
709 bankh = tex_res->surface->bankh;
710 tile_split = eg_tile_split(tile_split);
711 macro_aspect = eg_macro_tile_aspect(macro_aspect);
712 bankw = eg_bank_wh(bankw);
713 bankh = eg_bank_wh(bankh);
714 } else {
715 array_mode = tex_res->array_mode;
716 pitch = (tex_res->pitch + 7) >> 3;
717 tile_split = 4;
718 macro_aspect = 0;
719 bankw = 0;
720 bankh = 0;
721 }
722 nbanks = info->num_banks;
723 nbanks = eg_nbanks(nbanks);
724
725 sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
726
727 if (tex_res->w)
728 sq_tex_resource_word0 |= ( ((pitch - 1) << PITCH_shift) |
729 ((tex_res->w - 1) << TEX_WIDTH_shift) );
730
731 if (tex_res->tile_type)
732 sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
733
734 sq_tex_resource_word1 = (array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
735
736 if (tex_res->h)
737 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
738 if (tex_res->depth)
739 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
740
741 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
742 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
743 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
744 (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
745 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
746 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
747 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
748 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
749 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
750 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
751 (tex_res->base_level << BASE_LEVEL_shift));
752
753 if (tex_res->srf_mode_all)
754 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
755 if (tex_res->force_degamma)
756 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
757
758 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
759 (tex_res->base_array << BASE_ARRAY_shift) |
760 (tex_res->last_array << LAST_ARRAY_shift));
761
762 sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
763 (tex_res->perf_modulation << PERF_MODULATION_shift) |
764 (tile_split << SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift));
765
766 if (tex_res->interlaced)
767 sq_tex_resource_word6 |= INTERLACED_bit;
768
769 sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
770 (macro_aspect << SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift) |
771 (nbanks << SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift) |
772 (bankw << SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift) |
773 (bankh << SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift) |
774 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
775
776 /* flush texture cache */
777 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
778 tex_res->size, tex_res->base,
779 tex_res->bo, domain, 0);
780
781 BEGIN_BATCH(10 + 4);
782 PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
783 E32(sq_tex_resource_word0);
784 E32(sq_tex_resource_word1);
785 E32(((tex_res->base) >> 8));
786 E32(((tex_res->mip_base) >> 8));
787 E32(sq_tex_resource_word4);
788 E32(sq_tex_resource_word5);
789 E32(sq_tex_resource_word6);
790 E32(sq_tex_resource_word7);
791 RELOC_BATCH(tex_res->bo, domain, 0);
792 RELOC_BATCH(tex_res->mip_bo, domain, 0);
793 END_BATCH();
794 }
795
796 /* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
797 * but none that we use here.
798 */
799 void
evergreen_set_tex_sampler(ScrnInfoPtr pScrn,tex_sampler_t * s)800 evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
801 {
802 RADEONInfoPtr info = RADEONPTR(pScrn);
803 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
804
805 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) |
806 (s->clamp_y << CLAMP_Y_shift) |
807 (s->clamp_z << CLAMP_Z_shift) |
808 (s->xy_mag_filter << XY_MAG_FILTER_shift) |
809 (s->xy_min_filter << XY_MIN_FILTER_shift) |
810 (s->z_filter << Z_FILTER_shift) |
811 (s->mip_filter << MIP_FILTER_shift) |
812 (s->border_color << BORDER_COLOR_TYPE_shift) |
813 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) |
814 (s->chroma_key << CHROMA_KEY_shift));
815
816 sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) |
817 (s->max_lod << MAX_LOD_shift) |
818 (s->perf_mip << PERF_MIP_shift) |
819 (s->perf_z << PERF_Z_shift));
820
821
822 sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
823 (s->lod_bias2 << LOD_BIAS_SEC_shift));
824
825 if (s->mc_coord_truncate)
826 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
827 if (s->force_degamma)
828 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
829 if (s->truncate_coord)
830 sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
831 if (s->disable_cube_wrap)
832 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
833 if (s->type)
834 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
835
836 BEGIN_BATCH(5);
837 PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
838 E32(sq_tex_sampler_word0);
839 E32(sq_tex_sampler_word1);
840 E32(sq_tex_sampler_word2);
841 END_BATCH();
842 }
843
844 /* workarounds for hw bugs in eg+ */
845 /* only affects screen/window/generic/vport. cliprects are not affected */
846 static void
evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn,int * x1,int * y1,int * x2,int * y2)847 evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2)
848 {
849 RADEONInfoPtr info = RADEONPTR(pScrn);
850
851 /* all eg+ asics */
852 if (*x2 == 0)
853 *x1 = 1;
854 if (*y2 == 0)
855 *y1 = 1;
856
857 /* cayman/tn only */
858 if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
859 /* cliprects aren't affected so we can use them to clip if we need
860 * a true 1x1 clip region
861 */
862 if ((*x2 == 1) && (*y2 == 1))
863 *x2 = 2;
864 }
865 }
866
867 //XXX deal with clip offsets in clip setup
868 void
evergreen_set_screen_scissor(ScrnInfoPtr pScrn,int x1,int y1,int x2,int y2)869 evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
870 {
871 RADEONInfoPtr info = RADEONPTR(pScrn);
872
873 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
874
875 BEGIN_BATCH(4);
876 PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
877 E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
878 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
879 E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
880 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
881 END_BATCH();
882 }
883
884 void
evergreen_set_vport_scissor(ScrnInfoPtr pScrn,int id,int x1,int y1,int x2,int y2)885 evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
886 {
887 RADEONInfoPtr info = RADEONPTR(pScrn);
888
889 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
890
891 BEGIN_BATCH(4);
892 PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
893 E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
894 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
895 WINDOW_OFFSET_DISABLE_bit));
896 E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
897 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
898 END_BATCH();
899 }
900
901 void
evergreen_set_generic_scissor(ScrnInfoPtr pScrn,int x1,int y1,int x2,int y2)902 evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
903 {
904 RADEONInfoPtr info = RADEONPTR(pScrn);
905
906 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
907
908 BEGIN_BATCH(4);
909 PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
910 E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
911 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
912 WINDOW_OFFSET_DISABLE_bit));
913 E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
914 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
915 END_BATCH();
916 }
917
918 void
evergreen_set_window_scissor(ScrnInfoPtr pScrn,int x1,int y1,int x2,int y2)919 evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
920 {
921 RADEONInfoPtr info = RADEONPTR(pScrn);
922
923 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
924
925 BEGIN_BATCH(4);
926 PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
927 E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
928 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
929 WINDOW_OFFSET_DISABLE_bit));
930 E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
931 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
932 END_BATCH();
933 }
934
935 void
evergreen_set_clip_rect(ScrnInfoPtr pScrn,int id,int x1,int y1,int x2,int y2)936 evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
937 {
938 RADEONInfoPtr info = RADEONPTR(pScrn);
939
940 BEGIN_BATCH(4);
941 PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
942 E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
943 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
944 E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
945 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
946 END_BATCH();
947 }
948
949 /*
950 * Setup of default state
951 */
952
953 void
evergreen_set_default_state(ScrnInfoPtr pScrn)954 evergreen_set_default_state(ScrnInfoPtr pScrn)
955 {
956 tex_resource_t tex_res;
957 shader_config_t fs_conf;
958 sq_config_t sq_conf;
959 int i;
960 RADEONInfoPtr info = RADEONPTR(pScrn);
961 struct radeon_accel_state *accel_state = info->accel_state;
962
963 if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
964 cayman_set_default_state(pScrn);
965 return;
966 }
967
968 if (accel_state->XInited3D)
969 return;
970
971 memset(&tex_res, 0, sizeof(tex_resource_t));
972 memset(&fs_conf, 0, sizeof(shader_config_t));
973
974 accel_state->XInited3D = TRUE;
975
976 evergreen_start_3d(pScrn);
977
978 /* SQ */
979 sq_conf.ps_prio = 0;
980 sq_conf.vs_prio = 1;
981 sq_conf.gs_prio = 2;
982 sq_conf.es_prio = 3;
983 sq_conf.hs_prio = 0;
984 sq_conf.ls_prio = 0;
985 sq_conf.cs_prio = 0;
986
987 switch (info->ChipFamily) {
988 case CHIP_FAMILY_CEDAR:
989 default:
990 sq_conf.num_ps_gprs = 93;
991 sq_conf.num_vs_gprs = 46;
992 sq_conf.num_temp_gprs = 4;
993 sq_conf.num_gs_gprs = 31;
994 sq_conf.num_es_gprs = 31;
995 sq_conf.num_hs_gprs = 23;
996 sq_conf.num_ls_gprs = 23;
997 sq_conf.num_ps_threads = 96;
998 sq_conf.num_vs_threads = 16;
999 sq_conf.num_gs_threads = 16;
1000 sq_conf.num_es_threads = 16;
1001 sq_conf.num_hs_threads = 16;
1002 sq_conf.num_ls_threads = 16;
1003 sq_conf.num_ps_stack_entries = 42;
1004 sq_conf.num_vs_stack_entries = 42;
1005 sq_conf.num_gs_stack_entries = 42;
1006 sq_conf.num_es_stack_entries = 42;
1007 sq_conf.num_hs_stack_entries = 42;
1008 sq_conf.num_ls_stack_entries = 42;
1009 break;
1010 case CHIP_FAMILY_REDWOOD:
1011 sq_conf.num_ps_gprs = 93;
1012 sq_conf.num_vs_gprs = 46;
1013 sq_conf.num_temp_gprs = 4;
1014 sq_conf.num_gs_gprs = 31;
1015 sq_conf.num_es_gprs = 31;
1016 sq_conf.num_hs_gprs = 23;
1017 sq_conf.num_ls_gprs = 23;
1018 sq_conf.num_ps_threads = 128;
1019 sq_conf.num_vs_threads = 20;
1020 sq_conf.num_gs_threads = 20;
1021 sq_conf.num_es_threads = 20;
1022 sq_conf.num_hs_threads = 20;
1023 sq_conf.num_ls_threads = 20;
1024 sq_conf.num_ps_stack_entries = 42;
1025 sq_conf.num_vs_stack_entries = 42;
1026 sq_conf.num_gs_stack_entries = 42;
1027 sq_conf.num_es_stack_entries = 42;
1028 sq_conf.num_hs_stack_entries = 42;
1029 sq_conf.num_ls_stack_entries = 42;
1030 break;
1031 case CHIP_FAMILY_JUNIPER:
1032 sq_conf.num_ps_gprs = 93;
1033 sq_conf.num_vs_gprs = 46;
1034 sq_conf.num_temp_gprs = 4;
1035 sq_conf.num_gs_gprs = 31;
1036 sq_conf.num_es_gprs = 31;
1037 sq_conf.num_hs_gprs = 23;
1038 sq_conf.num_ls_gprs = 23;
1039 sq_conf.num_ps_threads = 128;
1040 sq_conf.num_vs_threads = 20;
1041 sq_conf.num_gs_threads = 20;
1042 sq_conf.num_es_threads = 20;
1043 sq_conf.num_hs_threads = 20;
1044 sq_conf.num_ls_threads = 20;
1045 sq_conf.num_ps_stack_entries = 85;
1046 sq_conf.num_vs_stack_entries = 85;
1047 sq_conf.num_gs_stack_entries = 85;
1048 sq_conf.num_es_stack_entries = 85;
1049 sq_conf.num_hs_stack_entries = 85;
1050 sq_conf.num_ls_stack_entries = 85;
1051 break;
1052 case CHIP_FAMILY_CYPRESS:
1053 case CHIP_FAMILY_HEMLOCK:
1054 sq_conf.num_ps_gprs = 93;
1055 sq_conf.num_vs_gprs = 46;
1056 sq_conf.num_temp_gprs = 4;
1057 sq_conf.num_gs_gprs = 31;
1058 sq_conf.num_es_gprs = 31;
1059 sq_conf.num_hs_gprs = 23;
1060 sq_conf.num_ls_gprs = 23;
1061 sq_conf.num_ps_threads = 128;
1062 sq_conf.num_vs_threads = 20;
1063 sq_conf.num_gs_threads = 20;
1064 sq_conf.num_es_threads = 20;
1065 sq_conf.num_hs_threads = 20;
1066 sq_conf.num_ls_threads = 20;
1067 sq_conf.num_ps_stack_entries = 85;
1068 sq_conf.num_vs_stack_entries = 85;
1069 sq_conf.num_gs_stack_entries = 85;
1070 sq_conf.num_es_stack_entries = 85;
1071 sq_conf.num_hs_stack_entries = 85;
1072 sq_conf.num_ls_stack_entries = 85;
1073 break;
1074 case CHIP_FAMILY_PALM:
1075 sq_conf.num_ps_gprs = 93;
1076 sq_conf.num_vs_gprs = 46;
1077 sq_conf.num_temp_gprs = 4;
1078 sq_conf.num_gs_gprs = 31;
1079 sq_conf.num_es_gprs = 31;
1080 sq_conf.num_hs_gprs = 23;
1081 sq_conf.num_ls_gprs = 23;
1082 sq_conf.num_ps_threads = 96;
1083 sq_conf.num_vs_threads = 16;
1084 sq_conf.num_gs_threads = 16;
1085 sq_conf.num_es_threads = 16;
1086 sq_conf.num_hs_threads = 16;
1087 sq_conf.num_ls_threads = 16;
1088 sq_conf.num_ps_stack_entries = 42;
1089 sq_conf.num_vs_stack_entries = 42;
1090 sq_conf.num_gs_stack_entries = 42;
1091 sq_conf.num_es_stack_entries = 42;
1092 sq_conf.num_hs_stack_entries = 42;
1093 sq_conf.num_ls_stack_entries = 42;
1094 break;
1095 case CHIP_FAMILY_SUMO:
1096 sq_conf.num_ps_gprs = 93;
1097 sq_conf.num_vs_gprs = 46;
1098 sq_conf.num_temp_gprs = 4;
1099 sq_conf.num_gs_gprs = 31;
1100 sq_conf.num_es_gprs = 31;
1101 sq_conf.num_hs_gprs = 23;
1102 sq_conf.num_ls_gprs = 23;
1103 sq_conf.num_ps_threads = 96;
1104 sq_conf.num_vs_threads = 25;
1105 sq_conf.num_gs_threads = 25;
1106 sq_conf.num_es_threads = 25;
1107 sq_conf.num_hs_threads = 25;
1108 sq_conf.num_ls_threads = 25;
1109 sq_conf.num_ps_stack_entries = 42;
1110 sq_conf.num_vs_stack_entries = 42;
1111 sq_conf.num_gs_stack_entries = 42;
1112 sq_conf.num_es_stack_entries = 42;
1113 sq_conf.num_hs_stack_entries = 42;
1114 sq_conf.num_ls_stack_entries = 42;
1115 break;
1116 case CHIP_FAMILY_SUMO2:
1117 sq_conf.num_ps_gprs = 93;
1118 sq_conf.num_vs_gprs = 46;
1119 sq_conf.num_temp_gprs = 4;
1120 sq_conf.num_gs_gprs = 31;
1121 sq_conf.num_es_gprs = 31;
1122 sq_conf.num_hs_gprs = 23;
1123 sq_conf.num_ls_gprs = 23;
1124 sq_conf.num_ps_threads = 96;
1125 sq_conf.num_vs_threads = 25;
1126 sq_conf.num_gs_threads = 25;
1127 sq_conf.num_es_threads = 25;
1128 sq_conf.num_hs_threads = 25;
1129 sq_conf.num_ls_threads = 25;
1130 sq_conf.num_ps_stack_entries = 85;
1131 sq_conf.num_vs_stack_entries = 85;
1132 sq_conf.num_gs_stack_entries = 85;
1133 sq_conf.num_es_stack_entries = 85;
1134 sq_conf.num_hs_stack_entries = 85;
1135 sq_conf.num_ls_stack_entries = 85;
1136 break;
1137 case CHIP_FAMILY_BARTS:
1138 sq_conf.num_ps_gprs = 93;
1139 sq_conf.num_vs_gprs = 46;
1140 sq_conf.num_temp_gprs = 4;
1141 sq_conf.num_gs_gprs = 31;
1142 sq_conf.num_es_gprs = 31;
1143 sq_conf.num_hs_gprs = 23;
1144 sq_conf.num_ls_gprs = 23;
1145 sq_conf.num_ps_threads = 128;
1146 sq_conf.num_vs_threads = 20;
1147 sq_conf.num_gs_threads = 20;
1148 sq_conf.num_es_threads = 20;
1149 sq_conf.num_hs_threads = 20;
1150 sq_conf.num_ls_threads = 20;
1151 sq_conf.num_ps_stack_entries = 85;
1152 sq_conf.num_vs_stack_entries = 85;
1153 sq_conf.num_gs_stack_entries = 85;
1154 sq_conf.num_es_stack_entries = 85;
1155 sq_conf.num_hs_stack_entries = 85;
1156 sq_conf.num_ls_stack_entries = 85;
1157 break;
1158 case CHIP_FAMILY_TURKS:
1159 sq_conf.num_ps_gprs = 93;
1160 sq_conf.num_vs_gprs = 46;
1161 sq_conf.num_temp_gprs = 4;
1162 sq_conf.num_gs_gprs = 31;
1163 sq_conf.num_es_gprs = 31;
1164 sq_conf.num_hs_gprs = 23;
1165 sq_conf.num_ls_gprs = 23;
1166 sq_conf.num_ps_threads = 128;
1167 sq_conf.num_vs_threads = 20;
1168 sq_conf.num_gs_threads = 20;
1169 sq_conf.num_es_threads = 20;
1170 sq_conf.num_hs_threads = 20;
1171 sq_conf.num_ls_threads = 20;
1172 sq_conf.num_ps_stack_entries = 42;
1173 sq_conf.num_vs_stack_entries = 42;
1174 sq_conf.num_gs_stack_entries = 42;
1175 sq_conf.num_es_stack_entries = 42;
1176 sq_conf.num_hs_stack_entries = 42;
1177 sq_conf.num_ls_stack_entries = 42;
1178 break;
1179 case CHIP_FAMILY_CAICOS:
1180 sq_conf.num_ps_gprs = 93;
1181 sq_conf.num_vs_gprs = 46;
1182 sq_conf.num_temp_gprs = 4;
1183 sq_conf.num_gs_gprs = 31;
1184 sq_conf.num_es_gprs = 31;
1185 sq_conf.num_hs_gprs = 23;
1186 sq_conf.num_ls_gprs = 23;
1187 sq_conf.num_ps_threads = 128;
1188 sq_conf.num_vs_threads = 10;
1189 sq_conf.num_gs_threads = 10;
1190 sq_conf.num_es_threads = 10;
1191 sq_conf.num_hs_threads = 10;
1192 sq_conf.num_ls_threads = 10;
1193 sq_conf.num_ps_stack_entries = 42;
1194 sq_conf.num_vs_stack_entries = 42;
1195 sq_conf.num_gs_stack_entries = 42;
1196 sq_conf.num_es_stack_entries = 42;
1197 sq_conf.num_hs_stack_entries = 42;
1198 sq_conf.num_ls_stack_entries = 42;
1199 break;
1200 }
1201
1202 evergreen_sq_setup(pScrn, &sq_conf);
1203
1204 BEGIN_BATCH(27);
1205 EREG(SQ_LDS_ALLOC_PS, 0);
1206 EREG(SQ_LDS_RESOURCE_MGMT, 0x10001000);
1207 EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
1208
1209 PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
1210 E32(0);
1211 E32(0);
1212 E32(0);
1213 E32(0);
1214 E32(0);
1215 E32(0);
1216
1217 PACK0(SQ_GS_VERT_ITEMSIZE, 4);
1218 E32(0);
1219 E32(0);
1220 E32(0);
1221 E32(0);
1222
1223 PACK0(SQ_VTX_BASE_VTX_LOC, 2);
1224 E32(0);
1225 E32(0);
1226 END_BATCH();
1227
1228 /* DB */
1229 BEGIN_BATCH(3 + 2);
1230 EREG(DB_Z_INFO, 0);
1231 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1232 END_BATCH();
1233
1234 BEGIN_BATCH(3 + 2);
1235 EREG(DB_STENCIL_INFO, 0);
1236 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1237 END_BATCH();
1238
1239 BEGIN_BATCH(3 + 2);
1240 EREG(DB_HTILE_DATA_BASE, 0);
1241 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1242 END_BATCH();
1243
1244 BEGIN_BATCH(49);
1245 EREG(DB_DEPTH_CONTROL, 0);
1246
1247 PACK0(PA_SC_VPORT_ZMIN_0, 2);
1248 EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1249 EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1250
1251 PACK0(DB_RENDER_CONTROL, 5);
1252 E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1253 E32(0); // DB_COUNT_CONTROL
1254 E32(0); // DB_DEPTH_VIEW
1255 E32(0x2a); // DB_RENDER_OVERRIDE
1256 E32(0); // DB_RENDER_OVERRIDE2
1257
1258 PACK0(DB_STENCIL_CLEAR, 2);
1259 E32(0); // DB_STENCIL_CLEAR
1260 E32(0); // DB_DEPTH_CLEAR
1261
1262 EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) |
1263 (2 << ALPHA_TO_MASK_OFFSET1_shift) |
1264 (2 << ALPHA_TO_MASK_OFFSET2_shift) |
1265 (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1266
1267 EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1268 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1269
1270 // SX
1271 EREG(SX_MISC, 0);
1272
1273 // CB
1274 PACK0(SX_ALPHA_TEST_CONTROL, 5);
1275 E32(0); // SX_ALPHA_TEST_CONTROL
1276 E32(0x00000000); //CB_BLEND_RED
1277 E32(0x00000000); //CB_BLEND_GREEN
1278 E32(0x00000000); //CB_BLEND_BLUE
1279 E32(0x00000000); //CB_BLEND_ALPHA
1280
1281 EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
1282
1283 // SC
1284 EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) |
1285 (0 << WINDOW_Y_OFFSET_shift)));
1286 EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask);
1287 EREG(PA_SC_EDGERULE, 0xAAAAAAAA);
1288 EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1289 END_BATCH();
1290
1291 /* clip boolean is set to always visible -> doesn't matter */
1292 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1293 evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1294
1295 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1296 evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1297
1298 BEGIN_BATCH(57);
1299 PACK0(PA_SC_MODE_CNTL_0, 2);
1300 E32(0); // PA_SC_MODE_CNTL_0
1301 E32(0); // PA_SC_MODE_CNTL_1
1302
1303 PACK0(PA_SC_LINE_CNTL, 16);
1304 E32(0); // PA_SC_LINE_CNTL
1305 E32(0); // PA_SC_AA_CONFIG
1306 E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1307 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1308 EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ
1309 EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ
1310 EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ
1311 EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ
1312 E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1313 E32(0);
1314 E32(0);
1315 E32(0);
1316 E32(0);
1317 E32(0);
1318 E32(0);
1319 E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1320 E32(0xFFFFFFFF); // PA_SC_AA_MASK
1321
1322 // CL
1323 PACK0(PA_CL_CLIP_CNTL, 8);
1324 E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1325 E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1326 E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1327 E32(0); // PA_CL_VS_OUT_CNTL
1328 E32(0); // PA_CL_NANINF_CNTL
1329 E32(0); // PA_SU_LINE_STIPPLE_CNTL
1330 E32(0); // PA_SU_LINE_STIPPLE_SCALE
1331 E32(0); // PA_SU_PRIM_FILTER_CNTL
1332
1333 // SU
1334 PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1335 E32(0);
1336 E32(0);
1337 E32(0);
1338 E32(0);
1339 E32(0);
1340 E32(0);
1341
1342 /* src = semantic id 0; mask = semantic id 1 */
1343 EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1344 (1 << SEMANTIC_1_shift)));
1345 PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1346 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1347 E32(((0 << SEMANTIC_shift) |
1348 (0x01 << DEFAULT_VAL_shift)));
1349 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1350 E32(((1 << SEMANTIC_shift) |
1351 (0x01 << DEFAULT_VAL_shift)));
1352
1353 PACK0(SPI_INPUT_Z, 8);
1354 E32(0); // SPI_INPUT_Z
1355 E32(0); // SPI_FOG_CNTL
1356 E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1357 E32(0); // SPI_PS_IN_CONTROL_2
1358 E32(0);
1359 E32(0);
1360 E32(0);
1361 E32(0);
1362 END_BATCH();
1363
1364 // clear FS
1365 fs_conf.bo = accel_state->shaders_bo;
1366 evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1367
1368 // VGT
1369 BEGIN_BATCH(46);
1370
1371 PACK0(VGT_MAX_VTX_INDX, 4);
1372 E32(0xffffff);
1373 E32(0);
1374 E32(0);
1375 E32(0);
1376
1377 PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1378 E32(0);
1379 E32(0);
1380
1381 PACK0(VGT_REUSE_OFF, 2);
1382 E32(0);
1383 E32(0);
1384
1385 PACK0(PA_SU_POINT_SIZE, 17);
1386 E32(0); // PA_SU_POINT_SIZE
1387 E32(0); // PA_SU_POINT_MINMAX
1388 E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1389 E32(0); // PA_SC_LINE_STIPPLE
1390 E32(0); // VGT_OUTPUT_PATH_CNTL
1391 E32(0); // VGT_HOS_CNTL
1392 E32(0);
1393 E32(0);
1394 E32(0);
1395 E32(0);
1396 E32(0);
1397 E32(0);
1398 E32(0);
1399 E32(0);
1400 E32(0);
1401 E32(0);
1402 E32(0); // VGT_GS_MODE
1403
1404 EREG(VGT_PRIMITIVEID_EN, 0);
1405 EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0);
1406 EREG(VGT_SHADER_STAGES_EN, 0);
1407
1408 PACK0(VGT_STRMOUT_CONFIG, 2);
1409 E32(0);
1410 E32(0);
1411 END_BATCH();
1412 }
1413
1414
1415 /*
1416 * Commands
1417 */
1418
1419 void
evergreen_draw_auto(ScrnInfoPtr pScrn,draw_config_t * draw_conf)1420 evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1421 {
1422 RADEONInfoPtr info = RADEONPTR(pScrn);
1423
1424 BEGIN_BATCH(10);
1425 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1426 PACK3(IT_INDEX_TYPE, 1);
1427 #if X_BYTE_ORDER == X_BIG_ENDIAN
1428 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1429 #else
1430 E32(draw_conf->index_type);
1431 #endif
1432 PACK3(IT_NUM_INSTANCES, 1);
1433 E32(draw_conf->num_instances);
1434 PACK3(IT_DRAW_INDEX_AUTO, 2);
1435 E32(draw_conf->num_indices);
1436 E32(draw_conf->vgt_draw_initiator);
1437 END_BATCH();
1438 }
1439
evergreen_finish_op(ScrnInfoPtr pScrn,int vtx_size)1440 void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1441 {
1442 RADEONInfoPtr info = RADEONPTR(pScrn);
1443 struct radeon_accel_state *accel_state = info->accel_state;
1444 draw_config_t draw_conf;
1445 vtx_resource_t vtx_res;
1446
1447 if (accel_state->vbo.vb_start_op == -1)
1448 return;
1449
1450 CLEAR (draw_conf);
1451 CLEAR (vtx_res);
1452
1453 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1454 radeon_ib_discard(pScrn);
1455 radeon_cs_flush_indirect(pScrn);
1456 return;
1457 }
1458
1459 /* Vertex buffer setup */
1460 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1461 vtx_res.id = SQ_FETCH_RESOURCE_vs;
1462 vtx_res.vtx_size_dw = vtx_size / 4;
1463 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1464 vtx_res.vb_addr = accel_state->vbo.vb_start_op;
1465 vtx_res.bo = accel_state->vbo.vb_bo;
1466 vtx_res.dst_sel_x = SQ_SEL_X;
1467 vtx_res.dst_sel_y = SQ_SEL_Y;
1468 vtx_res.dst_sel_z = SQ_SEL_Z;
1469 vtx_res.dst_sel_w = SQ_SEL_W;
1470 #if X_BYTE_ORDER == X_BIG_ENDIAN
1471 vtx_res.endian = SQ_ENDIAN_8IN32;
1472 #endif
1473 evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1474
1475 /* Draw */
1476 draw_conf.prim_type = DI_PT_RECTLIST;
1477 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1478 draw_conf.num_instances = 1;
1479 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1480 draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
1481
1482 evergreen_draw_auto(pScrn, &draw_conf);
1483
1484 /* sync dst surface */
1485 evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1486 accel_state->dst_size, 0,
1487 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1488
1489 accel_state->vbo.vb_start_op = -1;
1490 accel_state->cbuf.vb_start_op = -1;
1491 accel_state->ib_reset_op = 0;
1492
1493 }
1494
1495