1 /**************************************************************************
2 
3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4                      VMware, Inc.
5 
6 All Rights Reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15 
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 **************************************************************************/
29 
30 /*
31  * Authors:
32  *   Keith Whitwell <keithw@vmware.com>
33  */
34 
35 #include "main/glheader.h"
36 #include "main/mtypes.h"
37 #include "main/light.h"
38 #include "main/enums.h"
39 #include "main/state.h"
40 
41 #include "util/macros.h"
42 
43 #include "vbo/vbo.h"
44 #include "tnl/tnl.h"
45 #include "tnl/t_pipeline.h"
46 
47 #include "radeon_common.h"
48 #include "radeon_context.h"
49 #include "radeon_state.h"
50 #include "radeon_ioctl.h"
51 #include "radeon_tcl.h"
52 #include "radeon_swtcl.h"
53 #include "radeon_maos.h"
54 #include "radeon_common_context.h"
55 
56 
57 
58 /*
59  * Render unclipped vertex buffers by emitting vertices directly to
60  * dma buffers.  Use strip/fan hardware primitives where possible.
61  * Try to simulate missing primitives with indexed vertices.
62  */
63 #define HAVE_POINTS      1
64 #define HAVE_LINES       1
65 #define HAVE_LINE_LOOP   0
66 #define HAVE_LINE_STRIPS 1
67 #define HAVE_TRIANGLES   1
68 #define HAVE_TRI_STRIPS  1
69 #define HAVE_TRI_FANS    1
70 #define HAVE_QUADS       0
71 #define HAVE_QUAD_STRIPS 0
72 #define HAVE_POLYGONS    1
73 #define HAVE_ELTS        1
74 
75 
76 #define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
77 #define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
78 #define HW_LINE_LOOP        0
79 #define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
80 #define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
81 #define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
82 #define HW_TRIANGLE_STRIP_1 0
83 #define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
84 #define HW_QUADS            0
85 #define HW_QUAD_STRIP       0
86 #define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
87 
88 
89 static GLboolean discrete_prim[0x10] = {
90    0,				/* 0 none */
91    1,				/* 1 points */
92    1,				/* 2 lines */
93    0,				/* 3 line_strip */
94    1,				/* 4 tri_list */
95    0,				/* 5 tri_fan */
96    0,				/* 6 tri_type2 */
97    1,				/* 7 rect list (unused) */
98    1,				/* 8 3vert point */
99    1,				/* 9 3vert line */
100    0,
101    0,
102    0,
103    0,
104    0,
105    0,
106 };
107 
108 
109 #define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
110 #define ELT_TYPE  GLushort
111 
112 #define ELT_INIT(prim, hw_prim) \
113    radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
114 
115 #define GET_MESA_ELTS() rmesa->tcl.Elts
116 
117 
118 /* Don't really know how many elts will fit in what's left of cmdbuf,
119  * as there is state to emit, etc:
120  */
121 
122 /* Testing on isosurf shows a maximum around here.  Don't know if it's
123  * the card or driver or kernel module that is causing the behaviour.
124  */
125 #define GET_MAX_HW_ELTS() 300
126 
127 
128 #define RESET_STIPPLE() do {			\
129    RADEON_STATECHANGE( rmesa, lin );		\
130    radeonEmitState(&rmesa->radeon);			\
131 } while (0)
132 
133 #define AUTO_STIPPLE( mode )  do {		\
134    RADEON_STATECHANGE( rmesa, lin );		\
135    if (mode)					\
136       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
137 	 RADEON_LINE_PATTERN_AUTO_RESET;	\
138    else						\
139       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
140 	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
141    radeonEmitState(&rmesa->radeon);		\
142 } while (0)
143 
144 
145 
146 #define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
147 
radeonAllocElts(r100ContextPtr rmesa,GLuint nr)148 static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
149 {
150       if (rmesa->radeon.dma.flush)
151 	 rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
152 
153       radeonEmitAOS( rmesa,
154 		     rmesa->radeon.tcl.aos_count, 0 );
155 
156       return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
157 				       rmesa->tcl.hw_primitive, nr );
158 }
159 
160 #define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
161 
162 
163 
164 /* TODO: Try to extend existing primitive if both are identical,
165  * discrete and there are no intervening state changes.  (Somewhat
166  * duplicates changes to DrawArrays code)
167  */
radeonEmitPrim(struct gl_context * ctx,GLenum prim,GLuint hwprim,GLuint start,GLuint count)168 static void radeonEmitPrim( struct gl_context *ctx,
169 		       GLenum prim,
170 		       GLuint hwprim,
171 		       GLuint start,
172 		       GLuint count)
173 {
174    r100ContextPtr rmesa = R100_CONTEXT( ctx );
175    radeonTclPrimitive( ctx, prim, hwprim );
176 
177    radeonEmitAOS( rmesa,
178 		  rmesa->radeon.tcl.aos_count,
179 		  start );
180 
181    /* Why couldn't this packet have taken an offset param?
182     */
183    radeonEmitVbufPrim( rmesa,
184 		       rmesa->tcl.vertex_format,
185 		       rmesa->tcl.hw_primitive,
186 		       count - start );
187 }
188 
189 #define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
190    radeonEmitPrim( ctx, prim, hwprim, start, count );           \
191    (void) rmesa; } while (0)
192 
193 #define MAX_CONVERSION_SIZE 40
194 
195 /* Try & join small primitives
196  */
197 #if 0
198 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
199 #else
200 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
201   ((NR) < 20 ||							\
202    ((NR) < 40 &&						\
203     rmesa->tcl.hw_primitive == (PRIM|				\
204 			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
205 			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
206 #endif
207 
208 #ifdef MESA_BIG_ENDIAN
209 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
210 #define EMIT_ELT(dest, offset, x) do {				\
211 	int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );	\
212 	GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 );	\
213 	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); 	\
214 	(void)rmesa; } while (0)
215 #else
216 #define EMIT_ELT(dest, offset, x) do {				\
217 	(dest)[offset] = (GLushort) (x);			\
218 	(void)rmesa; } while (0)
219 #endif
220 
221 #define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
222 
223 
224 
225 #define TAG(x) tcl_##x
226 #include "tnl_dd/t_dd_dmatmp2.h"
227 
228 /**********************************************************************/
229 /*                          External entrypoints                     */
230 /**********************************************************************/
231 
radeonEmitPrimitive(struct gl_context * ctx,GLuint first,GLuint last,GLuint flags)232 void radeonEmitPrimitive( struct gl_context *ctx,
233 			  GLuint first,
234 			  GLuint last,
235 			  GLuint flags )
236 {
237    tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
238 }
239 
radeonEmitEltPrimitive(struct gl_context * ctx,GLuint first,GLuint last,GLuint flags)240 void radeonEmitEltPrimitive( struct gl_context *ctx,
241 			     GLuint first,
242 			     GLuint last,
243 			     GLuint flags )
244 {
245    tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
246 }
247 
radeonTclPrimitive(struct gl_context * ctx,GLenum prim,int hw_prim)248 void radeonTclPrimitive( struct gl_context *ctx,
249 			 GLenum prim,
250 			 int hw_prim )
251 {
252    r100ContextPtr rmesa = R100_CONTEXT(ctx);
253    GLuint se_cntl;
254    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
255 
256    radeon_prepare_render(&rmesa->radeon);
257    if (rmesa->radeon.NewGLState)
258       radeonValidateState( ctx );
259 
260    if (newprim != rmesa->tcl.hw_primitive ||
261        !discrete_prim[hw_prim&0xf]) {
262       RADEON_NEWPRIM( rmesa );
263       rmesa->tcl.hw_primitive = newprim;
264    }
265 
266    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
267    se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
268 
269    if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT)
270       se_cntl |= RADEON_FLAT_SHADE_VTX_0;
271    else
272       se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
273 
274    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
275       RADEON_STATECHANGE( rmesa, set );
276       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
277    }
278 }
279 
280 /**
281  * Predict total emit size for next rendering operation so there is no flush in middle of rendering
282  * Prediction has to aim towards the best possible value that is worse than worst case scenario
283  */
radeonEnsureEmitSize(struct gl_context * ctx,GLuint inputs)284 static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs )
285 {
286   r100ContextPtr rmesa = R100_CONTEXT(ctx);
287   TNLcontext *tnl = TNL_CONTEXT(ctx);
288   struct vertex_buffer *VB = &tnl->vb;
289   GLuint space_required;
290   GLuint state_size;
291   GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
292   int i;
293   /* list of flags that are allocating aos object */
294   const GLuint flags_to_check[] = {
295     VERT_BIT_NORMAL,
296     VERT_BIT_COLOR0,
297     VERT_BIT_COLOR1,
298     VERT_BIT_FOG
299   };
300   /* predict number of aos to emit */
301   for (i=0; i < ARRAY_SIZE(flags_to_check); ++i)
302   {
303     if (inputs & flags_to_check[i])
304       ++nr_aos;
305   }
306   for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
307   {
308     if (inputs & VERT_BIT_TEX(i))
309       ++nr_aos;
310   }
311 
312   {
313     /* count the prediction for state size */
314     space_required = 0;
315     state_size = radeonCountStateEmitSize( &rmesa->radeon );
316     /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
317     if (!rmesa->hw.tcl.dirty)
318       state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl );
319     /* predict size for elements */
320     for (i = 0; i < VB->PrimitiveCount; ++i)
321     {
322       /* If primitive.count is less than MAX_CONVERSION_SIZE
323 	 rendering code may decide convert to elts.
324 	 In that case we have to make pessimistic prediction.
325 	 and use larger of 2 paths. */
326       const GLuint elts = ELTS_BUFSZ(nr_aos);
327       const GLuint index = INDEX_BUFSZ;
328       const GLuint vbuf = VBUF_BUFSZ;
329       if (!VB->Primitive[i].count)
330 	continue;
331       if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
332 	  || vbuf > index + elts)
333 	space_required += vbuf;
334       else
335 	space_required += index + elts;
336       space_required += VB->Primitive[i].count * 3;
337       space_required += AOS_BUFSZ(nr_aos);
338     }
339     space_required += SCISSOR_BUFSZ;
340   }
341   /* flush the buffer in case we need more than is left. */
342   if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__))
343     return space_required + radeonCountStateEmitSize( &rmesa->radeon );
344   else
345     return space_required + state_size;
346 }
347 
348 /**********************************************************************/
349 /*                          Render pipeline stage                     */
350 /**********************************************************************/
351 
352 
353 /* TCL render.
354  */
radeon_run_tcl_render(struct gl_context * ctx,struct tnl_pipeline_stage * stage)355 static GLboolean radeon_run_tcl_render( struct gl_context *ctx,
356 					struct tnl_pipeline_stage *stage )
357 {
358    r100ContextPtr rmesa = R100_CONTEXT(ctx);
359    TNLcontext *tnl = TNL_CONTEXT(ctx);
360    struct vertex_buffer *VB = &tnl->vb;
361    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
362    GLuint i;
363    GLuint emit_end;
364 
365    /* TODO: separate this from the swtnl pipeline
366     */
367    if (rmesa->radeon.TclFallback)
368       return GL_TRUE;	/* fallback to software t&l */
369 
370    if (VB->Count == 0)
371       return GL_FALSE;
372 
373    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
374     * inputs.
375     */
376    if (ctx->Light.Enabled) {
377       inputs |= VERT_BIT_NORMAL;
378    }
379 
380    if (_mesa_need_secondary_color(ctx)) {
381       inputs |= VERT_BIT_COLOR1;
382    }
383 
384    if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
385       inputs |= VERT_BIT_FOG;
386    }
387 
388    for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
389       if (ctx->Texture.Unit[i]._Current) {
390       /* TODO: probably should not emit texture coords when texgen is enabled */
391 	 if (rmesa->TexGenNeedNormals[i]) {
392 	    inputs |= VERT_BIT_NORMAL;
393 	 }
394 	 inputs |= VERT_BIT_TEX(i);
395       }
396    }
397 
398    radeonReleaseArrays( ctx, ~0 );
399    emit_end = radeonEnsureEmitSize( ctx, inputs )
400      + rmesa->radeon.cmdbuf.cs->cdw;
401    radeonEmitArrays( ctx, inputs );
402 
403    rmesa->tcl.Elts = VB->Elts;
404 
405    for (i = 0 ; i < VB->PrimitiveCount ; i++)
406    {
407       GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
408       GLuint start = VB->Primitive[i].start;
409       GLuint length = VB->Primitive[i].count;
410 
411       if (!length)
412 	 continue;
413 
414       if (rmesa->tcl.Elts)
415 	 radeonEmitEltPrimitive( ctx, start, start+length, prim );
416       else
417 	 radeonEmitPrimitive( ctx, start, start+length, prim );
418    }
419 
420    if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
421       WARN_ONCE("Rendering was %d commands larger than predicted size."
422 	  " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
423 
424    return GL_FALSE;		/* finished the pipe */
425 }
426 
427 
428 
429 /* Initial state for tcl stage.
430  */
431 const struct tnl_pipeline_stage _radeon_tcl_stage =
432 {
433    "radeon render",
434    NULL,
435    NULL,
436    NULL,
437    NULL,
438    radeon_run_tcl_render	/* run */
439 };
440 
441 
442 
443 /**********************************************************************/
444 /*                 Validate state at pipeline start                   */
445 /**********************************************************************/
446 
447 
448 /*-----------------------------------------------------------------------
449  * Manage TCL fallbacks
450  */
451 
452 
transition_to_swtnl(struct gl_context * ctx)453 static void transition_to_swtnl( struct gl_context *ctx )
454 {
455    r100ContextPtr rmesa = R100_CONTEXT(ctx);
456    TNLcontext *tnl = TNL_CONTEXT(ctx);
457    GLuint se_cntl;
458 
459    RADEON_NEWPRIM( rmesa );
460    rmesa->swtcl.vertex_format = 0;
461 
462    radeonChooseVertexState( ctx );
463    radeonChooseRenderState( ctx );
464 
465    _tnl_validate_shine_tables( ctx );
466 
467    tnl->Driver.NotifyMaterialChange =
468       _tnl_validate_shine_tables;
469 
470    radeonReleaseArrays( ctx, ~0 );
471 
472    se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
473    se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
474 
475    if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
476       RADEON_STATECHANGE( rmesa, set );
477       rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
478    }
479 }
480 
481 
transition_to_hwtnl(struct gl_context * ctx)482 static void transition_to_hwtnl( struct gl_context *ctx )
483 {
484    r100ContextPtr rmesa = R100_CONTEXT(ctx);
485    TNLcontext *tnl = TNL_CONTEXT(ctx);
486    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
487 
488    se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
489 		     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
490 		     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
491    se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
492 
493    if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
494       RADEON_STATECHANGE( rmesa, set );
495       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
496       _tnl_need_projected_coords( ctx, GL_FALSE );
497    }
498 
499    radeonUpdateMaterial( ctx );
500 
501    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
502 
503    if ( rmesa->radeon.dma.flush )
504       rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
505 
506    rmesa->radeon.dma.flush = NULL;
507    rmesa->swtcl.vertex_format = 0;
508 
509    //   if (rmesa->swtcl.indexed_verts.buf)
510    //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
511    //			      __func__ );
512 
513    if (RADEON_DEBUG & RADEON_FALLBACKS)
514       fprintf(stderr, "Radeon end tcl fallback\n");
515 }
516 
517 static char *fallbackStrings[] = {
518    "Rasterization fallback",
519    "Unfilled triangles",
520    "Twosided lighting, differing materials",
521    "Materials in VB (maybe between begin/end)",
522    "Texgen unit 0",
523    "Texgen unit 1",
524    "Texgen unit 2",
525    "User disable",
526    "Fogcoord with separate specular lighting"
527 };
528 
529 
getFallbackString(GLuint bit)530 static char *getFallbackString(GLuint bit)
531 {
532    int i = 0;
533    while (bit > 1) {
534       i++;
535       bit >>= 1;
536    }
537    return fallbackStrings[i];
538 }
539 
540 
541 
radeonTclFallback(struct gl_context * ctx,GLuint bit,GLboolean mode)542 void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
543 {
544    r100ContextPtr rmesa = R100_CONTEXT(ctx);
545    GLuint oldfallback = rmesa->radeon.TclFallback;
546 
547    if (mode) {
548       rmesa->radeon.TclFallback |= bit;
549       if (oldfallback == 0) {
550 	 if (RADEON_DEBUG & RADEON_FALLBACKS)
551 	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
552 		    getFallbackString( bit ));
553 	 transition_to_swtnl( ctx );
554       }
555    }
556    else {
557       rmesa->radeon.TclFallback &= ~bit;
558       if (oldfallback == bit) {
559 	 if (RADEON_DEBUG & RADEON_FALLBACKS)
560 	    fprintf(stderr, "Radeon end tcl fallback %s\n",
561 		    getFallbackString( bit ));
562 	 transition_to_hwtnl( ctx );
563       }
564    }
565 }
566