1 
2 /*
3  * Mesa 3-D graphics library
4  *
5  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  * OTHER DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Keith Whitwell <keithw@vmware.com>
27  */
28 
29 /* Split indexed primitives with per-vertex copying.
30  */
31 
32 #include <stdio.h>
33 
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36 
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 #include "main/varray.h"
41 #include "vbo/vbo.h"
42 
43 #include "t_split.h"
44 #include "tnl.h"
45 
46 
47 #define ELT_TABLE_SIZE 16
48 
49 /**
50  * Used for vertex-level splitting of indexed buffers.  Note that
51  * non-indexed primitives may be converted to indexed in some cases
52  * (eg loops, fans) in order to use this splitting path.
53  */
54 struct copy_context {
55    struct gl_context *ctx;
56    const struct tnl_vertex_array *array;
57    const struct _mesa_prim *prim;
58    GLuint nr_prims;
59    const struct _mesa_index_buffer *ib;
60    tnl_draw_func draw;
61 
62    const struct split_limits *limits;
63 
64    struct {
65       GLuint attr;
66       GLuint size;
67       const struct tnl_vertex_array *array;
68       const GLubyte *src_ptr;
69 
70       struct gl_vertex_buffer_binding dstbinding;
71       struct gl_array_attributes dstattribs;
72 
73    } varying[VERT_ATTRIB_MAX];
74    GLuint nr_varying;
75 
76    struct tnl_vertex_array dstarray[VERT_ATTRIB_MAX];
77    struct _mesa_index_buffer dstib;
78 
79    GLuint *translated_elt_buf;
80    const GLuint *srcelt;
81 
82    /** A baby hash table to avoid re-emitting (some) duplicate
83     * vertices when splitting indexed primitives.
84     */
85    struct {
86       GLuint in;
87       GLuint out;
88    } vert_cache[ELT_TABLE_SIZE];
89 
90    GLuint vertex_size;
91    GLubyte *dstbuf;
92    GLubyte *dstptr;     /**< dstptr == dstbuf + dstelt_max * vertsize */
93    GLuint dstbuf_size;  /**< in vertices */
94    GLuint dstbuf_nr;    /**< count of emitted vertices, also the largest value
95                          * in dstelt.  Our MaxIndex.
96                          */
97 
98    GLuint *dstelt;
99    GLuint dstelt_nr;
100    GLuint dstelt_size;
101 
102 #define MAX_PRIM 32
103    struct _mesa_prim dstprim[MAX_PRIM];
104    GLuint dstprim_nr;
105 };
106 
107 
108 /**
109  * Shallow copy one vertex array to another.
110  */
111 static inline void
copy_vertex_array(struct tnl_vertex_array * dst,const struct tnl_vertex_array * src)112 copy_vertex_array(struct tnl_vertex_array *dst,
113                   const struct tnl_vertex_array *src)
114 {
115    dst->VertexAttrib = src->VertexAttrib;
116    dst->BufferBinding = src->BufferBinding;
117 }
118 
119 
120 /**
121  * Starts returning true slightly before the buffer fills, to ensure
122  * that there is sufficient room for any remaining vertices to finish
123  * off the prim:
124  */
125 static GLboolean
check_flush(struct copy_context * copy)126 check_flush(struct copy_context *copy)
127 {
128    GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
129 
130    if (GL_TRIANGLE_STRIP == mode &&
131        copy->dstelt_nr & 1) { /* see bug9962 */
132        return GL_FALSE;
133    }
134 
135    if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
136       return GL_TRUE;
137 
138    if (copy->dstelt_nr + 4 > copy->dstelt_size)
139       return GL_TRUE;
140 
141    return GL_FALSE;
142 }
143 
144 
145 /**
146  * Dump the parameters/info for a vbo->draw() call.
147  */
148 static void
dump_draw_info(const struct tnl_vertex_array * arrays,const struct _mesa_prim * prims,GLuint nr_prims,const struct _mesa_index_buffer * ib)149 dump_draw_info(const struct tnl_vertex_array *arrays,
150                const struct _mesa_prim *prims,
151                GLuint nr_prims,
152                const struct _mesa_index_buffer *ib)
153 {
154    GLuint i, j;
155 
156    printf("VBO Draw:\n");
157    for (i = 0; i < nr_prims; i++) {
158       printf("Prim %u of %u\n", i, nr_prims);
159       printf("  Prim mode 0x%x\n", prims[i].mode);
160       printf("  IB: %p\n", (void*) ib);
161       for (j = 0; j < VERT_ATTRIB_MAX; j++) {
162          const struct tnl_vertex_array *array = &arrays[j];
163          const struct gl_vertex_buffer_binding *binding
164             = array->BufferBinding;
165          const struct gl_array_attributes *attrib = array->VertexAttrib;
166          const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
167          printf("    array %d at %p:\n", j, (void*) &arrays[j]);
168          printf("      ptr %p, size %d, type 0x%x, stride %d\n",
169                 ptr, attrib->Format.Size, attrib->Format.Type, binding->Stride);
170          if (0) {
171             GLint k = prims[i].start + prims[i].count - 1;
172             GLfloat *last = (GLfloat *) (ptr + binding->Stride * k);
173             printf("        last: %f %f %f\n",
174                    last[0], last[1], last[2]);
175          }
176       }
177    }
178 }
179 
180 
181 static void
flush(struct copy_context * copy)182 flush(struct copy_context *copy)
183 {
184    struct gl_context *ctx = copy->ctx;
185    GLuint i;
186 
187    /* Set some counters:
188     */
189    copy->dstib.count = copy->dstelt_nr;
190 
191 #if 0
192    dump_draw_info(copy->dstarray,
193                   copy->dstprim,
194                   copy->dstprim_nr,
195                   &copy->dstib);
196 #else
197    (void) dump_draw_info;
198 #endif
199 
200    copy->draw(ctx,
201               copy->dstarray,
202               copy->dstprim,
203               copy->dstprim_nr,
204               &copy->dstib,
205               GL_TRUE,
206               0,
207               copy->dstbuf_nr - 1,
208               1,
209               0);
210 
211    /* Reset all pointers:
212     */
213    copy->dstprim_nr = 0;
214    copy->dstelt_nr = 0;
215    copy->dstbuf_nr = 0;
216    copy->dstptr = copy->dstbuf;
217 
218    /* Clear the vertex cache:
219     */
220    for (i = 0; i < ELT_TABLE_SIZE; i++)
221       copy->vert_cache[i].in = ~0;
222 }
223 
224 
225 /**
226  * Called at begin of each primitive during replay.
227  */
228 static void
begin(struct copy_context * copy,GLenum mode,GLboolean begin_flag)229 begin(struct copy_context *copy, GLenum mode, GLboolean begin_flag)
230 {
231    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
232 
233    prim->mode = mode;
234    prim->begin = begin_flag;
235 }
236 
237 
238 /**
239  * Use a hashtable to attempt to identify recently-emitted vertices
240  * and avoid re-emitting them.
241  */
242 static GLuint
elt(struct copy_context * copy,GLuint elt_idx)243 elt(struct copy_context *copy, GLuint elt_idx)
244 {
245    GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
246    GLuint slot = elt & (ELT_TABLE_SIZE-1);
247 
248    /* Look up the incoming element in the vertex cache.  Re-emit if
249     * necessary.
250     */
251    if (copy->vert_cache[slot].in != elt) {
252       GLubyte *csr = copy->dstptr;
253       GLuint i;
254 
255       for (i = 0; i < copy->nr_varying; i++) {
256          const struct tnl_vertex_array *srcarray = copy->varying[i].array;
257          const struct gl_vertex_buffer_binding* srcbinding
258             = srcarray->BufferBinding;
259          const GLubyte *srcptr
260             = copy->varying[i].src_ptr + elt * srcbinding->Stride;
261 
262          memcpy(csr, srcptr, copy->varying[i].size);
263          csr += copy->varying[i].size;
264 
265 #ifdef NAN_CHECK
266          if (srcarray->Format.Type == GL_FLOAT) {
267             GLuint k;
268             GLfloat *f = (GLfloat *) srcptr;
269             for (k = 0; k < srcarray->Size; k++) {
270                assert(!util_is_inf_or_nan(f[k]));
271                assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
272             }
273          }
274 #endif
275 
276          if (0) {
277             const GLuint *f = (const GLuint *)srcptr;
278             GLuint j;
279             printf("  varying %d: ", i);
280             for (j = 0; j < copy->varying[i].size / 4; j++)
281                printf("%x ", f[j]);
282             printf("\n");
283          }
284       }
285 
286       copy->vert_cache[slot].in = elt;
287       copy->vert_cache[slot].out = copy->dstbuf_nr++;
288       copy->dstptr += copy->vertex_size;
289 
290       assert(csr == copy->dstptr);
291       assert(copy->dstptr == (copy->dstbuf +
292                               copy->dstbuf_nr * copy->vertex_size));
293    }
294 
295    copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
296    return check_flush(copy);
297 }
298 
299 
300 /**
301  * Called at end of each primitive during replay.
302  */
303 static void
end(struct copy_context * copy,GLboolean end_flag)304 end(struct copy_context *copy, GLboolean end_flag)
305 {
306    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
307 
308    prim->end = end_flag;
309    prim->count = copy->dstelt_nr - prim->start;
310 
311    if (++copy->dstprim_nr == MAX_PRIM || check_flush(copy)) {
312       flush(copy);
313    }
314 }
315 
316 
317 static void
replay_elts(struct copy_context * copy)318 replay_elts(struct copy_context *copy)
319 {
320    GLuint i, j, k;
321    GLboolean split;
322 
323    for (i = 0; i < copy->nr_prims; i++) {
324       const struct _mesa_prim *prim = &copy->prim[i];
325       const GLuint start = prim->start;
326       GLuint first, incr;
327 
328       switch (prim->mode) {
329       case GL_LINE_LOOP:
330          /* Convert to linestrip and emit the final vertex explicitly,
331           * but only in the resultant strip that requires it.
332           */
333          j = 0;
334          while (j != prim->count) {
335             begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
336 
337             for (split = GL_FALSE; j != prim->count && !split; j++)
338                split = elt(copy, start + j);
339 
340             if (j == prim->count) {
341                /* Done, emit final line.  Split doesn't matter as
342                 * it is always raised a bit early so we can emit
343                 * the last verts if necessary!
344                 */
345                if (prim->end)
346                   (void)elt(copy, start + 0);
347 
348                end(copy, prim->end);
349             }
350             else {
351                /* Wrap
352                 */
353                assert(split);
354                end(copy, 0);
355                j--;
356             }
357          }
358          break;
359 
360       case GL_TRIANGLE_FAN:
361       case GL_POLYGON:
362          j = 2;
363          while (j != prim->count) {
364             begin(copy, prim->mode, prim->begin && j == 0);
365 
366             split = elt(copy, start+0);
367             assert(!split);
368 
369             split = elt(copy, start+j-1);
370             assert(!split);
371 
372             for (; j != prim->count && !split; j++)
373                split = elt(copy, start+j);
374 
375             end(copy, prim->end && j == prim->count);
376 
377             if (j != prim->count) {
378                /* Wrapped the primitive, need to repeat some vertices:
379                 */
380                j -= 1;
381             }
382          }
383          break;
384 
385       default:
386          (void)_tnl_split_prim_inplace(prim->mode, &first, &incr);
387 
388          j = 0;
389          while (j != prim->count) {
390 
391             begin(copy, prim->mode, prim->begin && j == 0);
392 
393             split = 0;
394             for (k = 0; k < first; k++, j++)
395                split |= elt(copy, start+j);
396 
397             assert(!split);
398 
399             for (; j != prim->count && !split;)
400                for (k = 0; k < incr; k++, j++)
401                   split |= elt(copy, start+j);
402 
403             end(copy, prim->end && j == prim->count);
404 
405             if (j != prim->count) {
406                /* Wrapped the primitive, need to repeat some vertices:
407                 */
408                assert(j > first - incr);
409                j -= (first - incr);
410             }
411          }
412          break;
413       }
414    }
415 
416    if (copy->dstprim_nr)
417       flush(copy);
418 }
419 
420 
421 static void
replay_init(struct copy_context * copy)422 replay_init(struct copy_context *copy)
423 {
424    struct gl_context *ctx = copy->ctx;
425    GLuint i;
426    GLuint offset;
427    const GLvoid *srcptr;
428 
429    /* Make a list of varying attributes and their vbo's.  Also
430     * calculate vertex size.
431     */
432    copy->vertex_size = 0;
433    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
434       const struct tnl_vertex_array *array = &copy->array[i];
435       const struct gl_vertex_buffer_binding *binding = array->BufferBinding;
436 
437       if (binding->Stride == 0) {
438          copy_vertex_array(&copy->dstarray[i], array);
439       }
440       else {
441          const struct gl_array_attributes *attrib = array->VertexAttrib;
442          struct gl_buffer_object *vbo = binding->BufferObj;
443          const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
444          GLuint j = copy->nr_varying++;
445 
446          copy->varying[j].attr = i;
447          copy->varying[j].array = &copy->array[i];
448          copy->varying[j].size = attrib->Format._ElementSize;
449          copy->vertex_size += attrib->Format._ElementSize;
450 
451          if (vbo) {
452             if (!_mesa_bufferobj_mapped(vbo, MAP_INTERNAL)) {
453                ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
454                                           MAP_INTERNAL);
455             }
456 
457             copy->varying[j].src_ptr =
458                   ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer, ptr);
459          } else {
460             copy->varying[j].src_ptr = ptr;
461          }
462 
463          copy->dstarray[i].VertexAttrib = &copy->varying[j].dstattribs;
464          copy->dstarray[i].BufferBinding = &copy->varying[j].dstbinding;
465       }
466    }
467 
468    /* There must always be an index buffer.  Currently require the
469     * caller convert non-indexed prims to indexed.  Could alternately
470     * do it internally.
471     */
472    if (copy->ib->obj) {
473       if (!_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
474          ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
475                                     copy->ib->obj, MAP_INTERNAL);
476 
477       srcptr = (const GLubyte *)
478          ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
479                       copy->ib->ptr);
480    } else
481       srcptr = copy->ib->ptr;
482 
483    switch (copy->ib->index_size_shift) {
484    case 0:
485       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
486       copy->srcelt = copy->translated_elt_buf;
487 
488       for (i = 0; i < copy->ib->count; i++)
489          copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
490       break;
491 
492    case 1:
493       copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
494       copy->srcelt = copy->translated_elt_buf;
495 
496       for (i = 0; i < copy->ib->count; i++)
497          copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
498       break;
499 
500    case 2:
501       copy->translated_elt_buf = NULL;
502       copy->srcelt = (const GLuint *)srcptr;
503       break;
504    }
505 
506    /* Figure out the maximum allowed vertex buffer size:
507     */
508    if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
509       copy->dstbuf_size = copy->limits->max_verts;
510    }
511    else {
512       copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
513    }
514 
515    /* Allocate an output vertex buffer:
516     *
517     * XXX:  This should be a VBO!
518     */
519    copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
520    copy->dstptr = copy->dstbuf;
521 
522    /* Setup new vertex arrays to point into the output buffer:
523     */
524    for (offset = 0, i = 0; i < copy->nr_varying; i++) {
525       const struct tnl_vertex_array *src = copy->varying[i].array;
526       const struct gl_array_attributes *srcattr = src->VertexAttrib;
527       struct tnl_vertex_array *dst = &copy->dstarray[copy->varying[i].attr];
528       struct gl_vertex_buffer_binding *dstbind = &copy->varying[i].dstbinding;
529       struct gl_array_attributes *dstattr = &copy->varying[i].dstattribs;
530 
531       dstattr->Format = srcattr->Format;
532       dstattr->Ptr = copy->dstbuf + offset;
533       dstbind->Stride = copy->vertex_size;
534       dstbind->BufferObj = NULL;
535       dst->BufferBinding = dstbind;
536       dst->VertexAttrib = dstattr;
537 
538       offset += copy->varying[i].size;
539    }
540 
541    /* Allocate an output element list:
542     */
543    copy->dstelt_size = MIN2(65536, copy->ib->count * 2 + 3);
544    copy->dstelt_size = MIN2(copy->dstelt_size, copy->limits->max_indices);
545    copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
546    copy->dstelt_nr = 0;
547 
548    /* Setup the new index buffer to point to the allocated element
549     * list:
550     */
551    copy->dstib.count = 0;        /* duplicates dstelt_nr */
552    copy->dstib.index_size_shift = 2;
553    copy->dstib.obj = NULL;
554    copy->dstib.ptr = copy->dstelt;
555 }
556 
557 
558 /**
559  * Free up everything allocated during split/replay.
560  */
561 static void
replay_finish(struct copy_context * copy)562 replay_finish(struct copy_context *copy)
563 {
564    struct gl_context *ctx = copy->ctx;
565    GLuint i;
566 
567    /* Free our vertex and index buffers */
568    free(copy->translated_elt_buf);
569    free(copy->dstbuf);
570    free(copy->dstelt);
571 
572    /* Unmap VBO's */
573    for (i = 0; i < copy->nr_varying; i++) {
574       struct gl_buffer_object *vbo =
575          copy->varying[i].array->BufferBinding->BufferObj;
576       if (vbo && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
577          ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
578    }
579 
580    /* Unmap index buffer */
581    if (copy->ib->obj &&
582        _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
583       ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
584    }
585 }
586 
587 
588 /**
589  * Split VBO into smaller pieces, draw the pieces.
590  */
591 void
_tnl_split_copy(struct gl_context * ctx,const struct tnl_vertex_array * arrays,const struct _mesa_prim * prim,GLuint nr_prims,const struct _mesa_index_buffer * ib,tnl_draw_func draw,const struct split_limits * limits)592 _tnl_split_copy(struct gl_context *ctx,
593                 const struct tnl_vertex_array *arrays,
594                 const struct _mesa_prim *prim,
595                 GLuint nr_prims,
596                 const struct _mesa_index_buffer *ib,
597                 tnl_draw_func draw,
598                 const struct split_limits *limits)
599 {
600    struct copy_context copy;
601    GLuint i, this_nr_prims;
602 
603    for (i = 0; i < nr_prims;) {
604       /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
605        * will rebase the elements to the basevertex, and we'll only
606        * emit strings of prims with the same basevertex in one draw call.
607        */
608       for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
609            this_nr_prims++) {
610          if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
611             break;
612       }
613 
614       memset(&copy, 0, sizeof(copy));
615 
616       /* Require indexed primitives:
617        */
618       assert(ib);
619 
620       copy.ctx = ctx;
621       copy.array = arrays;
622       copy.prim = &prim[i];
623       copy.nr_prims = this_nr_prims;
624       copy.ib = ib;
625       copy.draw = draw;
626       copy.limits = limits;
627 
628       /* Clear the vertex cache:
629        */
630       for (i = 0; i < ELT_TABLE_SIZE; i++)
631          copy.vert_cache[i].in = ~0;
632 
633       replay_init(&copy);
634       replay_elts(&copy);
635       replay_finish(&copy);
636    }
637 }
638