1 // license:BSD-3-Clause
2 // copyright-holders:Ville Linde, Aaron Giles
3 /***************************************************************************
4 
5     polylgcy.c
6 
7     Legacy helper routines for polygon rendering.
8 
9 ***************************************************************************/
10 
11 #include "emu.h"
12 #include "polylgcy.h"
13 
14 #include <atomic>
15 
16 
17 namespace {
18 
19 /***************************************************************************
20     DEBUGGING
21 ***************************************************************************/
22 
23 /* keep statistics */
24 #define KEEP_STATISTICS                 0
25 
26 /* turn this on to log the reasons for any long waits */
27 #define LOG_WAITS                       0
28 
29 /* number of profiling ticks before we consider a wait "long" */
30 #define LOG_WAIT_THRESHOLD              1000
31 
32 
33 
34 /***************************************************************************
35     CONSTANTS
36 ***************************************************************************/
37 
38 #define SCANLINES_PER_BUCKET            8
39 #define CACHE_LINE_SIZE                 64          /* this is a general guess */
40 #define TOTAL_BUCKETS                   (512 / SCANLINES_PER_BUCKET)
41 #define UNITS_PER_POLY                  (100 / SCANLINES_PER_BUCKET)
42 
43 
44 
45 /***************************************************************************
46     TYPE DEFINITIONS
47 ***************************************************************************/
48 
49 /* forward definitions */
50 struct polygon_info;
51 
52 
53 /* tri_extent describes start/end points for a scanline */
54 struct tri_extent
55 {
56 	int16_t       startx;                     /* starting X coordinate (inclusive) */
57 	int16_t       stopx;                      /* ending X coordinate (exclusive) */
58 };
59 
60 
61 /* single set of polygon per-parameter data */
62 struct poly_param
63 {
64 	float       start;                      /* parameter value at starting X,Y */
65 	float       dpdx;                       /* dp/dx relative to starting X */
66 	float       dpdy;                       /* dp/dy relative to starting Y */
67 };
68 
69 
70 /* poly edge is used internally for quad rendering */
71 struct poly_edge
72 {
73 	poly_edge *         next;                   /* next edge in sequence */
74 	int                 index;                  /* index of this edge */
75 	const poly_vertex * v1;                     /* pointer to first vertex */
76 	const poly_vertex * v2;                     /* pointer to second vertex */
77 	float               dxdy;                   /* dx/dy along the edge */
78 	float               dpdy[POLYLGCY_MAX_VERTEX_PARAMS];/* per-parameter dp/dy values */
79 };
80 
81 
82 /* poly section is used internally for quad rendering */
83 struct poly_section
84 {
85 	const poly_edge *   ledge;                  /* pointer to left edge */
86 	const poly_edge *   redge;                  /* pointer to right edge */
87 	float               ybottom;                /* bottom of this section */
88 };
89 
90 
91 /* work_unit_shared is a common set of data shared between tris and quads */
92 struct work_unit_shared
93 {
94 	polygon_info *      polygon;                /* pointer to polygon */
95 	std::atomic<uint32_t> count_next;             /* number of scanlines and index of next item to process */
96 	int16_t               scanline;               /* starting scanline and count */
97 	uint16_t              previtem;               /* index of previous item in the same bucket */
98 #ifndef PTR64
99 	uint32_t              dummy;                  /* pad to 16 bytes */
100 #endif
101 };
102 
103 
104 /* tri_work_unit is a triangle-specific work-unit */
105 struct tri_work_unit
106 {
107 	work_unit_shared    shared;                 /* shared data */
108 	tri_extent          extent[SCANLINES_PER_BUCKET]; /* array of scanline extents */
109 };
110 
111 
112 /* quad_work_unit is a quad-specific work-unit */
113 struct quad_work_unit
114 {
115 	work_unit_shared    shared;                 /* shared data */
116 	poly_extent         extent[SCANLINES_PER_BUCKET]; /* array of scanline extents */
117 };
118 
119 
120 /* work_unit is a union of the two types */
121 union work_unit
122 {
123 	work_unit_shared    shared;                 /* shared data */
124 	tri_work_unit       tri;                    /* triangle work unit */
125 	quad_work_unit      quad;                   /* quad work unit */
126 };
127 
128 
129 /* polygon_info describes a single polygon, which includes the poly_params */
130 struct polygon_info
131 {
132 	legacy_poly_manager *      poly;                   /* pointer back to the poly manager */
133 	void *              dest;                   /* pointer to the destination we are rendering to */
134 	void *              extra;                  /* extra data pointer */
135 	uint8_t               numparams;              /* number of parameters for this polygon  */
136 	uint8_t               numverts;               /* number of vertices in this polygon */
137 	poly_draw_scanline_func     callback;               /* callback to handle a scanline's worth of work */
138 	int32_t               xorigin;                /* X origin for all parameters */
139 	int32_t               yorigin;                /* Y origin for all parameters */
140 	poly_param          param[POLYLGCY_MAX_VERTEX_PARAMS];/* array of parameter data */
141 };
142 
143 } // anonymous namespace
144 
145 
146 /* full poly manager description */
147 struct legacy_poly_manager
148 {
149 	/* queue management */
150 	osd_work_queue *    queue;                  /* work queue */
151 
152 	/* triangle work units */
153 	work_unit **        unit;                   /* array of work unit pointers */
154 	uint32_t              unit_next;              /* index of next unit to allocate */
155 	uint32_t              unit_count;             /* number of work units available */
156 	size_t              unit_size;              /* size of each work unit, in bytes */
157 
158 	/* quad work units */
159 	uint32_t              quadunit_next;          /* index of next unit to allocate */
160 	uint32_t              quadunit_count;         /* number of work units available */
161 	size_t              quadunit_size;          /* size of each work unit, in bytes */
162 
163 	/* poly data */
164 	polygon_info **     polygon;                /* array of polygon pointers */
165 	uint32_t              polygon_next;           /* index of next polygon to allocate */
166 	uint32_t              polygon_count;          /* number of polygon items available */
167 	size_t              polygon_size;           /* size of each polygon, in bytes */
168 
169 	/* extra data */
170 	void **             extra;                  /* array of extra data pointers */
171 	uint32_t              extra_next;             /* index of next extra data to allocate */
172 	uint32_t              extra_count;            /* number of extra data items available */
173 	size_t              extra_size;             /* size of each extra data, in bytes */
174 
175 	/* misc data */
176 	uint8_t               flags;                  /* flags */
177 
178 	/* buckets */
179 	uint16_t              unit_bucket[TOTAL_BUCKETS]; /* buckets for tracking unit usage */
180 
181 	/* statistics */
182 	uint32_t              triangles;              /* number of triangles queued */
183 	uint32_t              quads;                  /* number of quads queued */
184 	uint64_t              pixels;                 /* number of pixels rendered */
185 #if KEEP_STATISTICS
186 	uint32_t              unit_waits;             /* number of times we waited for a unit */
187 	uint32_t              unit_max;               /* maximum units used */
188 	uint32_t              polygon_waits;          /* number of times we waited for a polygon */
189 	uint32_t              polygon_max;            /* maximum polygons used */
190 	uint32_t              extra_waits;            /* number of times we waited for an extra data */
191 	uint32_t              extra_max;              /* maximum extra data used */
192 	uint32_t              conflicts[WORK_MAX_THREADS]; /* number of conflicts found, per thread */
193 	uint32_t              resolved[WORK_MAX_THREADS]; /* number of conflicts resolved, per thread */
194 #endif
195 };
196 
197 
198 
199 /***************************************************************************
200     FUNCTION PROTOTYPES
201 ***************************************************************************/
202 
203 static void **allocate_array(running_machine &machine, size_t *itemsize, uint32_t itemcount);
204 static void *poly_item_callback(void *param, int threadid);
205 static void poly_state_presave(legacy_poly_manager &poly);
206 
207 
208 
209 /***************************************************************************
210     INLINE FUNCTIONS
211 ***************************************************************************/
212 
213 /*-------------------------------------------------
214     round_coordinate - round a coordinate to
215     an integer, following rules that 0.5 rounds
216     down
217 -------------------------------------------------*/
218 
round_coordinate(float value)219 static inline int32_t round_coordinate(float value)
220 {
221 	int32_t result = floor(value);
222 	return result + (value - (float)result > 0.5f);
223 }
224 
225 
226 /*-------------------------------------------------
227     convert_tri_extent_to_poly_extent - convert
228     a simple tri_extent to a full poly_extent
229 -------------------------------------------------*/
230 
convert_tri_extent_to_poly_extent(poly_extent * dstextent,const tri_extent * srcextent,const polygon_info * polygon,int32_t y)231 static inline void convert_tri_extent_to_poly_extent(poly_extent *dstextent, const tri_extent *srcextent, const polygon_info *polygon, int32_t y)
232 {
233 	/* copy start/stop always */
234 	dstextent->startx = srcextent->startx;
235 	dstextent->stopx = srcextent->stopx;
236 
237 	/* if we have parameters, process them as well */
238 	for (int paramnum = 0; paramnum < polygon->numparams; paramnum++)
239 	{
240 		dstextent->param[paramnum].start = polygon->param[paramnum].start + srcextent->startx * polygon->param[paramnum].dpdx + y * polygon->param[paramnum].dpdy;
241 		dstextent->param[paramnum].dpdx = polygon->param[paramnum].dpdx;
242 	}
243 }
244 
245 
246 /*-------------------------------------------------
247     interpolate_vertex - interpolate values in
248     a vertex based on p[0] crossing the clipval
249 -------------------------------------------------*/
250 
interpolate_vertex(poly_vertex * outv,const poly_vertex * v1,const poly_vertex * v2,int paramcount,float clipval)251 static inline void interpolate_vertex(poly_vertex *outv, const poly_vertex *v1, const poly_vertex *v2, int paramcount, float clipval)
252 {
253 	float frac = (clipval - v1->p[0]) / (v2->p[0] - v1->p[0]);
254 	int paramnum;
255 
256 	/* create a new one at the intersection point */
257 	outv->x = v1->x + frac * (v2->x - v1->x);
258 	outv->y = v1->y + frac * (v2->y - v1->y);
259 	for (paramnum = 0; paramnum < paramcount; paramnum++)
260 		outv->p[paramnum] = v1->p[paramnum] + frac * (v2->p[paramnum] - v1->p[paramnum]);
261 }
262 
263 
264 /*-------------------------------------------------
265     copy_vertex - copy vertex data from one to
266     another
267 -------------------------------------------------*/
268 
copy_vertex(poly_vertex * outv,const poly_vertex * v,int paramcount)269 static inline void copy_vertex(poly_vertex *outv, const poly_vertex *v, int paramcount)
270 {
271 	int paramnum;
272 
273 	outv->x = v->x;
274 	outv->y = v->y;
275 	for (paramnum = 0; paramnum < paramcount; paramnum++)
276 		outv->p[paramnum] = v->p[paramnum];
277 }
278 
279 
280 /*-------------------------------------------------
281     allocate_polygon - allocate a new polygon
282     object, blocking if we run out
283 -------------------------------------------------*/
284 
allocate_polygon(legacy_poly_manager * poly,int miny,int maxy)285 static inline polygon_info *allocate_polygon(legacy_poly_manager *poly, int miny, int maxy)
286 {
287 	/* wait for a work item if we have to */
288 	if (poly->polygon_next + 1 > poly->polygon_count)
289 	{
290 		poly_wait(poly, "Out of polygons");
291 #if KEEP_STATISTICS
292 		poly->polygon_waits++;
293 #endif
294 	}
295 	else if (poly->unit_next + (maxy - miny) / SCANLINES_PER_BUCKET + 2 > poly->unit_count)
296 	{
297 		poly_wait(poly, "Out of work units");
298 #if KEEP_STATISTICS
299 		poly->unit_waits++;
300 #endif
301 	}
302 #if KEEP_STATISTICS
303 	poly->polygon_max = std::max(poly->polygon_max, poly->polygon_next + 1);
304 #endif
305 	return poly->polygon[poly->polygon_next++];
306 }
307 
308 
309 
310 /***************************************************************************
311     INITIALIZATION/TEARDOWN
312 ***************************************************************************/
313 
314 /*-------------------------------------------------
315     poly_alloc - initialize a new polygon
316     manager
317 -------------------------------------------------*/
318 
poly_alloc(running_machine & machine,int max_polys,size_t extra_data_size,uint8_t flags)319 legacy_poly_manager *poly_alloc(running_machine &machine, int max_polys, size_t extra_data_size, uint8_t flags)
320 {
321 	legacy_poly_manager *poly;
322 
323 	/* allocate the manager itself */
324 	poly = auto_alloc_clear(machine, <legacy_poly_manager>());
325 	poly->flags = flags;
326 
327 	/* allocate polygons */
328 	poly->polygon_size = sizeof(polygon_info);
329 	poly->polygon_count = std::max(max_polys, 1);
330 	poly->polygon_next = 0;
331 	poly->polygon = (polygon_info **)allocate_array(machine, &poly->polygon_size, poly->polygon_count);
332 
333 	/* allocate extra data */
334 	poly->extra_size = extra_data_size;
335 	poly->extra_count = poly->polygon_count;
336 	poly->extra_next = 1;
337 	poly->extra = allocate_array(machine, &poly->extra_size, poly->extra_count);
338 
339 	/* allocate triangle work units */
340 	poly->unit_size = (flags & POLYLGCY_FLAG_ALLOW_QUADS) ? sizeof(quad_work_unit) : sizeof(tri_work_unit);
341 	poly->unit_count = std::min(poly->polygon_count * UNITS_PER_POLY, 65535U);
342 	poly->unit_next = 0;
343 	poly->unit = (work_unit **)allocate_array(machine, &poly->unit_size, poly->unit_count);
344 
345 	/* create the work queue */
346 	if (!(flags & POLYLGCY_FLAG_NO_WORK_QUEUE))
347 		poly->queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ);
348 
349 	/* request a pre-save callback for synchronization */
350 	machine.save().register_presave(save_prepost_delegate(FUNC(poly_state_presave), poly));
351 	return poly;
352 }
353 
354 
355 /*-------------------------------------------------
356     poly_free - free a polygon manager
357 -------------------------------------------------*/
358 
poly_free(legacy_poly_manager * poly)359 void poly_free(legacy_poly_manager *poly)
360 {
361 #if KEEP_STATISTICS
362 {
363 	int i, conflicts = 0, resolved = 0;
364 	for (i = 0; i < ARRAY_LENGTH(poly->conflicts); i++)
365 	{
366 		conflicts += poly->conflicts[i];
367 		resolved += poly->resolved[i];
368 	}
369 	printf("Total triangles = %d\n", poly->triangles);
370 	printf("Total quads = %d\n", poly->quads);
371 	if (poly->pixels > 1000000000)
372 		printf("Total pixels   = %d%09d\n", (uint32_t)(poly->pixels / 1000000000), (uint32_t)(poly->pixels % 1000000000));
373 	else
374 		printf("Total pixels   = %d\n", (uint32_t)poly->pixels);
375 	printf("Conflicts:  %d resolved, %d total\n", resolved, conflicts);
376 	printf("Units:      %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->unit_max, poly->unit_count, poly->unit_waits, (u32) poly->unit_size, poly->unit_count * (u32) poly->unit_size);
377 	printf("Polygons:   %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->polygon_max, poly->polygon_count, poly->polygon_waits, (u32) poly->polygon_size, poly->polygon_count * (u32) poly->polygon_size);
378 	printf("Extra data: %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->extra_max, poly->extra_count, poly->extra_waits, (u32) poly->extra_size, poly->extra_count * (u32) poly->extra_size);
379 }
380 #endif
381 
382 	/* free the work queue */
383 	if (poly->queue != nullptr)
384 		osd_work_queue_free(poly->queue);
385 }
386 
387 
388 
389 /***************************************************************************
390     COMMON FUNCTIONS
391 ***************************************************************************/
392 
393 /*-------------------------------------------------
394     poly_wait - wait for all pending rendering
395     to complete
396 -------------------------------------------------*/
397 
poly_wait(legacy_poly_manager * poly,const char * debug_reason)398 void poly_wait(legacy_poly_manager *poly, const char *debug_reason)
399 {
400 	osd_ticks_t time;
401 
402 	/* remember the start time if we're logging */
403 	if (LOG_WAITS)
404 		time = get_profile_ticks();
405 
406 	/* wait for all pending work items to complete */
407 	if (poly->queue != nullptr)
408 		osd_work_queue_wait(poly->queue, osd_ticks_per_second() * 100);
409 
410 	/* if we don't have a queue, just run the whole list now */
411 	else
412 	{
413 		int unitnum;
414 		for (unitnum = 0; unitnum < poly->unit_next; unitnum++)
415 			poly_item_callback(poly->unit[unitnum], 0);
416 	}
417 
418 	/* log any long waits */
419 	if (LOG_WAITS)
420 	{
421 		time = get_profile_ticks() - time;
422 		if (time > LOG_WAIT_THRESHOLD)
423 			osd_printf_verbose("Poly:Waited %d cycles for %s\n", (int)time, debug_reason);
424 	}
425 
426 	/* reset the state */
427 	poly->polygon_next = poly->unit_next = 0;
428 	memset(poly->unit_bucket, 0xff, sizeof(poly->unit_bucket));
429 
430 	/* we need to preserve the last extra data that was supplied */
431 	if (poly->extra_next > 1)
432 		memcpy(poly->extra[0], poly->extra[poly->extra_next - 1], poly->extra_size);
433 	poly->extra_next = 1;
434 }
435 
436 
437 /*-------------------------------------------------
438     poly_get_extra_data - get a pointer to the
439     extra data for the next polygon
440 -------------------------------------------------*/
441 
poly_get_extra_data(legacy_poly_manager * poly)442 void *poly_get_extra_data(legacy_poly_manager *poly)
443 {
444 	/* wait for a work item if we have to */
445 	if (poly->extra_next + 1 > poly->extra_count)
446 	{
447 		poly_wait(poly, "Out of extra data");
448 #if KEEP_STATISTICS
449 		poly->extra_waits++;
450 #endif
451 	}
452 
453 	/* return a pointer to the extra data for the next item */
454 #if KEEP_STATISTICS
455 	poly->extra_max = std::max(poly->extra_max, poly->extra_next + 1);
456 #endif
457 	return poly->extra[poly->extra_next++];
458 }
459 
460 
461 
462 /***************************************************************************
463     CORE TRIANGLE RENDERING
464 ***************************************************************************/
465 
466 /*-------------------------------------------------
467     poly_render_triangle - render a single
468     triangle given 3 vertexes
469 -------------------------------------------------*/
470 
poly_render_triangle(legacy_poly_manager * poly,void * dest,const rectangle & cliprect,poly_draw_scanline_func callback,int paramcount,const poly_vertex * v1,const poly_vertex * v2,const poly_vertex * v3)471 uint32_t poly_render_triangle(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3)
472 {
473 	float dxdy_v1v2, dxdy_v1v3, dxdy_v2v3;
474 	const poly_vertex *tv;
475 	int32_t curscan, scaninc;
476 	polygon_info *polygon;
477 	int32_t v1yclip, v3yclip;
478 	int32_t v1y, v3y, v1x;
479 	int32_t pixels = 0;
480 	uint32_t startunit;
481 
482 	/* first sort by Y */
483 	if (v2->y < v1->y)
484 	{
485 		tv = v1;
486 		v1 = v2;
487 		v2 = tv;
488 	}
489 	if (v3->y < v2->y)
490 	{
491 		tv = v2;
492 		v2 = v3;
493 		v3 = tv;
494 		if (v2->y < v1->y)
495 		{
496 			tv = v1;
497 			v1 = v2;
498 			v2 = tv;
499 		}
500 	}
501 
502 	/* compute some integral X/Y vertex values */
503 	v1x = round_coordinate(v1->x);
504 	v1y = round_coordinate(v1->y);
505 	v3y = round_coordinate(v3->y);
506 
507 	/* clip coordinates */
508 	v1yclip = v1y;
509 	v3yclip = v3y + ((poly->flags & POLYLGCY_FLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
510 	v1yclip = std::max(v1yclip, cliprect.min_y);
511 	v3yclip = std::min(v3yclip, cliprect.max_y + 1);
512 	if (v3yclip - v1yclip <= 0)
513 		return 0;
514 
515 	/* allocate a new polygon */
516 	polygon = allocate_polygon(poly, v1yclip, v3yclip);
517 
518 	/* fill in the polygon information */
519 	polygon->poly = poly;
520 	polygon->dest = dest;
521 	polygon->callback = callback;
522 	polygon->extra = poly->extra[poly->extra_next - 1];
523 	polygon->numparams = paramcount;
524 	polygon->numverts = 3;
525 
526 	/* set the start X/Y coordinates */
527 	polygon->xorigin = v1x;
528 	polygon->yorigin = v1y;
529 
530 	/* compute the slopes for each portion of the triangle */
531 	dxdy_v1v2 = (v2->y == v1->y) ? 0.0f : (v2->x - v1->x) / (v2->y - v1->y);
532 	dxdy_v1v3 = (v3->y == v1->y) ? 0.0f : (v3->x - v1->x) / (v3->y - v1->y);
533 	dxdy_v2v3 = (v3->y == v2->y) ? 0.0f : (v3->x - v2->x) / (v3->y - v2->y);
534 
535 	/* compute the X extents for each scanline */
536 	startunit = poly->unit_next;
537 	for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
538 	{
539 		uint32_t bucketnum = ((uint32_t)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
540 		uint32_t unit_index = poly->unit_next++;
541 		tri_work_unit *unit = &poly->unit[unit_index]->tri;
542 		int extnum;
543 
544 		/* determine how much to advance to hit the next bucket */
545 		scaninc = SCANLINES_PER_BUCKET - (uint32_t)curscan % SCANLINES_PER_BUCKET;
546 
547 		/* fill in the work unit basics */
548 		unit->shared.polygon = polygon;
549 		unit->shared.count_next = std::min(v3yclip - curscan, scaninc);
550 		unit->shared.scanline = curscan;
551 		unit->shared.previtem = poly->unit_bucket[bucketnum];
552 		poly->unit_bucket[bucketnum] = unit_index;
553 
554 		/* iterate over extents */
555 		for (extnum = 0; extnum < unit->shared.count_next; extnum++)
556 		{
557 			float fully = (float)(curscan + extnum) + 0.5f;
558 			float startx = v1->x + (fully - v1->y) * dxdy_v1v3;
559 			float stopx;
560 			int32_t istartx, istopx;
561 
562 			/* compute the ending X based on which part of the triangle we're in */
563 			if (fully < v2->y)
564 				stopx = v1->x + (fully - v1->y) * dxdy_v1v2;
565 			else
566 				stopx = v2->x + (fully - v2->y) * dxdy_v2v3;
567 
568 			/* clamp to full pixels */
569 			istartx = round_coordinate(startx);
570 			istopx = round_coordinate(stopx);
571 
572 			/* force start < stop */
573 			if (istartx > istopx)
574 			{
575 				int32_t temp = istartx;
576 				istartx = istopx;
577 				istopx = temp;
578 			}
579 
580 			/* include the right edge if requested */
581 			if (poly->flags & POLYLGCY_FLAG_INCLUDE_RIGHT_EDGE)
582 				istopx++;
583 
584 			/* apply left/right clipping */
585 			if (istartx < cliprect.min_x)
586 				istartx = cliprect.min_x;
587 			if (istopx > cliprect.max_x)
588 				istopx = cliprect.max_x + 1;
589 
590 			/* set the extent and update the total pixel count */
591 			if (istartx >= istopx)
592 				istartx = istopx = 0;
593 			unit->extent[extnum].startx = istartx;
594 			unit->extent[extnum].stopx = istopx;
595 			pixels += istopx - istartx;
596 		}
597 	}
598 #if KEEP_STATISTICS
599 	poly->unit_max = std::max(poly->unit_max, poly->unit_next);
600 #endif
601 
602 	/* compute parameter starting points and deltas */
603 	if (paramcount > 0)
604 	{
605 		float a00 = v2->y - v3->y;
606 		float a01 = v3->x - v2->x;
607 		float a02 = v2->x*v3->y - v3->x*v2->y;
608 		float a10 = v3->y - v1->y;
609 		float a11 = v1->x - v3->x;
610 		float a12 = v3->x*v1->y - v1->x*v3->y;
611 		float a20 = v1->y - v2->y;
612 		float a21 = v2->x - v1->x;
613 		float a22 = v1->x*v2->y - v2->x*v1->y;
614 		float det = a02 + a12 + a22;
615 
616 		if(fabsf(det) < 0.001f) {
617 			for (int paramnum = 0; paramnum < paramcount; paramnum++)
618 			{
619 				poly_param *params = &polygon->param[paramnum];
620 				params->dpdx = 0;
621 				params->dpdy = 0;
622 				params->start = v1->p[paramnum];
623 			}
624 		}
625 		else
626 		{
627 			float idet = 1/det;
628 			for (int paramnum = 0; paramnum < paramcount; paramnum++)
629 			{
630 				poly_param *params = &polygon->param[paramnum];
631 				params->dpdx  = idet*(v1->p[paramnum]*a00 + v2->p[paramnum]*a10 + v3->p[paramnum]*a20);
632 				params->dpdy  = idet*(v1->p[paramnum]*a01 + v2->p[paramnum]*a11 + v3->p[paramnum]*a21);
633 				params->start = idet*(v1->p[paramnum]*a02 + v2->p[paramnum]*a12 + v3->p[paramnum]*a22);
634 			}
635 		}
636 	}
637 
638 	/* enqueue the work items */
639 	if (poly->queue != nullptr)
640 		osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
641 
642 	/* return the total number of pixels in the triangle */
643 	poly->triangles++;
644 	poly->pixels += pixels;
645 	return pixels;
646 }
647 
648 
649 /*-------------------------------------------------
650     poly_render_triangle_fan - render a set of
651     triangles in a fan
652 -------------------------------------------------*/
653 
poly_render_triangle_fan(legacy_poly_manager * poly,void * dest,const rectangle & cliprect,poly_draw_scanline_func callback,int paramcount,int numverts,const poly_vertex * v)654 uint32_t poly_render_triangle_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
655 {
656 	uint32_t pixels = 0;
657 	int vertnum;
658 
659 	/* iterate over vertices */
660 	for (vertnum = 2; vertnum < numverts; vertnum++)
661 		pixels += poly_render_triangle(poly, dest, cliprect, callback, paramcount, &v[0], &v[vertnum - 1], &v[vertnum]);
662 	return pixels;
663 }
664 
665 
666 /*-------------------------------------------------
667     poly_render_triangle_custom - perform a custom
668     render of an object, given specific extents
669 -------------------------------------------------*/
670 
poly_render_triangle_custom(legacy_poly_manager * poly,void * dest,const rectangle & cliprect,poly_draw_scanline_func callback,int startscanline,int numscanlines,const poly_extent * extents)671 uint32_t poly_render_triangle_custom(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int startscanline, int numscanlines, const poly_extent *extents)
672 {
673 	int32_t curscan, scaninc;
674 	polygon_info *polygon;
675 	int32_t v1yclip, v3yclip;
676 	int32_t pixels = 0;
677 	uint32_t startunit;
678 
679 	/* clip coordinates */
680 	v1yclip = std::max(startscanline, cliprect.min_y);
681 	v3yclip = std::min(startscanline + numscanlines, cliprect.max_y + 1);
682 	if (v3yclip - v1yclip <= 0)
683 		return 0;
684 
685 	/* allocate a new polygon */
686 	polygon = allocate_polygon(poly, v1yclip, v3yclip);
687 
688 	/* fill in the polygon information */
689 	polygon->poly = poly;
690 	polygon->dest = dest;
691 	polygon->callback = callback;
692 	polygon->extra = poly->extra[poly->extra_next - 1];
693 	polygon->numparams = 0;
694 	polygon->numverts = 3;
695 
696 	/* compute the X extents for each scanline */
697 	startunit = poly->unit_next;
698 	for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
699 	{
700 		uint32_t bucketnum = ((uint32_t)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
701 		uint32_t unit_index = poly->unit_next++;
702 		tri_work_unit *unit = &poly->unit[unit_index]->tri;
703 		int extnum;
704 
705 		/* determine how much to advance to hit the next bucket */
706 		scaninc = SCANLINES_PER_BUCKET - (uint32_t)curscan % SCANLINES_PER_BUCKET;
707 
708 		/* fill in the work unit basics */
709 		unit->shared.polygon = polygon;
710 		unit->shared.count_next = std::min(v3yclip - curscan, scaninc);
711 		unit->shared.scanline = curscan;
712 		unit->shared.previtem = poly->unit_bucket[bucketnum];
713 		poly->unit_bucket[bucketnum] = unit_index;
714 
715 		/* iterate over extents */
716 		for (extnum = 0; extnum < unit->shared.count_next; extnum++)
717 		{
718 			const poly_extent *extent = &extents[(curscan + extnum) - startscanline];
719 			int32_t istartx = extent->startx, istopx = extent->stopx;
720 
721 			/* force start < stop */
722 			if (istartx > istopx)
723 			{
724 				int32_t temp = istartx;
725 				istartx = istopx;
726 				istopx = temp;
727 			}
728 
729 			/* apply left/right clipping */
730 			if (istartx < cliprect.min_x)
731 				istartx = cliprect.min_x;
732 			if (istopx > cliprect.max_x)
733 				istopx = cliprect.max_x + 1;
734 
735 			/* set the extent and update the total pixel count */
736 			unit->extent[extnum].startx = istartx;
737 			unit->extent[extnum].stopx = istopx;
738 			if (istartx < istopx)
739 				pixels += istopx - istartx;
740 		}
741 	}
742 #if KEEP_STATISTICS
743 	poly->unit_max = std::max(poly->unit_max, poly->unit_next);
744 #endif
745 
746 	/* enqueue the work items */
747 	if (poly->queue != nullptr)
748 		osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
749 
750 	/* return the total number of pixels in the object */
751 	poly->triangles++;
752 	poly->pixels += pixels;
753 	return pixels;
754 }
755 
756 
757 
758 /***************************************************************************
759     CORE QUAD RENDERING
760 ***************************************************************************/
761 
762 /*-------------------------------------------------
763     poly_render_quad - render a single quad
764     given 4 vertexes
765 -------------------------------------------------*/
766 
poly_render_quad(legacy_poly_manager * poly,void * dest,const rectangle & cliprect,poly_draw_scanline_func callback,int paramcount,const poly_vertex * v1,const poly_vertex * v2,const poly_vertex * v3,const poly_vertex * v4)767 uint32_t poly_render_quad(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3, const poly_vertex *v4)
768 {
769 	poly_edge fedgelist[3], bedgelist[3];
770 	const poly_edge *ledge, *redge;
771 	const poly_vertex *v[4];
772 	poly_edge *edgeptr;
773 	int minv, maxv, curv;
774 	int32_t minyclip, maxyclip;
775 	int32_t miny, maxy;
776 	int32_t curscan, scaninc;
777 	polygon_info *polygon;
778 	int32_t pixels = 0;
779 	uint32_t startunit;
780 
781 	assert(poly->flags & POLYLGCY_FLAG_ALLOW_QUADS);
782 
783 	/* arrays make things easier */
784 	v[0] = v1;
785 	v[1] = v2;
786 	v[2] = v3;
787 	v[3] = v4;
788 
789 	/* determine min/max Y vertices */
790 	if (v[1]->y < v[0]->y)
791 		minv = 1, maxv = 0;
792 	else
793 		minv = 0, maxv = 1;
794 	if (v[2]->y < v[minv]->y)
795 		minv = 2;
796 	else if (v[2]->y > v[maxv]->y)
797 		maxv = 2;
798 	if (v[3]->y < v[minv]->y)
799 		minv = 3;
800 	else if (v[3]->y > v[maxv]->y)
801 		maxv = 3;
802 
803 	/* determine start/end scanlines */
804 	miny = round_coordinate(v[minv]->y);
805 	maxy = round_coordinate(v[maxv]->y);
806 
807 	/* clip coordinates */
808 	minyclip = miny;
809 	maxyclip = maxy + ((poly->flags & POLYLGCY_FLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
810 	minyclip = std::max(minyclip, cliprect.min_y);
811 	maxyclip = std::min(maxyclip, cliprect.max_y + 1);
812 	if (maxyclip - minyclip <= 0)
813 		return 0;
814 
815 	/* allocate a new polygon */
816 	polygon = allocate_polygon(poly, minyclip, maxyclip);
817 
818 	/* fill in the polygon information */
819 	polygon->poly = poly;
820 	polygon->dest = dest;
821 	polygon->callback = callback;
822 	polygon->extra = poly->extra[poly->extra_next - 1];
823 	polygon->numparams = paramcount;
824 	polygon->numverts = 4;
825 
826 	/* walk forward to build up the forward edge list */
827 	edgeptr = &fedgelist[0];
828 	for (curv = minv; curv != maxv; curv = (curv + 1) & 3)
829 	{
830 		int paramnum;
831 		float ooy;
832 
833 		/* set the two vertices */
834 		edgeptr->v1 = v[curv];
835 		edgeptr->v2 = v[(curv + 1) & 3];
836 
837 		/* if horizontal, skip altogether */
838 		if (edgeptr->v1->y == edgeptr->v2->y)
839 			continue;
840 
841 		/* need dx/dy always, and parameter deltas as necessary */
842 		ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
843 		edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
844 		for (paramnum = 0; paramnum < paramcount; paramnum++)
845 			edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
846 		edgeptr++;
847 	}
848 
849 	/* walk backward to build up the backward edge list */
850 	edgeptr = &bedgelist[0];
851 	for (curv = minv; curv != maxv; curv = (curv - 1) & 3)
852 	{
853 		int paramnum;
854 		float ooy;
855 
856 		/* set the two vertices */
857 		edgeptr->v1 = v[curv];
858 		edgeptr->v2 = v[(curv - 1) & 3];
859 
860 		/* if horizontal, skip altogether */
861 		if (edgeptr->v1->y == edgeptr->v2->y)
862 			continue;
863 
864 		/* need dx/dy always, and parameter deltas as necessary */
865 		ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
866 		edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
867 		for (paramnum = 0; paramnum < paramcount; paramnum++)
868 			edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
869 		edgeptr++;
870 	}
871 
872 	/* determine which list is left/right: */
873 	/* if the first vertex is shared, compare the slopes */
874 	/* if the first vertex is not shared, compare the X coordinates */
875 	if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
876 		(fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
877 	{
878 		ledge = fedgelist;
879 		redge = bedgelist;
880 	}
881 	else
882 	{
883 		ledge = bedgelist;
884 		redge = fedgelist;
885 	}
886 
887 	/* compute the X extents for each scanline */
888 	startunit = poly->unit_next;
889 	for (curscan = minyclip; curscan < maxyclip; curscan += scaninc)
890 	{
891 		uint32_t bucketnum = ((uint32_t)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
892 		uint32_t unit_index = poly->unit_next++;
893 		quad_work_unit *unit = &poly->unit[unit_index]->quad;
894 		int extnum;
895 
896 		/* determine how much to advance to hit the next bucket */
897 		scaninc = SCANLINES_PER_BUCKET - (uint32_t)curscan % SCANLINES_PER_BUCKET;
898 
899 		/* fill in the work unit basics */
900 		unit->shared.polygon = polygon;
901 		unit->shared.count_next = std::min(maxyclip - curscan, scaninc);
902 		unit->shared.scanline = curscan;
903 		unit->shared.previtem = poly->unit_bucket[bucketnum];
904 		poly->unit_bucket[bucketnum] = unit_index;
905 
906 		/* iterate over extents */
907 		for (extnum = 0; extnum < unit->shared.count_next; extnum++)
908 		{
909 			float fully = (float)(curscan + extnum) + 0.5f;
910 			float startx, stopx;
911 			int32_t istartx, istopx;
912 			int paramnum;
913 
914 			/* compute the ending X based on which part of the triangle we're in */
915 			while (fully > ledge->v2->y && fully < v[maxv]->y)
916 				ledge++;
917 			while (fully > redge->v2->y && fully < v[maxv]->y)
918 				redge++;
919 			startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
920 			stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
921 
922 			/* clamp to full pixels */
923 			istartx = round_coordinate(startx);
924 			istopx = round_coordinate(stopx);
925 
926 			/* compute parameter starting points and deltas */
927 			if (paramcount > 0)
928 			{
929 				float ldy = fully - ledge->v1->y;
930 				float rdy = fully - redge->v1->y;
931 				float oox = 1.0f / (stopx - startx);
932 
933 				/* iterate over parameters */
934 				for (paramnum = 0; paramnum < paramcount; paramnum++)
935 				{
936 					float lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
937 					float rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
938 					float dpdx = (rparam - lparam) * oox;
939 
940 					unit->extent[extnum].param[paramnum].start = lparam;// - ((float)istartx + 0.5f) * dpdx;
941 					unit->extent[extnum].param[paramnum].dpdx = dpdx;
942 				}
943 			}
944 
945 			/* include the right edge if requested */
946 			if (poly->flags & POLYLGCY_FLAG_INCLUDE_RIGHT_EDGE)
947 				istopx++;
948 
949 			/* apply left/right clipping */
950 			if (istartx < cliprect.min_x)
951 			{
952 				for (paramnum = 0; paramnum < paramcount; paramnum++)
953 					unit->extent[extnum].param[paramnum].start += (cliprect.min_x - istartx) * unit->extent[extnum].param[paramnum].dpdx;
954 				istartx = cliprect.min_x;
955 			}
956 			if (istopx > cliprect.max_x)
957 				istopx = cliprect.max_x + 1;
958 
959 			/* set the extent and update the total pixel count */
960 			if (istartx >= istopx)
961 				istartx = istopx = 0;
962 			unit->extent[extnum].startx = istartx;
963 			unit->extent[extnum].stopx = istopx;
964 			pixels += istopx - istartx;
965 		}
966 	}
967 #if KEEP_STATISTICS
968 	poly->unit_max = std::max(poly->unit_max, poly->unit_next);
969 #endif
970 
971 	/* enqueue the work items */
972 	if (poly->queue != nullptr)
973 		osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
974 
975 	/* return the total number of pixels in the triangle */
976 	poly->quads++;
977 	poly->pixels += pixels;
978 	return pixels;
979 }
980 
981 
982 /*-------------------------------------------------
983     poly_render_quad_fan - render a set of
984     quads in a fan
985 -------------------------------------------------*/
986 
poly_render_quad_fan(legacy_poly_manager * poly,void * dest,const rectangle & cliprect,poly_draw_scanline_func callback,int paramcount,int numverts,const poly_vertex * v)987 uint32_t poly_render_quad_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
988 {
989 	uint32_t pixels = 0;
990 	int vertnum;
991 
992 	/* iterate over vertices */
993 	for (vertnum = 2; vertnum < numverts; vertnum += 2)
994 		pixels += poly_render_quad(poly, dest, cliprect, callback, paramcount, &v[0], &v[vertnum - 1], &v[vertnum], &v[std::min(vertnum + 1, numverts - 1)]);
995 	return pixels;
996 }
997 
998 
999 
1000 /***************************************************************************
1001     CORE POLYGON RENDERING
1002 ***************************************************************************/
1003 
1004 /*-------------------------------------------------
1005     poly_render_polygon - render a single polygon up
1006     to 32 vertices
1007 -------------------------------------------------*/
1008 
poly_render_polygon(legacy_poly_manager * poly,void * dest,const rectangle & cliprect,poly_draw_scanline_func callback,int paramcount,int numverts,const poly_vertex * v)1009 uint32_t poly_render_polygon(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
1010 {
1011 	poly_edge fedgelist[POLYLGCY_MAX_POLYGON_VERTS - 1], bedgelist[POLYLGCY_MAX_POLYGON_VERTS - 1];
1012 	const poly_edge *ledge, *redge;
1013 	poly_edge *edgeptr;
1014 	int minv, maxv, curv;
1015 	int32_t minyclip, maxyclip;
1016 	int32_t miny, maxy;
1017 	int32_t curscan, scaninc;
1018 	polygon_info *polygon;
1019 	int32_t pixels = 0;
1020 	uint32_t startunit;
1021 	int vertnum;
1022 
1023 	assert(poly->flags & POLYLGCY_FLAG_ALLOW_QUADS);
1024 
1025 	/* determine min/max Y vertices */
1026 	minv = maxv = 0;
1027 	for (vertnum = 1; vertnum < numverts; vertnum++)
1028 	{
1029 		if (v[vertnum].y < v[minv].y)
1030 			minv = vertnum;
1031 		else if (v[vertnum].y > v[maxv].y)
1032 			maxv = vertnum;
1033 	}
1034 
1035 	/* determine start/end scanlines */
1036 	miny = round_coordinate(v[minv].y);
1037 	maxy = round_coordinate(v[maxv].y);
1038 
1039 	/* clip coordinates */
1040 	minyclip = miny;
1041 	maxyclip = maxy + ((poly->flags & POLYLGCY_FLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
1042 	minyclip = std::max(minyclip, cliprect.min_y);
1043 	maxyclip = std::min(maxyclip, cliprect.max_y + 1);
1044 	if (maxyclip - minyclip <= 0)
1045 		return 0;
1046 
1047 	/* allocate a new polygon */
1048 	polygon = allocate_polygon(poly, minyclip, maxyclip);
1049 
1050 	/* fill in the polygon information */
1051 	polygon->poly = poly;
1052 	polygon->dest = dest;
1053 	polygon->callback = callback;
1054 	polygon->extra = poly->extra[poly->extra_next - 1];
1055 	polygon->numparams = paramcount;
1056 	polygon->numverts = numverts;
1057 
1058 	/* walk forward to build up the forward edge list */
1059 	edgeptr = &fedgelist[0];
1060 	for (curv = minv; curv != maxv; curv = (curv == numverts - 1) ? 0 : (curv + 1))
1061 	{
1062 		int paramnum;
1063 		float ooy;
1064 
1065 		/* set the two vertices */
1066 		edgeptr->v1 = &v[curv];
1067 		edgeptr->v2 = &v[(curv == numverts - 1) ? 0 : (curv + 1)];
1068 
1069 		/* if horizontal, skip altogether */
1070 		if (edgeptr->v1->y == edgeptr->v2->y)
1071 			continue;
1072 
1073 		/* need dx/dy always, and parameter deltas as necessary */
1074 		ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
1075 		edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1076 		for (paramnum = 0; paramnum < paramcount; paramnum++)
1077 			edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1078 		edgeptr++;
1079 	}
1080 
1081 	/* walk backward to build up the backward edge list */
1082 	edgeptr = &bedgelist[0];
1083 	for (curv = minv; curv != maxv; curv = (curv == 0) ? (numverts - 1) : (curv - 1))
1084 	{
1085 		int paramnum;
1086 		float ooy;
1087 
1088 		/* set the two vertices */
1089 		edgeptr->v1 = &v[curv];
1090 		edgeptr->v2 = &v[(curv == 0) ? (numverts - 1) : (curv - 1)];
1091 
1092 		/* if horizontal, skip altogether */
1093 		if (edgeptr->v1->y == edgeptr->v2->y)
1094 			continue;
1095 
1096 		/* need dx/dy always, and parameter deltas as necessary */
1097 		ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
1098 		edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1099 		for (paramnum = 0; paramnum < paramcount; paramnum++)
1100 			edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1101 		edgeptr++;
1102 	}
1103 
1104 	/* determine which list is left/right: */
1105 	/* if the first vertex is shared, compare the slopes */
1106 	/* if the first vertex is not shared, compare the X coordinates */
1107 	if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
1108 		(fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
1109 	{
1110 		ledge = fedgelist;
1111 		redge = bedgelist;
1112 	}
1113 	else
1114 	{
1115 		ledge = bedgelist;
1116 		redge = fedgelist;
1117 	}
1118 
1119 	/* compute the X extents for each scanline */
1120 	startunit = poly->unit_next;
1121 	for (curscan = minyclip; curscan < maxyclip; curscan += scaninc)
1122 	{
1123 		uint32_t bucketnum = ((uint32_t)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
1124 		uint32_t unit_index = poly->unit_next++;
1125 		quad_work_unit *unit = &poly->unit[unit_index]->quad;
1126 		int extnum;
1127 
1128 		/* determine how much to advance to hit the next bucket */
1129 		scaninc = SCANLINES_PER_BUCKET - (uint32_t)curscan % SCANLINES_PER_BUCKET;
1130 
1131 		/* fill in the work unit basics */
1132 		unit->shared.polygon = polygon;
1133 		unit->shared.count_next = std::min(maxyclip - curscan, scaninc);
1134 		unit->shared.scanline = curscan;
1135 		unit->shared.previtem = poly->unit_bucket[bucketnum];
1136 		poly->unit_bucket[bucketnum] = unit_index;
1137 
1138 		/* iterate over extents */
1139 		for (extnum = 0; extnum < unit->shared.count_next; extnum++)
1140 		{
1141 			float fully = (float)(curscan + extnum) + 0.5f;
1142 			float startx, stopx;
1143 			int32_t istartx, istopx;
1144 			int paramnum;
1145 
1146 			/* compute the ending X based on which part of the triangle we're in */
1147 			while (fully > ledge->v2->y && fully < v[maxv].y)
1148 				ledge++;
1149 			while (fully > redge->v2->y && fully < v[maxv].y)
1150 				redge++;
1151 			startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
1152 			stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
1153 
1154 			/* clamp to full pixels */
1155 			istartx = round_coordinate(startx);
1156 			istopx = round_coordinate(stopx);
1157 
1158 			/* compute parameter starting points and deltas */
1159 			if (paramcount > 0)
1160 			{
1161 				float ldy = fully - ledge->v1->y;
1162 				float rdy = fully - redge->v1->y;
1163 				float oox = 1.0f / (stopx - startx);
1164 
1165 				/* iterate over parameters */
1166 				for (paramnum = 0; paramnum < paramcount; paramnum++)
1167 				{
1168 					float lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
1169 					float rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
1170 					float dpdx = (rparam - lparam) * oox;
1171 
1172 					unit->extent[extnum].param[paramnum].start = lparam;// - ((float)istartx + 0.5f) * dpdx;
1173 					unit->extent[extnum].param[paramnum].dpdx = dpdx;
1174 				}
1175 			}
1176 
1177 			/* include the right edge if requested */
1178 			if (poly->flags & POLYLGCY_FLAG_INCLUDE_RIGHT_EDGE)
1179 				istopx++;
1180 
1181 			/* apply left/right clipping */
1182 			if (istartx < cliprect.min_x)
1183 			{
1184 				for (paramnum = 0; paramnum < paramcount; paramnum++)
1185 					unit->extent[extnum].param[paramnum].start += (cliprect.min_x - istartx) * unit->extent[extnum].param[paramnum].dpdx;
1186 				istartx = cliprect.min_x;
1187 			}
1188 			if (istopx > cliprect.max_x)
1189 				istopx = cliprect.max_x + 1;
1190 
1191 			/* set the extent and update the total pixel count */
1192 			if (istartx >= istopx)
1193 				istartx = istopx = 0;
1194 			unit->extent[extnum].startx = istartx;
1195 			unit->extent[extnum].stopx = istopx;
1196 			pixels += istopx - istartx;
1197 		}
1198 	}
1199 #if KEEP_STATISTICS
1200 	poly->unit_max = std::max(poly->unit_max, poly->unit_next);
1201 #endif
1202 
1203 	/* enqueue the work items */
1204 	if (poly->queue != nullptr)
1205 		osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
1206 
1207 	/* return the total number of pixels in the triangle */
1208 	poly->quads++;
1209 	poly->pixels += pixels;
1210 	return pixels;
1211 }
1212 
1213 
1214 
1215 /***************************************************************************
1216     CLIPPING
1217 ***************************************************************************/
1218 
1219 /*-------------------------------------------------
1220     poly_zclip_if_less - z clip a polygon against
1221     the given value, returning a set of clipped
1222     vertices
1223 -------------------------------------------------*/
1224 
poly_zclip_if_less(int numverts,const poly_vertex * v,poly_vertex * outv,int paramcount,float clipval)1225 int poly_zclip_if_less(int numverts, const poly_vertex *v, poly_vertex *outv, int paramcount, float clipval)
1226 {
1227 	int prevclipped = (v[numverts - 1].p[0] < clipval);
1228 	poly_vertex *nextout = outv;
1229 	int vertnum;
1230 
1231 	/* iterate over vertices */
1232 	for (vertnum = 0; vertnum < numverts; vertnum++)
1233 	{
1234 		int thisclipped = (v[vertnum].p[0] < clipval);
1235 
1236 		/* if we switched from clipped to non-clipped, interpolate a vertex */
1237 		if (thisclipped != prevclipped)
1238 			interpolate_vertex(nextout++, &v[(vertnum == 0) ? (numverts - 1) : (vertnum - 1)], &v[vertnum], paramcount, clipval);
1239 
1240 		/* if this vertex is not clipped, copy it in */
1241 		if (!thisclipped)
1242 			copy_vertex(nextout++, &v[vertnum], paramcount);
1243 
1244 		/* remember the last state */
1245 		prevclipped = thisclipped;
1246 	}
1247 	return nextout - outv;
1248 }
1249 
1250 
1251 
1252 /***************************************************************************
1253     INTERNAL FUNCTIONS
1254 ***************************************************************************/
1255 
1256 /*-------------------------------------------------
1257     allocate_array - allocate an array of pointers
1258 -------------------------------------------------*/
1259 
allocate_array(running_machine & machine,size_t * itemsize,uint32_t itemcount)1260 static void **allocate_array(running_machine &machine, size_t *itemsize, uint32_t itemcount)
1261 {
1262 	void **ptrarray;
1263 	int itemnum;
1264 
1265 	/* fail if 0 */
1266 	if (itemcount == 0)
1267 		return nullptr;
1268 
1269 	/* round to a cache line boundary */
1270 	*itemsize = ((*itemsize + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE) * CACHE_LINE_SIZE;
1271 
1272 	/* allocate the array */
1273 	ptrarray = auto_alloc_array_clear(machine, void *, itemcount);
1274 
1275 	/* allocate the actual items */
1276 	ptrarray[0] = auto_alloc_array_clear(machine, uint8_t, *itemsize * itemcount);
1277 
1278 	/* initialize the pointer array */
1279 	for (itemnum = 1; itemnum < itemcount; itemnum++)
1280 		ptrarray[itemnum] = (uint8_t *)ptrarray[0] + *itemsize * itemnum;
1281 	return ptrarray;
1282 }
1283 
1284 
1285 /*-------------------------------------------------
1286     poly_item_callback - callback for each poly
1287     item
1288 -------------------------------------------------*/
1289 
poly_item_callback(void * param,int threadid)1290 static void *poly_item_callback(void *param, int threadid)
1291 {
1292 	while (1)
1293 	{
1294 		work_unit *unit = (work_unit *)param;
1295 		polygon_info *polygon = unit->shared.polygon;
1296 		int count = unit->shared.count_next & 0xffff;
1297 		uint32_t orig_count_next;
1298 		int curscan;
1299 
1300 		/* if our previous item isn't done yet, enqueue this item to the end and proceed */
1301 		if (unit->shared.previtem != 0xffff)
1302 		{
1303 			work_unit *prevunit = polygon->poly->unit[unit->shared.previtem];
1304 			if (prevunit->shared.count_next != 0)
1305 			{
1306 				uint32_t unitnum = ((uint8_t *)unit - (uint8_t *)polygon->poly->unit[0]) / polygon->poly->unit_size;
1307 				uint32_t new_count_next;
1308 
1309 				/* attempt to atomically swap in this new value */
1310 				do
1311 				{
1312 					orig_count_next = prevunit->shared.count_next;
1313 					new_count_next = orig_count_next | (unitnum << 16);
1314 				} while (!prevunit->shared.count_next.compare_exchange_weak(orig_count_next, new_count_next, std::memory_order_release, std::memory_order_relaxed));
1315 
1316 #if KEEP_STATISTICS
1317 				/* track resolved conflicts */
1318 				polygon->poly->conflicts[threadid]++;
1319 				if (orig_count_next != 0)
1320 					polygon->poly->resolved[threadid]++;
1321 #endif
1322 				/* if we succeeded, skip out early so we can do other work */
1323 				if (orig_count_next != 0)
1324 					break;
1325 			}
1326 		}
1327 
1328 		/* iterate over extents */
1329 		for (curscan = 0; curscan < count; curscan++)
1330 		{
1331 			if (polygon->numverts == 3)
1332 			{
1333 				poly_extent tmpextent;
1334 				convert_tri_extent_to_poly_extent(&tmpextent, &unit->tri.extent[curscan], polygon, unit->shared.scanline + curscan);
1335 				(*polygon->callback)(polygon->dest, unit->shared.scanline + curscan, &tmpextent, polygon->extra, threadid);
1336 			}
1337 			else
1338 				(*polygon->callback)(polygon->dest, unit->shared.scanline + curscan, &unit->quad.extent[curscan], polygon->extra, threadid);
1339 		}
1340 
1341 		/* set our count to 0 and re-fetch the original count value */
1342 		do
1343 		{
1344 			orig_count_next = unit->shared.count_next;
1345 		} while (!unit->shared.count_next.compare_exchange_weak(orig_count_next, 0, std::memory_order_release, std::memory_order_relaxed));
1346 
1347 		/* if we have no more work to do, do nothing */
1348 		orig_count_next >>= 16;
1349 		if (orig_count_next == 0)
1350 			break;
1351 		param = polygon->poly->unit[orig_count_next];
1352 	}
1353 	return nullptr;
1354 }
1355 
1356 
1357 /*-------------------------------------------------
1358     poly_state_presave - pre-save callback to
1359     ensure everything is synced before saving
1360 -------------------------------------------------*/
1361 
poly_state_presave(legacy_poly_manager & poly)1362 static void poly_state_presave(legacy_poly_manager &poly)
1363 {
1364 	poly_wait(&poly, "pre-save");
1365 }
1366