1 // Emacs style mode select   -*- C++ -*-
2 //-----------------------------------------------------------------------------
3 //
4 // $Id: r_drawt.cpp 4542 2014-02-09 17:39:42Z dr_sean $
5 //
6 // Copyright (C) 1998-2006 by Randy Heit (ZDoom).
7 // Copyright (C) 2006-2014 by The Odamex Team.
8 //
9 // This program is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU General Public License
11 // as published by the Free Software Foundation; either version 2
12 // of the License, or (at your option) any later version.
13 //
14 // This program is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 // GNU General Public License for more details.
18 //
19 // DESCRIPTION:
20 //	Functions for drawing columns into a temporary buffer and then
21 //	copying them to the screen. On machines with a decent cache, this
22 //	is faster than drawing them directly to the screen. Will I be able
23 //	to even understand any of this if I come back to it later? Let's
24 //	hope so. :-)
25 //
26 //-----------------------------------------------------------------------------
27 
28 
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 
33 #include "doomtype.h"
34 #include "doomdef.h"
35 #include "i_system.h"
36 #include "r_defs.h"
37 #include "r_draw.h"
38 #include "r_main.h"
39 #include "r_things.h"
40 #include "v_video.h"
41 
42 void (*rtv_lucent4colsP)(byte *source, palindex_t *dest, int bga, int fga) = NULL;
43 void (*rtv_lucent4colsD)(byte *source, argb_t *dest, int bga, int fga) = NULL;
44 
45 
46 // Palettized functions:
47 
48 byte dc_temp[MAXHEIGHT * 4]; // denis - todo - security, overflow
49 unsigned int dc_tspans[4][256];
50 unsigned int *dc_ctspan[4];
51 unsigned int *horizspan[4];
52 
53 //
54 // rt_draw1blankcol
55 //
56 // [SL] - Does nothing (obviously). Used when a column drawing function
57 // pointer should not draw anything.
58 //
rt_draw1blankcol(int hx,int sx,int yl,int yh)59 void rt_draw1blankcol(int hx, int sx, int yl, int yh)
60 {
61 }
62 
63 //
64 // rt_draw4blankcols
65 //
66 // [SL] - Does nothing (obviously). Used when a column drawing function
67 // pointer should not draw anything.
68 //
rt_draw4blankcols(int sx,int yl,int yh)69 void rt_draw4blankcols(int sx, int yl, int yh)
70 {
71 }
72 
73 template<typename pixel_t, int columns>
74 static forceinline void rt_copycols(int hx, int sx, int yl, int yh);
75 
76 template<typename pixel_t, int columns>
77 static forceinline void rt_mapcols(int hx, int sx, int yl, int yh);
78 
79 template<typename pixel_t, int columns>
80 static forceinline void rt_tlatecols(int hx, int sx, int yl, int yh);
81 
82 template<typename pixel_t, int columns>
83 static forceinline void rt_lucentcols(int hx, int sx, int yl, int yh);
84 
85 template<typename pixel_t, int columns>
86 static forceinline void rt_tlatelucentcols(int hx, int sx, int yl, int yh);
87 
88 
89 template<typename pixel_t, int columns>
rt_copycols(int hx,int sx,int yl,int yh)90 static forceinline void rt_copycols(int hx, int sx, int yl, int yh)
91 {
92 	byte *source;
93 	pixel_t *dest;
94 	int count;
95 	int pitch;
96 
97 	count = yh-yl;
98 	if (count < 0)
99 		return;
100 	count++;
101 
102 	shaderef_t pal = shaderef_t(&realcolormaps, 0);
103 	dest = (pixel_t *)(ylookup[yl] + columnofs[sx]);
104 	source = &dc_temp[yl*4 + hx];
105 	pitch = dcol.pitch / sizeof(pixel_t);
106 
107 	if (count & 1)
108 	{
109 		for (int i = 0; i < columns; ++i)
110 			dest[pitch*0+i] = rt_rawcolor<pixel_t>(pal, source[0+i]);
111 		source += 4;
112 		dest += pitch;
113 	}
114 	if (!(count >>= 1))
115 		return;
116 
117 	do
118 	{
119 		for (int i = 0; i < columns; ++i)
120 			dest[pitch*0+i] = rt_rawcolor<pixel_t>(pal, source[0+i]);
121 		for (int i = 0; i < columns; ++i)
122 			dest[pitch*1+i] = rt_rawcolor<pixel_t>(pal, source[4+i]);
123 		source += 8;
124 		dest += pitch*2;
125 	} while (--count);
126 }
127 
128 template<typename pixel_t, int columns>
rt_mapcols(int hx,int sx,int yl,int yh)129 static forceinline void rt_mapcols(int hx, int sx, int yl, int yh)
130 {
131 	byte *source;
132 	pixel_t *dest;
133 	int count;
134 	int pitch;
135 
136 	count = yh-yl;
137 	if (count < 0)
138 		return;
139 	count++;
140 
141 	dest = (pixel_t *)(ylookup[yl] + columnofs[sx]);
142 	source = &dc_temp[yl*4 + hx];
143 	pitch = dcol.pitch / sizeof(pixel_t);
144 
145 	if (count & 1)
146 	{
147 		for (int i = 0; i < columns; ++i)
148 			dest[pitch*0+i] = rt_mapcolor<pixel_t>(dcol.colormap, source[0+i]);
149 		source += 4;
150 		dest += pitch;
151 	}
152 	if (!(count >>= 1))
153 		return;
154 
155 	do
156 	{
157 		for (int i = 0; i < columns; ++i)
158 			dest[pitch*0+i] = rt_mapcolor<pixel_t>(dcol.colormap, source[0+i]);
159 		for (int i = 0; i < columns; ++i)
160 			dest[pitch*1+i] = rt_mapcolor<pixel_t>(dcol.colormap, source[4+i]);
161 		source += 8;
162 		dest += pitch*2;
163 	} while (--count);
164 }
165 
166 template<typename pixel_t, int columns>
rt_tlatecols(int hx,int sx,int yl,int yh)167 static forceinline void rt_tlatecols(int hx, int sx, int yl, int yh)
168 {
169 	byte *source;
170 	pixel_t *dest;
171 	int count;
172 	int pitch;
173 
174 	count = yh-yl;
175 	if (count < 0)
176 		return;
177 	count++;
178 
179 	dest = (pixel_t *)( ylookup[yl] + columnofs[sx] );
180 	source = &dc_temp[yl*4 + hx];
181 	pitch = dcol.pitch / sizeof(pixel_t);
182 
183 	do
184 	{
185 		for (int i = 0; i < columns; ++i)
186 			dest[i] = rt_tlatecolor<pixel_t>(dcol.colormap, dcol.translation, source[i]);
187 		source += 4;
188 		dest += pitch;
189 	} while (--count);
190 }
191 
192 
193 
194 template<typename pixel_t>
195 static forceinline void rtv_lucent4cols(byte *source, pixel_t *dest, int bga, int fga);
196 
197 template<>
rtv_lucent4cols(byte * source,palindex_t * dest,int bga,int fga)198 forceinline void rtv_lucent4cols(byte *source, palindex_t *dest, int bga, int fga)
199 {
200 	rtv_lucent4colsP(source, dest, bga, fga);
201 }
202 
203 template<>
rtv_lucent4cols(byte * source,argb_t * dest,int bga,int fga)204 forceinline void rtv_lucent4cols(byte *source, argb_t *dest, int bga, int fga)
205 {
206 	rtv_lucent4colsD(source, dest, bga, fga);
207 }
208 
209 
210 template<typename pixel_t, int columns>
rt_lucentcols(int hx,int sx,int yl,int yh)211 static forceinline void rt_lucentcols(int hx, int sx, int yl, int yh)
212 {
213 	byte *source;
214 	pixel_t *dest;
215 	int count;
216 	int pitch;
217 
218 	count = yh-yl;
219 	if (count < 0)
220 		return;
221 	count++;
222 
223 	int fga = (dcol.translevel & ~0x03FF) >> 8;
224 	int bga = 255 - fga;
225 
226 	dest = (pixel_t *)( ylookup[yl] + columnofs[sx] );
227 	source = &dc_temp[yl*4 + hx];
228 	pitch = dcol.pitch / sizeof(pixel_t);
229 
230 	do
231 	{
232 		if (columns == 4)
233 		{
234 			rtv_lucent4cols<pixel_t>(source, dest, bga, fga);
235 		}
236 		else
237 		{
238 			for (int i = 0; i < columns; ++i)
239 			{
240 				const pixel_t fg = rt_mapcolor<pixel_t>(dcol.colormap, source[i]);
241 				const pixel_t bg = dest[i];
242 
243 				dest[i] = rt_blend2<pixel_t>(bg, bga, fg, fga);
244 			}
245 		}
246 
247 		source += 4;
248 		dest += pitch;
249 	} while (--count);
250 }
251 
252 template<typename pixel_t, int columns>
rt_tlatelucentcols(int hx,int sx,int yl,int yh)253 static forceinline void rt_tlatelucentcols(int hx, int sx, int yl, int yh)
254 {
255 	byte *source;
256 	pixel_t *dest;
257 	int count;
258 	int pitch;
259 
260 	count = yh-yl;
261 	if (count < 0)
262 		return;
263 	count++;
264 
265 	int fga = (dcol.translevel & ~0x03FF) >> 8;
266 	int bga = 255 - fga;
267 
268 	dest = (pixel_t *)( ylookup[yl] + columnofs[sx] );
269 	source = &dc_temp[yl*4 + hx];
270 	pitch = dcol.pitch / sizeof(pixel_t);
271 
272 	do
273 	{
274 		for (int i = 0; i < columns; ++i)
275 		{
276 			const pixel_t fg = rt_tlatecolor<pixel_t>(dcol.colormap, dcol.translation, source[i]);
277 			const pixel_t bg = dest[i];
278 
279 			dest[i] = rt_blend2<pixel_t>(bg, bga, fg, fga);
280 		}
281 
282 		source += 4;
283 		dest += pitch;
284 	} while (--count);
285 }
286 
287 
288 // Copies one span at hx to the screen at sx.
rt_copy1colP(int hx,int sx,int yl,int yh)289 void rt_copy1colP (int hx, int sx, int yl, int yh)
290 {
291 	rt_copycols<byte, 1>(hx, sx, yl, yh);
292 }
293 
294 // Copies all four spans to the screen starting at sx.
rt_copy4colsP(int sx,int yl,int yh)295 void rt_copy4colsP (int sx, int yl, int yh)
296 {
297 	rt_copycols<byte, 4>(0, sx, yl, yh);
298 }
299 
300 // Maps one span at hx to the screen at sx.
rt_map1colP(int hx,int sx,int yl,int yh)301 void rt_map1colP (int hx, int sx, int yl, int yh)
302 {
303 	rt_mapcols<byte, 1>(hx, sx, yl, yh);
304 }
305 
306 // Maps all four spans to the screen starting at sx.
rt_map4colsP(int sx,int yl,int yh)307 void rt_map4colsP (int sx, int yl, int yh)
308 {
309 	rt_mapcols<byte, 4>(0, sx, yl, yh);
310 }
311 
312 // Translates one span at hx to the screen at sx.
rt_tlate1colP(int hx,int sx,int yl,int yh)313 void rt_tlate1colP (int hx, int sx, int yl, int yh)
314 {
315 	rt_tlatecols<byte, 1>(hx, sx, yl, yh);
316 }
317 
318 // Translates all four spans to the screen starting at sx.
rt_tlate4colsP(int sx,int yl,int yh)319 void rt_tlate4colsP (int sx, int yl, int yh)
320 {
321 	rt_tlatecols<byte, 4>(0, sx, yl, yh);
322 }
323 
324 // Mixes one span at hx to the screen at sx.
rt_lucent1colP(int hx,int sx,int yl,int yh)325 void rt_lucent1colP (int hx, int sx, int yl, int yh)
326 {
327 	rt_lucentcols<byte, 1>(hx, sx, yl, yh);
328 }
329 
330 // Mixes all four spans to the screen starting at sx.
rt_lucent4colsP(int sx,int yl,int yh)331 void rt_lucent4colsP (int sx, int yl, int yh)
332 {
333 	rt_lucentcols<byte, 4>(0, sx, yl, yh);
334 }
335 
336 // Translates and mixes one span at hx to the screen at sx.
rt_tlatelucent1colP(int hx,int sx,int yl,int yh)337 void rt_tlatelucent1colP (int hx, int sx, int yl, int yh)
338 {
339 	rt_tlatelucentcols<byte, 1>(hx, sx, yl, yh);
340 }
341 
342 // Translates and mixes all four spans to the screen starting at sx.
rt_tlatelucent4colsP(int sx,int yl,int yh)343 void rt_tlatelucent4colsP (int sx, int yl, int yh)
344 {
345 	rt_tlatelucentcols<byte, 4>(0, sx, yl, yh);
346 }
347 
348 
349 // Direct rendering (32-bit) functions:
350 
351 
rt_copy1colD(int hx,int sx,int yl,int yh)352 void rt_copy1colD (int hx, int sx, int yl, int yh)
353 {
354 	rt_copycols<argb_t, 1>(hx, sx, yl, yh);
355 }
356 
rt_copy4colsD(int sx,int yl,int yh)357 void rt_copy4colsD (int sx, int yl, int yh)
358 {
359 	rt_copycols<argb_t, 4>(0, sx, yl, yh);
360 }
361 
rt_map1colD(int hx,int sx,int yl,int yh)362 void rt_map1colD (int hx, int sx, int yl, int yh)
363 {
364 	rt_mapcols<argb_t, 1>(hx, sx, yl, yh);
365 }
366 
rt_map4colsD(int sx,int yl,int yh)367 void rt_map4colsD (int sx, int yl, int yh)
368 {
369 	rt_mapcols<argb_t, 4>(0, sx, yl, yh);
370 }
371 
rt_tlate1colD(int hx,int sx,int yl,int yh)372 void rt_tlate1colD (int hx, int sx, int yl, int yh)
373 {
374 	rt_tlatecols<argb_t, 1>(hx, sx, yl, yh);
375 }
376 
rt_tlate4colsD(int sx,int yl,int yh)377 void rt_tlate4colsD (int sx, int yl, int yh)
378 {
379 	rt_tlatecols<argb_t, 4>(0, sx, yl, yh);
380 }
381 
rt_lucent1colD(int hx,int sx,int yl,int yh)382 void rt_lucent1colD (int hx, int sx, int yl, int yh)
383 {
384 	rt_lucentcols<argb_t, 1>(hx, sx, yl, yh);
385 }
386 
rt_lucent4colsD(int sx,int yl,int yh)387 void rt_lucent4colsD (int sx, int yl, int yh)
388 {
389 	rt_lucentcols<argb_t, 4>(0, sx, yl, yh);
390 }
391 
rt_tlatelucent1colD(int hx,int sx,int yl,int yh)392 void rt_tlatelucent1colD (int hx, int sx, int yl, int yh)
393 {
394 	rt_tlatelucentcols<argb_t, 1>(hx, sx, yl, yh);
395 }
396 
rt_tlatelucent4colsD(int sx,int yl,int yh)397 void rt_tlatelucent4colsD (int sx, int yl, int yh)
398 {
399 	rt_tlatelucentcols<argb_t, 4>(0, sx, yl, yh);
400 }
401 
402 // Functions for v_video.cpp support
403 
r_dimpatchD_c(const DCanvas * const cvs,argb_t color,int alpha,int x1,int y1,int w,int h)404 void r_dimpatchD_c(const DCanvas *const cvs, argb_t color, int alpha, int x1, int y1, int w, int h)
405 {
406 	int dpitch = cvs->pitch / sizeof(argb_t);
407 	argb_t* line = (argb_t *)cvs->buffer + y1 * dpitch;
408 
409 	for (int y = y1; y < y1 + h; y++)
410 	{
411 		for (int x = x1; x < x1 + w; x++)
412 			line[x] = alphablend1a(line[x], color, alpha);
413 
414 		line += dpitch;
415 	}
416 }
417 
418 
419 // Generic drawing functions which call either D(irect) or P(alettized) functions above:
420 
421 
422 // Draws all spans at hx to the screen at sx.
rt_draw1col(int hx,int sx)423 void rt_draw1col (int hx, int sx)
424 {
425 	while (horizspan[hx] < dc_ctspan[hx]) {
426 		hcolfunc_post1 (hx, sx, horizspan[hx][0], horizspan[hx][1]);
427 		horizspan[hx] += 2;
428 	}
429 }
430 
431 // Copies all spans in all four columns to the screen starting at sx.
432 // sx should be dword-aligned
rt_draw4cols(int sx)433 void rt_draw4cols(int sx)
434 {
435 	int x, bad;
436 	unsigned int maxtop, minbot, minnexttop;
437 
438 	// Place a dummy "span" in each column. These don't get
439 	// drawn. They're just here to avoid special cases in the
440 	// max/min calculations below.
441 	for (x = 0; x < 4; ++x)
442 	{
443 		dc_ctspan[x][0] = viewheight + 1;
444 		dc_ctspan[x][1] = viewheight;
445 	}
446 
447 	for (;;)
448 	{
449 		// If a column is out of spans, mark it as such
450 		bad = 0;
451 		minnexttop = 0xffffffff;
452 
453 		for (x = 0; x < 4; ++x)
454 		{
455 			if (horizspan[x] >= dc_ctspan[x])
456 				bad |= 1 << x;
457 			else if ((horizspan[x]+2)[0] < minnexttop)
458 				minnexttop = (horizspan[x]+2)[0];
459 		}
460 		// Once all columns are out of spans, we're done
461 		if (bad == 15)
462 			return;
463 
464 		// Find the largest shared area for the spans in each column
465 		maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]),
466 					  MAX (horizspan[2][0], horizspan[3][0]));
467 		minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]),
468 					  MIN (horizspan[2][1], horizspan[3][1]));
469 
470 		// If there is no shared area with these spans, draw each span
471 		// individually and advance to the next spans until we reach a shared area.
472 		// However, only draw spans down to the highest span in the next set of
473 		// spans. If we allow the entire height of a span to be drawn, it could
474 		// prevent any more shared areas from being drawn in these four columns.
475 		//
476 		// Example: Suppose we have the following arrangement:
477 		//			A CD
478 		//			A CD
479 		//			 B D
480 		//			 B D
481 		//			aB D
482 		//			aBcD
483 		//			aBcD
484 		//			aBc
485 		//
486 		// If we draw the entire height of the spans, we end up drawing this first:
487 		//			A CD
488 		//			A CD
489 		//			 B D
490 		//			 B D
491 		//			 B D
492 		//			 B D
493 		//			 B D
494 		//			 B D
495 		//			 B
496 		//
497 		// This leaves only the "a" and "c" columns to be drawn, and they are not
498 		// part of a shared area, but if we can include B and D with them, we can
499 		// get a shared area. So we cut off everything in the first set just
500 		// above the "a" column and end up drawing this first:
501 		//			A CD
502 		//			A CD
503 		//			 B D
504 		//			 B D
505 		//
506 		// Then the next time through, we have the following arrangement with an
507 		// easily shared area to draw:
508 		//			aB D
509 		//			aBcD
510 		//			aBcD
511 		//			aBc
512 		if (bad != 0 || maxtop > minbot)
513 		{
514 			for (x = 0; x < 4; ++x)
515 			{
516 				if (!(bad & 1))
517 				{
518 					if (horizspan[x][1] < minnexttop)
519 					{
520 						hcolfunc_post1(x, sx + x, horizspan[x][0], horizspan[x][1]);
521 						horizspan[x] += 2;
522 					}
523 					else if (minnexttop > horizspan[x][0])
524 					{
525 						hcolfunc_post1(x, sx + x, horizspan[x][0], minnexttop - 1);
526 						horizspan[x][0] = minnexttop;
527 					}
528 				}
529 				bad >>= 1;
530 			}
531 			continue;
532 		}
533 
534 		// Draw any span fragments above the shared area.
535 		for (x = 0; x < 4; ++x)
536 		{
537 			if (maxtop > horizspan[x][0])
538 				hcolfunc_post1(x, sx + x, horizspan[x][0], maxtop - 1);
539 		}
540 
541 		// Draw the shared area.
542 		hcolfunc_post4(sx, maxtop, minbot);
543 
544 		// For each column, if part of the span is past the shared area,
545 		// set its top to just below the shared area. Otherwise, advance
546 		// to the next span in that column.
547 		for (x = 0; x < 4; ++x)
548 		{
549 			if (minbot < horizspan[x][1])
550 				horizspan[x][0] = minbot + 1;
551 			else
552 				horizspan[x] += 2;
553 		}
554 	}
555 }
556 
557 // Before each pass through a rendering loop that uses these routines,
558 // call this function to set up the span pointers.
rt_initcols(void)559 void rt_initcols (void)
560 {
561 	int y;
562 
563 	for (y = 3; y >= 0; y--)
564 		horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0];
565 }
566 
567 VERSION_CONTROL (r_drawt_cpp, "$Id: r_drawt.cpp 4542 2014-02-09 17:39:42Z dr_sean $")
568 
569