1 // Emacs style mode select -*- C++ -*-
2 //-----------------------------------------------------------------------------
3 //
4 // $Id: r_drawt.cpp 4542 2014-02-09 17:39:42Z dr_sean $
5 //
6 // Copyright (C) 1998-2006 by Randy Heit (ZDoom).
7 // Copyright (C) 2006-2014 by The Odamex Team.
8 //
9 // This program is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU General Public License
11 // as published by the Free Software Foundation; either version 2
12 // of the License, or (at your option) any later version.
13 //
14 // This program is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // DESCRIPTION:
20 // Functions for drawing columns into a temporary buffer and then
21 // copying them to the screen. On machines with a decent cache, this
22 // is faster than drawing them directly to the screen. Will I be able
23 // to even understand any of this if I come back to it later? Let's
24 // hope so. :-)
25 //
26 //-----------------------------------------------------------------------------
27
28
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32
33 #include "doomtype.h"
34 #include "doomdef.h"
35 #include "i_system.h"
36 #include "r_defs.h"
37 #include "r_draw.h"
38 #include "r_main.h"
39 #include "r_things.h"
40 #include "v_video.h"
41
42 void (*rtv_lucent4colsP)(byte *source, palindex_t *dest, int bga, int fga) = NULL;
43 void (*rtv_lucent4colsD)(byte *source, argb_t *dest, int bga, int fga) = NULL;
44
45
46 // Palettized functions:
47
48 byte dc_temp[MAXHEIGHT * 4]; // denis - todo - security, overflow
49 unsigned int dc_tspans[4][256];
50 unsigned int *dc_ctspan[4];
51 unsigned int *horizspan[4];
52
53 //
54 // rt_draw1blankcol
55 //
56 // [SL] - Does nothing (obviously). Used when a column drawing function
57 // pointer should not draw anything.
58 //
rt_draw1blankcol(int hx,int sx,int yl,int yh)59 void rt_draw1blankcol(int hx, int sx, int yl, int yh)
60 {
61 }
62
63 //
64 // rt_draw4blankcols
65 //
66 // [SL] - Does nothing (obviously). Used when a column drawing function
67 // pointer should not draw anything.
68 //
rt_draw4blankcols(int sx,int yl,int yh)69 void rt_draw4blankcols(int sx, int yl, int yh)
70 {
71 }
72
73 template<typename pixel_t, int columns>
74 static forceinline void rt_copycols(int hx, int sx, int yl, int yh);
75
76 template<typename pixel_t, int columns>
77 static forceinline void rt_mapcols(int hx, int sx, int yl, int yh);
78
79 template<typename pixel_t, int columns>
80 static forceinline void rt_tlatecols(int hx, int sx, int yl, int yh);
81
82 template<typename pixel_t, int columns>
83 static forceinline void rt_lucentcols(int hx, int sx, int yl, int yh);
84
85 template<typename pixel_t, int columns>
86 static forceinline void rt_tlatelucentcols(int hx, int sx, int yl, int yh);
87
88
89 template<typename pixel_t, int columns>
rt_copycols(int hx,int sx,int yl,int yh)90 static forceinline void rt_copycols(int hx, int sx, int yl, int yh)
91 {
92 byte *source;
93 pixel_t *dest;
94 int count;
95 int pitch;
96
97 count = yh-yl;
98 if (count < 0)
99 return;
100 count++;
101
102 shaderef_t pal = shaderef_t(&realcolormaps, 0);
103 dest = (pixel_t *)(ylookup[yl] + columnofs[sx]);
104 source = &dc_temp[yl*4 + hx];
105 pitch = dcol.pitch / sizeof(pixel_t);
106
107 if (count & 1)
108 {
109 for (int i = 0; i < columns; ++i)
110 dest[pitch*0+i] = rt_rawcolor<pixel_t>(pal, source[0+i]);
111 source += 4;
112 dest += pitch;
113 }
114 if (!(count >>= 1))
115 return;
116
117 do
118 {
119 for (int i = 0; i < columns; ++i)
120 dest[pitch*0+i] = rt_rawcolor<pixel_t>(pal, source[0+i]);
121 for (int i = 0; i < columns; ++i)
122 dest[pitch*1+i] = rt_rawcolor<pixel_t>(pal, source[4+i]);
123 source += 8;
124 dest += pitch*2;
125 } while (--count);
126 }
127
128 template<typename pixel_t, int columns>
rt_mapcols(int hx,int sx,int yl,int yh)129 static forceinline void rt_mapcols(int hx, int sx, int yl, int yh)
130 {
131 byte *source;
132 pixel_t *dest;
133 int count;
134 int pitch;
135
136 count = yh-yl;
137 if (count < 0)
138 return;
139 count++;
140
141 dest = (pixel_t *)(ylookup[yl] + columnofs[sx]);
142 source = &dc_temp[yl*4 + hx];
143 pitch = dcol.pitch / sizeof(pixel_t);
144
145 if (count & 1)
146 {
147 for (int i = 0; i < columns; ++i)
148 dest[pitch*0+i] = rt_mapcolor<pixel_t>(dcol.colormap, source[0+i]);
149 source += 4;
150 dest += pitch;
151 }
152 if (!(count >>= 1))
153 return;
154
155 do
156 {
157 for (int i = 0; i < columns; ++i)
158 dest[pitch*0+i] = rt_mapcolor<pixel_t>(dcol.colormap, source[0+i]);
159 for (int i = 0; i < columns; ++i)
160 dest[pitch*1+i] = rt_mapcolor<pixel_t>(dcol.colormap, source[4+i]);
161 source += 8;
162 dest += pitch*2;
163 } while (--count);
164 }
165
166 template<typename pixel_t, int columns>
rt_tlatecols(int hx,int sx,int yl,int yh)167 static forceinline void rt_tlatecols(int hx, int sx, int yl, int yh)
168 {
169 byte *source;
170 pixel_t *dest;
171 int count;
172 int pitch;
173
174 count = yh-yl;
175 if (count < 0)
176 return;
177 count++;
178
179 dest = (pixel_t *)( ylookup[yl] + columnofs[sx] );
180 source = &dc_temp[yl*4 + hx];
181 pitch = dcol.pitch / sizeof(pixel_t);
182
183 do
184 {
185 for (int i = 0; i < columns; ++i)
186 dest[i] = rt_tlatecolor<pixel_t>(dcol.colormap, dcol.translation, source[i]);
187 source += 4;
188 dest += pitch;
189 } while (--count);
190 }
191
192
193
194 template<typename pixel_t>
195 static forceinline void rtv_lucent4cols(byte *source, pixel_t *dest, int bga, int fga);
196
197 template<>
rtv_lucent4cols(byte * source,palindex_t * dest,int bga,int fga)198 forceinline void rtv_lucent4cols(byte *source, palindex_t *dest, int bga, int fga)
199 {
200 rtv_lucent4colsP(source, dest, bga, fga);
201 }
202
203 template<>
rtv_lucent4cols(byte * source,argb_t * dest,int bga,int fga)204 forceinline void rtv_lucent4cols(byte *source, argb_t *dest, int bga, int fga)
205 {
206 rtv_lucent4colsD(source, dest, bga, fga);
207 }
208
209
210 template<typename pixel_t, int columns>
rt_lucentcols(int hx,int sx,int yl,int yh)211 static forceinline void rt_lucentcols(int hx, int sx, int yl, int yh)
212 {
213 byte *source;
214 pixel_t *dest;
215 int count;
216 int pitch;
217
218 count = yh-yl;
219 if (count < 0)
220 return;
221 count++;
222
223 int fga = (dcol.translevel & ~0x03FF) >> 8;
224 int bga = 255 - fga;
225
226 dest = (pixel_t *)( ylookup[yl] + columnofs[sx] );
227 source = &dc_temp[yl*4 + hx];
228 pitch = dcol.pitch / sizeof(pixel_t);
229
230 do
231 {
232 if (columns == 4)
233 {
234 rtv_lucent4cols<pixel_t>(source, dest, bga, fga);
235 }
236 else
237 {
238 for (int i = 0; i < columns; ++i)
239 {
240 const pixel_t fg = rt_mapcolor<pixel_t>(dcol.colormap, source[i]);
241 const pixel_t bg = dest[i];
242
243 dest[i] = rt_blend2<pixel_t>(bg, bga, fg, fga);
244 }
245 }
246
247 source += 4;
248 dest += pitch;
249 } while (--count);
250 }
251
252 template<typename pixel_t, int columns>
rt_tlatelucentcols(int hx,int sx,int yl,int yh)253 static forceinline void rt_tlatelucentcols(int hx, int sx, int yl, int yh)
254 {
255 byte *source;
256 pixel_t *dest;
257 int count;
258 int pitch;
259
260 count = yh-yl;
261 if (count < 0)
262 return;
263 count++;
264
265 int fga = (dcol.translevel & ~0x03FF) >> 8;
266 int bga = 255 - fga;
267
268 dest = (pixel_t *)( ylookup[yl] + columnofs[sx] );
269 source = &dc_temp[yl*4 + hx];
270 pitch = dcol.pitch / sizeof(pixel_t);
271
272 do
273 {
274 for (int i = 0; i < columns; ++i)
275 {
276 const pixel_t fg = rt_tlatecolor<pixel_t>(dcol.colormap, dcol.translation, source[i]);
277 const pixel_t bg = dest[i];
278
279 dest[i] = rt_blend2<pixel_t>(bg, bga, fg, fga);
280 }
281
282 source += 4;
283 dest += pitch;
284 } while (--count);
285 }
286
287
288 // Copies one span at hx to the screen at sx.
rt_copy1colP(int hx,int sx,int yl,int yh)289 void rt_copy1colP (int hx, int sx, int yl, int yh)
290 {
291 rt_copycols<byte, 1>(hx, sx, yl, yh);
292 }
293
294 // Copies all four spans to the screen starting at sx.
rt_copy4colsP(int sx,int yl,int yh)295 void rt_copy4colsP (int sx, int yl, int yh)
296 {
297 rt_copycols<byte, 4>(0, sx, yl, yh);
298 }
299
300 // Maps one span at hx to the screen at sx.
rt_map1colP(int hx,int sx,int yl,int yh)301 void rt_map1colP (int hx, int sx, int yl, int yh)
302 {
303 rt_mapcols<byte, 1>(hx, sx, yl, yh);
304 }
305
306 // Maps all four spans to the screen starting at sx.
rt_map4colsP(int sx,int yl,int yh)307 void rt_map4colsP (int sx, int yl, int yh)
308 {
309 rt_mapcols<byte, 4>(0, sx, yl, yh);
310 }
311
312 // Translates one span at hx to the screen at sx.
rt_tlate1colP(int hx,int sx,int yl,int yh)313 void rt_tlate1colP (int hx, int sx, int yl, int yh)
314 {
315 rt_tlatecols<byte, 1>(hx, sx, yl, yh);
316 }
317
318 // Translates all four spans to the screen starting at sx.
rt_tlate4colsP(int sx,int yl,int yh)319 void rt_tlate4colsP (int sx, int yl, int yh)
320 {
321 rt_tlatecols<byte, 4>(0, sx, yl, yh);
322 }
323
324 // Mixes one span at hx to the screen at sx.
rt_lucent1colP(int hx,int sx,int yl,int yh)325 void rt_lucent1colP (int hx, int sx, int yl, int yh)
326 {
327 rt_lucentcols<byte, 1>(hx, sx, yl, yh);
328 }
329
330 // Mixes all four spans to the screen starting at sx.
rt_lucent4colsP(int sx,int yl,int yh)331 void rt_lucent4colsP (int sx, int yl, int yh)
332 {
333 rt_lucentcols<byte, 4>(0, sx, yl, yh);
334 }
335
336 // Translates and mixes one span at hx to the screen at sx.
rt_tlatelucent1colP(int hx,int sx,int yl,int yh)337 void rt_tlatelucent1colP (int hx, int sx, int yl, int yh)
338 {
339 rt_tlatelucentcols<byte, 1>(hx, sx, yl, yh);
340 }
341
342 // Translates and mixes all four spans to the screen starting at sx.
rt_tlatelucent4colsP(int sx,int yl,int yh)343 void rt_tlatelucent4colsP (int sx, int yl, int yh)
344 {
345 rt_tlatelucentcols<byte, 4>(0, sx, yl, yh);
346 }
347
348
349 // Direct rendering (32-bit) functions:
350
351
rt_copy1colD(int hx,int sx,int yl,int yh)352 void rt_copy1colD (int hx, int sx, int yl, int yh)
353 {
354 rt_copycols<argb_t, 1>(hx, sx, yl, yh);
355 }
356
rt_copy4colsD(int sx,int yl,int yh)357 void rt_copy4colsD (int sx, int yl, int yh)
358 {
359 rt_copycols<argb_t, 4>(0, sx, yl, yh);
360 }
361
rt_map1colD(int hx,int sx,int yl,int yh)362 void rt_map1colD (int hx, int sx, int yl, int yh)
363 {
364 rt_mapcols<argb_t, 1>(hx, sx, yl, yh);
365 }
366
rt_map4colsD(int sx,int yl,int yh)367 void rt_map4colsD (int sx, int yl, int yh)
368 {
369 rt_mapcols<argb_t, 4>(0, sx, yl, yh);
370 }
371
rt_tlate1colD(int hx,int sx,int yl,int yh)372 void rt_tlate1colD (int hx, int sx, int yl, int yh)
373 {
374 rt_tlatecols<argb_t, 1>(hx, sx, yl, yh);
375 }
376
rt_tlate4colsD(int sx,int yl,int yh)377 void rt_tlate4colsD (int sx, int yl, int yh)
378 {
379 rt_tlatecols<argb_t, 4>(0, sx, yl, yh);
380 }
381
rt_lucent1colD(int hx,int sx,int yl,int yh)382 void rt_lucent1colD (int hx, int sx, int yl, int yh)
383 {
384 rt_lucentcols<argb_t, 1>(hx, sx, yl, yh);
385 }
386
rt_lucent4colsD(int sx,int yl,int yh)387 void rt_lucent4colsD (int sx, int yl, int yh)
388 {
389 rt_lucentcols<argb_t, 4>(0, sx, yl, yh);
390 }
391
rt_tlatelucent1colD(int hx,int sx,int yl,int yh)392 void rt_tlatelucent1colD (int hx, int sx, int yl, int yh)
393 {
394 rt_tlatelucentcols<argb_t, 1>(hx, sx, yl, yh);
395 }
396
rt_tlatelucent4colsD(int sx,int yl,int yh)397 void rt_tlatelucent4colsD (int sx, int yl, int yh)
398 {
399 rt_tlatelucentcols<argb_t, 4>(0, sx, yl, yh);
400 }
401
402 // Functions for v_video.cpp support
403
r_dimpatchD_c(const DCanvas * const cvs,argb_t color,int alpha,int x1,int y1,int w,int h)404 void r_dimpatchD_c(const DCanvas *const cvs, argb_t color, int alpha, int x1, int y1, int w, int h)
405 {
406 int dpitch = cvs->pitch / sizeof(argb_t);
407 argb_t* line = (argb_t *)cvs->buffer + y1 * dpitch;
408
409 for (int y = y1; y < y1 + h; y++)
410 {
411 for (int x = x1; x < x1 + w; x++)
412 line[x] = alphablend1a(line[x], color, alpha);
413
414 line += dpitch;
415 }
416 }
417
418
419 // Generic drawing functions which call either D(irect) or P(alettized) functions above:
420
421
422 // Draws all spans at hx to the screen at sx.
rt_draw1col(int hx,int sx)423 void rt_draw1col (int hx, int sx)
424 {
425 while (horizspan[hx] < dc_ctspan[hx]) {
426 hcolfunc_post1 (hx, sx, horizspan[hx][0], horizspan[hx][1]);
427 horizspan[hx] += 2;
428 }
429 }
430
431 // Copies all spans in all four columns to the screen starting at sx.
432 // sx should be dword-aligned
rt_draw4cols(int sx)433 void rt_draw4cols(int sx)
434 {
435 int x, bad;
436 unsigned int maxtop, minbot, minnexttop;
437
438 // Place a dummy "span" in each column. These don't get
439 // drawn. They're just here to avoid special cases in the
440 // max/min calculations below.
441 for (x = 0; x < 4; ++x)
442 {
443 dc_ctspan[x][0] = viewheight + 1;
444 dc_ctspan[x][1] = viewheight;
445 }
446
447 for (;;)
448 {
449 // If a column is out of spans, mark it as such
450 bad = 0;
451 minnexttop = 0xffffffff;
452
453 for (x = 0; x < 4; ++x)
454 {
455 if (horizspan[x] >= dc_ctspan[x])
456 bad |= 1 << x;
457 else if ((horizspan[x]+2)[0] < minnexttop)
458 minnexttop = (horizspan[x]+2)[0];
459 }
460 // Once all columns are out of spans, we're done
461 if (bad == 15)
462 return;
463
464 // Find the largest shared area for the spans in each column
465 maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]),
466 MAX (horizspan[2][0], horizspan[3][0]));
467 minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]),
468 MIN (horizspan[2][1], horizspan[3][1]));
469
470 // If there is no shared area with these spans, draw each span
471 // individually and advance to the next spans until we reach a shared area.
472 // However, only draw spans down to the highest span in the next set of
473 // spans. If we allow the entire height of a span to be drawn, it could
474 // prevent any more shared areas from being drawn in these four columns.
475 //
476 // Example: Suppose we have the following arrangement:
477 // A CD
478 // A CD
479 // B D
480 // B D
481 // aB D
482 // aBcD
483 // aBcD
484 // aBc
485 //
486 // If we draw the entire height of the spans, we end up drawing this first:
487 // A CD
488 // A CD
489 // B D
490 // B D
491 // B D
492 // B D
493 // B D
494 // B D
495 // B
496 //
497 // This leaves only the "a" and "c" columns to be drawn, and they are not
498 // part of a shared area, but if we can include B and D with them, we can
499 // get a shared area. So we cut off everything in the first set just
500 // above the "a" column and end up drawing this first:
501 // A CD
502 // A CD
503 // B D
504 // B D
505 //
506 // Then the next time through, we have the following arrangement with an
507 // easily shared area to draw:
508 // aB D
509 // aBcD
510 // aBcD
511 // aBc
512 if (bad != 0 || maxtop > minbot)
513 {
514 for (x = 0; x < 4; ++x)
515 {
516 if (!(bad & 1))
517 {
518 if (horizspan[x][1] < minnexttop)
519 {
520 hcolfunc_post1(x, sx + x, horizspan[x][0], horizspan[x][1]);
521 horizspan[x] += 2;
522 }
523 else if (minnexttop > horizspan[x][0])
524 {
525 hcolfunc_post1(x, sx + x, horizspan[x][0], minnexttop - 1);
526 horizspan[x][0] = minnexttop;
527 }
528 }
529 bad >>= 1;
530 }
531 continue;
532 }
533
534 // Draw any span fragments above the shared area.
535 for (x = 0; x < 4; ++x)
536 {
537 if (maxtop > horizspan[x][0])
538 hcolfunc_post1(x, sx + x, horizspan[x][0], maxtop - 1);
539 }
540
541 // Draw the shared area.
542 hcolfunc_post4(sx, maxtop, minbot);
543
544 // For each column, if part of the span is past the shared area,
545 // set its top to just below the shared area. Otherwise, advance
546 // to the next span in that column.
547 for (x = 0; x < 4; ++x)
548 {
549 if (minbot < horizspan[x][1])
550 horizspan[x][0] = minbot + 1;
551 else
552 horizspan[x] += 2;
553 }
554 }
555 }
556
557 // Before each pass through a rendering loop that uses these routines,
558 // call this function to set up the span pointers.
rt_initcols(void)559 void rt_initcols (void)
560 {
561 int y;
562
563 for (y = 3; y >= 0; y--)
564 horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0];
565 }
566
567 VERSION_CONTROL (r_drawt_cpp, "$Id: r_drawt.cpp 4542 2014-02-09 17:39:42Z dr_sean $")
568
569