1 /*
2 d_scan.c
3
4 Portable C scan-level rasterization code, all pixel depths.
5
6 Copyright (C) 1996-1997 Id Software, Inc.
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16
17 See the GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to:
21
22 Free Software Foundation, Inc.
23 59 Temple Place - Suite 330
24 Boston, MA 02111-1307, USA
25
26 */
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30
31 #define NH_DEFINE
32 #include "namehack.h"
33
34 #include "QF/qendian.h"
35 #include "QF/render.h"
36 #include "QF/sys.h"
37
38 #include "compat.h"
39 #include "d_local.h"
40 #include "r_internal.h"
41 #include "vid_internal.h"
42
43 static byte *r_turb_pbase;
44 static void *r_turb_pdest;
45 static fixed16_t r_turb_s, r_turb_t, r_turb_sstep, r_turb_tstep;
46 static int *r_turb_turb;
47 static int r_turb_spancount;
48
49 /*
50 D_WarpScreen
51
52 this performs a slight compression of the screen at the same time as
53 the sine warp, to keep the edges from wrapping
54 */
55 void
sw32_D_WarpScreen(void)56 sw32_D_WarpScreen (void)
57 {
58 switch(sw32_r_pixbytes) {
59 case 1:
60 {
61 int w, h;
62 int u, v;
63 byte *dest;
64 int *turb;
65 int *col;
66 byte **row;
67 byte *rowptr[MAXHEIGHT];
68 int column[MAXWIDTH];
69 float wratio, hratio;
70
71 w = r_refdef.vrect.width;
72 h = r_refdef.vrect.height;
73
74 wratio = w / (float) scr_vrect.width;
75 hratio = h / (float) scr_vrect.height;
76
77 for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
78 rowptr[v] = (byte *) sw32_d_viewbuffer + (r_refdef.vrect.y *
79 sw32_screenwidth) +
80 (sw32_screenwidth * (int) ((float) v * hratio * h /
81 (h + AMP2 * 2)));
82 }
83
84 for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
85 column[u] = r_refdef.vrect.x +
86 (int) ((float) u * wratio * w / (w + AMP2 * 2));
87 }
88
89 turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
90 dest = (byte *)vid.buffer + scr_vrect.y * vid.rowbytes +
91 scr_vrect.x;
92
93 for (v = 0; v < scr_vrect.height; v++, dest += vid.rowbytes) {
94 col = &column[turb[v]];
95 row = &rowptr[v];
96 for (u = 0; u < scr_vrect.width; u += 4) {
97 dest[u + 0] = row[turb[u + 0]][col[u + 0]];
98 dest[u + 1] = row[turb[u + 1]][col[u + 1]];
99 dest[u + 2] = row[turb[u + 2]][col[u + 2]];
100 dest[u + 3] = row[turb[u + 3]][col[u + 3]];
101 }
102 }
103 }
104 break;
105 case 2:
106 {
107 int w, h;
108 int u, v;
109 short *dest;
110 int *turb;
111 int *col;
112 short **row;
113 short *rowptr[MAXHEIGHT];
114 int column[MAXWIDTH];
115 float wratio, hratio;
116
117 w = r_refdef.vrect.width;
118 h = r_refdef.vrect.height;
119
120 wratio = w / (float) scr_vrect.width;
121 hratio = h / (float) scr_vrect.height;
122
123 for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
124 rowptr[v] = (short *) sw32_d_viewbuffer +
125 (r_refdef.vrect.y * sw32_screenwidth) +
126 (sw32_screenwidth * (int) ((float) v * hratio * h /
127 (h + AMP2 * 2)));
128 }
129
130 for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
131 column[u] = r_refdef.vrect.x +
132 (int) ((float) u * wratio * w / (w + AMP2 * 2));
133 }
134
135 turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
136 dest = (short *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 1) +
137 scr_vrect.x;
138
139 for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 1)) {
140 col = &column[turb[v]];
141 row = &rowptr[v];
142 for (u = 0; u < scr_vrect.width; u += 4) {
143 dest[u + 0] = row[turb[u + 0]][col[u + 0]];
144 dest[u + 1] = row[turb[u + 1]][col[u + 1]];
145 dest[u + 2] = row[turb[u + 2]][col[u + 2]];
146 dest[u + 3] = row[turb[u + 3]][col[u + 3]];
147 }
148 }
149 }
150 break;
151 case 4:
152 {
153 int w, h;
154 int u, v;
155 int *dest;
156 int *turb;
157 int *col;
158 int **row;
159 int *rowptr[MAXHEIGHT];
160 int column[MAXWIDTH];
161 float wratio, hratio;
162
163 w = r_refdef.vrect.width;
164 h = r_refdef.vrect.height;
165
166 wratio = w / (float) scr_vrect.width;
167 hratio = h / (float) scr_vrect.height;
168
169 for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
170 rowptr[v] = (int *) sw32_d_viewbuffer +
171 (r_refdef.vrect.y * sw32_screenwidth) +
172 (sw32_screenwidth * (int) ((float) v * hratio * h /
173 (h + AMP2 * 2)));
174 }
175
176 for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
177 column[u] = r_refdef.vrect.x +
178 (int) ((float) u * wratio * w / (w + AMP2 * 2));
179 }
180
181 turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
182 dest = (int *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 2) +
183 scr_vrect.x;
184
185 for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 2)) {
186 col = &column[turb[v]];
187 row = &rowptr[v];
188 for (u = 0; u < scr_vrect.width; u += 4) {
189 dest[u + 0] = row[turb[u + 0]][col[u + 0]];
190 dest[u + 1] = row[turb[u + 1]][col[u + 1]];
191 dest[u + 2] = row[turb[u + 2]][col[u + 2]];
192 dest[u + 3] = row[turb[u + 3]][col[u + 3]];
193 }
194 }
195 }
196 break;
197 default:
198 Sys_Error("D_WarpScreen: unsupported r_pixbytes %i", sw32_r_pixbytes);
199 }
200 }
201
202 static void
D_DrawTurbulentSpan(void)203 D_DrawTurbulentSpan (void)
204 {
205 int sturb, tturb;
206
207 switch (sw32_r_pixbytes) {
208 case 1:
209 {
210 byte *pdest = (byte *) r_turb_pdest;
211 do {
212 sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
213 (CYCLE - 1)]) >> 16) & 63;
214 tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
215 (CYCLE - 1)]) >> 16) & 63;
216 *pdest++ = r_turb_pbase[(tturb << 6) + sturb];
217 r_turb_s += r_turb_sstep;
218 r_turb_t += r_turb_tstep;
219 } while (--r_turb_spancount > 0);
220 r_turb_pdest = (byte *)pdest;
221 }
222 break;
223 case 2:
224 {
225 short *pdest = (short *) r_turb_pdest;
226 do {
227 sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
228 (CYCLE - 1)]) >> 16) & 63;
229 tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
230 (CYCLE - 1)]) >> 16) & 63;
231 *pdest++ = sw32_8to16table[r_turb_pbase[(tturb << 6) + sturb]];
232 r_turb_s += r_turb_sstep;
233 r_turb_t += r_turb_tstep;
234 } while (--r_turb_spancount > 0);
235 r_turb_pdest = (byte *)pdest;
236 }
237 break;
238 case 4:
239 {
240 int *pdest = (int *) r_turb_pdest;
241 do {
242 sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
243 (CYCLE - 1)]) >> 16) & 63;
244 tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
245 (CYCLE - 1)]) >> 16) & 63;
246 *pdest++ = d_8to24table[r_turb_pbase[(tturb << 6) + sturb]];
247 r_turb_s += r_turb_sstep;
248 r_turb_t += r_turb_tstep;
249 } while (--r_turb_spancount > 0);
250 r_turb_pdest = (byte *)pdest;
251 }
252 break;
253 default:
254 Sys_Error("D_DrawTurbulentSpan: unsupported r_pixbytes %i",
255 sw32_r_pixbytes);
256 }
257 }
258
259 void
sw32_Turbulent(espan_t * pspan)260 sw32_Turbulent (espan_t *pspan)
261 {
262 int count;
263 fixed16_t snext, tnext;
264 float sdivz, tdivz, zi, z, du, dv, spancountminus1;
265 float sdivz16stepu, tdivz16stepu, zi16stepu;
266
267 r_turb_turb = sw32_sintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
268
269 r_turb_sstep = 0; // keep compiler happy
270 r_turb_tstep = 0; // ditto
271
272 r_turb_pbase = (byte *) sw32_cacheblock;
273
274 sdivz16stepu = sw32_d_sdivzstepu * 16;
275 tdivz16stepu = sw32_d_tdivzstepu * 16;
276 zi16stepu = d_zistepu * 16 * 65536;
277
278 do {
279 r_turb_pdest = (byte *) sw32_d_viewbuffer + ((sw32_screenwidth * pspan->v) +
280 pspan->u) * sw32_r_pixbytes;
281
282 count = pspan->count;
283
284 // calculate the initial s/z, t/z, 1/z, s, and t and clamp
285 du = (float) pspan->u;
286 dv = (float) pspan->v;
287
288 sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
289 tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
290 zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
291 z = sw32_d_zitable[(unsigned short) zi];
292
293 r_turb_s = (int) (sdivz * z) + sw32_sadjust;
294 if (r_turb_s > sw32_bbextents)
295 r_turb_s = sw32_bbextents;
296 else if (r_turb_s < 0)
297 r_turb_s = 0;
298
299 r_turb_t = (int) (tdivz * z) + sw32_tadjust;
300 if (r_turb_t > sw32_bbextentt)
301 r_turb_t = sw32_bbextentt;
302 else if (r_turb_t < 0)
303 r_turb_t = 0;
304
305 do {
306 // calculate s and t at the far end of the span
307 if (count >= 16)
308 r_turb_spancount = 16;
309 else
310 r_turb_spancount = count;
311
312 count -= r_turb_spancount;
313
314 if (count) {
315 // calculate s/z, t/z, zi->fixed s and t at far end of span,
316 // calculate s and t steps across span by shifting
317 sdivz += sdivz16stepu;
318 tdivz += tdivz16stepu;
319 zi += zi16stepu;
320 z = sw32_d_zitable[(unsigned short) zi];
321
322 snext = (int) (sdivz * z) + sw32_sadjust;
323 if (snext > sw32_bbextents)
324 snext = sw32_bbextents;
325 else if (snext < 16)
326 snext = 16; // prevent round-off error on <0
327 // steps from
328 // from causing overstepping & running off the
329 // edge of the texture
330
331 tnext = (int) (tdivz * z) + sw32_tadjust;
332 if (tnext > sw32_bbextentt)
333 tnext = sw32_bbextentt;
334 else if (tnext < 16)
335 tnext = 16; // guard against round-off error on
336 // <0 steps
337
338 r_turb_sstep = (snext - r_turb_s) >> 4;
339 r_turb_tstep = (tnext - r_turb_t) >> 4;
340 } else {
341 // calculate s/z, t/z, zi->fixed s and t at last pixel in
342 // span (so can't step off polygon), clamp, calculate s and t
343 // steps across span by division, biasing steps low so we
344 // don't run off the texture
345 spancountminus1 = (float) (r_turb_spancount - 1);
346 sdivz += sw32_d_sdivzstepu * spancountminus1;
347 tdivz += sw32_d_tdivzstepu * spancountminus1;
348 zi += d_zistepu * 65536.0f * spancountminus1;
349 z = sw32_d_zitable[(unsigned short) zi];
350 snext = (int) (sdivz * z) + sw32_sadjust;
351 if (snext > sw32_bbextents)
352 snext = sw32_bbextents;
353 else if (snext < 16)
354 snext = 16; // prevent round-off error on <0 steps
355 // from causing overstepping & running
356 // off the edge of the texture
357
358 tnext = (int) (tdivz * z) + sw32_tadjust;
359 if (tnext > sw32_bbextentt)
360 tnext = sw32_bbextentt;
361 else if (tnext < 16)
362 tnext = 16; // guard against round-off error on
363 // <0 steps
364
365 if (r_turb_spancount > 1) {
366 r_turb_sstep = (snext - r_turb_s) / (r_turb_spancount - 1);
367 r_turb_tstep = (tnext - r_turb_t) / (r_turb_spancount - 1);
368 }
369 }
370
371 r_turb_s = r_turb_s & ((CYCLE << 16) - 1);
372 r_turb_t = r_turb_t & ((CYCLE << 16) - 1);
373
374 D_DrawTurbulentSpan ();
375
376 r_turb_s = snext;
377 r_turb_t = tnext;
378
379 } while (count > 0);
380
381 } while ((pspan = pspan->pnext) != NULL);
382 }
383
384 void
sw32_D_DrawSpans(espan_t * pspan)385 sw32_D_DrawSpans (espan_t *pspan)
386 {
387 switch(sw32_r_pixbytes) {
388 case 1:
389 {
390 byte *pbase = (byte *) sw32_cacheblock, *pdest;
391 int count;
392 fixed16_t s, t, snext, tnext, sstep, tstep;
393 float sdivz, tdivz, zi, z, du, dv;
394 float sdivz8stepu, tdivz8stepu, zi8stepu;
395
396 sstep = 0; // keep compiler happy
397 tstep = 0; // ditto
398
399 sdivz8stepu = sw32_d_sdivzstepu * 8;
400 tdivz8stepu = sw32_d_tdivzstepu * 8;
401 zi8stepu = d_zistepu * 8 * 65536;
402
403 do {
404 pdest = (byte *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
405 pspan->u;
406
407 count = pspan->count;
408
409 // calculate the initial s/z, t/z, 1/z, s, and t and clamp
410 du = (float) pspan->u;
411 dv = (float) pspan->v;
412
413 sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
414 tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
415 zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
416 z = sw32_d_zitable[(unsigned short) zi];
417
418 s = (int) (sdivz * z) + sw32_sadjust;
419 s = bound(0, s, sw32_bbextents);
420 t = (int) (tdivz * z) + sw32_tadjust;
421 t = bound(0, t, sw32_bbextentt);
422
423 while(count >= 8) {
424 count -= 8;
425 // calculate s/z, t/z, zi->fixed s and t at far end of span,
426 // calculate s and t steps across span by shifting
427 sdivz += sdivz8stepu;
428 tdivz += tdivz8stepu;
429 zi += zi8stepu;
430 z = sw32_d_zitable[(unsigned short) zi];
431
432 // prevent round-off error on <0 steps from from causing
433 // overstepping & running off the edge of the texture
434 snext = (int) (sdivz * z) + sw32_sadjust;
435 snext = bound(8, snext, sw32_bbextents);
436 tnext = (int) (tdivz * z) + sw32_tadjust;
437 tnext = bound(8, tnext, sw32_bbextentt);
438
439 sstep = (snext - s) >> 3;
440 tstep = (tnext - t) >> 3;
441
442 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
443 s += sstep;t += tstep;
444 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
445 s += sstep;
446 t += tstep;
447 pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
448 s += sstep;
449 t += tstep;
450 pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
451 s += sstep;
452 t += tstep;
453 pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
454 s += sstep;
455 t += tstep;
456 pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
457 s += sstep;
458 t += tstep;
459 pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
460 s += sstep;
461 t += tstep;
462 pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
463 s = snext;
464 t = tnext;
465 pdest += 8;
466 }
467 if (count)
468 {
469 // calculate s/z, t/z, zi->fixed s and t at last pixel in span
470 // (so can't step off polygon), clamp, calculate s and t steps
471 // across span by division, biasing steps low so we don't run
472 // off the texture
473 //countminus1 = (float) (count - 1);
474 sdivz += sw32_d_sdivzstepu * count; //minus1;
475 tdivz += sw32_d_tdivzstepu * count; //minus1;
476 zi += d_zistepu * 65536.0f * count; //minus1;
477 z = sw32_d_zitable[(unsigned short) zi];
478
479 // prevent round-off error on <0 steps from from causing
480 // overstepping & running off the edge of the texture
481 snext = (int) (sdivz * z) + sw32_sadjust;
482 snext = bound(count, snext, sw32_bbextents);
483 tnext = (int) (tdivz * z) + sw32_tadjust;
484 tnext = bound(count, tnext, sw32_bbextentt);
485
486 if (count > 1) {
487 sstep = (snext - s) / count; //(count - 1);
488 tstep = (tnext - t) / count; //(count - 1);
489
490 if (count & 4)
491 {
492 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
493 s += sstep;
494 t += tstep;
495 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
496 s += sstep;
497 t += tstep;
498 pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
499 s += sstep;
500 t += tstep;
501 pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
502 s += sstep;
503 t += tstep;
504 pdest += 4;
505 }
506 if (count & 2)
507 {
508 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
509 s += sstep;
510 t += tstep;
511 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
512 s += sstep;
513 t += tstep;
514 pdest += 2;
515 }
516 if (count & 1)
517 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
518 s += sstep;
519 t += tstep;
520 }
521 else
522 {
523 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
524 s += sstep;
525 t += tstep;
526 }
527 }
528 } while ((pspan = pspan->pnext) != NULL);
529 }
530 break;
531 case 2:
532 {
533 short *pbase = (short *) sw32_cacheblock, *pdest;
534 int count;
535 fixed16_t s, t, snext, tnext, sstep, tstep;
536 float sdivz, tdivz, zi, z, du, dv;
537 float sdivz8stepu, tdivz8stepu, zi8stepu;
538
539 sstep = 0; // keep compiler happy
540 tstep = 0; // ditto
541
542 sdivz8stepu = sw32_d_sdivzstepu * 8;
543 tdivz8stepu = sw32_d_tdivzstepu * 8;
544 zi8stepu = d_zistepu * 8 * 65536;
545
546 do {
547 pdest = (short *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
548 pspan->u;
549
550 count = pspan->count;
551
552 // calculate the initial s/z, t/z, 1/z, s, and t and clamp
553 du = (float) pspan->u;
554 dv = (float) pspan->v;
555
556 sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
557 tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
558 zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
559 z = sw32_d_zitable[(unsigned short) zi];
560
561 s = (int) (sdivz * z) + sw32_sadjust;
562 s = bound(0, s, sw32_bbextents);
563 t = (int) (tdivz * z) + sw32_tadjust;
564 t = bound(0, t, sw32_bbextentt);
565
566 while(count >= 8) {
567 count -= 8;
568 // calculate s/z, t/z, zi->fixed s and t at far end of span,
569 // calculate s and t steps across span by shifting
570 sdivz += sdivz8stepu;
571 tdivz += tdivz8stepu;
572 zi += zi8stepu;
573 z = sw32_d_zitable[(unsigned short) zi];
574
575 // prevent round-off error on <0 steps from from causing
576 // overstepping & running off the edge of the texture
577 snext = (int) (sdivz * z) + sw32_sadjust;
578 snext = bound(8, snext, sw32_bbextents);
579 tnext = (int) (tdivz * z) + sw32_tadjust;
580 tnext = bound(8, tnext, sw32_bbextentt);
581
582 sstep = (snext - s) >> 3;
583 tstep = (tnext - t) >> 3;
584
585 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
586 s += sstep;
587 t += tstep;
588 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
589 s += sstep;
590 t += tstep;
591 pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
592 s += sstep;
593 t += tstep;
594 pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
595 s += sstep;
596 t += tstep;
597 pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
598 s += sstep;
599 t += tstep;
600 pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
601 s += sstep;
602 t += tstep;
603 pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
604 s += sstep;
605 t += tstep;
606 pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
607 s = snext;t = tnext;
608 pdest += 8;
609 }
610 if (count)
611 {
612 // calculate s/z, t/z, zi->fixed s and t at last pixel in span
613 // (so can't step off polygon), clamp, calculate s and t steps
614 // across span by division, biasing steps low so we don't run
615 // off the texture
616 //countminus1 = (float) (count - 1);
617 sdivz += sw32_d_sdivzstepu * count; //minus1;
618 tdivz += sw32_d_tdivzstepu * count; //minus1;
619 zi += d_zistepu * 65536.0f * count; //minus1;
620 z = sw32_d_zitable[(unsigned short) zi];
621
622 // prevent round-off error on <0 steps from from causing
623 // overstepping & running off the edge of the texture
624 snext = (int) (sdivz * z) + sw32_sadjust;
625 snext = bound(count, snext, sw32_bbextents);
626 tnext = (int) (tdivz * z) + sw32_tadjust;
627 tnext = bound(count, tnext, sw32_bbextentt);
628
629 if (count > 1) {
630 sstep = (snext - s) / count; //(count - 1);
631 tstep = (tnext - t) / count; //(count - 1);
632
633 if (count & 4)
634 {
635 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
636 s += sstep;
637 t += tstep;
638 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
639 s += sstep;
640 t += tstep;
641 pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
642 s += sstep;
643 t += tstep;
644 pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
645 s += sstep;t += tstep;
646 pdest += 4;
647 }
648 if (count & 2)
649 {
650 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
651 s += sstep;
652 t += tstep;
653 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
654 s += sstep;
655 t += tstep;
656 pdest += 2;
657 }
658 if (count & 1)
659 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
660 s += sstep;
661 t += tstep;
662 }
663 else
664 {
665 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
666 s += sstep;
667 t += tstep;
668 }
669 }
670 } while ((pspan = pspan->pnext) != NULL);
671 }
672 break;
673 case 4:
674 {
675 int *pbase = (int *) sw32_cacheblock, *pdest;
676 int count;
677 fixed16_t s, t, snext, tnext, sstep, tstep;
678 float sdivz, tdivz, zi, z, du, dv;
679 float sdivz8stepu, tdivz8stepu, zi8stepu;
680
681 sstep = 0; // keep compiler happy
682 tstep = 0; // ditto
683
684 sdivz8stepu = sw32_d_sdivzstepu * 8;
685 tdivz8stepu = sw32_d_tdivzstepu * 8;
686 zi8stepu = d_zistepu * 8 * 65536;
687
688 do {
689 pdest = (int *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) + pspan->u;
690
691 count = pspan->count;
692
693 // calculate the initial s/z, t/z, 1/z, s, and t and clamp
694 du = (float) pspan->u;
695 dv = (float) pspan->v;
696
697 sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
698 tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
699 zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
700 z = sw32_d_zitable[(unsigned short) zi];
701
702 s = (int) (sdivz * z) + sw32_sadjust;
703 s = bound(0, s, sw32_bbextents);
704 t = (int) (tdivz * z) + sw32_tadjust;
705 t = bound(0, t, sw32_bbextentt);
706
707 while(count >= 8) {
708 count -= 8;
709 // calculate s/z, t/z, zi->fixed s and t at far end of span,
710 // calculate s and t steps across span by shifting
711 sdivz += sdivz8stepu;
712 tdivz += tdivz8stepu;
713 zi += zi8stepu;
714 z = sw32_d_zitable[(unsigned short) zi];
715
716 // prevent round-off error on <0 steps from from causing
717 // overstepping & running off the edge of the texture
718 snext = (int) (sdivz * z) + sw32_sadjust;
719 snext = bound(8, snext, sw32_bbextents);
720 tnext = (int) (tdivz * z) + sw32_tadjust;
721 tnext = bound(8, tnext, sw32_bbextentt);
722
723 sstep = (snext - s) >> 3;
724 tstep = (tnext - t) >> 3;
725
726 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
727 s += sstep;
728 t += tstep;
729 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
730 s += sstep;
731 t += tstep;
732 pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
733 s += sstep;
734 t += tstep;
735 pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
736 s += sstep;
737 t += tstep;
738 pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
739 s += sstep;
740 t += tstep;
741 pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
742 s += sstep;
743 t += tstep;
744 pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
745 s += sstep;
746 t += tstep;
747 pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
748 s = snext;
749 t = tnext;
750 pdest += 8;
751 }
752 if (count)
753 {
754 // calculate s/z, t/z, zi->fixed s and t at last pixel in span
755 // (so can't step off polygon), clamp, calculate s and t steps
756 // across span by division, biasing steps low so we don't run
757 // off the texture
758 //countminus1 = (float) (count - 1);
759 sdivz += sw32_d_sdivzstepu * count; //minus1;
760 tdivz += sw32_d_tdivzstepu * count; //minus1;
761 zi += d_zistepu * 65536.0f * count; //minus1;
762 z = sw32_d_zitable[(unsigned short) zi];
763
764 // prevent round-off error on <0 steps from from causing
765 // overstepping & running off the edge of the texture
766 snext = (int) (sdivz * z) + sw32_sadjust;
767 snext = bound(count, snext, sw32_bbextents);
768 tnext = (int) (tdivz * z) + sw32_tadjust;
769 tnext = bound(count, tnext, sw32_bbextentt);
770
771 if (count > 1) {
772 sstep = (snext - s) / count; //(count - 1);
773 tstep = (tnext - t) / count; //(count - 1);
774
775 if (count & 4)
776 {
777 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
778 s += sstep;
779 t += tstep;
780 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
781 s += sstep;
782 t += tstep;
783 pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
784 s += sstep;
785 t += tstep;
786 pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
787 s += sstep;
788 t += tstep;
789 pdest += 4;
790 }
791 if (count & 2)
792 {
793 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
794 s += sstep;
795 t += tstep;
796 pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
797 s += sstep;
798 t += tstep;
799 pdest += 2;
800 }
801 if (count & 1)
802 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
803 s += sstep;
804 t += tstep;
805 }
806 else
807 {
808 pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
809 s += sstep;
810 t += tstep;
811 }
812 }
813 } while ((pspan = pspan->pnext) != NULL);
814 }
815 break;
816 default:
817 Sys_Error("D_DrawSpans: unsupported r_pixbytes %i", sw32_r_pixbytes);
818 }
819 }
820
821 void
sw32_D_DrawZSpans(espan_t * pspan)822 sw32_D_DrawZSpans (espan_t *pspan)
823 {
824 int count, doublecount, izistep;
825 int izi;
826 short *pdest;
827 unsigned int ltemp;
828 double zi;
829 float du, dv;
830
831 // FIXME: check for clamping/range problems
832 // we count on FP exceptions being turned off to avoid range problems
833 izistep = (int) (d_zistepu * 0x8000 * 0x10000);
834
835 do {
836 pdest = sw32_d_pzbuffer + (sw32_d_zwidth * pspan->v) + pspan->u;
837
838 count = pspan->count;
839
840 // calculate the initial 1/z
841 du = (float) pspan->u;
842 dv = (float) pspan->v;
843
844 zi = d_ziorigin + dv * d_zistepv + du * d_zistepu;
845 // we count on FP exceptions being turned off to avoid range problems
846 izi = (int) (zi * 0x8000 * 0x10000);
847
848 // LordHavoc: added big endian case, the old code is not correct on
849 // big-endian (results in swapped depth pairs), and is tuned more for
850 // x86, PowerPC compilers can probably do a good job with raw loop
851 // unrolling if it is even necessary...
852 if (bigendien)
853 {
854 do
855 {
856 *pdest++ = (short) (izi >> 16);
857 izi += izistep;
858 }
859 while(--count);
860 }
861 else
862 {
863 if ((intptr_t) pdest & 0x02) {
864 *pdest++ = (short) (izi >> 16);
865 izi += izistep;
866 count--;
867 }
868
869 if ((doublecount = count >> 1) > 0) {
870 do {
871 ltemp = izi >> 16;
872 izi += izistep;
873 ltemp |= izi & 0xFFFF0000;
874 izi += izistep;
875 *(int *) pdest = ltemp;
876 pdest += 2;
877 } while (--doublecount > 0);
878 }
879
880 if (count & 1)
881 *pdest = (short) (izi >> 16);
882 }
883 } while ((pspan = pspan->pnext) != NULL);
884 }
885