1 /*
2 	d_scan.c
3 
4 	Portable C scan-level rasterization code, all pixel depths.
5 
6 	Copyright (C) 1996-1997  Id Software, Inc.
7 
8 	This program is free software; you can redistribute it and/or
9 	modify it under the terms of the GNU General Public License
10 	as published by the Free Software Foundation; either version 2
11 	of the License, or (at your option) any later version.
12 
13 	This program is distributed in the hope that it will be useful,
14 	but WITHOUT ANY WARRANTY; without even the implied warranty of
15 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 
17 	See the GNU General Public License for more details.
18 
19 	You should have received a copy of the GNU General Public License
20 	along with this program; if not, write to:
21 
22 		Free Software Foundation, Inc.
23 		59 Temple Place - Suite 330
24 		Boston, MA  02111-1307, USA
25 
26 */
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30 
31 #define NH_DEFINE
32 #include "namehack.h"
33 
34 #include "QF/qendian.h"
35 #include "QF/render.h"
36 #include "QF/sys.h"
37 
38 #include "compat.h"
39 #include "d_local.h"
40 #include "r_internal.h"
41 #include "vid_internal.h"
42 
43 static byte       *r_turb_pbase;
44 static void       *r_turb_pdest;
45 static fixed16_t   r_turb_s, r_turb_t, r_turb_sstep, r_turb_tstep;
46 static int        *r_turb_turb;
47 static int         r_turb_spancount;
48 
49 /*
50 	D_WarpScreen
51 
52 	this performs a slight compression of the screen at the same time as
53 	the sine warp, to keep the edges from wrapping
54 */
55 void
sw32_D_WarpScreen(void)56 sw32_D_WarpScreen (void)
57 {
58 	switch(sw32_r_pixbytes) {
59 	case 1:
60 	{
61 		int         w, h;
62 		int         u, v;
63 		byte       *dest;
64 		int        *turb;
65 		int        *col;
66 		byte      **row;
67 		byte       *rowptr[MAXHEIGHT];
68 		int         column[MAXWIDTH];
69 		float       wratio, hratio;
70 
71 		w = r_refdef.vrect.width;
72 		h = r_refdef.vrect.height;
73 
74 		wratio = w / (float) scr_vrect.width;
75 		hratio = h / (float) scr_vrect.height;
76 
77 		for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
78 			rowptr[v] = (byte *) sw32_d_viewbuffer + (r_refdef.vrect.y *
79 												 sw32_screenwidth) +
80 				(sw32_screenwidth * (int) ((float) v * hratio * h /
81 									  (h + AMP2 * 2)));
82 		}
83 
84 		for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
85 			column[u] = r_refdef.vrect.x +
86 				(int) ((float) u * wratio * w / (w + AMP2 * 2));
87 		}
88 
89 		turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
90 		dest = (byte *)vid.buffer + scr_vrect.y * vid.rowbytes +
91 						 scr_vrect.x;
92 
93 		for (v = 0; v < scr_vrect.height; v++, dest += vid.rowbytes) {
94 			col = &column[turb[v]];
95 			row = &rowptr[v];
96 			for (u = 0; u < scr_vrect.width; u += 4) {
97 				dest[u + 0] = row[turb[u + 0]][col[u + 0]];
98 				dest[u + 1] = row[turb[u + 1]][col[u + 1]];
99 				dest[u + 2] = row[turb[u + 2]][col[u + 2]];
100 				dest[u + 3] = row[turb[u + 3]][col[u + 3]];
101 			}
102 		}
103 	}
104 	break;
105 	case 2:
106 	{
107 		int         w, h;
108 		int         u, v;
109 		short      *dest;
110 		int        *turb;
111 		int        *col;
112 		short     **row;
113 		short      *rowptr[MAXHEIGHT];
114 		int         column[MAXWIDTH];
115 		float       wratio, hratio;
116 
117 		w = r_refdef.vrect.width;
118 		h = r_refdef.vrect.height;
119 
120 		wratio = w / (float) scr_vrect.width;
121 		hratio = h / (float) scr_vrect.height;
122 
123 		for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
124 			rowptr[v] = (short *) sw32_d_viewbuffer +
125 				(r_refdef.vrect.y * sw32_screenwidth) +
126 				(sw32_screenwidth * (int) ((float) v * hratio * h /
127 									  (h + AMP2 * 2)));
128 		}
129 
130 		for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
131 			column[u] = r_refdef.vrect.x +
132 				(int) ((float) u * wratio * w / (w + AMP2 * 2));
133 		}
134 
135 		turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
136 		dest = (short *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 1) +
137 			scr_vrect.x;
138 
139 		for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 1)) {
140 			col = &column[turb[v]];
141 			row = &rowptr[v];
142 			for (u = 0; u < scr_vrect.width; u += 4) {
143 				dest[u + 0] = row[turb[u + 0]][col[u + 0]];
144 				dest[u + 1] = row[turb[u + 1]][col[u + 1]];
145 				dest[u + 2] = row[turb[u + 2]][col[u + 2]];
146 				dest[u + 3] = row[turb[u + 3]][col[u + 3]];
147 			}
148 		}
149 	}
150 	break;
151 	case 4:
152 	{
153 		int         w, h;
154 		int         u, v;
155 		int        *dest;
156 		int        *turb;
157 		int        *col;
158 		int       **row;
159 		int        *rowptr[MAXHEIGHT];
160 		int         column[MAXWIDTH];
161 		float       wratio, hratio;
162 
163 		w = r_refdef.vrect.width;
164 		h = r_refdef.vrect.height;
165 
166 		wratio = w / (float) scr_vrect.width;
167 		hratio = h / (float) scr_vrect.height;
168 
169 		for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
170 			rowptr[v] = (int *) sw32_d_viewbuffer +
171 				(r_refdef.vrect.y * sw32_screenwidth) +
172 				(sw32_screenwidth * (int) ((float) v * hratio * h /
173 									  (h + AMP2 * 2)));
174 		}
175 
176 		for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
177 			column[u] = r_refdef.vrect.x +
178 				(int) ((float) u * wratio * w / (w + AMP2 * 2));
179 		}
180 
181 		turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
182 		dest = (int *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 2) +
183 			scr_vrect.x;
184 
185 		for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 2)) {
186 			col = &column[turb[v]];
187 			row = &rowptr[v];
188 			for (u = 0; u < scr_vrect.width; u += 4) {
189 				dest[u + 0] = row[turb[u + 0]][col[u + 0]];
190 				dest[u + 1] = row[turb[u + 1]][col[u + 1]];
191 				dest[u + 2] = row[turb[u + 2]][col[u + 2]];
192 				dest[u + 3] = row[turb[u + 3]][col[u + 3]];
193 			}
194 		}
195 	}
196 	break;
197 	default:
198 		Sys_Error("D_WarpScreen: unsupported r_pixbytes %i", sw32_r_pixbytes);
199 	}
200 }
201 
202 static void
D_DrawTurbulentSpan(void)203 D_DrawTurbulentSpan (void)
204 {
205 	int         sturb, tturb;
206 
207 	switch (sw32_r_pixbytes) {
208 	case 1:
209 	{
210 		byte *pdest = (byte *) r_turb_pdest;
211 		do {
212 			sturb =	((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
213 											 (CYCLE - 1)]) >> 16) &	63;
214 			tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
215 											 (CYCLE - 1)]) >> 16) & 63;
216 			*pdest++ = r_turb_pbase[(tturb << 6) + sturb];
217 			r_turb_s += r_turb_sstep;
218 			r_turb_t += r_turb_tstep;
219 		} while (--r_turb_spancount > 0);
220 		r_turb_pdest = (byte *)pdest;
221 	}
222 	break;
223 	case 2:
224 	{
225 		short *pdest = (short *) r_turb_pdest;
226 		do {
227 			sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
228 											 (CYCLE - 1)]) >> 16) & 63;
229 			tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
230 											 (CYCLE - 1)]) >> 16) & 63;
231 			*pdest++ = sw32_8to16table[r_turb_pbase[(tturb << 6) + sturb]];
232 			r_turb_s += r_turb_sstep;
233 			r_turb_t += r_turb_tstep;
234 		} while (--r_turb_spancount > 0);
235 		r_turb_pdest = (byte *)pdest;
236 	}
237 	break;
238 	case 4:
239 	{
240 		int *pdest = (int *) r_turb_pdest;
241 		do {
242 			sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
243 											 (CYCLE - 1)]) >> 16) & 63;
244 				tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
245 												 (CYCLE - 1)]) >> 16) & 63;
246 				*pdest++ = d_8to24table[r_turb_pbase[(tturb << 6) + sturb]];
247 				r_turb_s += r_turb_sstep;
248 				r_turb_t += r_turb_tstep;
249 			} while (--r_turb_spancount > 0);
250 			r_turb_pdest = (byte *)pdest;
251 		}
252 		break;
253 	default:
254 		Sys_Error("D_DrawTurbulentSpan: unsupported r_pixbytes %i",
255 				  sw32_r_pixbytes);
256 	}
257 }
258 
259 void
sw32_Turbulent(espan_t * pspan)260 sw32_Turbulent (espan_t *pspan)
261 {
262 	int         count;
263 	fixed16_t   snext, tnext;
264 	float       sdivz, tdivz, zi, z, du, dv, spancountminus1;
265 	float       sdivz16stepu, tdivz16stepu, zi16stepu;
266 
267 	r_turb_turb = sw32_sintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
268 
269 	r_turb_sstep = 0;					// keep compiler happy
270 	r_turb_tstep = 0;					// ditto
271 
272 	r_turb_pbase = (byte *) sw32_cacheblock;
273 
274 	sdivz16stepu = sw32_d_sdivzstepu * 16;
275 	tdivz16stepu = sw32_d_tdivzstepu * 16;
276 	zi16stepu = d_zistepu * 16 * 65536;
277 
278 	do {
279 		r_turb_pdest = (byte *) sw32_d_viewbuffer + ((sw32_screenwidth * pspan->v) +
280 												pspan->u) * sw32_r_pixbytes;
281 
282 		count = pspan->count;
283 
284 		// calculate the initial s/z, t/z, 1/z, s, and t and clamp
285 		du = (float) pspan->u;
286 		dv = (float) pspan->v;
287 
288 		sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
289 		tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
290 		zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
291 		z = sw32_d_zitable[(unsigned short) zi];
292 
293 		r_turb_s = (int) (sdivz * z) + sw32_sadjust;
294 		if (r_turb_s > sw32_bbextents)
295 			r_turb_s = sw32_bbextents;
296 		else if (r_turb_s < 0)
297 			r_turb_s = 0;
298 
299 		r_turb_t = (int) (tdivz * z) + sw32_tadjust;
300 		if (r_turb_t > sw32_bbextentt)
301 			r_turb_t = sw32_bbextentt;
302 		else if (r_turb_t < 0)
303 			r_turb_t = 0;
304 
305 		do {
306 			// calculate s and t at the far end of the span
307 			if (count >= 16)
308 				r_turb_spancount = 16;
309 			else
310 				r_turb_spancount = count;
311 
312 			count -= r_turb_spancount;
313 
314 			if (count) {
315 				// calculate s/z, t/z, zi->fixed s and t at far end of span,
316 				// calculate s and t steps across span by shifting
317 				sdivz += sdivz16stepu;
318 				tdivz += tdivz16stepu;
319 				zi += zi16stepu;
320 				z = sw32_d_zitable[(unsigned short) zi];
321 
322 				snext = (int) (sdivz * z) + sw32_sadjust;
323 				if (snext > sw32_bbextents)
324 					snext = sw32_bbextents;
325 				else if (snext < 16)
326 					snext = 16;			// prevent round-off error on <0
327 										// steps from
328 				// from causing overstepping & running off the
329 				// edge of the texture
330 
331 				tnext = (int) (tdivz * z) + sw32_tadjust;
332 				if (tnext > sw32_bbextentt)
333 					tnext = sw32_bbextentt;
334 				else if (tnext < 16)
335 					tnext = 16;			// guard against round-off error on
336 										// <0 steps
337 
338 				r_turb_sstep = (snext - r_turb_s) >> 4;
339 				r_turb_tstep = (tnext - r_turb_t) >> 4;
340 			} else {
341 				// calculate s/z, t/z, zi->fixed s and t at last pixel in
342 				// span (so can't step off polygon), clamp, calculate s and t
343 				// steps across span by division, biasing steps low so we
344 				// don't run off the texture
345 				spancountminus1 = (float) (r_turb_spancount - 1);
346 				sdivz += sw32_d_sdivzstepu * spancountminus1;
347 				tdivz += sw32_d_tdivzstepu * spancountminus1;
348 				zi += d_zistepu * 65536.0f * spancountminus1;
349 				z = sw32_d_zitable[(unsigned short) zi];
350 				snext = (int) (sdivz * z) + sw32_sadjust;
351 				if (snext > sw32_bbextents)
352 					snext = sw32_bbextents;
353 				else if (snext < 16)
354 					snext = 16;			// prevent round-off error on <0 steps
355 										// from causing overstepping & running
356 										// off the edge of the texture
357 
358 				tnext = (int) (tdivz * z) + sw32_tadjust;
359 				if (tnext > sw32_bbextentt)
360 					tnext = sw32_bbextentt;
361 				else if (tnext < 16)
362 					tnext = 16;			// guard against round-off error on
363 										// <0 steps
364 
365 				if (r_turb_spancount > 1) {
366 					r_turb_sstep = (snext - r_turb_s) / (r_turb_spancount - 1);
367 					r_turb_tstep = (tnext - r_turb_t) / (r_turb_spancount - 1);
368 				}
369 			}
370 
371 			r_turb_s = r_turb_s & ((CYCLE << 16) - 1);
372 			r_turb_t = r_turb_t & ((CYCLE << 16) - 1);
373 
374 			D_DrawTurbulentSpan ();
375 
376 			r_turb_s = snext;
377 			r_turb_t = tnext;
378 
379 		} while (count > 0);
380 
381 	} while ((pspan = pspan->pnext) != NULL);
382 }
383 
384 void
sw32_D_DrawSpans(espan_t * pspan)385 sw32_D_DrawSpans (espan_t *pspan)
386 {
387 	switch(sw32_r_pixbytes) {
388 	case 1:
389 	{
390 		byte       *pbase = (byte *) sw32_cacheblock, *pdest;
391 		int         count;
392 		fixed16_t   s, t, snext, tnext, sstep, tstep;
393 		float       sdivz, tdivz, zi, z, du, dv;
394 		float       sdivz8stepu, tdivz8stepu, zi8stepu;
395 
396 		sstep = 0;							// keep compiler happy
397 		tstep = 0;							// ditto
398 
399 		sdivz8stepu = sw32_d_sdivzstepu * 8;
400 		tdivz8stepu = sw32_d_tdivzstepu * 8;
401 		zi8stepu = d_zistepu * 8 * 65536;
402 
403 		do {
404 			pdest = (byte *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
405 				pspan->u;
406 
407 			count = pspan->count;
408 
409 			// calculate the initial s/z, t/z, 1/z, s, and t and clamp
410 			du = (float) pspan->u;
411 			dv = (float) pspan->v;
412 
413 			sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
414 			tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
415 			zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
416 			z = sw32_d_zitable[(unsigned short) zi];
417 
418 			s = (int) (sdivz * z) + sw32_sadjust;
419 			s = bound(0, s, sw32_bbextents);
420 			t = (int) (tdivz * z) + sw32_tadjust;
421 			t = bound(0, t, sw32_bbextentt);
422 
423 			while(count >= 8) {
424 				count -= 8;
425 				// calculate s/z, t/z, zi->fixed s and t at far end of span,
426 				// calculate s and t steps across span by shifting
427 				sdivz += sdivz8stepu;
428 				tdivz += tdivz8stepu;
429 				zi += zi8stepu;
430 				z = sw32_d_zitable[(unsigned short) zi];
431 
432 				// prevent round-off error on <0 steps from from causing
433 				// overstepping & running off the edge of the texture
434 				snext = (int) (sdivz * z) + sw32_sadjust;
435 				snext = bound(8, snext, sw32_bbextents);
436 				tnext = (int) (tdivz * z) + sw32_tadjust;
437 				tnext = bound(8, tnext, sw32_bbextentt);
438 
439 				sstep = (snext - s) >> 3;
440 				tstep = (tnext - t) >> 3;
441 
442 				pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
443 				s += sstep;t += tstep;
444 				pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
445 				s += sstep;
446 				t += tstep;
447 				pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
448 				s += sstep;
449 				t += tstep;
450 				pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
451 				s += sstep;
452 				t += tstep;
453 				pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
454 				s += sstep;
455 				t += tstep;
456 				pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
457 				s += sstep;
458 				t += tstep;
459 				pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
460 				s += sstep;
461 				t += tstep;
462 				pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
463 				s = snext;
464 				t = tnext;
465 				pdest += 8;
466 			}
467 			if (count)
468 			{
469 				// calculate s/z, t/z, zi->fixed s and t at last pixel in span
470 				// (so can't step off polygon), clamp, calculate s and t steps
471 				// across span by division, biasing steps low so we don't run
472 				// off the texture
473 				//countminus1 = (float) (count - 1);
474 				sdivz += sw32_d_sdivzstepu * count; //minus1;
475 				tdivz += sw32_d_tdivzstepu * count; //minus1;
476 				zi += d_zistepu * 65536.0f * count; //minus1;
477 				z = sw32_d_zitable[(unsigned short) zi];
478 
479 				// prevent round-off error on <0 steps from from causing
480 				// overstepping & running off the edge of the texture
481 				snext = (int) (sdivz * z) + sw32_sadjust;
482 				snext = bound(count, snext, sw32_bbextents);
483 				tnext = (int) (tdivz * z) + sw32_tadjust;
484 				tnext = bound(count, tnext, sw32_bbextentt);
485 
486 				if (count > 1) {
487 					sstep = (snext - s) / count; //(count - 1);
488 					tstep = (tnext - t) / count; //(count - 1);
489 
490 					if (count & 4)
491 					{
492 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
493 						s += sstep;
494 						t += tstep;
495 						pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
496 						s += sstep;
497 						t += tstep;
498 						pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
499 						s += sstep;
500 						t += tstep;
501 						pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
502 						s += sstep;
503 						t += tstep;
504 						pdest += 4;
505 					}
506 					if (count & 2)
507 					{
508 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
509 						s += sstep;
510 						t += tstep;
511 						pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
512 						s += sstep;
513 						t += tstep;
514 						pdest += 2;
515 					}
516 					if (count & 1)
517 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
518 					s += sstep;
519 					t += tstep;
520 				}
521 				else
522 				{
523 					pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
524 					s += sstep;
525 					t += tstep;
526 				}
527 			}
528 		} while ((pspan = pspan->pnext) != NULL);
529 	}
530 	break;
531 	case 2:
532 	{
533 		short      *pbase = (short *) sw32_cacheblock, *pdest;
534 		int         count;
535 		fixed16_t   s, t, snext, tnext, sstep, tstep;
536 		float       sdivz, tdivz, zi, z, du, dv;
537 		float       sdivz8stepu, tdivz8stepu, zi8stepu;
538 
539 		sstep = 0;							// keep compiler happy
540 		tstep = 0;							// ditto
541 
542 		sdivz8stepu = sw32_d_sdivzstepu * 8;
543 		tdivz8stepu = sw32_d_tdivzstepu * 8;
544 		zi8stepu = d_zistepu * 8 * 65536;
545 
546 		do {
547 			pdest = (short *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
548 				pspan->u;
549 
550 			count = pspan->count;
551 
552 			// calculate the initial s/z, t/z, 1/z, s, and t and clamp
553 			du = (float) pspan->u;
554 			dv = (float) pspan->v;
555 
556 			sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
557 			tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
558 			zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
559 			z = sw32_d_zitable[(unsigned short) zi];
560 
561 			s = (int) (sdivz * z) + sw32_sadjust;
562 			s = bound(0, s, sw32_bbextents);
563 			t = (int) (tdivz * z) + sw32_tadjust;
564 			t = bound(0, t, sw32_bbextentt);
565 
566 			while(count >= 8) {
567 				count -= 8;
568 				// calculate s/z, t/z, zi->fixed s and t at far end of span,
569 				// calculate s and t steps across span by shifting
570 				sdivz += sdivz8stepu;
571 				tdivz += tdivz8stepu;
572 				zi += zi8stepu;
573 				z = sw32_d_zitable[(unsigned short) zi];
574 
575 				// prevent round-off error on <0 steps from from causing
576 				// overstepping & running off the edge of the texture
577 				snext = (int) (sdivz * z) + sw32_sadjust;
578 				snext = bound(8, snext, sw32_bbextents);
579 				tnext = (int) (tdivz * z) + sw32_tadjust;
580 				tnext = bound(8, tnext, sw32_bbextentt);
581 
582 				sstep = (snext - s) >> 3;
583 				tstep = (tnext - t) >> 3;
584 
585 				pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
586 				s += sstep;
587 				t += tstep;
588 				pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
589 				s += sstep;
590 				t += tstep;
591 				pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
592 				s += sstep;
593 				t += tstep;
594 				pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
595 				s += sstep;
596 				t += tstep;
597 				pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
598 				s += sstep;
599 				t += tstep;
600 				pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
601 				s += sstep;
602 				t += tstep;
603 				pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
604 				s += sstep;
605 				t += tstep;
606 				pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
607 				s = snext;t = tnext;
608 				pdest += 8;
609 			}
610 			if (count)
611 			{
612 				// calculate s/z, t/z, zi->fixed s and t at last pixel in span
613 				// (so can't step off polygon), clamp, calculate s and t steps
614 				// across span by division, biasing steps low so we don't run
615 				// off the texture
616 				//countminus1 = (float) (count - 1);
617 				sdivz += sw32_d_sdivzstepu * count; //minus1;
618 				tdivz += sw32_d_tdivzstepu * count; //minus1;
619 				zi += d_zistepu * 65536.0f * count; //minus1;
620 				z = sw32_d_zitable[(unsigned short) zi];
621 
622 				// prevent round-off error on <0 steps from from causing
623 				// overstepping & running off the edge of the texture
624 				snext = (int) (sdivz * z) + sw32_sadjust;
625 				snext = bound(count, snext, sw32_bbextents);
626 				tnext = (int) (tdivz * z) + sw32_tadjust;
627 				tnext = bound(count, tnext, sw32_bbextentt);
628 
629 				if (count > 1) {
630 					sstep = (snext - s) / count; //(count - 1);
631 					tstep = (tnext - t) / count; //(count - 1);
632 
633 					if (count & 4)
634 					{
635 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
636 						s += sstep;
637 						t += tstep;
638 						pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
639 						s += sstep;
640 						t += tstep;
641 						pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
642 						s += sstep;
643 						t += tstep;
644 						pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
645 						s += sstep;t += tstep;
646 						pdest += 4;
647 					}
648 					if (count & 2)
649 					{
650 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
651 						s += sstep;
652 						t += tstep;
653 						pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
654 						s += sstep;
655 						t += tstep;
656 						pdest += 2;
657 					}
658 					if (count & 1)
659 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
660 					s += sstep;
661 					t += tstep;
662 				}
663 				else
664 				{
665 					pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
666 					s += sstep;
667 					t += tstep;
668 				}
669 			}
670 		} while ((pspan = pspan->pnext) != NULL);
671 	}
672 	break;
673 	case 4:
674 	{
675 		int        *pbase = (int *) sw32_cacheblock, *pdest;
676 		int         count;
677 		fixed16_t   s, t, snext, tnext, sstep, tstep;
678 		float       sdivz, tdivz, zi, z, du, dv;
679 		float       sdivz8stepu, tdivz8stepu, zi8stepu;
680 
681 		sstep = 0;							// keep compiler happy
682 		tstep = 0;							// ditto
683 
684 		sdivz8stepu = sw32_d_sdivzstepu * 8;
685 		tdivz8stepu = sw32_d_tdivzstepu * 8;
686 		zi8stepu = d_zistepu * 8 * 65536;
687 
688 		do {
689 			pdest = (int *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) + pspan->u;
690 
691 			count = pspan->count;
692 
693 			// calculate the initial s/z, t/z, 1/z, s, and t and clamp
694 			du = (float) pspan->u;
695 			dv = (float) pspan->v;
696 
697 			sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
698 			tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
699 			zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
700 			z = sw32_d_zitable[(unsigned short) zi];
701 
702 			s = (int) (sdivz * z) + sw32_sadjust;
703 			s = bound(0, s, sw32_bbextents);
704 			t = (int) (tdivz * z) + sw32_tadjust;
705 			t = bound(0, t, sw32_bbextentt);
706 
707 			while(count >= 8) {
708 				count -= 8;
709 				// calculate s/z, t/z, zi->fixed s and t at far end of span,
710 				// calculate s and t steps across span by shifting
711 				sdivz += sdivz8stepu;
712 				tdivz += tdivz8stepu;
713 				zi += zi8stepu;
714 				z = sw32_d_zitable[(unsigned short) zi];
715 
716 				// prevent round-off error on <0 steps from from causing
717 				// overstepping & running off the edge of the texture
718 				snext = (int) (sdivz * z) + sw32_sadjust;
719 				snext = bound(8, snext, sw32_bbextents);
720 				tnext = (int) (tdivz * z) + sw32_tadjust;
721 				tnext = bound(8, tnext, sw32_bbextentt);
722 
723 				sstep = (snext - s) >> 3;
724 				tstep = (tnext - t) >> 3;
725 
726 				pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
727 				s += sstep;
728 				t += tstep;
729 				pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
730 				s += sstep;
731 				t += tstep;
732 				pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
733 				s += sstep;
734 				t += tstep;
735 				pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
736 				s += sstep;
737 				t += tstep;
738 				pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
739 				s += sstep;
740 				t += tstep;
741 				pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
742 				s += sstep;
743 				t += tstep;
744 				pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
745 				s += sstep;
746 				t += tstep;
747 				pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
748 				s = snext;
749 				t = tnext;
750 				pdest += 8;
751 			}
752 			if (count)
753 			{
754 				// calculate s/z, t/z, zi->fixed s and t at last pixel in span
755 				// (so can't step off polygon), clamp, calculate s and t steps
756 				// across span by division, biasing steps low so we don't run
757 				// off the texture
758 				//countminus1 = (float) (count - 1);
759 				sdivz += sw32_d_sdivzstepu * count; //minus1;
760 				tdivz += sw32_d_tdivzstepu * count; //minus1;
761 				zi += d_zistepu * 65536.0f * count; //minus1;
762 				z = sw32_d_zitable[(unsigned short) zi];
763 
764 				// prevent round-off error on <0 steps from from causing
765 				// overstepping & running off the edge of the texture
766 				snext = (int) (sdivz * z) + sw32_sadjust;
767 				snext = bound(count, snext, sw32_bbextents);
768 				tnext = (int) (tdivz * z) + sw32_tadjust;
769 				tnext = bound(count, tnext, sw32_bbextentt);
770 
771 				if (count > 1) {
772 					sstep = (snext - s) / count; //(count - 1);
773 					tstep = (tnext - t) / count; //(count - 1);
774 
775 					if (count & 4)
776 					{
777 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
778 						s += sstep;
779 						t += tstep;
780 						pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
781 						s += sstep;
782 						t += tstep;
783 						pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
784 						s += sstep;
785 						t += tstep;
786 						pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
787 						s += sstep;
788 						t += tstep;
789 						pdest += 4;
790 					}
791 					if (count & 2)
792 					{
793 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
794 						s += sstep;
795 						t += tstep;
796 						pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
797 						s += sstep;
798 						t += tstep;
799 						pdest += 2;
800 					}
801 					if (count & 1)
802 						pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
803 					s += sstep;
804 					t += tstep;
805 				}
806 				else
807 				{
808 					pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
809 					s += sstep;
810 					t += tstep;
811 				}
812 			}
813 		} while ((pspan = pspan->pnext) != NULL);
814 	}
815 	break;
816 	default:
817 		Sys_Error("D_DrawSpans: unsupported r_pixbytes %i", sw32_r_pixbytes);
818 	}
819 }
820 
821 void
sw32_D_DrawZSpans(espan_t * pspan)822 sw32_D_DrawZSpans (espan_t *pspan)
823 {
824 	int         count, doublecount, izistep;
825 	int         izi;
826 	short      *pdest;
827 	unsigned int ltemp;
828 	double      zi;
829 	float       du, dv;
830 
831 	// FIXME: check for clamping/range problems
832 	// we count on FP exceptions being turned off to avoid range problems
833 	izistep = (int) (d_zistepu * 0x8000 * 0x10000);
834 
835 	do {
836 		pdest = sw32_d_pzbuffer + (sw32_d_zwidth * pspan->v) + pspan->u;
837 
838 		count = pspan->count;
839 
840 		// calculate the initial 1/z
841 		du = (float) pspan->u;
842 		dv = (float) pspan->v;
843 
844 		zi = d_ziorigin + dv * d_zistepv + du * d_zistepu;
845 		// we count on FP exceptions being turned off to avoid range problems
846 		izi = (int) (zi * 0x8000 * 0x10000);
847 
848 		// LordHavoc: added big endian case, the old code is not correct on
849 		// big-endian (results in swapped depth pairs), and is tuned more for
850 		// x86, PowerPC compilers can probably do a good job with raw loop
851 		// unrolling if it is even necessary...
852 		if (bigendien)
853 		{
854 			do
855 			{
856 				*pdest++ = (short) (izi >> 16);
857 				izi += izistep;
858 			}
859 			while(--count);
860 		}
861 		else
862 		{
863 			if ((intptr_t) pdest & 0x02) {
864 				*pdest++ = (short) (izi >> 16);
865 				izi += izistep;
866 				count--;
867 			}
868 
869 			if ((doublecount = count >> 1) > 0) {
870 				do {
871 					ltemp = izi >> 16;
872 					izi += izistep;
873 					ltemp |= izi & 0xFFFF0000;
874 					izi += izistep;
875 					*(int *) pdest = ltemp;
876 					pdest += 2;
877 				} while (--doublecount > 0);
878 			}
879 
880 			if (count & 1)
881 				*pdest = (short) (izi >> 16);
882 		}
883 	} while ((pspan = pspan->pnext) != NULL);
884 }
885