1/*
2	r_drawa.S
3
4	x86 assembly-language edge clipping and emission code
5
6	Copyright (C) 1996-1997  Id Software, Inc.
7
8	This program is free software; you can redistribute it and/or
9	modify it under the terms of the GNU General Public License
10	as published by the Free Software Foundation; either version 2
11	of the License, or (at your option) any later version.
12
13	This program is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16
17	See the GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with this program; if not, write to:
21
22		Free Software Foundation, Inc.
23		59 Temple Place - Suite 330
24		Boston, MA  02111-1307, USA
25
26	$Id$
27*/
28
29#ifdef HAVE_CONFIG_H
30# include <config.h>
31#endif
32#include "asm_i386.h"
33#include "quakeasm.h"
34#include "asm_draw.h"
35#include "d_ifacea.h"
36
37#ifdef PIC
38#undef USE_INTEL_ASM //XXX asm pic hack
39#endif
40
41#ifdef USE_INTEL_ASM
42
43// !!! if these are changed, they must be changed in r_draw.c too !!!
44#define FULLY_CLIPPED_CACHED	0x80000000
45#define FRAMECOUNT_MASK			0x7FFFFFFF
46
47	.data
48
49Ld0:			.single		0.0
50Ld1:			.single		0.0
51Lstack:			.long		0
52Lfp_near_clip:	.single		NEAR_CLIP
53Lceilv0:		.long		0
54Lv:				.long		0
55Lu0:			.long		0
56Lv0:			.long		0
57Lzi0:			.long		0
58
59	.text
60
61//----------------------------------------------------------------------
62// edge clipping code
63//----------------------------------------------------------------------
64
65#define pv0		4+12
66#define pv1		8+12
67#define clip	12+12
68
69	.align 4
70.globl C(R_ClipEdge)
71C(R_ClipEdge):
72	pushl	%esi				// preserve register variables
73	pushl	%edi
74	pushl	%ebx
75	movl	%esp,Lstack			// for clearing the stack later
76
77//	float		d0, d1, f;
78//	mvertex_t	clipvert;
79
80	movl	clip(%esp),%ebx
81	movl	pv0(%esp),%esi
82	movl	pv1(%esp),%edx
83
84//	if (clip)
85//	{
86	testl	%ebx,%ebx
87	jz		Lemit
88
89//		do
90//		{
91
92Lcliploop:
93
94//			d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
95//			d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
96	flds	mv_position+0(%esi)
97	fmuls	cp_normal+0(%ebx)
98	flds	mv_position+4(%esi)
99	fmuls	cp_normal+4(%ebx)
100	flds	mv_position+8(%esi)
101	fmuls	cp_normal+8(%ebx)
102	fxch	%st(1)
103	faddp	%st(0),%st(2)		// d0mul2 | d0add0
104
105	flds	mv_position+0(%edx)
106	fmuls	cp_normal+0(%ebx)
107	flds	mv_position+4(%edx)
108	fmuls	cp_normal+4(%ebx)
109	flds	mv_position+8(%edx)
110	fmuls	cp_normal+8(%ebx)
111	fxch	%st(1)
112	faddp	%st(0),%st(2)		// d1mul2 | d1add0 | d0mul2 | d0add0
113	fxch	%st(3)				// d0add0 | d1add0 | d0mul2 | d1mul2
114
115	faddp	%st(0),%st(2)		// d1add0 | dot0 | d1mul2
116	faddp	%st(0),%st(2)		// dot0 | dot1
117
118	fsubs	cp_dist(%ebx)		// d0 | dot1
119	fxch	%st(1)				// dot1 | d0
120	fsubs	cp_dist(%ebx)		// d1 | d0
121	fxch	%st(1)
122	fstps	Ld0
123	fstps	Ld1
124
125//			if (d0 >= 0)
126//			{
127	movl	Ld0,%eax
128	movl	Ld1,%ecx
129	orl		%eax,%ecx
130	js		Lp2
131
132// both points are unclipped
133
134Lcontinue:
135
136//
137//				R_ClipEdge (&clipvert, pv1, clip->next);
138//				return;
139//			}
140//		} while ((clip = clip->next) != NULL);
141	movl	cp_next(%ebx),%ebx
142	testl	%ebx,%ebx
143	jnz		Lcliploop
144
145//	}
146
147//// add the edge
148//	R_EmitEdge (pv0, pv1);
149Lemit:
150
151//
152// set integer rounding to ceil mode, set to single precision
153//
154// FIXME: do away with by manually extracting integers from floats?
155// FIXME: set less often
156	fldcw	C(r_ceil_cw)
157
158//	edge_t	*edge, *pcheck;
159//	int		u_check;
160//	float	u, u_step;
161//	vec3_t	local, transformed;
162//	float	*world;
163//	int		v, v2, ceilv0;
164//	float	scale, lzi0, u0, v0;
165//	int		side;
166
167//	if (r_lastvertvalid)
168//	{
169	cmpl	$0,C(r_lastvertvalid)
170	jz		LCalcFirst
171
172//		u0 = r_u1;
173//		v0 = r_v1;
174//		lzi0 = r_lzi1;
175//		ceilv0 = r_ceilv1;
176	movl	C(r_lzi1),%eax
177	movl	C(r_u1),%ecx
178	movl	%eax,Lzi0
179	movl	%ecx,Lu0
180	movl	C(r_v1),%ecx
181	movl	C(r_ceilv1),%eax
182	movl	%ecx,Lv0
183	movl	%eax,Lceilv0
184	jmp		LCalcSecond
185
186//	}
187
188LCalcFirst:
189
190//	else
191//	{
192//		world = &pv0->position[0];
193
194	call	LTransformAndProject	// v0 | lzi0 | u0
195
196	fsts	Lv0
197	fxch	%st(2)					// u0 | lzi0 | v0
198	fstps	Lu0						// lzi0 | v0
199	fstps	Lzi0					// v0
200
201//		ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
202	fistpl	Lceilv0
203
204//	}
205
206LCalcSecond:
207
208//	world = &pv1->position[0];
209	movl	%edx,%esi
210
211	call	LTransformAndProject	// v1 | lzi1 | u1
212
213	flds	Lu0						// u0 | v1 | lzi1 | u1
214	fxch	%st(3)					// u1 | v1 | lzi1 | u0
215	flds	Lzi0					// lzi0 | u1 | v1 | lzi1 | u0
216	fxch	%st(3)					// lzi1 | u1 | v1 | lzi0 | u0
217	flds	Lv0						// v0 | lzi1 | u1 | v1 | lzi0 | u0
218	fxch	%st(3)					// v1 | lzi1 | u1 | v0 | lzi0 | u0
219
220//	r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
221	fistl	C(r_ceilv1)
222
223	fldcw	C(r_single_cw)			// put back normal floating-point state
224
225	fsts	C(r_v1)
226	fxch	%st(4)					// lzi0 | lzi1 | u1 | v0 | v1 | u0
227
228//	if (r_lzi1 > lzi0)
229//		lzi0 = r_lzi1;
230	fcom	%st(1)
231	fnstsw	%ax
232	testb	$1,%ah
233	jz		LP0
234	fstp	%st(0)
235	fld		%st(0)
236LP0:
237
238	fxch	%st(1)					// lzi1 | lzi0 | u1 | v0 | v1 | u0
239	fstps	C(r_lzi1)				// lzi0 | u1 | v0 | v1 | u0
240	fxch	%st(1)
241	fsts	C(r_u1)
242	fxch	%st(1)
243
244//	if (lzi0 > r_nearzi)	// for mipmap finding
245//		r_nearzi = lzi0;
246	fcoms	C(r_nearzi)
247	fnstsw	%ax
248	testb	$0x45,%ah
249	jnz		LP1
250	fsts	C(r_nearzi)
251LP1:
252
253// // for right edges, all we want is the effect on 1/z
254//	if (r_nearzionly)
255//		return;
256	movl	C(r_nearzionly),%eax
257	testl	%eax,%eax
258	jz		LP2
259LPop5AndDone:
260	movl	C(cacheoffset),%eax
261	movl	C(r_framecount),%edx
262	cmpl	$0x7FFFFFFF,%eax
263	jz		LDoPop
264	andl	$(FRAMECOUNT_MASK),%edx
265	orl		$(FULLY_CLIPPED_CACHED),%edx
266	movl	%edx,C(cacheoffset)
267
268LDoPop:
269	fstp	%st(0)			// u1 | v0 | v1 | u0
270	fstp	%st(0)			// v0 | v1 | u0
271	fstp	%st(0)			// v1 | u0
272	fstp	%st(0)			// u0
273	fstp	%st(0)
274	jmp		Ldone
275
276LP2:
277
278// // create the edge
279//	if (ceilv0 == r_ceilv1)
280//		return;		// horizontal edge
281	movl	Lceilv0,%ebx
282	movl	C(edge_p),%edi
283	movl	C(r_ceilv1),%ecx
284	movl	%edi,%edx
285	movl	C(r_pedge),%esi
286	addl	$(et_size),%edx
287	cmpl	%ecx,%ebx
288	jz		LPop5AndDone
289
290	movl	C(r_pedge),%eax
291	movl	%eax,et_owner(%edi)
292
293//	side = ceilv0 > r_ceilv1;
294//
295//	edge->nearzi = lzi0;
296	fstps	et_nearzi(%edi)		// u1 | v0 | v1 | u0
297
298//	if (side == 1)
299//	{
300	jc		LSide0
301
302LSide1:
303
304//	// leading edge (go from p2 to p1)
305
306//		u_step = ((u0 - r_u1) / (v0 - r_v1));
307	fsubrp	%st(0),%st(3)		// v0 | v1 | u0-u1
308	fsub	%st(1),%st(0)		// v0-v1 | v1 | u0-u1
309	fdivrp	%st(0),%st(2)		// v1 | ustep
310
311//	r_emitted = 1;
312	movl	$1,C(r_emitted)
313
314//	edge = edge_p++;
315	movl	%edx,C(edge_p)
316
317// pretouch next edge
318	movl	(%edx),%eax
319
320//		v2 = ceilv0 - 1;
321//		v = r_ceilv1;
322	movl	%ecx,%eax
323	leal	-1(%ebx),%ecx
324	movl	%eax,%ebx
325
326//		edge->surfs[0] = 0;
327//		edge->surfs[1] = surface_p - surfaces;
328	movl	C(surface_p),%eax
329	movl	C(surfaces),%esi
330	subl	%edx,%edx
331	subl	%esi,%eax
332	shrl	$(SURF_T_SHIFT),%eax
333	movl	%edx,et_surfs(%edi)
334	movl	%eax,et_surfs+2(%edi)
335
336	subl	%esi,%esi
337
338//		u = r_u1 + ((float)v - r_v1) * u_step;
339	movl	%ebx,Lv
340	fildl	Lv					// v | v1 | ustep
341	fsubp	%st(0),%st(1)		// v-v1 | ustep
342	fmul	%st(1),%st(0)		// (v-v1)*ustep | ustep
343	fadds	C(r_u1)				// u | ustep
344
345	jmp		LSideDone
346
347//	}
348
349LSide0:
350
351//	else
352//	{
353//	// trailing edge (go from p1 to p2)
354
355//		u_step = ((r_u1 - u0) / (r_v1 - v0));
356	fsub	%st(3),%st(0)		// u1-u0 | v0 | v1 | u0
357	fxch	%st(2)				// v1 | v0 | u1-u0 | u0
358	fsub	%st(1),%st(0)		// v1-v0 | v0 | u1-u0 | u0
359	fdivrp	%st(0),%st(2)		// v0 | ustep | u0
360
361//	r_emitted = 1;
362	movl	$1,C(r_emitted)
363
364//	edge = edge_p++;
365	movl	%edx,C(edge_p)
366
367// pretouch next edge
368	movl	(%edx),%eax
369
370//		v = ceilv0;
371//		v2 = r_ceilv1 - 1;
372	decl	%ecx
373
374//		edge->surfs[0] = surface_p - surfaces;
375//		edge->surfs[1] = 0;
376	movl	C(surface_p),%eax
377	movl	C(surfaces),%esi
378	subl	%edx,%edx
379	subl	%esi,%eax
380	shrl	$(SURF_T_SHIFT),%eax
381	movl	%edx,et_surfs+2(%edi)
382	movl	%eax,et_surfs(%edi)
383
384	movl	$1,%esi
385
386//		u = u0 + ((float)v - v0) * u_step;
387	movl	%ebx,Lv
388	fildl	Lv					// v | v0 | ustep | u0
389	fsubp	%st(0),%st(1)		// v-v0 | ustep | u0
390	fmul	%st(1),%st(0)		// (v-v0)*ustep | ustep | u0
391	faddp	%st(0),%st(2)		// ustep | u
392	fxch	%st(1)				// u | ustep
393
394//	}
395
396LSideDone:
397
398//	edge->u_step = u_step*0x100000;
399//	edge->u = u*0x100000 + 0xFFFFF;
400
401	fmuls	C(fp_1m)				// u*0x100000 | ustep
402	fxch	%st(1)				// ustep | u*0x100000
403	fmuls	C(fp_1m)				// ustep*0x100000 | u*0x100000
404	fxch	%st(1)				// u*0x100000 | ustep*0x100000
405	fadds	C(fp_1m_minus_1)		// u*0x100000 + 0xFFFFF | ustep*0x100000
406	fxch	%st(1)				// ustep*0x100000 | u*0x100000 + 0xFFFFF
407	fistpl	et_u_step(%edi)		// u*0x100000 + 0xFFFFF
408	fistpl	et_u(%edi)
409
410// // we need to do this to avoid stepping off the edges if a very nearly
411// // horizontal edge is less than epsilon above a scan, and numeric error
412// // causes it to incorrectly extend to the scan, and the extension of the
413// // line goes off the edge of the screen
414// // FIXME: is this actually needed?
415//	if (edge->u < r_refdef.vrect_x_adj_shift20)
416//		edge->u = r_refdef.vrect_x_adj_shift20;
417//	if (edge->u > r_refdef.vrectright_adj_shift20)
418//		edge->u = r_refdef.vrectright_adj_shift20;
419	movl	et_u(%edi),%eax
420	movl	C(r_refdef)+rd_vrect_x_adj_shift20,%edx
421	cmpl	%edx,%eax
422	jl		LP4
423	movl	C(r_refdef)+rd_vrectright_adj_shift20,%edx
424	cmpl	%edx,%eax
425	jng		LP5
426LP4:
427	movl	%edx,et_u(%edi)
428	movl	%edx,%eax
429LP5:
430
431// // sort the edge in normally
432//	u_check = edge->u;
433//
434//	if (edge->surfs[0])
435//		u_check++;	// sort trailers after leaders
436	addl	%esi,%eax
437
438//	if (!newedges[v] || newedges[v]->u >= u_check)
439//	{
440	movl	C(newedges)(,%ebx,4),%esi
441	testl	%esi,%esi
442	jz		LDoFirst
443	cmpl	%eax,et_u(%esi)
444	jl		LNotFirst
445LDoFirst:
446
447//		edge->next = newedges[v];
448//		newedges[v] = edge;
449	movl	%esi,et_next(%edi)
450	movl	%edi,C(newedges)(,%ebx,4)
451
452	jmp		LSetRemove
453
454//	}
455
456LNotFirst:
457
458//	else
459//	{
460//		pcheck = newedges[v];
461//
462//		while (pcheck->next && pcheck->next->u < u_check)
463//			pcheck = pcheck->next;
464LFindInsertLoop:
465	movl	%esi,%edx
466	movl	et_next(%esi),%esi
467	testl	%esi,%esi
468	jz		LInsertFound
469	cmpl	%eax,et_u(%esi)
470	jl		LFindInsertLoop
471
472LInsertFound:
473
474//		edge->next = pcheck->next;
475//		pcheck->next = edge;
476	movl	%esi,et_next(%edi)
477	movl	%edi,et_next(%edx)
478
479//	}
480
481LSetRemove:
482
483//	edge->nextremove = removeedges[v2];
484//	removeedges[v2] = edge;
485	movl	C(removeedges)(,%ecx,4),%eax
486	movl	%edi,C(removeedges)(,%ecx,4)
487	movl	%eax,et_nextremove(%edi)
488
489Ldone:
490	movl	Lstack,%esp			// clear temporary variables from stack
491
492	popl	%ebx				// restore register variables
493	popl	%edi
494	popl	%esi
495	ret
496
497// at least one point is clipped
498
499Lp2:
500	testl	%eax,%eax
501	jns		Lp1
502
503//			else
504//			{
505//			// point 0 is clipped
506
507//				if (d1 < 0)
508//				{
509	movl	Ld1,%eax
510	testl	%eax,%eax
511	jns		Lp3
512
513//				// both points are clipped
514//				// we do cache fully clipped edges
515//					if (!leftclipped)
516	movl	C(r_leftclipped),%eax
517	movl	C(r_pedge),%ecx
518	testl	%eax,%eax
519	jnz		Ldone
520
521//						r_pedge->framecount = r_framecount;
522	movl	C(r_framecount),%eax
523	andl	$(FRAMECOUNT_MASK),%eax
524	orl		$(FULLY_CLIPPED_CACHED),%eax
525	movl	%eax,C(cacheoffset)
526
527//					return;
528	jmp		Ldone
529
530//				}
531
532Lp1:
533
534//			// point 0 is unclipped
535//				if (d1 >= 0)
536//				{
537//				// both points are unclipped
538//					continue;
539
540//			// only point 1 is clipped
541
542//				f = d0 / (d0 - d1);
543	flds	Ld0
544	flds	Ld1
545	fsubr	%st(1),%st(0)
546
547//			// we don't cache partially clipped edges
548	movl	$0x7FFFFFFF,C(cacheoffset)
549
550	fdivrp	%st(0),%st(1)
551
552	subl	$(mv_size),%esp			// allocate space for clipvert
553
554//				clipvert.position[0] = pv0->position[0] +
555//						f * (pv1->position[0] - pv0->position[0]);
556//				clipvert.position[1] = pv0->position[1] +
557//						f * (pv1->position[1] - pv0->position[1]);
558//				clipvert.position[2] = pv0->position[2] +
559//						f * (pv1->position[2] - pv0->position[2]);
560	flds	mv_position+8(%edx)
561	fsubs	mv_position+8(%esi)
562	flds	mv_position+4(%edx)
563	fsubs	mv_position+4(%esi)
564	flds	mv_position+0(%edx)
565	fsubs	mv_position+0(%esi)		// 0 | 1 | 2
566
567// replace pv1 with the clip point
568	movl	%esp,%edx
569	movl	cp_leftedge(%ebx),%eax
570	testb	%al,%al
571
572	fmul	%st(3),%st(0)
573	fxch	%st(1)					// 1 | 0 | 2
574	fmul	%st(3),%st(0)
575	fxch	%st(2)					// 2 | 0 | 1
576	fmulp	%st(0),%st(3)			// 0 | 1 | 2
577	fadds	mv_position+0(%esi)
578	fxch	%st(1)					// 1 | 0 | 2
579	fadds	mv_position+4(%esi)
580	fxch	%st(2)					// 2 | 0 | 1
581	fadds	mv_position+8(%esi)
582	fxch	%st(1)					// 0 | 2 | 1
583	fstps	mv_position+0(%esp)		// 2 | 1
584	fstps	mv_position+8(%esp)		// 1
585	fstps	mv_position+4(%esp)
586
587//				if (clip->leftedge)
588//				{
589	jz		Ltestright
590
591//					r_leftclipped = true;
592//					r_leftexit = clipvert;
593	movl	$1,C(r_leftclipped)
594	movl	mv_position+0(%esp),%eax
595	movl	%eax,C(r_leftexit)+mv_position+0
596	movl	mv_position+4(%esp),%eax
597	movl	%eax,C(r_leftexit)+mv_position+4
598	movl	mv_position+8(%esp),%eax
599	movl	%eax,C(r_leftexit)+mv_position+8
600
601	jmp		Lcontinue
602
603//				}
604
605Ltestright:
606//				else if (clip->rightedge)
607//				{
608	testb	%ah,%ah
609	jz		Lcontinue
610
611//					r_rightclipped = true;
612//					r_rightexit = clipvert;
613	movl	$1,C(r_rightclipped)
614	movl	mv_position+0(%esp),%eax
615	movl	%eax,C(r_rightexit)+mv_position+0
616	movl	mv_position+4(%esp),%eax
617	movl	%eax,C(r_rightexit)+mv_position+4
618	movl	mv_position+8(%esp),%eax
619	movl	%eax,C(r_rightexit)+mv_position+8
620
621//				}
622//
623//				R_ClipEdge (pv0, &clipvert, clip->next);
624//				return;
625//			}
626	jmp		Lcontinue
627
628//			}
629
630Lp3:
631
632//			// only point 0 is clipped
633//				r_lastvertvalid = false;
634
635	movl	$0,C(r_lastvertvalid)
636
637//				f = d0 / (d0 - d1);
638	flds	Ld0
639	flds	Ld1
640	fsubr	%st(1),%st(0)
641
642//			// we don't cache partially clipped edges
643	movl	$0x7FFFFFFF,C(cacheoffset)
644
645	fdivrp	%st(0),%st(1)
646
647	subl	$(mv_size),%esp			// allocate space for clipvert
648
649//				clipvert.position[0] = pv0->position[0] +
650//						f * (pv1->position[0] - pv0->position[0]);
651//				clipvert.position[1] = pv0->position[1] +
652//						f * (pv1->position[1] - pv0->position[1]);
653//				clipvert.position[2] = pv0->position[2] +
654//						f * (pv1->position[2] - pv0->position[2]);
655	flds	mv_position+8(%edx)
656	fsubs	mv_position+8(%esi)
657	flds	mv_position+4(%edx)
658	fsubs	mv_position+4(%esi)
659	flds	mv_position+0(%edx)
660	fsubs	mv_position+0(%esi)		// 0 | 1 | 2
661
662	movl	cp_leftedge(%ebx),%eax
663	testb	%al,%al
664
665	fmul	%st(3),%st(0)
666	fxch	%st(1)					// 1 | 0 | 2
667	fmul	%st(3),%st(0)
668	fxch	%st(2)					// 2 | 0 | 1
669	fmulp	%st(0),%st(3)			// 0 | 1 | 2
670	fadds	mv_position+0(%esi)
671	fxch	%st(1)					// 1 | 0 | 2
672	fadds	mv_position+4(%esi)
673	fxch	%st(2)					// 2 | 0 | 1
674	fadds	mv_position+8(%esi)
675	fxch	%st(1)					// 0 | 2 | 1
676	fstps	mv_position+0(%esp)		// 2 | 1
677	fstps	mv_position+8(%esp)		// 1
678	fstps	mv_position+4(%esp)
679
680// replace pv0 with the clip point
681	movl	%esp,%esi
682
683//				if (clip->leftedge)
684//				{
685	jz		Ltestright2
686
687//					r_leftclipped = true;
688//					r_leftenter = clipvert;
689	movl	$1,C(r_leftclipped)
690	movl	mv_position+0(%esp),%eax
691	movl	%eax,C(r_leftenter)+mv_position+0
692	movl	mv_position+4(%esp),%eax
693	movl	%eax,C(r_leftenter)+mv_position+4
694	movl	mv_position+8(%esp),%eax
695	movl	%eax,C(r_leftenter)+mv_position+8
696
697	jmp		Lcontinue
698
699//				}
700
701Ltestright2:
702//				else if (clip->rightedge)
703//				{
704	testb	%ah,%ah
705	jz		Lcontinue
706
707//					r_rightclipped = true;
708//					r_rightenter = clipvert;
709	movl	$1,C(r_rightclipped)
710	movl	mv_position+0(%esp),%eax
711	movl	%eax,C(r_rightenter)+mv_position+0
712	movl	mv_position+4(%esp),%eax
713	movl	%eax,C(r_rightenter)+mv_position+4
714	movl	mv_position+8(%esp),%eax
715	movl	%eax,C(r_rightenter)+mv_position+8
716
717//				}
718	jmp		Lcontinue
719
720// %esi = vec3_t point to transform and project
721// %edx preserved
722LTransformAndProject:
723
724//	// transform and project
725//		VectorSubtract (world, modelorg, local);
726	flds	mv_position+0(%esi)
727	fsubs	C(modelorg)+0
728	flds	mv_position+4(%esi)
729	fsubs	C(modelorg)+4
730	flds	mv_position+8(%esi)
731	fsubs	C(modelorg)+8
732	fxch	%st(2)				// local[0] | local[1] | local[2]
733
734//		TransformVector (local, transformed);
735//
736//		if (transformed[2] < NEAR_CLIP)
737//			transformed[2] = NEAR_CLIP;
738//
739//		lzi0 = 1.0 / transformed[2];
740	fld		%st(0)				// local[0] | local[0] | local[1] | local[2]
741	fmuls	C(vpn)+0			// zm0 | local[0] | local[1] | local[2]
742	fld		%st(1)				// local[0] | zm0 | local[0] | local[1] |
743								//  local[2]
744	fmuls	C(vright)+0			// xm0 | zm0 | local[0] | local[1] | local[2]
745	fxch	%st(2)				// local[0] | zm0 | xm0 | local[1] | local[2]
746	fmuls	C(vup)+0			// ym0 |  zm0 | xm0 | local[1] | local[2]
747	fld		%st(3)				// local[1] | ym0 |  zm0 | xm0 | local[1] |
748								//  local[2]
749	fmuls	C(vpn)+4			// zm1 | ym0 | zm0 | xm0 | local[1] |
750								//  local[2]
751	fld		%st(4)				// local[1] | zm1 | ym0 | zm0 | xm0 |
752								//  local[1] | local[2]
753	fmuls	C(vright)+4			// xm1 | zm1 | ym0 |  zm0 | xm0 |
754								//  local[1] | local[2]
755	fxch	%st(5)				// local[1] | zm1 | ym0 | zm0 | xm0 |
756								//  xm1 | local[2]
757	fmuls	C(vup)+4			// ym1 | zm1 | ym0 | zm0 | xm0 |
758								//  xm1 | local[2]
759	fxch	%st(1)				// zm1 | ym1 | ym0 | zm0 | xm0 |
760								//  xm1 | local[2]
761	faddp	%st(0),%st(3)		// ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
762	fxch	%st(3)				// xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
763	faddp	%st(0),%st(4)		// ym0 | zm2 | ym1 | xm2 | local[2]
764	faddp	%st(0),%st(2)		// zm2 | ym2 | xm2 | local[2]
765	fld		%st(3)				// local[2] | zm2 | ym2 | xm2 | local[2]
766	fmuls	C(vpn)+8			// zm3 | zm2 | ym2 | xm2 | local[2]
767	fld		%st(4)				// local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
768	fmuls	C(vright)+8			// xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
769	fxch	%st(5)				// local[2] | zm3 | zm2 | ym2 | xm2 | xm3
770	fmuls	C(vup)+8			// ym3 | zm3 | zm2 | ym2 | xm2 | xm3
771	fxch	%st(1)				// zm3 | ym3 | zm2 | ym2 | xm2 | xm3
772	faddp	%st(0),%st(2)		// ym3 | zm4 | ym2 | xm2 | xm3
773	fxch	%st(4)				// xm3 | zm4 | ym2 | xm2 | ym3
774	faddp	%st(0),%st(3)		// zm4 | ym2 | xm4 | ym3
775	fxch	%st(1)				// ym2 | zm4 | xm4 | ym3
776	faddp	%st(0),%st(3)		// zm4 | xm4 | ym4
777
778	fcoms	Lfp_near_clip
779	fnstsw	%ax
780	testb	$1,%ah
781	jz		LNoClip
782	fstp	%st(0)
783	flds	Lfp_near_clip
784
785LNoClip:
786
787	fdivrs	C(float_1)				// lzi0 | x | y
788	fxch	%st(1)				// x | lzi0 | y
789
790//	// FIXME: build x/yscale into transform?
791//		scale = xscale * lzi0;
792//		u0 = (xcenter + scale*transformed[0]);
793	flds	C(xscale)			// xscale | x | lzi0 | y
794	fmul	%st(2),%st(0)		// scale | x | lzi0 | y
795	fmulp	%st(0),%st(1)		// scale*x | lzi0 | y
796	fadds	C(xcenter)			// u0 | lzi0 | y
797
798//		if (u0 < r_refdef.fvrectx_adj)
799//			u0 = r_refdef.fvrectx_adj;
800//		if (u0 > r_refdef.fvrectright_adj)
801//			u0 = r_refdef.fvrectright_adj;
802// FIXME: use integer compares of floats?
803	fcoms	C(r_refdef)+rd_fvrectx_adj
804	fnstsw	%ax
805	testb	$1,%ah
806	jz		LClampP0
807	fstp	%st(0)
808	flds	C(r_refdef)+rd_fvrectx_adj
809LClampP0:
810	fcoms	C(r_refdef)+rd_fvrectright_adj
811	fnstsw	%ax
812	testb	$0x45,%ah
813	jnz		LClampP1
814	fstp	%st(0)
815	flds	C(r_refdef)+rd_fvrectright_adj
816LClampP1:
817
818	fld		%st(1)				// lzi0 | u0 | lzi0 | y
819
820//		scale = yscale * lzi0;
821//		v0 = (ycenter - scale*transformed[1]);
822	fmuls	C(yscale)			// scale | u0 | lzi0 | y
823	fmulp	%st(0),%st(3)		// u0 | lzi0 | scale*y
824	fxch	%st(2)				// scale*y | lzi0 | u0
825	fsubrs	C(ycenter)			// v0 | lzi0 | u0
826
827//		if (v0 < r_refdef.fvrecty_adj)
828//			v0 = r_refdef.fvrecty_adj;
829//		if (v0 > r_refdef.fvrectbottom_adj)
830//			v0 = r_refdef.fvrectbottom_adj;
831// FIXME: use integer compares of floats?
832	fcoms	C(r_refdef)+rd_fvrecty_adj
833	fnstsw	%ax
834	testb	$1,%ah
835	jz		LClampP2
836	fstp	%st(0)
837	flds	C(r_refdef)+rd_fvrecty_adj
838LClampP2:
839	fcoms	C(r_refdef)+rd_fvrectbottom_adj
840	fnstsw	%ax
841	testb	$0x45,%ah
842	jnz		LClampP3
843	fstp	%st(0)
844	flds	C(r_refdef)+rd_fvrectbottom_adj
845LClampP3:
846	ret
847
848#endif	// USE_INTEL_ASM
849
850