1//
2// d_polysa.s
3// x86 assembly-language polygon model drawing code
4//
5
6#include "qasm.h"
7#include "d_ifacea.h"
8
9#if	id386
10
11// !!! if this is changed, it must be changed in d_polyse.c too !!!
12#define DPS_MAXSPANS			MAXHEIGHT+1
13									// 1 extra for spanpackage that marks end
14
15//#define	SPAN_SIZE	(((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
16#define SPAN_SIZE (1024+1+1+1)*32
17
18
19
20	.data
21
22	.align	4
23p10_minus_p20:	.single		0
24p01_minus_p21:	.single		0
25temp0:			.single		0
26temp1:			.single		0
27Ltemp:			.single		0
28
29aff8entryvec_table:	.long	LDraw8, LDraw7, LDraw6, LDraw5
30				.long	LDraw4, LDraw3, LDraw2, LDraw1
31
32lzistepx:		.long	0
33
34
35	.text
36
37#ifndef NeXT
38	.extern C(D_PolysetSetEdgeTable)
39	.extern C(D_RasterizeAliasPolySmooth)
40#endif
41
42//----------------------------------------------------------------------
43// affine triangle gradient calculation code
44//----------------------------------------------------------------------
45
46#if 0
47#define skinwidth	4+0
48
49.globl C(R_PolysetCalcGradients)
50C(R_PolysetCalcGradients):
51
52//	p00_minus_p20 = r_p0[0] - r_p2[0];
53//	p01_minus_p21 = r_p0[1] - r_p2[1];
54//	p10_minus_p20 = r_p1[0] - r_p2[0];
55//	p11_minus_p21 = r_p1[1] - r_p2[1];
56//
57//	xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
58//			     p00_minus_p20 * p11_minus_p21);
59//
60//	ystepdenominv = -xstepdenominv;
61
62	fildl	C(r_p0)+0		// r_p0[0]
63	fildl	C(r_p2)+0		// r_p2[0] | r_p0[0]
64	fildl	C(r_p0)+4		// r_p0[1] | r_p2[0] | r_p0[0]
65	fildl	C(r_p2)+4		// r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
66	fildl	C(r_p1)+0		// r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
67	fildl	C(r_p1)+4		// r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
68							//  r_p2[0] | r_p0[0]
69	fxch	%st(3)			// r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
70							//  r_p2[0] | r_p0[0]
71	fsub	%st(2),%st(0)	// p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
72							//  r_p2[0] | r_p0[0]
73	fxch	%st(1)			// r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
74							//  r_p2[0] | r_p0[0]
75	fsub	%st(4),%st(0)	// p10_minus_p20 | p01_minus_p21 | r_p2[1] |
76							//  r_p1[1] | r_p2[0] | r_p0[0]
77	fxch	%st(5)			// r_p0[0] | p01_minus_p21 | r_p2[1] |
78							//  r_p1[1] | r_p2[0] | p10_minus_p20
79	fsubp	%st(0),%st(4)	// p01_minus_p21 | r_p2[1] | r_p1[1] |
80							//  p00_minus_p20 | p10_minus_p20
81	fxch	%st(2)			// r_p1[1] | r_p2[1] | p01_minus_p21 |
82							//  p00_minus_p20 | p10_minus_p20
83	fsubp	%st(0),%st(1)	// p11_minus_p21 | p01_minus_p21 |
84							//  p00_minus_p20 | p10_minus_p20
85	fxch	%st(1)			// p01_minus_p21 | p11_minus_p21 |
86							//  p00_minus_p20 | p10_minus_p20
87	flds	C(d_xdenom)		// d_xdenom | p01_minus_p21 | p11_minus_p21 |
88							//  p00_minus_p20 | p10_minus_p20
89	fxch	%st(4)			// p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
90							//  p00_minus_p20 | d_xdenom
91	fstps	p10_minus_p20	// p01_minus_p21 | p11_minus_p21 |
92							//  p00_minus_p20 | d_xdenom
93	fstps	p01_minus_p21	// p11_minus_p21 | p00_minus_p20 | xstepdenominv
94	fxch	%st(2)			// xstepdenominv | p00_minus_p20 | p11_minus_p21
95
96//// ceil () for light so positive steps are exaggerated, negative steps
97//// diminished,  pushing us away from underflow toward overflow. Underflow is
98//// very visible, overflow is very unlikely, because of ambient lighting
99//	t0 = r_p0[4] - r_p2[4];
100//	t1 = r_p1[4] - r_p2[4];
101
102	fildl	C(r_p2)+16		// r_p2[4] | xstepdenominv | p00_minus_p20 |
103							//  p11_minus_p21
104	fildl	C(r_p0)+16		// r_p0[4] | r_p2[4] | xstepdenominv |
105							//  p00_minus_p20 | p11_minus_p21
106	fildl	C(r_p1)+16		// r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
107							//  p00_minus_p20 | p11_minus_p21
108	fxch	%st(2)			// r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
109							//  p00_minus_p20 | p11_minus_p21
110	fld		%st(0)			// r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
111							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
112	fsubrp	%st(0),%st(2)	// r_p2[4] | t0 | r_p1[4] | xstepdenominv |
113							//  p00_minus_p20 | p11_minus_p21
114	fsubrp	%st(0),%st(2)	// t0 | t1 | xstepdenominv | p00_minus_p20 |
115							//  p11_minus_p21
116
117//	r_lstepx = (int)
118//			ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
119//	r_lstepy = (int)
120//			ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
121
122	fld		%st(0)			// t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
123							//  p11_minus_p21
124	fmul	%st(5),%st(0)	// t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
125							//  p00_minus_p20 | p11_minus_p21
126	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
127							//  p00_minus_p20 | p11_minus_p21
128	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
129							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
130	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
131							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
132	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
133							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
134	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
135							//  t0*p11_minus_p21 | xstepdenominv |
136							//  p00_minus_p20 | p11_minus_p21
137	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
138							//  t0*p11_minus_p21 | xstepdenominv |
139							//  p00_minus_p20 | p11_minus_p21
140	fmul	%st(5),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
141							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
142							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
143	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
144							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
145							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
146	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
147							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
148							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
149	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
150							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
151							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
152	fld		%st(2)			// xstepdenominv |
153							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
154							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
155							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
156	fmuls	float_minus_1	// ystepdenominv |
157							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
158							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
159							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
160	fxch	%st(2)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
161							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
162							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
163							//  p11_minus_p21
164	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
165							//   xstepdenominv |
166							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
167							//   | ystepdenominv | xstepdenominv |
168							//   p00_minus_p20 | p11_minus_p21
169	fxch	%st(1)			// t1*p00_minus_p20 - t0*p10_minus_p20 |
170							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
171							//   xstepdenominv | ystepdenominv |
172							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
173	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
174							//  ystepdenominv |
175							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
176							//  xstepdenominv | ystepdenominv |
177							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
178	fldcw	ceil_cw
179	fistpl	C(r_lstepy)		// r_lstepx | ystepdenominv | xstepdenominv |
180							//  p00_minus_p20 | p11_minus_p21
181	fistpl	C(r_lstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
182							//  p11_minus_p21
183	fldcw	single_cw
184
185//	t0 = r_p0[2] - r_p2[2];
186//	t1 = r_p1[2] - r_p2[2];
187
188	fildl	C(r_p2)+8		// r_p2[2] | ystepdenominv | xstepdenominv |
189							//  p00_minus_p20 | p11_minus_p21
190	fildl	C(r_p0)+8		// r_p0[2] | r_p2[2] | ystepdenominv |
191							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
192	fildl	C(r_p1)+8		// r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
193							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
194	fxch	%st(2)			// r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
195							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
196	fld		%st(0)			// r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
197							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
198							//  p11_minus_p21
199	fsubrp	%st(0),%st(2)	// r_p2[2] | t0 | r_p1[2] | ystepdenominv |
200							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
201	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
202							//  p00_minus_p20 | p11_minus_p21
203
204//	r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
205//			xstepdenominv);
206//	r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
207//			ystepdenominv);
208
209	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv
210	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
211							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
212	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
213							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
214	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
215							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
216							//  p11_minus_p21
217	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
218							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
219							//  p11_minus_p21
220	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
221							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
222							//  p11_minus_p21
223	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
224							//  t0*p11_minus_p21 | ystepdenominv |
225							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
226	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
227							//  t0*p11_minus_p21 | ystepdenominv |
228							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
229	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
230							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
231							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
232							//  p11_minus_p21
233	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
234							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
235							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
236							//  p11_minus_p21
237	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
238							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
239							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
240							//  p11_minus_p21
241	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
242							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
243							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
244							//  p11_minus_p21
245	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
246							//   ystepdenominv |
247							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
248							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
249							//  p11_minus_p21
250	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
251							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
252							//   ystepdenominv | ystepdenominv |
253							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
254	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
255							//  xstepdenominv |
256							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
257							//  ystepdenominv | ystepdenominv |
258							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
259	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
260							//  ystepdenominv |
261							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
262							//  xstepdenominv | ystepdenominv |
263							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
264	fistpl	C(r_sstepy)		// r_sstepx | ystepdenominv | xstepdenominv |
265							//  p00_minus_p20 | p11_minus_p21
266	fistpl	C(r_sstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
267							//  p11_minus_p21
268
269//	t0 = r_p0[3] - r_p2[3];
270//	t1 = r_p1[3] - r_p2[3];
271
272	fildl	C(r_p2)+12		// r_p2[3] | ystepdenominv | xstepdenominv |
273							//  p00_minus_p20 | p11_minus_p21
274	fildl	C(r_p0)+12		// r_p0[3] | r_p2[3] | ystepdenominv |
275							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
276	fildl	C(r_p1)+12		// r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
277							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
278	fxch	%st(2)			// r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
279							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
280	fld		%st(0)			// r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
281							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
282							//  p11_minus_p21
283	fsubrp	%st(0),%st(2)	// r_p2[3] | t0 | r_p1[3] | ystepdenominv |
284							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
285	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
286							//  p00_minus_p20 | p11_minus_p21
287
288//	r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
289//			xstepdenominv);
290//	r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
291//			ystepdenominv);
292
293	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
294							//  p00_minus_p20 | p11_minus_p21
295	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
296							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
297	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
298							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
299	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
300							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
301							//  p11_minus_p21
302	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
303							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
304							//  p11_minus_p21
305	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
306							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
307							//  p11_minus_p21
308	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
309							//  t0*p11_minus_p21 | ystepdenominv |
310							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
311	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
312							//  t0*p11_minus_p21 | ystepdenominv |
313							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
314	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
315							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
316							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
317							//  p11_minus_p21
318	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
319							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
320							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
321							//  p11_minus_p21
322	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
323							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
324							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
325							//  p11_minus_p21
326	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
327							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
328							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
329							//  p11_minus_p21
330	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
331							//   ystepdenominv |
332							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
333							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
334							//  p11_minus_p21
335	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
336							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
337							//  ystepdenominv | ystepdenominv |
338							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
339	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
340							//  xstepdenominv |
341							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
342							//  ystepdenominv | ystepdenominv |
343							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
344	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
345							//  ystepdenominv |
346							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
347							//  xstepdenominv | ystepdenominv |
348							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
349	fistpl	C(r_tstepy)		// r_tstepx | ystepdenominv | xstepdenominv |
350							//  p00_minus_p20 | p11_minus_p21
351	fistpl	C(r_tstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
352							//  p11_minus_p21
353
354//	t0 = r_p0[5] - r_p2[5];
355//	t1 = r_p1[5] - r_p2[5];
356
357	fildl	C(r_p2)+20		// r_p2[5] | ystepdenominv | xstepdenominv |
358							//  p00_minus_p20 | p11_minus_p21
359	fildl	C(r_p0)+20		// r_p0[5] | r_p2[5] | ystepdenominv |
360							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
361	fildl	C(r_p1)+20		// r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
362							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
363	fxch	%st(2)			// r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
364							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
365	fld		%st(0)			// r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
366							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
367							//  p11_minus_p21
368	fsubrp	%st(0),%st(2)	// r_p2[5] | t0 | r_p1[5] | ystepdenominv |
369							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
370	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
371							//  p00_minus_p20 | p11_minus_p21
372
373//	r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
374//			xstepdenominv);
375//	r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
376//			ystepdenominv);
377
378	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
379							//  p00_minus_p20 | p11_minus_p21
380	fmulp	%st(0),%st(6)	// t0 | t1 | ystepdenominv | xstepdenominv |
381							//  p00_minus_p20 | t0*p11_minus_p21
382	fxch	%st(1)			// t1 | t0 | ystepdenominv | xstepdenominv |
383							//  p00_minus_p20 | t0*p11_minus_p21
384	fld		%st(0)			// t1 | t1 | t0 | ystepdenominv | xstepdenominv |
385							//  p00_minus_p20 | t0*p11_minus_p21
386	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
387							//  xstepdenominv | p00_minus_p20 |
388							//  t0*p11_minus_p21
389	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
390							//  xstepdenominv | p00_minus_p20 |
391							//  t0*p11_minus_p21
392	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
393							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
394							//  t0*p11_minus_p21
395	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
396							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
397							//  t0*p11_minus_p21
398	fmulp	%st(0),%st(5)	// t0*p10_minus_p20 | t1*p01_minus_p21 |
399							//  ystepdenominv | xstepdenominv |
400							//  t1*p00_minus_p20 | t0*p11_minus_p21
401	fxch	%st(5)			// t0*p11_minus_p21 | t1*p01_minus_p21 |
402							//  ystepdenominv | xstepdenominv |
403							//  t1*p00_minus_p20 | t0*p10_minus_p20
404	fsubrp	%st(0),%st(1)	// t1*p01_minus_p21 - t0*p11_minus_p21 |
405							//  ystepdenominv | xstepdenominv |
406							//  t1*p00_minus_p20 | t0*p10_minus_p20
407	fxch	%st(3)			// t1*p00_minus_p20 | ystepdenominv |
408							//  xstepdenominv |
409							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
410							//  t0*p10_minus_p20
411	fsubp	%st(0),%st(4)	// ystepdenominv | xstepdenominv |
412							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
413							//  t1*p00_minus_p20 - t0*p10_minus_p20
414	fxch	%st(1)			// xstepdenominv | ystepdenominv |
415							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
416							//  t1*p00_minus_p20 - t0*p10_minus_p20
417	fmulp	%st(0),%st(2)	// ystepdenominv |
418							//  (t1*p01_minus_p21 - t0*p11_minus_p21) *
419							//  xstepdenominv |
420							//  t1*p00_minus_p20 - t0*p10_minus_p20
421	fmulp	%st(0),%st(2)	// (t1*p01_minus_p21 - t0*p11_minus_p21) *
422							//  xstepdenominv |
423							//  (t1*p00_minus_p20 - t0*p10_minus_p20) *
424							//  ystepdenominv
425	fistpl	C(r_zistepx)	// (t1*p00_minus_p20 - t0*p10_minus_p20) *
426							//  ystepdenominv
427	fistpl	C(r_zistepy)
428
429//	a_sstepxfrac = r_sstepx << 16;
430//	a_tstepxfrac = r_tstepx << 16;
431//
432//	a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
433//			(r_sstepx >> 16);
434
435	movl	C(r_sstepx),%eax
436	movl	C(r_tstepx),%edx
437	shll	$16,%eax
438	shll	$16,%edx
439	movl	%eax,C(a_sstepxfrac)
440	movl	%edx,C(a_tstepxfrac)
441
442	movl	C(r_sstepx),%ecx
443	movl	C(r_tstepx),%eax
444	sarl	$16,%ecx
445	sarl	$16,%eax
446	imull	skinwidth(%esp)
447	addl	%ecx,%eax
448	movl	%eax,C(a_ststepxwhole)
449
450	ret
451
452#endif
453
454//----------------------------------------------------------------------
455// recursive subdivision affine triangle drawing code
456//
457// not C-callable because of stdcall return
458//----------------------------------------------------------------------
459
460#define lp1	4+16
461#define lp2	8+16
462#define lp3	12+16
463
464.globl C(D_PolysetRecursiveTriangle)
465C(D_PolysetRecursiveTriangle):
466	pushl	%ebp				// preserve caller stack frame pointer
467	pushl	%esi				// preserve register variables
468	pushl	%edi
469	pushl	%ebx
470
471//	int		*temp;
472//	int		d;
473//	int		new[6];
474//	int		i;
475//	int		z;
476//	short	*zbuf;
477	movl	lp2(%esp),%esi
478	movl	lp1(%esp),%ebx
479	movl	lp3(%esp),%edi
480
481//	d = lp2[0] - lp1[0];
482//	if (d < -1 || d > 1)
483//		goto split;
484	movl	0(%esi),%eax
485
486	movl	0(%ebx),%edx
487	movl	4(%esi),%ebp
488
489	subl	%edx,%eax
490	movl	4(%ebx),%ecx
491
492	subl	%ecx,%ebp
493	incl	%eax
494
495	cmpl	$2,%eax
496	ja		LSplit
497
498//	d = lp2[1] - lp1[1];
499//	if (d < -1 || d > 1)
500//		goto split;
501	movl	0(%edi),%eax
502	incl	%ebp
503
504	cmpl	$2,%ebp
505	ja		LSplit
506
507//	d = lp3[0] - lp2[0];
508//	if (d < -1 || d > 1)
509//		goto split2;
510	movl	0(%esi),%edx
511	movl	4(%edi),%ebp
512
513	subl	%edx,%eax
514	movl	4(%esi),%ecx
515
516	subl	%ecx,%ebp
517	incl	%eax
518
519	cmpl	$2,%eax
520	ja		LSplit2
521
522//	d = lp3[1] - lp2[1];
523//	if (d < -1 || d > 1)
524//		goto split2;
525	movl	0(%ebx),%eax
526	incl	%ebp
527
528	cmpl	$2,%ebp
529	ja		LSplit2
530
531//	d = lp1[0] - lp3[0];
532//	if (d < -1 || d > 1)
533//		goto split3;
534	movl	0(%edi),%edx
535	movl	4(%ebx),%ebp
536
537	subl	%edx,%eax
538	movl	4(%edi),%ecx
539
540	subl	%ecx,%ebp
541	incl	%eax
542
543	incl	%ebp
544	movl	%ebx,%edx
545
546	cmpl	$2,%eax
547	ja		LSplit3
548
549//	d = lp1[1] - lp3[1];
550//	if (d < -1 || d > 1)
551//	{
552//split3:
553//		temp = lp1;
554//		lp3 = lp2;
555//		lp1 = lp3;
556//		lp2 = temp;
557//		goto split;
558//	}
559//
560//	return;			// entire tri is filled
561//
562	cmpl	$2,%ebp
563	jna		LDone
564
565LSplit3:
566	movl	%edi,%ebx
567	movl	%esi,%edi
568	movl	%edx,%esi
569	jmp		LSplit
570
571//split2:
572LSplit2:
573
574//	temp = lp1;
575//	lp1 = lp2;
576//	lp2 = lp3;
577//	lp3 = temp;
578	movl	%ebx,%eax
579	movl	%esi,%ebx
580	movl	%edi,%esi
581	movl	%eax,%edi
582
583//split:
584LSplit:
585
586	subl	$24,%esp		// allocate space for a new vertex
587
588//// split this edge
589//	new[0] = (lp1[0] + lp2[0]) >> 1;
590//	new[1] = (lp1[1] + lp2[1]) >> 1;
591//	new[2] = (lp1[2] + lp2[2]) >> 1;
592//	new[3] = (lp1[3] + lp2[3]) >> 1;
593//	new[5] = (lp1[5] + lp2[5]) >> 1;
594	movl	8(%ebx),%eax
595
596	movl	8(%esi),%edx
597	movl	12(%ebx),%ecx
598
599	addl	%edx,%eax
600	movl	12(%esi),%edx
601
602	sarl	$1,%eax
603	addl	%edx,%ecx
604
605	movl	%eax,8(%esp)
606	movl	20(%ebx),%eax
607
608	sarl	$1,%ecx
609	movl	20(%esi),%edx
610
611	movl	%ecx,12(%esp)
612	addl	%edx,%eax
613
614	movl	0(%ebx),%ecx
615	movl	0(%esi),%edx
616
617	sarl	$1,%eax
618	addl	%ecx,%edx
619
620	movl	%eax,20(%esp)
621	movl	4(%ebx),%eax
622
623	sarl	$1,%edx
624	movl	4(%esi),%ebp
625
626	movl	%edx,0(%esp)
627	addl	%eax,%ebp
628
629	sarl	$1,%ebp
630	movl	%ebp,4(%esp)
631
632//// draw the point if splitting a leading edge
633//	if (lp2[1] > lp1[1])
634//		goto nodraw;
635	cmpl	%eax,4(%esi)
636	jg		LNoDraw
637
638//	if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
639//		goto nodraw;
640	movl	0(%esi),%edx
641	jnz		LDraw
642
643	cmpl	%ecx,%edx
644	jl		LNoDraw
645
646LDraw:
647
648// z = new[5] >> 16;
649	movl	20(%esp),%edx
650	movl	4(%esp),%ecx
651
652	sarl	$16,%edx
653	movl	0(%esp),%ebp
654
655//	zbuf = zspantable[new[1]] + new[0];
656	movl	C(zspantable)(,%ecx,4),%eax
657
658//	if (z >= *zbuf)
659//	{
660	cmpw	(%eax,%ebp,2),%dx
661	jnge	LNoDraw
662
663//		int		pix;
664//
665//		*zbuf = z;
666	movw	%dx,(%eax,%ebp,2)
667
668//		pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
669	movl	12(%esp),%eax
670
671	sarl	$16,%eax
672	movl	8(%esp),%edx
673
674	sarl	$16,%edx
675	subl	%ecx,%ecx
676
677	movl	C(skintable)(,%eax,4),%eax
678	movl	4(%esp),%ebp
679
680	movb	(%eax,%edx,),%cl
681	movl	C(d_pcolormap),%edx
682
683	movb	(%edx,%ecx,),%dl
684	movl	0(%esp),%ecx
685
686//		d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
687	movl	C(d_scantable)(,%ebp,4),%eax
688	addl	%eax,%ecx
689	movl	C(d_viewbuffer),%eax
690	movb	%dl,(%eax,%ecx,1)
691
692//	}
693//
694//nodraw:
695LNoDraw:
696
697//// recursively continue
698//	D_PolysetRecursiveTriangle (lp3, lp1, new);
699	pushl	%esp
700	pushl	%ebx
701	pushl	%edi
702	call	C(D_PolysetRecursiveTriangle)
703
704//	D_PolysetRecursiveTriangle (lp3, new, lp2);
705	movl	%esp,%ebx
706	pushl	%esi
707	pushl	%ebx
708	pushl	%edi
709	call	C(D_PolysetRecursiveTriangle)
710	addl	$24,%esp
711
712LDone:
713	popl	%ebx				// restore register variables
714	popl	%edi
715	popl	%esi
716	popl	%ebp				// restore caller stack frame pointer
717	ret		$12
718
719
720//----------------------------------------------------------------------
721// 8-bpp horizontal span drawing code for affine polygons, with smooth
722// shading and no transparency
723//----------------------------------------------------------------------
724
725#define pspans	4+8
726
727.globl C(D_PolysetAff8Start)
728C(D_PolysetAff8Start):
729
730.globl C(R_PolysetDrawSpans8_Opaque)
731C(R_PolysetDrawSpans8_Opaque):
732	pushl	%esi				// preserve register variables
733	pushl	%ebx
734
735	movl	pspans(%esp),%esi	// point to the first span descriptor
736	movl	C(r_zistepx),%ecx
737
738	pushl	%ebp				// preserve caller's stack frame
739	pushl	%edi
740
741	rorl	$16,%ecx			// put high 16 bits of 1/z step in low word
742	movl	spanpackage_t_count(%esi),%edx
743
744	movl	%ecx,lzistepx
745
746LSpanLoop:
747
748//		lcount = d_aspancount - pspanpackage->count;
749//
750//		errorterm += erroradjustup;
751//		if (errorterm >= 0)
752//		{
753//			d_aspancount += d_countextrastep;
754//			errorterm -= erroradjustdown;
755//		}
756//		else
757//		{
758//			d_aspancount += ubasestep;
759//		}
760	movl	C(d_aspancount),%eax
761	subl	%edx,%eax
762
763	movl	C(erroradjustup),%edx
764	movl	C(errorterm),%ebx
765	addl	%edx,%ebx
766	js		LNoTurnover
767
768	movl	C(erroradjustdown),%edx
769	movl	C(d_countextrastep),%edi
770	subl	%edx,%ebx
771	movl	C(d_aspancount),%ebp
772	movl	%ebx,C(errorterm)
773	addl	%edi,%ebp
774	movl	%ebp,C(d_aspancount)
775	jmp		LRightEdgeStepped
776
777LNoTurnover:
778	movl	C(d_aspancount),%edi
779	movl	C(ubasestep),%edx
780	movl	%ebx,C(errorterm)
781	addl	%edx,%edi
782	movl	%edi,C(d_aspancount)
783
784LRightEdgeStepped:
785	cmpl	$1,%eax
786
787	jl		LNextSpan
788	jz		LExactlyOneLong
789
790//
791// set up advancetable
792//
793	movl	C(a_ststepxwhole),%ecx
794	movl	C(r_affinetridesc)+atd_skinwidth,%edx
795
796	movl	%ecx,advancetable+4	// advance base in t
797	addl	%edx,%ecx
798
799	movl	%ecx,advancetable	// advance extra in t
800	movl	C(a_tstepxfrac),%ecx
801
802	movw	C(r_lstepx),%cx
803	movl	%eax,%edx			// count
804
805	movl	%ecx,tstep
806	addl	$7,%edx
807
808	shrl	$3,%edx				// count of full and partial loops
809	movl	spanpackage_t_sfrac(%esi),%ebx
810
811	movw	%dx,%bx
812	movl	spanpackage_t_pz(%esi),%ecx
813
814	negl	%eax
815
816	movl	spanpackage_t_pdest(%esi),%edi
817	andl	$7,%eax		// 0->0, 1->7, 2->6, ... , 7->1
818
819	subl	%eax,%edi	// compensate for hardwired offsets
820	subl	%eax,%ecx
821
822	subl	%eax,%ecx
823	movl	spanpackage_t_tfrac(%esi),%edx
824
825	movw	spanpackage_t_light(%esi),%dx
826	movl	spanpackage_t_zi(%esi),%ebp
827
828	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
829	pushl	%esi
830
831	movl	spanpackage_t_ptex(%esi),%esi
832	jmp		*aff8entryvec_table(,%eax,4)
833
834// %bx = count of full and partial loops
835// %ebx high word = sfrac
836// %ecx = pz
837// %dx = light
838// %edx high word = tfrac
839// %esi = ptex
840// %edi = pdest
841// %ebp = 1/z
842// tstep low word = C(r_lstepx)
843// tstep high word = C(a_tstepxfrac)
844// C(a_sstepxfrac) low word = 0
845// C(a_sstepxfrac) high word = C(a_sstepxfrac)
846
847LDrawLoop:
848
849// FIXME: do we need to clamp light? We may need at least a buffer bit to
850// keep it from poking into tfrac and causing problems
851
852LDraw8:
853	cmpw	(%ecx),%bp
854	jl		Lp1
855	xorl	%eax,%eax
856	movb	%dh,%ah
857	movb	(%esi),%al
858	movw	%bp,(%ecx)
859	movb	0x12345678(%eax),%al
860LPatch8:
861	movb	%al,(%edi)
862Lp1:
863	addl	tstep,%edx
864	sbbl	%eax,%eax
865	addl	lzistepx,%ebp
866	adcl	$0,%ebp
867	addl	C(a_sstepxfrac),%ebx
868	adcl	advancetable+4(,%eax,4),%esi
869
870LDraw7:
871	cmpw	2(%ecx),%bp
872	jl		Lp2
873	xorl	%eax,%eax
874	movb	%dh,%ah
875	movb	(%esi),%al
876	movw	%bp,2(%ecx)
877	movb	0x12345678(%eax),%al
878LPatch7:
879	movb	%al,1(%edi)
880Lp2:
881	addl	tstep,%edx
882	sbbl	%eax,%eax
883	addl	lzistepx,%ebp
884	adcl	$0,%ebp
885	addl	C(a_sstepxfrac),%ebx
886	adcl	advancetable+4(,%eax,4),%esi
887
888LDraw6:
889	cmpw	4(%ecx),%bp
890	jl		Lp3
891	xorl	%eax,%eax
892	movb	%dh,%ah
893	movb	(%esi),%al
894	movw	%bp,4(%ecx)
895	movb	0x12345678(%eax),%al
896LPatch6:
897	movb	%al,2(%edi)
898Lp3:
899	addl	tstep,%edx
900	sbbl	%eax,%eax
901	addl	lzistepx,%ebp
902	adcl	$0,%ebp
903	addl	C(a_sstepxfrac),%ebx
904	adcl	advancetable+4(,%eax,4),%esi
905
906LDraw5:
907	cmpw	6(%ecx),%bp
908	jl		Lp4
909	xorl	%eax,%eax
910	movb	%dh,%ah
911	movb	(%esi),%al
912	movw	%bp,6(%ecx)
913	movb	0x12345678(%eax),%al
914LPatch5:
915	movb	%al,3(%edi)
916Lp4:
917	addl	tstep,%edx
918	sbbl	%eax,%eax
919	addl	lzistepx,%ebp
920	adcl	$0,%ebp
921	addl	C(a_sstepxfrac),%ebx
922	adcl	advancetable+4(,%eax,4),%esi
923
924LDraw4:
925	cmpw	8(%ecx),%bp
926	jl		Lp5
927	xorl	%eax,%eax
928	movb	%dh,%ah
929	movb	(%esi),%al
930	movw	%bp,8(%ecx)
931	movb	0x12345678(%eax),%al
932LPatch4:
933	movb	%al,4(%edi)
934Lp5:
935	addl	tstep,%edx
936	sbbl	%eax,%eax
937	addl	lzistepx,%ebp
938	adcl	$0,%ebp
939	addl	C(a_sstepxfrac),%ebx
940	adcl	advancetable+4(,%eax,4),%esi
941
942LDraw3:
943	cmpw	10(%ecx),%bp
944	jl		Lp6
945	xorl	%eax,%eax
946	movb	%dh,%ah
947	movb	(%esi),%al
948	movw	%bp,10(%ecx)
949	movb	0x12345678(%eax),%al
950LPatch3:
951	movb	%al,5(%edi)
952Lp6:
953	addl	tstep,%edx
954	sbbl	%eax,%eax
955	addl	lzistepx,%ebp
956	adcl	$0,%ebp
957	addl	C(a_sstepxfrac),%ebx
958	adcl	advancetable+4(,%eax,4),%esi
959
960LDraw2:
961	cmpw	12(%ecx),%bp
962	jl		Lp7
963	xorl	%eax,%eax
964	movb	%dh,%ah
965	movb	(%esi),%al
966	movw	%bp,12(%ecx)
967	movb	0x12345678(%eax),%al
968LPatch2:
969	movb	%al,6(%edi)
970Lp7:
971	addl	tstep,%edx
972	sbbl	%eax,%eax
973	addl	lzistepx,%ebp
974	adcl	$0,%ebp
975	addl	C(a_sstepxfrac),%ebx
976	adcl	advancetable+4(,%eax,4),%esi
977
978LDraw1:
979	cmpw	14(%ecx),%bp
980	jl		Lp8
981	xorl	%eax,%eax
982	movb	%dh,%ah
983	movb	(%esi),%al
984	movw	%bp,14(%ecx)
985	movb	0x12345678(%eax),%al
986LPatch1:
987	movb	%al,7(%edi)
988Lp8:
989	addl	tstep,%edx
990	sbbl	%eax,%eax
991	addl	lzistepx,%ebp
992	adcl	$0,%ebp
993	addl	C(a_sstepxfrac),%ebx
994	adcl	advancetable+4(,%eax,4),%esi
995
996	addl	$8,%edi
997	addl	$16,%ecx
998
999	decw	%bx
1000	jnz		LDrawLoop
1001
1002	popl	%esi				// restore spans pointer
1003LNextSpan:
1004	addl	$(spanpackage_t_size),%esi	// point to next span
1005LNextSpanESISet:
1006	movl	spanpackage_t_count(%esi),%edx
1007	cmpl	$-999999,%edx		// any more spans?
1008	jnz		LSpanLoop			// yes
1009
1010	popl	%edi
1011	popl	%ebp				// restore the caller's stack frame
1012	popl	%ebx				// restore register variables
1013	popl	%esi
1014	ret
1015
1016
1017// draw a one-long span
1018
1019LExactlyOneLong:
1020
1021	movl	spanpackage_t_pz(%esi),%ecx
1022	movl	spanpackage_t_zi(%esi),%ebp
1023
1024	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
1025	movl	spanpackage_t_ptex(%esi),%ebx
1026
1027	cmpw	(%ecx),%bp
1028	jl		LNextSpan
1029	xorl	%eax,%eax
1030	movl	spanpackage_t_pdest(%esi),%edi
1031	movb	spanpackage_t_light+1(%esi),%ah
1032	addl	$(spanpackage_t_size),%esi	// point to next span
1033	movb	(%ebx),%al
1034	movw	%bp,(%ecx)
1035	movb	0x12345678(%eax),%al
1036LPatch9:
1037	movb	%al,(%edi)
1038
1039	jmp		LNextSpanESISet
1040
1041.globl C(D_PolysetAff8End)
1042C(D_PolysetAff8End):
1043
1044
1045.globl C(D_Aff8Patch)
1046C(D_Aff8Patch):
1047	movl	C(d_pcolormap),%eax
1048	movl	%eax,LPatch1-4
1049	movl	%eax,LPatch2-4
1050	movl	%eax,LPatch3-4
1051	movl	%eax,LPatch4-4
1052	movl	%eax,LPatch5-4
1053	movl	%eax,LPatch6-4
1054	movl	%eax,LPatch7-4
1055	movl	%eax,LPatch8-4
1056	movl	%eax,LPatch9-4
1057
1058	ret
1059
1060//----------------------------------------------------------------------
1061// Alias model triangle left-edge scanning code
1062//----------------------------------------------------------------------
1063
1064#define height	4+16
1065
1066.globl C(R_PolysetScanLeftEdge)
1067C(R_PolysetScanLeftEdge):
1068	pushl	%ebp				// preserve caller stack frame pointer
1069	pushl	%esi				// preserve register variables
1070	pushl	%edi
1071	pushl	%ebx
1072
1073	movl	height(%esp),%eax
1074	movl	C(d_sfrac),%ecx
1075	andl	$0xFFFF,%eax
1076	movl	C(d_ptex),%ebx
1077	orl		%eax,%ecx
1078	movl	C(d_pedgespanpackage),%esi
1079	movl	C(d_tfrac),%edx
1080	movl	C(d_light),%edi
1081	movl	C(d_zi),%ebp
1082
1083// %eax: scratch
1084// %ebx: d_ptex
1085// %ecx: d_sfrac in high word, count in low word
1086// %edx: d_tfrac
1087// %esi: d_pedgespanpackage, errorterm, scratch alternately
1088// %edi: d_light
1089// %ebp: d_zi
1090
1091//	do
1092//	{
1093
1094LScanLoop:
1095
1096//		d_pedgespanpackage->ptex = ptex;
1097//		d_pedgespanpackage->pdest = d_pdest;
1098//		d_pedgespanpackage->pz = d_pz;
1099//		d_pedgespanpackage->count = d_aspancount;
1100//		d_pedgespanpackage->light = d_light;
1101//		d_pedgespanpackage->zi = d_zi;
1102//		d_pedgespanpackage->sfrac = d_sfrac << 16;
1103//		d_pedgespanpackage->tfrac = d_tfrac << 16;
1104	movl	%ebx,spanpackage_t_ptex(%esi)
1105	movl	C(d_pdest),%eax
1106	movl	%eax,spanpackage_t_pdest(%esi)
1107	movl	C(d_pz),%eax
1108	movl	%eax,spanpackage_t_pz(%esi)
1109	movl	C(d_aspancount),%eax
1110	movl	%eax,spanpackage_t_count(%esi)
1111	movl	%edi,spanpackage_t_light(%esi)
1112	movl	%ebp,spanpackage_t_zi(%esi)
1113	movl	%ecx,spanpackage_t_sfrac(%esi)
1114	movl	%edx,spanpackage_t_tfrac(%esi)
1115
1116// pretouch the next cache line
1117	movb	spanpackage_t_size(%esi),%al
1118
1119//		d_pedgespanpackage++;
1120	addl	$(spanpackage_t_size),%esi
1121	movl	C(erroradjustup),%eax
1122	movl	%esi,C(d_pedgespanpackage)
1123
1124//		errorterm += erroradjustup;
1125	movl	C(errorterm),%esi
1126	addl	%eax,%esi
1127	movl	C(d_pdest),%eax
1128
1129//		if (errorterm >= 0)
1130//		{
1131	js		LNoLeftEdgeTurnover
1132
1133//			errorterm -= erroradjustdown;
1134//			d_pdest += d_pdestextrastep;
1135	subl	C(erroradjustdown),%esi
1136	addl	C(d_pdestextrastep),%eax
1137	movl	%esi,C(errorterm)
1138	movl	%eax,C(d_pdest)
1139
1140//			d_pz += d_pzextrastep;
1141//			d_aspancount += d_countextrastep;
1142//			d_ptex += d_ptexextrastep;
1143//			d_sfrac += d_sfracextrastep;
1144//			d_ptex += d_sfrac >> 16;
1145//			d_sfrac &= 0xFFFF;
1146//			d_tfrac += d_tfracextrastep;
1147	movl	C(d_pz),%eax
1148	movl	C(d_aspancount),%esi
1149	addl	C(d_pzextrastep),%eax
1150	addl	C(d_sfracextrastep),%ecx
1151	adcl	C(d_ptexextrastep),%ebx
1152	addl	C(d_countextrastep),%esi
1153	movl	%eax,C(d_pz)
1154	movl	C(d_tfracextrastep),%eax
1155	movl	%esi,C(d_aspancount)
1156	addl	%eax,%edx
1157
1158//			if (d_tfrac & 0x10000)
1159//			{
1160	jnc		LSkip1
1161
1162//				d_ptex += r_affinetridesc.skinwidth;
1163//				d_tfrac &= 0xFFFF;
1164	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
1165
1166//			}
1167
1168LSkip1:
1169
1170//			d_light += d_lightextrastep;
1171//			d_zi += d_ziextrastep;
1172	addl	C(d_lightextrastep),%edi
1173	addl	C(d_ziextrastep),%ebp
1174
1175//		}
1176	movl	C(d_pedgespanpackage),%esi
1177	decl	%ecx
1178	testl	$0xFFFF,%ecx
1179	jnz		LScanLoop
1180
1181	popl	%ebx
1182	popl	%edi
1183	popl	%esi
1184	popl	%ebp
1185	ret
1186
1187//		else
1188//		{
1189
1190LNoLeftEdgeTurnover:
1191	movl	%esi,C(errorterm)
1192
1193//			d_pdest += d_pdestbasestep;
1194	addl	C(d_pdestbasestep),%eax
1195	movl	%eax,C(d_pdest)
1196
1197//			d_pz += d_pzbasestep;
1198//			d_aspancount += ubasestep;
1199//			d_ptex += d_ptexbasestep;
1200//			d_sfrac += d_sfracbasestep;
1201//			d_ptex += d_sfrac >> 16;
1202//			d_sfrac &= 0xFFFF;
1203	movl	C(d_pz),%eax
1204	movl	C(d_aspancount),%esi
1205	addl	C(d_pzbasestep),%eax
1206	addl	C(d_sfracbasestep),%ecx
1207	adcl	C(d_ptexbasestep),%ebx
1208	addl	C(ubasestep),%esi
1209	movl	%eax,C(d_pz)
1210	movl	%esi,C(d_aspancount)
1211
1212//			d_tfrac += d_tfracbasestep;
1213	movl	C(d_tfracbasestep),%esi
1214	addl	%esi,%edx
1215
1216//			if (d_tfrac & 0x10000)
1217//			{
1218	jnc		LSkip2
1219
1220//				d_ptex += r_affinetridesc.skinwidth;
1221//				d_tfrac &= 0xFFFF;
1222	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
1223
1224//			}
1225
1226LSkip2:
1227
1228//			d_light += d_lightbasestep;
1229//			d_zi += d_zibasestep;
1230	addl	C(d_lightbasestep),%edi
1231	addl	C(d_zibasestep),%ebp
1232
1233//		}
1234//	} while (--height);
1235	movl	C(d_pedgespanpackage),%esi
1236	decl	%ecx
1237	testl	$0xFFFF,%ecx
1238	jnz		LScanLoop
1239
1240	popl	%ebx
1241	popl	%edi
1242	popl	%esi
1243	popl	%ebp
1244	ret
1245
1246#endif	// id386
1247
1248