1/*
2 * Clip testing in SPARC assembly
3 */
4
5#if __arch64__
6#define LDPTR		ldx
7#define MATH_ASM_PTR_SIZE 8
8#include "math/m_vector_asm.h"
9#else
10#define LDPTR		ld
11#define MATH_ASM_PTR_SIZE 4
12#include "math/m_vector_asm.h"
13#endif
14
15        .register %g2, #scratch
16        .register %g3, #scratch
17
18	.text
19	.align		64
20
21one_dot_zero:
22	.word		0x3f800000	/* 1.0f */
23
24	/* This trick is shamelessly stolen from the x86
25	 * Mesa asm.  Very clever, and we can do it too
26	 * since we have the necessary add with carry
27	 * instructions on Sparc.
28	 */
29clip_table:
30	.byte	 0,  1,  0,  2,  4,  5,  4,  6
31	.byte	 0,  1,  0,  2,  8,  9,  8, 10
32	.byte	32, 33, 32, 34, 36, 37, 36, 38
33	.byte	32, 33, 32, 34, 40, 41, 40, 42
34	.byte	 0,  1,  0,  2,  4,  5,  4,  6
35	.byte	 0,  1,  0,  2,  8,  9,  8, 10
36	.byte	16, 17, 16, 18, 20, 21, 20, 22
37	.byte	16, 17, 16, 18, 24, 25, 24, 26
38	.byte	63, 61, 63, 62, 55, 53, 55, 54
39	.byte	63, 61, 63, 62, 59, 57, 59, 58
40	.byte	47, 45, 47, 46, 39, 37, 39, 38
41	.byte	47, 45, 47, 46, 43, 41, 43, 42
42	.byte	63, 61, 63, 62, 55, 53, 55, 54
43	.byte	63, 61, 63, 62, 59, 57, 59, 58
44	.byte	31, 29, 31, 30, 23, 21, 23, 22
45	.byte	31, 29, 31, 30, 27, 25, 27, 26
46
47/* GLvector4f *clip_vec, GLvector4f *proj_vec,
48   GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
49   GLboolean viewport_z_enable */
50
51	.align		64
52__pc_tramp:
53	retl
54	 nop
55
56	.globl		_mesa_sparc_cliptest_points4
57_mesa_sparc_cliptest_points4:
58	save		%sp, -64, %sp
59	call		__pc_tramp
60	 sub		%o7, (. - one_dot_zero - 4), %g1
61	ld		[%g1 + 0x0], %f4
62	add		%g1, 0x4, %g1
63
64	ld		[%i0 + V4F_STRIDE], %l1
65	ld		[%i0 + V4F_COUNT], %l3
66	LDPTR		[%i0 + V4F_START], %i0
67	LDPTR		[%i1 + V4F_START], %i5
68	ldub		[%i3], %g2
69	ldub		[%i4], %g3
70	sll		%g3, 8, %g3
71	or		%g2, %g3, %g2
72
73	ld		[%i1 + V4F_FLAGS], %g3
74	or		%g3, VEC_SIZE_4, %g3
75	st		%g3, [%i1 + V4F_FLAGS]
76	mov		3, %g3
77	st		%g3, [%i1 + V4F_SIZE]
78	st		%l3, [%i1 + V4F_COUNT]
79	clr		%l2
80	clr		%l0
81
82	/* l0:	i
83	 * l3:	count
84	 * l1:	stride
85	 * l2:	c
86	 * g2:	(tmpAndMask << 8) | tmpOrMask
87	 * g1:	clip_table
88	 * i0:	from[stride][i]
89	 * i2:	clipMask
90	 * i5:	vProj[4][i]
91	 */
92
931:	ld		[%i0 + 0x0c], %f3	! LSU	Group
94	ld		[%i0 + 0x0c], %g5	! LSU	Group
95	ld		[%i0 + 0x08], %g4	! LSU	Group
96	fdivs		%f4, %f3, %f8		! FGM
97	addcc		%g5, %g5, %g5		! IEU1	Group
98	addx		%g0, 0x0, %g3		! IEU1	Group
99	addcc		%g4, %g4, %g4		! IEU1	Group
100	addx		%g3, %g3, %g3		! IEU1	Group
101	subcc		%g5, %g4, %g0		! IEU1	Group
102	ld		[%i0 + 0x04], %g4	! LSU	Group
103	addx		%g3, %g3, %g3		! IEU1	Group
104	addcc		%g4, %g4, %g4		! IEU1	Group
105	addx		%g3, %g3, %g3		! IEU1	Group
106	subcc		%g5, %g4, %g0		! IEU1	Group
107	ld		[%i0 + 0x00], %g4	! LSU	Group
108	addx		%g3, %g3, %g3		! IEU1	Group
109	addcc		%g4, %g4, %g4		! IEU1	Group
110	addx		%g3, %g3, %g3		! IEU1	Group
111	subcc		%g5, %g4, %g0		! IEU1	Group
112	addx		%g3, %g3, %g3		! IEU1	Group
113	ldub		[%g1 + %g3], %g3	! LSU	Group
114	cmp		%g3, 0			! IEU1	Group, stall
115	be		2f			! CTI
116	 stb		%g3, [%i2]		! LSU
117	sll		%g3, 8, %g4		! IEU1	Group
118	add		%l2, 1, %l2		! IEU0
119	st		%g0, [%i5 + 0x00]	! LSU
120	or		%g4, 0xff, %g4		! IEU0	Group
121	or		%g2, %g3, %g2		! IEU1
122	st		%g0, [%i5 + 0x04]	! LSU
123	and		%g2, %g4, %g2		! IEU0	Group
124	st		%g0, [%i5 + 0x08]	! LSU
125	b		3f			! CTI
126	 st		%f4, [%i5 + 0x0c]	! LSU	Group
1272:	ld		[%i0 + 0x00], %f0	! LSU	Group
128	ld		[%i0 + 0x04], %f1	! LSU	Group
129	ld		[%i0 + 0x08], %f2	! LSU	Group
130	fmuls		%f0, %f8, %f0		! FGM
131	st		%f0, [%i5 + 0x00]	! LSU	Group
132	fmuls		%f1, %f8, %f1		! FGM
133	st		%f1, [%i5 + 0x04]	! LSU	Group
134	fmuls		%f2, %f8, %f2		! FGM
135	st		%f2, [%i5 + 0x08]	! LSU	Group
136	st		%f8, [%i5 + 0x0c]	! LSU	Group
1373:	add		%i5, 0x10, %i5		! IEU1
138	add		%l0, 1, %l0		! IEU0	Group
139	add		%i2, 1, %i2		! IEU0	Group
140	cmp		%l0, %l3		! IEU1	Group
141	bne		1b			! CTI
142	 add		%i0, %l1, %i0		! IEU0	Group
143	stb		%g2, [%i3]		! LSU
144	srl		%g2, 8, %g3		! IEU0	Group
145	cmp		%l2, %l3		! IEU1	Group
146	bl,a		1f			! CTI
147	 clr		%g3			! IEU0
1481:	stb		%g3, [%i4]		! LSU	Group
149	ret					! CTI	Group
150	 restore	%i1, 0x0, %o0
151
152	.globl		_mesa_sparc_cliptest_points4_np
153_mesa_sparc_cliptest_points4_np:
154	save		%sp, -64, %sp
155
156	call		__pc_tramp
157	 sub		%o7, (. - one_dot_zero - 4), %g1
158	add		%g1, 0x4, %g1
159
160	ld		[%i0 + V4F_STRIDE], %l1
161	ld		[%i0 + V4F_COUNT], %l3
162	LDPTR		[%i0 + V4F_START], %i0
163	ldub		[%i3], %g2
164	ldub		[%i4], %g3
165	sll		%g3, 8, %g3
166	or		%g2, %g3, %g2
167
168	clr		%l2
169	clr		%l0
170
171	/* l0:	i
172	 * l3:	count
173	 * l1:	stride
174	 * l2:	c
175	 * g2:	(tmpAndMask << 8) | tmpOrMask
176	 * g1:	clip_table
177	 * i0:	from[stride][i]
178	 * i2:	clipMask
179	 */
180
1811:	ld		[%i0 + 0x0c], %g5	! LSU	Group
182	ld		[%i0 + 0x08], %g4	! LSU	Group
183	addcc		%g5, %g5, %g5		! IEU1	Group
184	addx		%g0, 0x0, %g3		! IEU1	Group
185	addcc		%g4, %g4, %g4		! IEU1	Group
186	addx		%g3, %g3, %g3		! IEU1	Group
187	subcc		%g5, %g4, %g0		! IEU1	Group
188	ld		[%i0 + 0x04], %g4	! LSU	Group
189	addx		%g3, %g3, %g3		! IEU1	Group
190	addcc		%g4, %g4, %g4		! IEU1	Group
191	addx		%g3, %g3, %g3		! IEU1	Group
192	subcc		%g5, %g4, %g0		! IEU1	Group
193	ld		[%i0 + 0x00], %g4	! LSU	Group
194	addx		%g3, %g3, %g3		! IEU1	Group
195	addcc		%g4, %g4, %g4		! IEU1	Group
196	addx		%g3, %g3, %g3		! IEU1	Group
197	subcc		%g5, %g4, %g0		! IEU1	Group
198	addx		%g3, %g3, %g3		! IEU1	Group
199	ldub		[%g1 + %g3], %g3	! LSU	Group
200	cmp		%g3, 0			! IEU1	Group, stall
201	be		2f			! CTI
202	 stb		%g3, [%i2]		! LSU
203	sll		%g3, 8, %g4		! IEU1	Group
204	add		%l2, 1, %l2		! IEU0
205	or		%g4, 0xff, %g4		! IEU0	Group
206	or		%g2, %g3, %g2		! IEU1
207	and		%g2, %g4, %g2		! IEU0	Group
2082:	add		%l0, 1, %l0		! IEU0	Group
209	add		%i2, 1, %i2		! IEU0	Group
210	cmp		%l0, %l3		! IEU1	Group
211	bne		1b			! CTI
212	 add		%i0, %l1, %i0		! IEU0	Group
213	stb		%g2, [%i3]		! LSU
214	srl		%g2, 8, %g3		! IEU0	Group
215	cmp		%l2, %l3		! IEU1	Group
216	bl,a		1f			! CTI
217	 clr		%g3			! IEU0
2181:	stb		%g3, [%i4]		! LSU	Group
219	ret					! CTI	Group
220	 restore	%i1, 0x0, %o0
221