1#include <config.h>
2#include "asm.h"
3
4#ifdef DO_MMX_ASM
5
6/*\
7|*| MMX assembly rgba rendering routines for Imlib2
8|*| Written by Willem Monsuwe <willem@stack.nl>
9|*|
10|*| Special (hairy) constructs are only commented on first use.
11\*/
12
13/*\ All functions have the same calling convention:
14|*|  __imlib_mmx_rgbXXX(void *src, int sjmp, void *dst, int dw,
15|*|			int w, int h, int dx, int dy)
16\*/
17
18#define src	8(%ebp)
19#define sjmp	12(%ebp)
20#define dst	16(%ebp)
21#define dw	20(%ebp)
22#define w	24(%ebp)
23#define h	28(%ebp)
24#define dx	32(%ebp)
25#define dy	36(%ebp)
26
27.text
28	.align 8
29FN_(imlib_mmx_rgb565_fast)
30FN_(imlib_mmx_bgr565_fast)
31FN_(imlib_mmx_rgb555_fast)
32FN_(imlib_mmx_bgr555_fast)
33
34FN_(imlib_get_cpuid)
35
36#include "asm_loadimmq.S"
37
38/*\ Common code \*/
39/*\ Save registers, load common parameters \*/
40#define ENTER			\
41	pushl %ebp;		\
42	movl  %esp, %ebp;	\
43	pushl %ebx;		\
44	pushl %ecx;		\
45	pushl %edx;		\
46	pushl %edi;		\
47	pushl %esi;		\
48	movl  src,  %esi;	\
49	movl  dst,  %edi;	\
50	movl  w,    %ebx;	\
51	movl  h,    %edx;	\
52	addl %ebx, sjmp
53
54#define LOOP_START		\
55	testl %edx, %edx;	\
56	jz 4f;			\
57	testl %ebx, %ebx;	\
58	jz 4f;			\
590:				\
60	movl %ebx, %ecx
61
62#define LOOP_END			\
633:					\
64	movl sjmp, %ecx;		\
65	leal (%esi, %ecx, 4), %esi;	\
66	addl dw, %edi;			\
67	decl %edx;			\
68	jnz 0b;				\
694:
70
71/*\ Unset MMX mode, reset registers, return \*/
72#define LEAVE			\
73	emms;			\
74	popl %esi;		\
75	popl %edi;		\
76	popl %edx;		\
77	popl %ecx;		\
78	popl %ebx;		\
79	movl %ebp, %esp;	\
80	popl %ebp;		\
81	ret
82
83
84
85PR_(imlib_mmx_bgr565_fast):
86	LOAD_IMMQ(mul_bgr565, %mm7)	/*\ This constant is the only difference \*/
87	CLEANUP_IMMQ_LOADS(1)
88	jmp .rgb565_fast_entry
89
90SIZE(imlib_mmx_bgr565_fast)
91
92PR_(imlib_mmx_rgb565_fast):
93	LOAD_IMMQ(mul_rgb565, %mm7)
94	CLEANUP_IMMQ_LOADS(1)
95.rgb565_fast_entry:
96	ENTER
97
98	LOAD_IMMQ(m_rb, %mm5)
99	LOAD_IMMQ(m_g6, %mm6)
100	CLEANUP_IMMQ_LOADS(2)
101
102	LOOP_START
103
104	test $1, %ecx
105	jz 1f
106	decl %ecx
107	movd (%esi, %ecx, 4), %mm0
108	movq %mm0, %mm1
109	pand %mm5, %mm0
110	pand %mm6, %mm1
111	pmaddwd %mm7, %mm0
112	por %mm1, %mm0
113	psrad $5, %mm0
114
115	movd %mm0, %eax
116	movw %ax, (%edi, %ecx, 2)
117
118	jz 3f
1191:
120	test $2, %ecx
121	jz 2f
122	subl $2, %ecx
123	movq (%esi, %ecx, 4), %mm0
124	movq %mm0, %mm1
125	pand %mm5, %mm0
126	pand %mm6, %mm1
127	pmaddwd %mm7, %mm0
128	por %mm1, %mm0
129	pslld $11, %mm0
130	psrad $16, %mm0
131
132	packssdw %mm0, %mm0
133
134	movd %mm0, (%edi, %ecx, 2)
135
136	jz 3f
1372:
138	subl $4, %ecx
139	movq (%esi, %ecx, 4), %mm0
140	movq 8(%esi, %ecx, 4), %mm2
141	movq %mm0, %mm1		/*\ a r g b (2x) \*/
142	movq %mm2, %mm3
143	pand %mm5, %mm0		/*\ 0 rrrrr000 0 bbbbb000 (2 x) \*/
144	pand %mm5, %mm2
145	pand %mm6, %mm1		/*\ 0 0 gggggg00 00000000 (2 x)	\*/
146	pand %mm6, %mm3
147	pmaddwd %mm7, %mm0	/*\ 0 000rrrrr 000000bb bbb00000 (2 x) \*/
148	pmaddwd %mm7, %mm2
149	por %mm1, %mm0		/*\ 0 000rrrrr ggggggbb bbb00000 (2 x) \*/
150	por %mm3, %mm2
151	pslld $11, %mm0		/*\ rrrrrggg gggbbbbb 0 0 (2 x) \*/
152	pslld $11, %mm2
153	psrad $16, %mm0		/*\ x x rrrrrggg gggbbbbb (2 x) \*/
154	psrad $16, %mm2
155
156	packssdw %mm2, %mm0	/*\ rrrrrggg gggbbbbb (4 x) \*/
157
158	movq %mm0, (%edi, %ecx, 2)
159
160	jnz 2b
161	LOOP_END
162	LEAVE
163
164SIZE(imlib_mmx_rgb565_fast)
165
166
167PR_(imlib_mmx_bgr555_fast):
168	LOAD_IMMQ(mul_bgr555, %mm7)	/*\ This constant is the only difference \*/
169	CLEANUP_IMMQ_LOADS(1)
170	jmp .rgb555_fast_entry
171
172SIZE(imlib_mmx_bgr555_fast)
173
174PR_(imlib_mmx_rgb555_fast):
175	LOAD_IMMQ(mul_rgb555, %mm7)
176	CLEANUP_IMMQ_LOADS(1)
177.rgb555_fast_entry:
178	ENTER
179
180	LOAD_IMMQ(m_rb, %mm5)
181	LOAD_IMMQ(m_g5, %mm6)
182	CLEANUP_IMMQ_LOADS(2)
183
184	LOOP_START
185
186	test $1, %ecx
187	jz 1f
188	decl %ecx
189	movd (%esi, %ecx, 4), %mm0
190	movq %mm0, %mm1
191	pand %mm5, %mm0
192	pand %mm6, %mm1
193	pmaddwd %mm7, %mm0
194	por %mm1, %mm0
195	psrad $5, %mm0
196
197	movd %mm0, %eax
198	movw %ax, (%edi, %ecx, 2)
199
200	jz 3f
2011:
202	test $2, %ecx
203	jz 2f
204	subl $2, %ecx
205	movq (%esi, %ecx, 4), %mm0
206	movq %mm0, %mm1
207	pand %mm5, %mm0
208	pand %mm6, %mm1
209	pmaddwd %mm7, %mm0
210	por %mm1, %mm0
211	psrld $6, %mm0
212
213	packssdw %mm0, %mm0
214
215	movd %mm0, (%edi, %ecx, 2)
216
217	jz 3f
2182:
219	subl $4, %ecx
220	movq (%esi, %ecx, 4), %mm0
221	movq 8(%esi, %ecx, 4), %mm2
222	movq %mm0, %mm1		/*\ a r g b (2x) \*/
223	movq %mm2, %mm3
224	pand %mm5, %mm0		/*\ 0 rrrrr000 0 bbbbb000 (2 x) \*/
225	pand %mm5, %mm2
226	pand %mm6, %mm1		/*\ 0 0 ggggg000 00000000 (2 x)	\*/
227	pand %mm6, %mm3
228	pmaddwd %mm7, %mm0	/*\ 0 000rrrrr 00000bbb bb000000 (2 x) \*/
229	pmaddwd %mm7, %mm2
230	por %mm1, %mm0		/*\ 0 000rrrrr gggggbbb bb000000 (2 x) \*/
231	por %mm3, %mm2
232	psrld $6, %mm0		/*\ 0 0 0rrrrrgg gggbbbbb (2 x) \*/
233	psrld $6, %mm2
234
235	packssdw %mm2, %mm0	/*\ 0rrrrrgg gggbbbbb (4 x) \*/
236
237	movq %mm0, (%edi, %ecx, 2)
238
239	jnz 2b
240	LOOP_END
241	LEAVE
242
243SIZE(imlib_mmx_rgb555_fast)
244
245PR_(imlib_get_cpuid):
246	pushl %ebx
247	pushl %edx
248
249	pushf
250	popl %eax
251	movl %eax, %ebx
252	xorl $0x200000, %eax
253	pushl %eax
254	popf
255	pushf
256	popl %eax
257	xorl %ebx, %eax
258	andl $0x200000, %eax
259	jz 1f
260	xorl %eax, %eax
261	cpuid
262	testl %eax, %eax
263	jz 1f
264	movl $1, %eax
265	cpuid
266	and $0x00000f00, %eax
267	and $0xfffff0ff, %edx
268	orl %edx, %eax
2691:
270	popl %edx
271	popl %ebx
272	ret
273
274SIZE(imlib_get_cpuid)
275
276#endif
277
278#ifdef __ELF__
279.section .note.GNU-stack,"",@progbits
280#endif
281