1#include <config.h> 2#include "asm.h" 3 4#ifdef DO_MMX_ASM 5 6/*\ 7|*| MMX assembly rgba rendering routines for Imlib2 8|*| Written by Willem Monsuwe <willem@stack.nl> 9|*| 10|*| Special (hairy) constructs are only commented on first use. 11\*/ 12 13/*\ All functions have the same calling convention: 14|*| __imlib_mmx_rgbXXX(void *src, int sjmp, void *dst, int dw, 15|*| int w, int h, int dx, int dy) 16\*/ 17 18#define src 8(%ebp) 19#define sjmp 12(%ebp) 20#define dst 16(%ebp) 21#define dw 20(%ebp) 22#define w 24(%ebp) 23#define h 28(%ebp) 24#define dx 32(%ebp) 25#define dy 36(%ebp) 26 27.text 28 .align 8 29FN_(imlib_mmx_rgb565_fast) 30FN_(imlib_mmx_bgr565_fast) 31FN_(imlib_mmx_rgb555_fast) 32FN_(imlib_mmx_bgr555_fast) 33 34FN_(imlib_get_cpuid) 35 36#include "asm_loadimmq.S" 37 38/*\ Common code \*/ 39/*\ Save registers, load common parameters \*/ 40#define ENTER \ 41 pushl %ebp; \ 42 movl %esp, %ebp; \ 43 pushl %ebx; \ 44 pushl %ecx; \ 45 pushl %edx; \ 46 pushl %edi; \ 47 pushl %esi; \ 48 movl src, %esi; \ 49 movl dst, %edi; \ 50 movl w, %ebx; \ 51 movl h, %edx; \ 52 addl %ebx, sjmp 53 54#define LOOP_START \ 55 testl %edx, %edx; \ 56 jz 4f; \ 57 testl %ebx, %ebx; \ 58 jz 4f; \ 590: \ 60 movl %ebx, %ecx 61 62#define LOOP_END \ 633: \ 64 movl sjmp, %ecx; \ 65 leal (%esi, %ecx, 4), %esi; \ 66 addl dw, %edi; \ 67 decl %edx; \ 68 jnz 0b; \ 694: 70 71/*\ Unset MMX mode, reset registers, return \*/ 72#define LEAVE \ 73 emms; \ 74 popl %esi; \ 75 popl %edi; \ 76 popl %edx; \ 77 popl %ecx; \ 78 popl %ebx; \ 79 movl %ebp, %esp; \ 80 popl %ebp; \ 81 ret 82 83 84 85PR_(imlib_mmx_bgr565_fast): 86 LOAD_IMMQ(mul_bgr565, %mm7) /*\ This constant is the only difference \*/ 87 CLEANUP_IMMQ_LOADS(1) 88 jmp .rgb565_fast_entry 89 90SIZE(imlib_mmx_bgr565_fast) 91 92PR_(imlib_mmx_rgb565_fast): 93 LOAD_IMMQ(mul_rgb565, %mm7) 94 CLEANUP_IMMQ_LOADS(1) 95.rgb565_fast_entry: 96 ENTER 97 98 LOAD_IMMQ(m_rb, %mm5) 99 LOAD_IMMQ(m_g6, %mm6) 100 CLEANUP_IMMQ_LOADS(2) 101 102 LOOP_START 103 104 test $1, %ecx 105 jz 1f 106 decl %ecx 107 movd (%esi, %ecx, 4), %mm0 108 movq %mm0, %mm1 109 pand %mm5, %mm0 110 pand %mm6, %mm1 111 pmaddwd %mm7, %mm0 112 por %mm1, %mm0 113 psrad $5, %mm0 114 115 movd %mm0, %eax 116 movw %ax, (%edi, %ecx, 2) 117 118 jz 3f 1191: 120 test $2, %ecx 121 jz 2f 122 subl $2, %ecx 123 movq (%esi, %ecx, 4), %mm0 124 movq %mm0, %mm1 125 pand %mm5, %mm0 126 pand %mm6, %mm1 127 pmaddwd %mm7, %mm0 128 por %mm1, %mm0 129 pslld $11, %mm0 130 psrad $16, %mm0 131 132 packssdw %mm0, %mm0 133 134 movd %mm0, (%edi, %ecx, 2) 135 136 jz 3f 1372: 138 subl $4, %ecx 139 movq (%esi, %ecx, 4), %mm0 140 movq 8(%esi, %ecx, 4), %mm2 141 movq %mm0, %mm1 /*\ a r g b (2x) \*/ 142 movq %mm2, %mm3 143 pand %mm5, %mm0 /*\ 0 rrrrr000 0 bbbbb000 (2 x) \*/ 144 pand %mm5, %mm2 145 pand %mm6, %mm1 /*\ 0 0 gggggg00 00000000 (2 x) \*/ 146 pand %mm6, %mm3 147 pmaddwd %mm7, %mm0 /*\ 0 000rrrrr 000000bb bbb00000 (2 x) \*/ 148 pmaddwd %mm7, %mm2 149 por %mm1, %mm0 /*\ 0 000rrrrr ggggggbb bbb00000 (2 x) \*/ 150 por %mm3, %mm2 151 pslld $11, %mm0 /*\ rrrrrggg gggbbbbb 0 0 (2 x) \*/ 152 pslld $11, %mm2 153 psrad $16, %mm0 /*\ x x rrrrrggg gggbbbbb (2 x) \*/ 154 psrad $16, %mm2 155 156 packssdw %mm2, %mm0 /*\ rrrrrggg gggbbbbb (4 x) \*/ 157 158 movq %mm0, (%edi, %ecx, 2) 159 160 jnz 2b 161 LOOP_END 162 LEAVE 163 164SIZE(imlib_mmx_rgb565_fast) 165 166 167PR_(imlib_mmx_bgr555_fast): 168 LOAD_IMMQ(mul_bgr555, %mm7) /*\ This constant is the only difference \*/ 169 CLEANUP_IMMQ_LOADS(1) 170 jmp .rgb555_fast_entry 171 172SIZE(imlib_mmx_bgr555_fast) 173 174PR_(imlib_mmx_rgb555_fast): 175 LOAD_IMMQ(mul_rgb555, %mm7) 176 CLEANUP_IMMQ_LOADS(1) 177.rgb555_fast_entry: 178 ENTER 179 180 LOAD_IMMQ(m_rb, %mm5) 181 LOAD_IMMQ(m_g5, %mm6) 182 CLEANUP_IMMQ_LOADS(2) 183 184 LOOP_START 185 186 test $1, %ecx 187 jz 1f 188 decl %ecx 189 movd (%esi, %ecx, 4), %mm0 190 movq %mm0, %mm1 191 pand %mm5, %mm0 192 pand %mm6, %mm1 193 pmaddwd %mm7, %mm0 194 por %mm1, %mm0 195 psrad $5, %mm0 196 197 movd %mm0, %eax 198 movw %ax, (%edi, %ecx, 2) 199 200 jz 3f 2011: 202 test $2, %ecx 203 jz 2f 204 subl $2, %ecx 205 movq (%esi, %ecx, 4), %mm0 206 movq %mm0, %mm1 207 pand %mm5, %mm0 208 pand %mm6, %mm1 209 pmaddwd %mm7, %mm0 210 por %mm1, %mm0 211 psrld $6, %mm0 212 213 packssdw %mm0, %mm0 214 215 movd %mm0, (%edi, %ecx, 2) 216 217 jz 3f 2182: 219 subl $4, %ecx 220 movq (%esi, %ecx, 4), %mm0 221 movq 8(%esi, %ecx, 4), %mm2 222 movq %mm0, %mm1 /*\ a r g b (2x) \*/ 223 movq %mm2, %mm3 224 pand %mm5, %mm0 /*\ 0 rrrrr000 0 bbbbb000 (2 x) \*/ 225 pand %mm5, %mm2 226 pand %mm6, %mm1 /*\ 0 0 ggggg000 00000000 (2 x) \*/ 227 pand %mm6, %mm3 228 pmaddwd %mm7, %mm0 /*\ 0 000rrrrr 00000bbb bb000000 (2 x) \*/ 229 pmaddwd %mm7, %mm2 230 por %mm1, %mm0 /*\ 0 000rrrrr gggggbbb bb000000 (2 x) \*/ 231 por %mm3, %mm2 232 psrld $6, %mm0 /*\ 0 0 0rrrrrgg gggbbbbb (2 x) \*/ 233 psrld $6, %mm2 234 235 packssdw %mm2, %mm0 /*\ 0rrrrrgg gggbbbbb (4 x) \*/ 236 237 movq %mm0, (%edi, %ecx, 2) 238 239 jnz 2b 240 LOOP_END 241 LEAVE 242 243SIZE(imlib_mmx_rgb555_fast) 244 245PR_(imlib_get_cpuid): 246 pushl %ebx 247 pushl %edx 248 249 pushf 250 popl %eax 251 movl %eax, %ebx 252 xorl $0x200000, %eax 253 pushl %eax 254 popf 255 pushf 256 popl %eax 257 xorl %ebx, %eax 258 andl $0x200000, %eax 259 jz 1f 260 xorl %eax, %eax 261 cpuid 262 testl %eax, %eax 263 jz 1f 264 movl $1, %eax 265 cpuid 266 and $0x00000f00, %eax 267 and $0xfffff0ff, %edx 268 orl %edx, %eax 2691: 270 popl %edx 271 popl %ebx 272 ret 273 274SIZE(imlib_get_cpuid) 275 276#endif 277 278#ifdef __ELF__ 279.section .note.GNU-stack,"",@progbits 280#endif 281