1 /*
2 * OpenBOR - http://www.chronocrash.com
3 * -----------------------------------------------------------------------
4 * All rights reserved, see LICENSE in OpenBOR root for details.
5 *
6 * Copyright (c) 2004 - 2012 OpenBOR Team
7 */
8
9 /**
10 * This is an implementation of the Scale2x algorithm, also known as
11 * AdvanceMAME2x. Before October 2012, OpenBOR contained a version of the
12 * Scale2x filter licensed under the GPL. Both implementations in this version
13 * (C and MMX) were written from scratch by Plombo based on the description of
14 * the algorithm on the Scale2x website at:
15 * http://scale2x.sourceforge.net/algorithm.html
16 */
17
18 #include "gfx.h"
19 #include "types.h"
20
scale2x_16_pixel_c(void * src0v,void * src1v,void * src2v,void * dst0v,void * dst1v)21 static inline void scale2x_16_pixel_c(void *src0v, void *src1v, void *src2v, void *dst0v, void *dst1v)
22 {
23 u16 *src0 = src0v, *src1 = src1v, *src2 = src2v, *dst0 = dst0v, *dst1 = dst1v;
24 u16 D = *(src0 - 1), E = *src0, F = *(src0 + 1), B = *src1, H = *src2;
25 u16 R1, R2, R3, R4;
26 if (B != H && D != F)
27 {
28 R1 = D == B ? D : E;
29 R2 = F == B ? F : E;
30 R3 = D == H ? D : E;
31 R4 = F == H ? F : E;
32 }
33 else
34 {
35 R1 = R2 = R3 = R4 = E;
36 }
37
38 *dst0 = R1;
39 *(dst0 + 1) = R2;
40 *dst1 = R3;
41 *(dst1 + 1) = R4;
42 }
43
scale2x_32_pixel_c(void * src0v,void * src1v,void * src2v,void * dst0v,void * dst1v)44 static inline void scale2x_32_pixel_c(void *src0v, void *src1v, void *src2v, void *dst0v, void *dst1v)
45 {
46 u32 *src0 = src0v, *src1 = src1v, *src2 = src2v, *dst0 = dst0v, *dst1 = dst1v;
47 u32 D = *(src0 - 1), E = *src0, F = *(src0 + 1), B = *src1, H = *src2;
48 u32 R1, R2, R3, R4;
49 if (B != H && D != F)
50 {
51 R1 = D == B ? D : E;
52 R2 = F == B ? F : E;
53 R3 = D == H ? D : E;
54 R4 = F == H ? F : E;
55 }
56 else
57 {
58 R1 = R2 = R3 = R4 = E;
59 }
60
61 *dst0 = R1;
62 *(dst0 + 1) = R2;
63 *dst1 = R3;
64 *(dst1 + 1) = R4;
65 }
66
67 #if MMX
scale2x_16_pixel_mmx(void * src0,void * src1,void * src2,void * dst0,void * dst1)68 static inline void scale2x_16_pixel_mmx(void *src0, void *src1, void *src2, void *dst0, void *dst1)
69 {
70 __asm__ __volatile__ (
71 "# load pixels surrounding input pixel\n"
72 "movq -2(%%eax),%%mm0 # mm0 := D\n"
73 "movq 2(%%eax),%%mm1 # mm1 := F\n"
74 "movq (%%edi),%%mm2 # mm2 := B\n"
75 "movq (%%ecx),%%mm3 # mm3 := H\n"
76 "\n"
77 "# mm4 := ~((B==H)|(D==F))\n"
78 "movq %%mm2,%%mm4\n"
79 "pcmpeqw %%mm3,%%mm4\n"
80 "movq %%mm0,%%mm5\n"
81 "pcmpeqw %%mm1,%%mm5\n"
82 "por %%mm5,%%mm4\n"
83 "pxor %%mm7,%%mm7\n"
84 "pcmpeqw %%mm7,%%mm4\n"
85 "\n"
86 "# calculate boolean conditions\n"
87 "movq %%mm0,%%mm5\n"
88 "pcmpeqw %%mm2,%%mm5\n"
89 "pand %%mm4,%%mm5 # mm5 := (D == B) & mm4\n"
90 "movq %%mm1,%%mm6\n"
91 "pcmpeqw %%mm2,%%mm6\n"
92 "pand %%mm4,%%mm6 # mm6 := (F == B) & mm4\n"
93 "movq %%mm0,%%mm7\n"
94 "pcmpeqw %%mm3,%%mm7\n"
95 "pand %%mm4,%%mm7 # mm7 := (D == H) & mm4\n"
96 "pcmpeqw %%mm1,%%mm3\n"
97 "pand %%mm4,%%mm3 # mm3 := (F == H) & mm4\n"
98 "\n"
99 "# fetch input pixel E\n"
100 "movq (%%eax),%%mm2 # mm2 := E\n"
101 "\n"
102 "# calculate output pixel values\n"
103 "movq %%mm5,%%mm4\n"
104 "pandn %%mm2,%%mm4\n"
105 "pand %%mm0,%%mm5\n"
106 "por %%mm4,%%mm5 # mm5 := R0\n"
107 "movq %%mm6,%%mm4\n"
108 "pandn %%mm2,%%mm4\n"
109 "pand %%mm1,%%mm6\n"
110 "por %%mm4,%%mm6 # mm6 := R1\n"
111 "movq %%mm7,%%mm4\n"
112 "pandn %%mm2,%%mm4\n"
113 "pand %%mm0,%%mm7\n"
114 "por %%mm4,%%mm7 # mm7 := R2\n"
115 "movq %%mm3,%%mm4\n"
116 "pandn %%mm2,%%mm4\n"
117 "pand %%mm1,%%mm3\n"
118 "por %%mm4,%%mm3 # mm3 := R3\n"
119 "\n"
120 "# write the R0 pixels to memory\n"
121 "movd %%mm5,%%eax\n"
122 "movw %%ax,(%%edx) # far left pixel\n"
123 "shrl $16,%%eax\n"
124 "movw %%ax,4(%%edx) # middle left pixel\n"
125 "psrlq $32,%%mm5\n"
126 "movd %%mm5,%%eax\n"
127 "movw %%ax,8(%%edx) # middle right pixel\n"
128 "shrl $16,%%eax\n"
129 "movw %%ax,12(%%edx) # far right pixel\n"
130 "\n"
131 "# write the R1 pixels to memory\n"
132 "movd %%mm6,%%eax\n"
133 "movw %%ax,2(%%edx) # far left pixel\n"
134 "shrl $16,%%eax\n"
135 "movw %%ax,6(%%edx) # middle left pixel\n"
136 "psrlq $32,%%mm6\n"
137 "movd %%mm6,%%eax\n"
138 "movw %%ax,10(%%edx) # middle right pixel\n"
139 "shrl $16,%%eax\n"
140 "movw %%ax,14(%%edx) # far right pixel\n"
141 "\n"
142 "# write the R2 pixels to memory\n"
143 "movd %%mm7,%%eax\n"
144 "movw %%ax,(%%esi) # far left pixel\n"
145 "shrl $16,%%eax\n"
146 "movw %%ax,4(%%esi) # middle left pixel\n"
147 "psrlq $32,%%mm7\n"
148 "movd %%mm7,%%eax\n"
149 "movw %%ax,8(%%esi) # middle right pixel\n"
150 "shrl $16,%%eax\n"
151 "movw %%ax,12(%%esi) # far right pixel\n"
152 "\n"
153 "# write the R3 pixels to memory\n"
154 "movd %%mm3,%%eax\n"
155 "movw %%ax,2(%%esi) # far left pixel\n"
156 "shrl $16,%%eax\n"
157 "movw %%ax,6(%%esi) # middle left pixel\n"
158 "psrlq $32,%%mm3\n"
159 "movd %%mm3,%%eax\n"
160 "movw %%ax,10(%%esi) # middle right pixel\n"
161 "shrl $16,%%eax\n"
162 "movw %%ax,14(%%esi) # far right pixel\n"
163 : "=a" (src0)
164 : "a" (src0), "D" (src1), "c" (src2), "d" (dst0), "S" (dst1)
165 : "cc", "memory"
166 );
167 }
168
scale2x_32_pixel_mmx(void * src0,void * src1,void * src2,void * dst0,void * dst1)169 static inline void scale2x_32_pixel_mmx(void *src0, void *src1, void *src2, void *dst0, void *dst1)
170 {
171 __asm__ __volatile__ (
172 "# load pixels surrounding input pixel\n"
173 "movq -4(%%eax),%%mm0 # mm0 := D\n"
174 "movq 4(%%eax),%%mm1 # mm1 := F\n"
175 "movq (%%edi),%%mm2 # mm2 := B\n"
176 "movq (%%ecx),%%mm3 # mm3 := H\n"
177 "\n"
178 "# mm4 := ~((B==H)|(D==F))\n"
179 "movq %%mm2,%%mm4\n"
180 "pcmpeqd %%mm3,%%mm4\n"
181 "movq %%mm0,%%mm5\n"
182 "pcmpeqd %%mm1,%%mm5\n"
183 "por %%mm5,%%mm4\n"
184 "pxor %%mm7,%%mm7\n"
185 "pcmpeqd %%mm7,%%mm4\n"
186 "\n"
187 "# calculate boolean conditions\n"
188 "movq %%mm0,%%mm5\n"
189 "pcmpeqd %%mm2,%%mm5\n"
190 "pand %%mm4,%%mm5 # mm5 := (D == B) & mm4\n"
191 "movq %%mm1,%%mm6\n"
192 "pcmpeqd %%mm2,%%mm6\n"
193 "pand %%mm4,%%mm6 # mm6 := (F == B) & mm4\n"
194 "movq %%mm0,%%mm7\n"
195 "pcmpeqd %%mm3,%%mm7\n"
196 "pand %%mm4,%%mm7 # mm7 := (D == H) & mm4\n"
197 "pcmpeqd %%mm1,%%mm3\n"
198 "pand %%mm4,%%mm3 # mm3 := (F == H) & mm4\n"
199 "\n"
200 "# fetch input pixel E\n"
201 "movq (%%eax),%%mm2 # mm2 := E\n"
202 "\n"
203 "# calculate output pixel values\n"
204 "movq %%mm5,%%mm4\n"
205 "pandn %%mm2,%%mm4\n"
206 "pand %%mm0,%%mm5\n"
207 "por %%mm4,%%mm5 # mm5 := R0\n"
208 "movq %%mm6,%%mm4\n"
209 "pandn %%mm2,%%mm4\n"
210 "pand %%mm1,%%mm6\n"
211 "por %%mm4,%%mm6 # mm6 := R1\n"
212 "movq %%mm7,%%mm4\n"
213 "pandn %%mm2,%%mm4\n"
214 "pand %%mm0,%%mm7\n"
215 "por %%mm4,%%mm7 # mm7 := R2\n"
216 "movq %%mm3,%%mm4\n"
217 "pandn %%mm2,%%mm4\n"
218 "pand %%mm1,%%mm3\n"
219 "por %%mm4,%%mm3 # mm3 := R3\n"
220 "\n"
221 "# write the R0 pixels to memory\n"
222 "movd %%mm5,%%eax\n"
223 "movl %%eax,(%%edx) # left pixel\n"
224 "psrlq $32,%%mm5\n"
225 "movd %%mm5,%%eax\n"
226 "movl %%eax,8(%%edx) # right pixel\n"
227 "\n"
228 "# write the R1 pixels to memory\n"
229 "movd %%mm6,%%eax\n"
230 "movl %%eax,4(%%edx) # left pixel\n"
231 "psrlq $32,%%mm6\n"
232 "movd %%mm6,%%eax\n"
233 "movl %%eax,12(%%edx) # right pixel\n"
234 "\n"
235 "# write the R2 pixels to memory\n"
236 "movd %%mm7,%%eax\n"
237 "movl %%eax,(%%esi) # left pixel\n"
238 "psrlq $32,%%mm7\n"
239 "movd %%mm7,%%eax\n"
240 "movl %%eax,8(%%esi) # right pixel\n"
241 "\n"
242 "# write the R3 pixels to memory\n"
243 "movd %%mm3,%%eax\n"
244 "movl %%eax,4(%%esi) # left pixel\n"
245 "psrlq $32,%%mm3\n"
246 "movd %%mm3,%%eax\n"
247 "movl %%eax,12(%%esi) # right pixel\n"
248 : "=a" (src0)
249 : "a" (src0), "D" (src1), "c" (src2), "d" (dst0), "S" (dst1)
250 : "cc", "memory"
251 );
252 }
253 #endif
254
255 #if MMX
256 #define scale2x_16_pixel scale2x_16_pixel_mmx
257 #define scale2x_32_pixel scale2x_32_pixel_mmx
258 #define increment16 4
259 #define increment32 2
260 #else
261 #define scale2x_16_pixel scale2x_16_pixel_c
262 #define scale2x_32_pixel scale2x_32_pixel_c
263 #define increment16 1
264 #define increment32 1
265 #endif
266
AdMame2x(u8 * srcPtr,u32 srcPitch,u8 * deltaPtr,u8 * dstPtr,u32 dstPitch,int width,int height)267 void AdMame2x(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, u8 *dstPtr, u32 dstPitch, int width, int height)
268 {
269 int x, y;
270 for (y = 0; y < height; y++)
271 {
272 u8 *src0 = srcPtr + srcPitch * y;
273 u8 *src1 = y == 0 ? src0 : src0 - srcPitch;
274 u8 *src2 = (y == height - 1) ? src0 : src1 + srcPitch;
275 u8 *dst0 = dstPtr + dstPitch * y * 2;
276 u8 *dst1 = dst0 + dstPitch;
277
278 for (x = 0; x < width; x += increment16)
279 {
280 scale2x_16_pixel(src0 + 2 * x, src1 + 2 * x, src2 + 2 * x, dst0 + 4 * x, dst1 + 4 * x);
281 }
282 }
283
284 #if MMX
285 // done with MMX instructions, so tell the processor to restore floating-point state
286 __asm__ __volatile__ ("emms");
287 #endif
288 }
289
AdMame2x32(u8 * srcPtr,u32 srcPitch,u8 * deltaPtr,u8 * dstPtr,u32 dstPitch,int width,int height)290 void AdMame2x32(u8 *srcPtr, u32 srcPitch, u8 *deltaPtr, u8 *dstPtr, u32 dstPitch, int width, int height)
291 {
292 int x, y;
293 for (y = 0; y < height; y++)
294 {
295 u8 *src0 = srcPtr + srcPitch * y;
296 u8 *src1 = y == 0 ? src0 : src0 - srcPitch;
297 u8 *src2 = (y == height - 1) ? src0 : src1 + srcPitch;
298 u8 *dst0 = dstPtr + dstPitch * y * 2;
299 u8 *dst1 = dst0 + dstPitch;
300
301 for (x = 0; x < width; x += increment32)
302 {
303 scale2x_32_pixel(src0 + 4 * x, src1 + 4 * x, src2 + 4 * x, dst0 + 8 * x, dst1 + 8 * x);
304 }
305 }
306 #if MMX
307 // done with MMX instructions, so tell the processor to restore floating-point state
308 __asm__ __volatile__ ("emms");
309 #endif
310 }
311
312