1 #ifdef HAVE_MMX
2 
3 #define BUFFPOINTNB 16
4 #define BUFFPOINTMASK 0xffff
5 #define BUFFINCR 0xff
6 
7 #include "mmx.h"
8 #include "goom_graphic.h"
9 
10 #define sqrtperte 16
11 // faire : a % sqrtperte <=> a & pertemask
12 #define PERTEMASK 0xf
13 // faire : a / sqrtperte <=> a >> PERTEDEC
14 #define PERTEDEC 4
15 
mmx_supported(void)16 int mmx_supported (void) {
17 	return (mm_support()&0x1);
18 }
19 
zoom_filter_mmx(int prevX,int prevY,Pixel * expix1,Pixel * expix2,int * brutS,int * brutD,int buffratio,int precalCoef[16][16])20 void zoom_filter_mmx (int prevX, int prevY,
21 		      Pixel *expix1, Pixel *expix2,
22 		      int *brutS, int *brutD, int buffratio,
23 		      int precalCoef[16][16])
24 {
25 	unsigned int ax = (prevX-1)<<PERTEDEC, ay = (prevY-1)<<PERTEDEC;
26 
27 	int bufsize = prevX * prevY;
28 	int loop;
29 
30 	__asm__ __volatile__ ("pxor %mm7,%mm7");
31 
32 	for (loop=0; loop<bufsize; loop++)
33 	{
34 		/*      int couleur; */
35 		int px,py;
36 		int pos;
37 		int coeffs;
38 
39 		int myPos = loop << 1,
40 		myPos2 = myPos + 1;
41 		int brutSmypos = brutS[myPos];
42 
43 		px = brutSmypos + (((brutD[myPos] - brutSmypos)*buffratio) >> BUFFPOINTNB);
44 		brutSmypos = brutS[myPos2];
45 		py = brutSmypos + (((brutD[myPos2] - brutSmypos)*buffratio) >> BUFFPOINTNB);
46 
47 		if ((py>=ay) || (px>=ax)) {
48 			pos=coeffs=0;
49 		}
50 		else {
51 			pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC));
52 			// coef en modulo 15
53 			coeffs = precalCoef [px & PERTEMASK][py & PERTEMASK];
54 		}
55 
56 		__asm__ __volatile__ (
57 		"movd %2, %%mm6 \n\t"
58 
59 		/* recuperation des deux premiers pixels dans mm0 et mm1 */
60 		"movq (%3,%1,4), %%mm0 \n\t"	/* b1-v1-r1-a1-b2-v2-r2-a2 */
61 		"movq %%mm0, %%mm1 \n\t"		/* b1-v1-r1-a1-b2-v2-r2-a2 */
62 
63 		/* depackage du premier pixel */
64 		"punpcklbw %%mm7, %%mm0 \n\t"	/* 00-b2-00-v2-00-r2-00-a2 */
65 
66 		"movq %%mm6, %%mm5 \n\t"		/* ??-??-??-??-c4-c3-c2-c1 */
67 		/* depackage du 2ieme pixel */
68 		"punpckhbw %%mm7, %%mm1 \n\t"	/* 00-b1-00-v1-00-r1-00-a1 */
69 
70 		/* extraction des coefficients... */
71 		"punpcklbw %%mm5, %%mm6 \n\t"	/* c4-c4-c3-c3-c2-c2-c1-c1 */
72 		"movq %%mm6, %%mm4 \n\t"		/* c4-c4-c3-c3-c2-c2-c1-c1 */
73 		"movq %%mm6, %%mm5 \n\t"		/* c4-c4-c3-c3-c2-c2-c1-c1 */
74 
75 		"punpcklbw %%mm5, %%mm6 \n\t"	/* c2-c2-c2-c2-c1-c1-c1-c1 */
76 		"punpckhbw %%mm5, %%mm4 \n\t"	/* c4-c4-c4-c4-c3-c3-c3-c3 */
77 
78 		"movq %%mm6, %%mm3 \n\t"		/* c2-c2-c2-c2-c1-c1-c1-c1 */
79 
80 		"punpcklbw %%mm7, %%mm6 \n\t"	/* 00-c1-00-c1-00-c1-00-c1 */
81 		"punpckhbw %%mm7, %%mm3 \n\t"	/* 00-c2-00-c2-00-c2-00-c2 */
82 
83 		/* multiplication des pixels par les coefficients */
84 		"pmullw %%mm6, %%mm0 \n\t"		/* c1*b2-c1*v2-c1*r2-c1*a2 */
85 		"pmullw %%mm3, %%mm1 \n\t"		/* c2*b1-c2*v1-c2*r1-c2*a1 */
86 		"paddw %%mm1, %%mm0 \n\t"
87 
88 		/* ...extraction des 2 derniers coefficients */
89 		"movq %%mm4, %%mm5 \n\t"		/* c4-c4-c4-c4-c3-c3-c3-c3 */
90 		"punpcklbw %%mm7, %%mm4 \n\t"	/* 00-c3-00-c3-00-c3-00-c3 */
91 		"punpckhbw %%mm7, %%mm5 \n\t"	/* 00-c4-00-c4-00-c4-00-c4 */
92 
93 		/* ajouter la longueur de ligne a esi */
94 		"addl 8(%%ebp),%1 \n\t"
95 
96 		/* recuperation des 2 derniers pixels */
97 		"movq (%3,%1,4), %%mm1 \n\t"
98 		"movq %%mm1, %%mm2 \n\t"
99 
100 		/* depackage des pixels */
101 		"punpcklbw %%mm7, %%mm1 \n\t"
102 		"punpckhbw %%mm7, %%mm2 \n\t"
103 
104 		/* multiplication pas les coeffs */
105 		"pmullw %%mm4, %%mm1 \n\t"
106 		"pmullw %%mm5, %%mm2 \n\t"
107 
108 		/* ajout des valeurs obtenues � la valeur finale */
109 		"paddw %%mm1, %%mm0 \n\t"
110 		"paddw %%mm2, %%mm0 \n\t"
111 
112 		/* division par 256 = 16+16+16+16, puis repackage du pixel final */
113 		"psrlw $8, %%mm0 \n\t"
114 		"packuswb %%mm7, %%mm0 \n\t"
115 
116 		"movd %%mm0,%0 \n\t"
117 		  :"=g"(expix2[loop])
118 		  :"r"(pos),"r"(coeffs),"r"(expix1)
119 
120 		);
121 
122 		emms();
123 	}
124 }
125 
126 #define DRAWMETHOD_PLUS_MMX(_out,_backbuf,_col) \
127 { \
128 	movd_m2r(_backbuf, mm0); \
129 	paddusb_m2r(_col, mm0); \
130 	movd_r2m(mm0, _out); \
131 }
132 
133 #define DRAWMETHOD DRAWMETHOD_PLUS_MMX(*p,*p,col)
134 
draw_line_mmx(Pixel * data,int x1,int y1,int x2,int y2,int col,int screenx,int screeny)135 void draw_line_mmx (Pixel *data, int x1, int y1, int x2, int y2, int col, int screenx, int screeny)
136 {
137 	int x, y, dx, dy, yy, xx;
138 	Pixel *p;
139 
140 	if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny) || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
141 		goto end_of_line;
142 
143 	dx = x2 - x1;
144 	dy = y2 - y1;
145 	if (x1 >= x2) {
146 		int tmp;
147 
148 		tmp = x1;
149 		x1 = x2;
150 		x2 = tmp;
151 		tmp = y1;
152 		y1 = y2;
153 		y2 = tmp;
154 		dx = x2 - x1;
155 		dy = y2 - y1;
156 	}
157 
158 	/* vertical line */
159 	if (dx == 0) {
160 		if (y1 < y2) {
161 			p = &(data[(screenx * y1) + x1]);
162 			for (y = y1; y <= y2; y++) {
163 				DRAWMETHOD;
164 				p += screenx;
165 			}
166 		}
167 		else {
168 			p = &(data[(screenx * y2) + x1]);
169 			for (y = y2; y <= y1; y++) {
170 				DRAWMETHOD;
171 				p += screenx;
172 			}
173 		}
174 		goto end_of_line;
175 	}
176 	/* horizontal line */
177 	if (dy == 0) {
178 		if (x1 < x2) {
179 			p = &(data[(screenx * y1) + x1]);
180 			for (x = x1; x <= x2; x++) {
181 				DRAWMETHOD;
182 				p++;
183 			}
184 			goto end_of_line;
185 		}
186 		else {
187 			p = &(data[(screenx * y1) + x2]);
188 			for (x = x2; x <= x1; x++) {
189 				DRAWMETHOD;
190 				p++;
191 			}
192 			goto end_of_line;
193 		}
194 	}
195 	/* 1    */
196 	/*  \   */
197 	/*   \  */
198 	/*    2 */
199 	if (y2 > y1) {
200 		/* steep */
201 		if (dy > dx) {
202 			dx = ((dx << 16) / dy);
203 			x = x1 << 16;
204 			for (y = y1; y <= y2; y++) {
205 				xx = x >> 16;
206 				p = &(data[(screenx * y) + xx]);
207 				DRAWMETHOD;
208 				if (xx < (screenx - 1)) {
209 					p++;
210 					/* DRAWMETHOD; */
211 				}
212 				x += dx;
213 			}
214 			goto end_of_line;
215 		}
216 		/* shallow */
217 		else {
218 			dy = ((dy << 16) / dx);
219 			y = y1 << 16;
220 			for (x = x1; x <= x2; x++) {
221 				yy = y >> 16;
222 				p = &(data[(screenx * yy) + x]);
223 				DRAWMETHOD;
224 				if (yy < (screeny - 1)) {
225 					p += screeny;
226 					/* DRAWMETHOD; */
227 				}
228 				y += dy;
229 			}
230 		}
231 	}
232 	/*    2 */
233 	/*   /  */
234 	/*  /   */
235 	/* 1    */
236 	else {
237 		/* steep */
238 		if (-dy > dx) {
239 			dx = ((dx << 16) / -dy);
240 			x = (x1 + 1) << 16;
241 			for (y = y1; y >= y2; y--) {
242 				xx = x >> 16;
243 				p = &(data[(screenx * y) + xx]);
244 				DRAWMETHOD;
245 				if (xx < (screenx - 1)) {
246 					p--;
247 					/* DRAWMETHOD; */
248 				}
249 				x += dx;
250 			}
251 			goto end_of_line;
252 		}
253 		/* shallow */
254 		else {
255 			dy = ((dy << 16) / dx);
256 			y = y1 << 16;
257 			for (x = x1; x <= x2; x++) {
258 				yy = y >> 16;
259 				p = &(data[(screenx * yy) + x]);
260 				DRAWMETHOD;
261 				if (yy < (screeny - 1)) {
262 					p += screeny;
263 					/* DRAWMETHOD; */
264 				}
265 				y += dy;
266 			}
267 			goto end_of_line;
268 		}
269 	}
270 end_of_line:
271 	emms();
272 	/* __asm__ __volatile__ ("emms"); */
273 }
274 
275 #endif
276