1 /*	mmx.c
2 
3 	MultiMedia eXtensions GCC interface library for IA32.
4 
5 	To use this library, simply include this header file
6 	and compile with GCC.  You MUST have inlining enabled
7 	in order for mmx_ok() to work; this can be done by
8 	simply using -O on the GCC command line.
9 
10 	Compiling with -DMMX_TRACE will cause detailed trace
11 	output to be sent to stderr for each mmx operation.
12 	This adds lots of code, and obviously slows execution to
13 	a crawl, but can be very useful for debugging.
14 
15 	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
16 	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
17 	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18 	AND FITNESS FOR ANY PARTICULAR PURPOSE.
19 
20 	1997-99 by H. Dietz and R. Fisher
21 
22  Notes:
23 	It appears that the latest gas has the pand problem fixed, therefore
24 	  I'll undefine BROKEN_PAND by default.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include "goom_config.h"
32 
33 #ifdef HAVE_MMX
34 
35 #define BUFFPOINTNB 16
36 #define BUFFPOINTMASK 0xffff
37 #define BUFFINCR 0xff
38 
39 #include "mmx.h"
40 #include "goom_graphic.h"
41 
42 #define sqrtperte 16
43 // faire : a % sqrtperte <=> a & pertemask
44 #define PERTEMASK 0xf
45 // faire : a / sqrtperte <=> a >> PERTEDEC
46 #define PERTEDEC 4
47 
48 int
mmx_supported(void)49 mmx_supported (void)
50 {
51   return (mm_support () & 0x1);
52 }
53 
54 void
zoom_filter_mmx(int prevX,int prevY,Pixel * expix1,Pixel * expix2,int * brutS,int * brutD,int buffratio,int precalCoef[16][16])55 zoom_filter_mmx (int prevX, int prevY,
56     Pixel * expix1, Pixel * expix2,
57     int *brutS, int *brutD, int buffratio, int precalCoef[16][16])
58 {
59   unsigned int ax = (prevX - 1) << PERTEDEC, ay = (prevY - 1) << PERTEDEC;
60 
61   int bufsize = prevX * prevY;
62   int loop;
63 
64   __asm__ __volatile__ ("pxor %mm7,%mm7");
65 
66   for (loop = 0; loop < bufsize; loop++) {
67     /*      int couleur; */
68     int px, py;
69     int pos;
70     int coeffs;
71 
72     int myPos = loop << 1, myPos2 = myPos + 1;
73     int brutSmypos = brutS[myPos];
74 
75     px = brutSmypos + (((brutD[myPos] -
76                 brutSmypos) * buffratio) >> BUFFPOINTNB);
77     brutSmypos = brutS[myPos2];
78     py = brutSmypos + (((brutD[myPos2] -
79                 brutSmypos) * buffratio) >> BUFFPOINTNB);
80 
81     if ((py >= ay) || (px >= ax)) {
82       pos = coeffs = 0;
83     } else {
84       pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC));
85       // coef en modulo 15
86       coeffs = precalCoef[px & PERTEMASK][py & PERTEMASK];
87     }
88 
89     __asm__ __volatile__ ("movd %2, %%mm6 \n\t"
90         /* recuperation des deux premiers pixels dans mm0 et mm1 */
91         "movq (%3,%1,4), %%mm0 \n\t"    /* b1-v1-r1-a1-b2-v2-r2-a2 */
92         "movq %%mm0, %%mm1 \n\t"        /* b1-v1-r1-a1-b2-v2-r2-a2 */
93         /* depackage du premier pixel */
94         "punpcklbw %%mm7, %%mm0 \n\t"   /* 00-b2-00-v2-00-r2-00-a2 */
95         "movq %%mm6, %%mm5 \n\t"        /* ??-??-??-??-c4-c3-c2-c1 */
96         /* depackage du 2ieme pixel */
97         "punpckhbw %%mm7, %%mm1 \n\t"   /* 00-b1-00-v1-00-r1-00-a1 */
98         /* extraction des coefficients... */
99         "punpcklbw %%mm5, %%mm6 \n\t"   /* c4-c4-c3-c3-c2-c2-c1-c1 */
100         "movq %%mm6, %%mm4 \n\t"        /* c4-c4-c3-c3-c2-c2-c1-c1 */
101         "movq %%mm6, %%mm5 \n\t"        /* c4-c4-c3-c3-c2-c2-c1-c1 */
102         "punpcklbw %%mm5, %%mm6 \n\t"   /* c2-c2-c2-c2-c1-c1-c1-c1 */
103         "punpckhbw %%mm5, %%mm4 \n\t"   /* c4-c4-c4-c4-c3-c3-c3-c3 */
104         "movq %%mm6, %%mm3 \n\t"        /* c2-c2-c2-c2-c1-c1-c1-c1 */
105         "punpcklbw %%mm7, %%mm6 \n\t"   /* 00-c1-00-c1-00-c1-00-c1 */
106         "punpckhbw %%mm7, %%mm3 \n\t"   /* 00-c2-00-c2-00-c2-00-c2 */
107         /* multiplication des pixels par les coefficients */
108         "pmullw %%mm6, %%mm0 \n\t"      /* c1*b2-c1*v2-c1*r2-c1*a2 */
109         "pmullw %%mm3, %%mm1 \n\t"      /* c2*b1-c2*v1-c2*r1-c2*a1 */
110         "paddw %%mm1, %%mm0 \n\t"
111         /* ...extraction des 2 derniers coefficients */
112         "movq %%mm4, %%mm5 \n\t"        /* c4-c4-c4-c4-c3-c3-c3-c3 */
113         "punpcklbw %%mm7, %%mm4 \n\t"   /* 00-c3-00-c3-00-c3-00-c3 */
114         "punpckhbw %%mm7, %%mm5 \n\t"   /* 00-c4-00-c4-00-c4-00-c4 */
115         /* ajouter la longueur de ligne a esi */
116         "addl 8(%%ebp),%1 \n\t"
117         /* recuperation des 2 derniers pixels */
118         "movq (%3,%1,4), %%mm1 \n\t" "movq %%mm1, %%mm2 \n\t"
119         /* depackage des pixels */
120         "punpcklbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t"
121         /* multiplication pas les coeffs */
122         "pmullw %%mm4, %%mm1 \n\t" "pmullw %%mm5, %%mm2 \n\t"
123         /* ajout des valeurs obtenues ? la valeur finale */
124         "paddw %%mm1, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
125         /* division par 256 = 16+16+16+16, puis repackage du pixel final */
126         "psrlw $8, %%mm0 \n\t"
127         "packuswb %%mm7, %%mm0 \n\t" "movd %%mm0,%0 \n\t":"=g" (expix2[loop])
128         :"r" (pos), "r" (coeffs), "r" (expix1)
129 
130         );
131 
132     emms ();
133   }
134 }
135 
136 #define DRAWMETHOD_PLUS_MMX(_out,_backbuf,_col) \
137 { \
138 	movd_m2r(_backbuf, mm0); \
139 	paddusb_m2r(_col, mm0); \
140 	movd_r2m(mm0, _out); \
141 }
142 
143 #define DRAWMETHOD DRAWMETHOD_PLUS_MMX(*p,*p,col)
144 
145 void
draw_line_mmx(Pixel * data,int x1,int y1,int x2,int y2,int col,int screenx,int screeny)146 draw_line_mmx (Pixel * data, int x1, int y1, int x2, int y2, int col,
147     int screenx, int screeny)
148 {
149   int x, y, dx, dy, yy, xx;
150   Pixel *p;
151 
152   if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny)
153       || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
154     goto end_of_line;
155 
156   dx = x2 - x1;
157   dy = y2 - y1;
158   if (x1 >= x2) {
159     int tmp;
160 
161     tmp = x1;
162     x1 = x2;
163     x2 = tmp;
164     tmp = y1;
165     y1 = y2;
166     y2 = tmp;
167     dx = x2 - x1;
168     dy = y2 - y1;
169   }
170 
171   /* vertical line */
172   if (dx == 0) {
173     if (y1 < y2) {
174       p = &(data[(screenx * y1) + x1]);
175       for (y = y1; y <= y2; y++) {
176         DRAWMETHOD;
177         p += screenx;
178       }
179     } else {
180       p = &(data[(screenx * y2) + x1]);
181       for (y = y2; y <= y1; y++) {
182         DRAWMETHOD;
183         p += screenx;
184       }
185     }
186     goto end_of_line;
187   }
188   /* horizontal line */
189   if (dy == 0) {
190     if (x1 < x2) {
191       p = &(data[(screenx * y1) + x1]);
192       for (x = x1; x <= x2; x++) {
193         DRAWMETHOD;
194         p++;
195       }
196       goto end_of_line;
197     } else {
198       p = &(data[(screenx * y1) + x2]);
199       for (x = x2; x <= x1; x++) {
200         DRAWMETHOD;
201         p++;
202       }
203       goto end_of_line;
204     }
205   }
206   /* 1    */
207   /*  \   */
208   /*   \  */
209   /*    2 */
210   if (y2 > y1) {
211     /* steep */
212     if (dy > dx) {
213       dx = ((dx << 16) / dy);
214       x = x1 << 16;
215       for (y = y1; y <= y2; y++) {
216         xx = x >> 16;
217         p = &(data[(screenx * y) + xx]);
218         DRAWMETHOD;
219         if (xx < (screenx - 1)) {
220           p++;
221           /* DRAWMETHOD; */
222         }
223         x += dx;
224       }
225       goto end_of_line;
226     }
227     /* shallow */
228     else {
229       dy = ((dy << 16) / dx);
230       y = y1 << 16;
231       for (x = x1; x <= x2; x++) {
232         yy = y >> 16;
233         p = &(data[(screenx * yy) + x]);
234         DRAWMETHOD;
235         if (yy < (screeny - 1)) {
236           p += screeny;
237           /* DRAWMETHOD; */
238         }
239         y += dy;
240       }
241     }
242   }
243   /*    2 */
244   /*   /  */
245   /*  /   */
246   /* 1    */
247   else {
248     /* steep */
249     if (-dy > dx) {
250       dx = ((dx << 16) / -dy);
251       x = (x1 + 1) << 16;
252       for (y = y1; y >= y2; y--) {
253         xx = x >> 16;
254         p = &(data[(screenx * y) + xx]);
255         DRAWMETHOD;
256         if (xx < (screenx - 1)) {
257           p--;
258           /* DRAWMETHOD; */
259         }
260         x += dx;
261       }
262       goto end_of_line;
263     }
264     /* shallow */
265     else {
266       dy = ((dy << 16) / dx);
267       y = y1 << 16;
268       for (x = x1; x <= x2; x++) {
269         yy = y >> 16;
270         p = &(data[(screenx * yy) + x]);
271         DRAWMETHOD;
272         if (yy < (screeny - 1)) {
273           p += screeny;
274           /* DRAWMETHOD; */
275         }
276         y += dy;
277       }
278       goto end_of_line;
279     }
280   }
281 end_of_line:
282   emms ();
283   /* __asm__ __volatile__ ("emms"); */
284 }
285 #else
286 int
mmx_supported(void)287 mmx_supported (void)
288 {
289   return (0);
290 }
291 #endif /* HAVE_MMX */
292