1 /*****************************************************************
2  * gavl - a general purpose audio/video processing library
3  *
4  * Copyright (c) 2001 - 2011 Members of the Gmerlin project
5  * gmerlin-general@lists.sourceforge.net
6  * http://gmerlin.sourceforge.net
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  * *****************************************************************/
21 
22 #include <config.h>
23 
24 #include <stdio.h>
25 #include <gavl/gavl.h>
26 #include <gavl/gavldsp.h>
27 #include <dsp.h>
28 #include <attributes.h>
29 #include "mmx.h"
30 
31 #ifdef MMXEXT
32 #define MOVQ_R2M(reg,mem) movntq_r2m(reg, mem)
33 #else
34 #define MOVQ_R2M(reg,mem) movq_r2m(reg, mem)
35 #endif
36 
37 #if 0
38 static mmx_t mm_tmp;
39 #define DUMP_MM(name, reg) MOVQ_R2M(reg, mm_tmp);\
40   fprintf(stderr, "%s: %016llx\n", name, mm_tmp.q);
41 #endif
42 
43 
44 
interpolate_8_mmx(const uint8_t * src_1,const uint8_t * src_2,uint8_t * dst,int num,float fac)45 static void interpolate_8_mmx(const uint8_t * src_1, const uint8_t * src_2,
46                               uint8_t * dst, int num, float fac)
47   {
48   int i, imax;
49   int32_t tmp;
50   int32_t fac_i;
51   int32_t anti_fac;
52 
53   fac_i = (float)(fac * 0x4000 + 0.5);
54   anti_fac = 0x4000 - fac_i;
55 
56   //  fprintf(stderr, "interpolate_8_mmx %d %d\n", fac, anti_fac);
57 
58   imax = num / 8;
59   //  imax = 0;
60 
61   /* Load factors */
62 
63   /*
64    *  mm0: Input1
65    *  mm1: Input2
66    *  mm2: Factor1
67    *  mm3: Factor1
68    *  mm4: Output1
69    *  mm5: Output2
70    *  mm6: Scratch
71    *  mm7: 0
72    */
73 
74   pxor_r2r(mm7, mm7);
75 
76   /* Load factor1 */
77   movd_m2r(fac_i, mm2);
78   movq_r2r(mm2, mm6);
79   psllq_i2r(16, mm6);
80   por_r2r(mm6, mm2);
81   movq_r2r(mm2, mm6);
82   psllq_i2r(32, mm6);
83   por_r2r(mm6, mm2);
84   /* Load factor2 */
85   movd_m2r(anti_fac, mm3);
86   movq_r2r(mm3, mm6);
87   psllq_i2r(16, mm6);
88   por_r2r(mm6, mm3);
89   movq_r2r(mm3, mm6);
90   psllq_i2r(32, mm6);
91   por_r2r(mm6, mm3);
92 
93   for(i = 0; i < imax; i++)
94     {
95     /* Load input 1 */
96     movq_m2r(*src_1,mm0);
97     movq_r2r(mm0,mm1);
98     punpcklbw_r2r(mm7, mm0);
99     punpckhbw_r2r(mm7, mm1);
100     psllw_i2r(7, mm0);
101     psllw_i2r(7, mm1);
102 
103     /* Accumulate mm0 */
104     pmulhw_r2r(mm2, mm0);
105     movq_r2r(mm0, mm4);
106     /* Accumulate mm1 */
107     pmulhw_r2r(mm2, mm1);
108     movq_r2r(mm1, mm5);
109 
110     /* Load input 2 */
111     movq_m2r(*(src_2),mm0);
112     movq_r2r(mm0,mm1);
113     punpcklbw_r2r(mm7, mm0);
114     punpckhbw_r2r(mm7, mm1);
115     psllw_i2r(7, mm0);
116     psllw_i2r(7, mm1);
117 
118     /* Accumulate mm0 */
119     pmulhw_r2r(mm3, mm0);
120     paddsw_r2r(mm0, mm4);
121     /* Accumulate mm1 */
122     pmulhw_r2r(mm3, mm1);
123     paddsw_r2r(mm1, mm5);
124 
125     psraw_i2r(5, mm4);
126     psraw_i2r(5, mm5);
127 
128     packuswb_r2r(mm5, mm4);
129 
130     MOVQ_R2M(mm4, *dst);
131 
132     dst += 8;
133     src_1 += 8;
134     src_2 += 8;
135     }
136 
137   emms();
138 
139   imax = num % 8;
140   //  imax = num;
141 
142   if(!imax)
143     return;
144 
145   for(i = 0; i < imax; i++)
146     {
147     tmp = (*src_1 * fac_i + *src_2 * anti_fac) >> 15;
148     *dst = (uint8_t)((tmp & ~0xFF)?((-tmp) >> 31) : tmp);
149     /* Accum */
150     dst++;
151     src_1++;
152     src_2++;
153     }
154 
155 
156   }
157 
gavl_dsp_init_mmx(gavl_dsp_funcs_t * funcs,int quality)158 void gavl_dsp_init_mmx(gavl_dsp_funcs_t * funcs,
159                        int quality)
160   {
161   if(quality < 3)
162     funcs->interpolate_8 = interpolate_8_mmx;
163   }
164