1 /*
2  * SpanDSP - a series of DSP components for telephony
3  *
4  * fir.h - General telephony FIR routines
5  *
6  * Written by Steve Underwood <steveu@coppice.org>
7  *
8  * Copyright (C) 2002 Steve Underwood
9  *
10  * All rights reserved.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser General Public License version 2.1,
14  * as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this program; if not, write to the Free Software
23  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25 
26 /*! \page fir_page FIR filtering
27 \section fir_page_sec_1 What does it do?
28 ???.
29 
30 \section fir_page_sec_2 How does it work?
31 ???.
32 */
33 
34 #if !defined(_SPANDSP_FIR_H_)
35 #define _SPANDSP_FIR_H_
36 
37 #if defined(USE_MMX)  ||  defined(USE_SSE2)
38 #include "mmx.h"
39 #endif
40 
41 /*!
42     16 bit integer FIR descriptor. This defines the working state for a single
43     instance of an FIR filter using 16 bit integer coefficients.
44 */
45 typedef struct
46 {
47     int taps;
48     int curr_pos;
49     const int16_t *coeffs;
50     int16_t *history;
51 } fir16_state_t;
52 
53 /*!
54     32 bit integer FIR descriptor. This defines the working state for a single
55     instance of an FIR filter using 32 bit integer coefficients, and filtering
56     16 bit integer data.
57 */
58 typedef struct
59 {
60     int taps;
61     int curr_pos;
62     const int32_t *coeffs;
63     int16_t *history;
64 } fir32_state_t;
65 
66 /*!
67     Floating point FIR descriptor. This defines the working state for a single
68     instance of an FIR filter using floating point coefficients and data.
69 */
70 typedef struct
71 {
72     int taps;
73     int curr_pos;
74     const float *coeffs;
75     float *history;
76 } fir_float_state_t;
77 
78 #if defined(__cplusplus)
79 extern "C"
80 {
81 #endif
82 
fir16_create(fir16_state_t * fir,const int16_t * coeffs,int taps)83 static __inline__ const int16_t *fir16_create(fir16_state_t *fir,
84                                               const int16_t *coeffs,
85                                               int taps)
86 {
87     fir->taps = taps;
88     fir->curr_pos = taps - 1;
89     fir->coeffs = coeffs;
90 #if defined(USE_MMX)  ||  defined(USE_SSE2)
91     if ((fir->history = span_alloc(2*taps*sizeof(int16_t))))
92         memset(fir->history, 0, 2*taps*sizeof(int16_t));
93 #else
94     if ((fir->history = (int16_t *) span_alloc(taps*sizeof(int16_t))))
95         memset(fir->history, 0, taps*sizeof(int16_t));
96 #endif
97     return fir->history;
98 }
99 /*- End of function --------------------------------------------------------*/
100 
fir16_flush(fir16_state_t * fir)101 static __inline__ void fir16_flush(fir16_state_t *fir)
102 {
103 #if defined(USE_MMX)  ||  defined(USE_SSE2)
104     memset(fir->history, 0, 2*fir->taps*sizeof(int16_t));
105 #else
106     memset(fir->history, 0, fir->taps*sizeof(int16_t));
107 #endif
108 }
109 /*- End of function --------------------------------------------------------*/
110 
fir16_free(fir16_state_t * fir)111 static __inline__ void fir16_free(fir16_state_t *fir)
112 {
113     span_free(fir->history);
114 }
115 /*- End of function --------------------------------------------------------*/
116 
fir16(fir16_state_t * fir,int16_t sample)117 static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
118 {
119     int i;
120     int32_t y;
121 #if defined(USE_MMX)
122     mmx_t *mmx_coeffs;
123     mmx_t *mmx_hist;
124 
125     fir->history[fir->curr_pos] = sample;
126     fir->history[fir->curr_pos + fir->taps] = sample;
127 
128     mmx_coeffs = (mmx_t *) fir->coeffs;
129     mmx_hist = (mmx_t *) &fir->history[fir->curr_pos];
130     i = fir->taps;
131     pxor_r2r(mm4, mm4);
132     /* 8 samples per iteration, so the filter must be a multiple of 8 long. */
133     while (i > 0)
134     {
135         movq_m2r(mmx_coeffs[0], mm0);
136         movq_m2r(mmx_coeffs[1], mm2);
137         movq_m2r(mmx_hist[0], mm1);
138         movq_m2r(mmx_hist[1], mm3);
139         mmx_coeffs += 2;
140         mmx_hist += 2;
141         pmaddwd_r2r(mm1, mm0);
142         pmaddwd_r2r(mm3, mm2);
143         paddd_r2r(mm0, mm4);
144         paddd_r2r(mm2, mm4);
145         i -= 8;
146     }
147     movq_r2r(mm4, mm0);
148     psrlq_i2r(32, mm0);
149     paddd_r2r(mm0, mm4);
150     movd_r2m(mm4, y);
151     emms();
152 #elif defined(USE_SSE2)
153     xmm_t *xmm_coeffs;
154     xmm_t *xmm_hist;
155 
156     fir->history[fir->curr_pos] = sample;
157     fir->history[fir->curr_pos + fir->taps] = sample;
158 
159     xmm_coeffs = (xmm_t *) fir->coeffs;
160     xmm_hist = (xmm_t *) &fir->history[fir->curr_pos];
161     i = fir->taps;
162     pxor_r2r(xmm4, xmm4);
163     /* 16 samples per iteration, so the filter must be a multiple of 16 long. */
164     while (i > 0)
165     {
166         movdqu_m2r(xmm_coeffs[0], xmm0);
167         movdqu_m2r(xmm_coeffs[1], xmm2);
168         movdqu_m2r(xmm_hist[0], xmm1);
169         movdqu_m2r(xmm_hist[1], xmm3);
170         xmm_coeffs += 2;
171         xmm_hist += 2;
172         pmaddwd_r2r(xmm1, xmm0);
173         pmaddwd_r2r(xmm3, xmm2);
174         paddd_r2r(xmm0, xmm4);
175         paddd_r2r(xmm2, xmm4);
176         i -= 16;
177     }
178     movdqa_r2r(xmm4, xmm0);
179     psrldq_i2r(8, xmm0);
180     paddd_r2r(xmm0, xmm4);
181     movdqa_r2r(xmm4, xmm0);
182     psrldq_i2r(4, xmm0);
183     paddd_r2r(xmm0, xmm4);
184     movd_r2m(xmm4, y);
185 #else
186     int offset1;
187     int offset2;
188 
189     fir->history[fir->curr_pos] = sample;
190 
191     offset2 = fir->curr_pos;
192     offset1 = fir->taps - offset2;
193     y = 0;
194     for (i = fir->taps - 1;  i >= offset1;  i--)
195         y += fir->coeffs[i]*fir->history[i - offset1];
196     for (  ;  i >= 0;  i--)
197         y += fir->coeffs[i]*fir->history[i + offset2];
198 #endif
199     if (fir->curr_pos <= 0)
200         fir->curr_pos = fir->taps;
201     fir->curr_pos--;
202     return (int16_t) (y >> 15);
203 }
204 /*- End of function --------------------------------------------------------*/
205 
fir32_create(fir32_state_t * fir,const int32_t * coeffs,int taps)206 static __inline__ const int16_t *fir32_create(fir32_state_t *fir,
207                                               const int32_t *coeffs,
208                                               int taps)
209 {
210     fir->taps = taps;
211     fir->curr_pos = taps - 1;
212     fir->coeffs = coeffs;
213     fir->history = (int16_t *) span_alloc(taps*sizeof(int16_t));
214     if (fir->history)
215         memset(fir->history, '\0', taps*sizeof(int16_t));
216     return fir->history;
217 }
218 /*- End of function --------------------------------------------------------*/
219 
fir32_flush(fir32_state_t * fir)220 static __inline__ void fir32_flush(fir32_state_t *fir)
221 {
222     memset(fir->history, 0, fir->taps*sizeof(int16_t));
223 }
224 /*- End of function --------------------------------------------------------*/
225 
fir32_free(fir32_state_t * fir)226 static __inline__ void fir32_free(fir32_state_t *fir)
227 {
228     span_free(fir->history);
229 }
230 /*- End of function --------------------------------------------------------*/
231 
fir32(fir32_state_t * fir,int16_t sample)232 static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
233 {
234     int i;
235     int32_t y;
236     int offset1;
237     int offset2;
238 
239     fir->history[fir->curr_pos] = sample;
240     offset2 = fir->curr_pos;
241     offset1 = fir->taps - offset2;
242     y = 0;
243     for (i = fir->taps - 1;  i >= offset1;  i--)
244         y += fir->coeffs[i]*fir->history[i - offset1];
245     for (  ;  i >= 0;  i--)
246         y += fir->coeffs[i]*fir->history[i + offset2];
247     if (fir->curr_pos <= 0)
248         fir->curr_pos = fir->taps;
249     fir->curr_pos--;
250     return (int16_t) (y >> 15);
251 }
252 /*- End of function --------------------------------------------------------*/
253 
fir_float_create(fir_float_state_t * fir,const float * coeffs,int taps)254 static __inline__ const float *fir_float_create(fir_float_state_t *fir,
255                                                 const float *coeffs,
256                                                 int taps)
257 {
258     fir->taps = taps;
259     fir->curr_pos = taps - 1;
260     fir->coeffs = coeffs;
261     fir->history = (float *) span_alloc(taps*sizeof(float));
262     if (fir->history)
263         memset(fir->history, '\0', taps*sizeof(float));
264     return fir->history;
265 }
266 /*- End of function --------------------------------------------------------*/
267 
fir_float_free(fir_float_state_t * fir)268 static __inline__ void fir_float_free(fir_float_state_t *fir)
269 {
270     span_free(fir->history);
271 }
272 /*- End of function --------------------------------------------------------*/
273 
fir_float(fir_float_state_t * fir,int16_t sample)274 static __inline__ int16_t fir_float(fir_float_state_t *fir, int16_t sample)
275 {
276     int i;
277     float y;
278     int offset1;
279     int offset2;
280 
281     fir->history[fir->curr_pos] = sample;
282 
283     offset2 = fir->curr_pos;
284     offset1 = fir->taps - offset2;
285     y = 0;
286     for (i = fir->taps - 1;  i >= offset1;  i--)
287         y += fir->coeffs[i]*fir->history[i - offset1];
288     for (  ;  i >= 0;  i--)
289         y += fir->coeffs[i]*fir->history[i + offset2];
290     if (fir->curr_pos <= 0)
291         fir->curr_pos = fir->taps;
292     fir->curr_pos--;
293     return  (int16_t) y;
294 }
295 /*- End of function --------------------------------------------------------*/
296 
297 #if defined(__cplusplus)
298 }
299 #endif
300 
301 #endif
302 /*- End of file ------------------------------------------------------------*/
303