1 /*
2 Copyright (c) 2003-2004, Mark Borgerding
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14
15 #include "_kiss_fft_guts.h"
16
17
18 /*
19 Some definitions that allow real or complex filtering
20 */
21 #ifdef REAL_FASTFIR
22 #define MIN_FFT_LEN 2048
23 #include "kiss_fftr.h"
24 typedef kiss_fft_scalar kffsamp_t;
25 typedef kiss_fftr_cfg kfcfg_t;
26 #define FFT_ALLOC kiss_fftr_alloc
27 #define FFTFWD kiss_fftr
28 #define FFTINV kiss_fftri
29 #else
30 #define MIN_FFT_LEN 1024
31 typedef kiss_fft_cpx kffsamp_t;
32 typedef kiss_fft_cfg kfcfg_t;
33 #define FFT_ALLOC kiss_fft_alloc
34 #define FFTFWD kiss_fft
35 #define FFTINV kiss_fft
36 #endif
37
38 typedef struct kiss_fastfir_state *kiss_fastfir_cfg;
39
40
41
42 kiss_fastfir_cfg kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
43 size_t * nfft,void * mem,size_t*lenmem);
44
45 /* see do_file_filter for usage */
46 size_t kiss_fastfir( kiss_fastfir_cfg cfg, kffsamp_t * inbuf, kffsamp_t * outbuf, size_t n, size_t *offset);
47
48
49
50 static int verbose=0;
51
52
53 struct kiss_fastfir_state{
54 size_t nfft;
55 size_t ngood;
56 kfcfg_t fftcfg;
57 kfcfg_t ifftcfg;
58 kiss_fft_cpx * fir_freq_resp;
59 kiss_fft_cpx * freqbuf;
60 size_t n_freq_bins;
61 kffsamp_t * tmpbuf;
62 };
63
64
kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,size_t * pnfft,void * mem,size_t * lenmem)65 kiss_fastfir_cfg kiss_fastfir_alloc(
66 const kffsamp_t * imp_resp,size_t n_imp_resp,
67 size_t *pnfft, /* if <= 0, an appropriate size will be chosen */
68 void * mem,size_t*lenmem)
69 {
70 kiss_fastfir_cfg st = NULL;
71 size_t len_fftcfg,len_ifftcfg;
72 size_t memneeded = sizeof(struct kiss_fastfir_state);
73 char * ptr;
74 size_t i;
75 size_t nfft=0;
76 float scale;
77 int n_freq_bins;
78 if (pnfft)
79 nfft=*pnfft;
80
81 if (nfft<=0) {
82 /* determine fft size as next power of two at least 2x
83 the impulse response length*/
84 i=n_imp_resp-1;
85 nfft=2;
86 do{
87 nfft<<=1;
88 }while (i>>=1);
89 #ifdef MIN_FFT_LEN
90 if ( nfft < MIN_FFT_LEN )
91 nfft=MIN_FFT_LEN;
92 #endif
93 }
94 if (pnfft)
95 *pnfft = nfft;
96
97 #ifdef REAL_FASTFIR
98 n_freq_bins = nfft/2 + 1;
99 #else
100 n_freq_bins = nfft;
101 #endif
102 /*fftcfg*/
103 FFT_ALLOC (nfft, 0, NULL, &len_fftcfg);
104 memneeded += len_fftcfg;
105 /*ifftcfg*/
106 FFT_ALLOC (nfft, 1, NULL, &len_ifftcfg);
107 memneeded += len_ifftcfg;
108 /* tmpbuf */
109 memneeded += sizeof(kffsamp_t) * nfft;
110 /* fir_freq_resp */
111 memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
112 /* freqbuf */
113 memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
114
115 if (lenmem == NULL) {
116 st = (kiss_fastfir_cfg) malloc (memneeded);
117 } else {
118 if (*lenmem >= memneeded)
119 st = (kiss_fastfir_cfg) mem;
120 *lenmem = memneeded;
121 }
122 if (!st)
123 return NULL;
124
125 st->nfft = nfft;
126 st->ngood = nfft - n_imp_resp + 1;
127 st->n_freq_bins = n_freq_bins;
128 ptr=(char*)(st+1);
129
130 st->fftcfg = (kfcfg_t)ptr;
131 ptr += len_fftcfg;
132
133 st->ifftcfg = (kfcfg_t)ptr;
134 ptr += len_ifftcfg;
135
136 st->tmpbuf = (kffsamp_t*)ptr;
137 ptr += sizeof(kffsamp_t) * nfft;
138
139 st->freqbuf = (kiss_fft_cpx*)ptr;
140 ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
141
142 st->fir_freq_resp = (kiss_fft_cpx*)ptr;
143 ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
144
145 FFT_ALLOC (nfft,0,st->fftcfg , &len_fftcfg);
146 FFT_ALLOC (nfft,1,st->ifftcfg , &len_ifftcfg);
147
148 memset(st->tmpbuf,0,sizeof(kffsamp_t)*nfft);
149 /*zero pad in the middle to left-rotate the impulse response
150 This puts the scrap samples at the end of the inverse fft'd buffer */
151 st->tmpbuf[0] = imp_resp[ n_imp_resp - 1 ];
152 for (i=0;i<n_imp_resp - 1; ++i) {
153 st->tmpbuf[ nfft - n_imp_resp + 1 + i ] = imp_resp[ i ];
154 }
155
156 FFTFWD(st->fftcfg,st->tmpbuf,st->fir_freq_resp);
157
158 /* TODO: this won't work for fixed point */
159 scale = 1.0 / st->nfft;
160
161 for ( i=0; i < st->n_freq_bins; ++i ) {
162 #ifdef USE_SIMD
163 st->fir_freq_resp[i].r *= _mm_set1_ps(scale);
164 st->fir_freq_resp[i].i *= _mm_set1_ps(scale);
165 #else
166 st->fir_freq_resp[i].r *= scale;
167 st->fir_freq_resp[i].i *= scale;
168 #endif
169 }
170 return st;
171 }
172
fastconv1buf(const kiss_fastfir_cfg st,const kffsamp_t * in,kffsamp_t * out)173 static void fastconv1buf(const kiss_fastfir_cfg st,const kffsamp_t * in,kffsamp_t * out)
174 {
175 size_t i;
176 /* multiply the frequency response of the input signal by
177 that of the fir filter*/
178 FFTFWD( st->fftcfg, in , st->freqbuf );
179 for ( i=0; i<st->n_freq_bins; ++i ) {
180 kiss_fft_cpx tmpsamp;
181 C_MUL(tmpsamp,st->freqbuf[i],st->fir_freq_resp[i]);
182 st->freqbuf[i] = tmpsamp;
183 }
184
185 /* perform the inverse fft*/
186 FFTINV(st->ifftcfg,st->freqbuf,out);
187 }
188
189 /* n : the size of inbuf and outbuf in samples
190 return value: the number of samples completely processed
191 n-retval samples should be copied to the front of the next input buffer */
kff_nocopy(kiss_fastfir_cfg st,const kffsamp_t * inbuf,kffsamp_t * outbuf,size_t n)192 static size_t kff_nocopy(
193 kiss_fastfir_cfg st,
194 const kffsamp_t * inbuf,
195 kffsamp_t * outbuf,
196 size_t n)
197 {
198 size_t norig=n;
199 while (n >= st->nfft ) {
200 fastconv1buf(st,inbuf,outbuf);
201 inbuf += st->ngood;
202 outbuf += st->ngood;
203 n -= st->ngood;
204 }
205 return norig - n;
206 }
207
208 static
kff_flush(kiss_fastfir_cfg st,const kffsamp_t * inbuf,kffsamp_t * outbuf,size_t n)209 size_t kff_flush(kiss_fastfir_cfg st,const kffsamp_t * inbuf,kffsamp_t * outbuf,size_t n)
210 {
211 size_t zpad=0,ntmp;
212
213 ntmp = kff_nocopy(st,inbuf,outbuf,n);
214 n -= ntmp;
215 inbuf += ntmp;
216 outbuf += ntmp;
217
218 zpad = st->nfft - n;
219 memset(st->tmpbuf,0,sizeof(kffsamp_t)*st->nfft );
220 memcpy(st->tmpbuf,inbuf,sizeof(kffsamp_t)*n );
221
222 fastconv1buf(st,st->tmpbuf,st->tmpbuf);
223
224 memcpy(outbuf,st->tmpbuf,sizeof(kffsamp_t)*( st->ngood - zpad ));
225 return ntmp + st->ngood - zpad;
226 }
227
kiss_fastfir(kiss_fastfir_cfg vst,kffsamp_t * inbuf,kffsamp_t * outbuf,size_t n_new,size_t * offset)228 size_t kiss_fastfir(
229 kiss_fastfir_cfg vst,
230 kffsamp_t * inbuf,
231 kffsamp_t * outbuf,
232 size_t n_new,
233 size_t *offset)
234 {
235 size_t ntot = n_new + *offset;
236 if (n_new==0) {
237 return kff_flush(vst,inbuf,outbuf,ntot);
238 }else{
239 size_t nwritten = kff_nocopy(vst,inbuf,outbuf,ntot);
240 *offset = ntot - nwritten;
241 /*save the unused or underused samples at the front of the input buffer */
242 memcpy( inbuf , inbuf+nwritten , *offset * sizeof(kffsamp_t) );
243 return nwritten;
244 }
245 }
246
247 #ifdef FAST_FILT_UTIL
248 #include <unistd.h>
249 #include <sys/types.h>
250 #include <sys/mman.h>
251 #include <assert.h>
252
253 static
direct_file_filter(FILE * fin,FILE * fout,const kffsamp_t * imp_resp,size_t n_imp_resp)254 void direct_file_filter(
255 FILE * fin,
256 FILE * fout,
257 const kffsamp_t * imp_resp,
258 size_t n_imp_resp)
259 {
260 size_t nlag = n_imp_resp - 1;
261
262 const kffsamp_t *tmph;
263 kffsamp_t *buf, *circbuf;
264 kffsamp_t outval;
265 size_t nread;
266 size_t nbuf;
267 size_t oldestlag = 0;
268 size_t k, tap;
269 #ifndef REAL_FASTFIR
270 kffsamp_t tmp;
271 #endif
272
273 nbuf = 4096;
274 buf = (kffsamp_t *) malloc ( sizeof (kffsamp_t) * nbuf);
275 circbuf = (kffsamp_t *) malloc (sizeof (kffsamp_t) * nlag);
276 if (!circbuf || !buf) {
277 perror("circbuf allocation");
278 exit(1);
279 }
280
281 if ( fread (circbuf, sizeof (kffsamp_t), nlag, fin) != nlag ) {
282 perror ("insufficient data to overcome transient");
283 exit (1);
284 }
285
286 do {
287 nread = fread (buf, sizeof (kffsamp_t), nbuf, fin);
288 if (nread <= 0)
289 break;
290
291 for (k = 0; k < nread; ++k) {
292 tmph = imp_resp+nlag;
293 #ifdef REAL_FASTFIR
294 # ifdef USE_SIMD
295 outval = _mm_set1_ps(0);
296 #else
297 outval = 0;
298 #endif
299 for (tap = oldestlag; tap < nlag; ++tap)
300 outval += circbuf[tap] * *tmph--;
301 for (tap = 0; tap < oldestlag; ++tap)
302 outval += circbuf[tap] * *tmph--;
303 outval += buf[k] * *tmph;
304 #else
305 # ifdef USE_SIMD
306 outval.r = outval.i = _mm_set1_ps(0);
307 #else
308 outval.r = outval.i = 0;
309 #endif
310 for (tap = oldestlag; tap < nlag; ++tap){
311 C_MUL(tmp,circbuf[tap],*tmph);
312 --tmph;
313 C_ADDTO(outval,tmp);
314 }
315
316 for (tap = 0; tap < oldestlag; ++tap) {
317 C_MUL(tmp,circbuf[tap],*tmph);
318 --tmph;
319 C_ADDTO(outval,tmp);
320 }
321 C_MUL(tmp,buf[k],*tmph);
322 C_ADDTO(outval,tmp);
323 #endif
324
325 circbuf[oldestlag++] = buf[k];
326 buf[k] = outval;
327
328 if (oldestlag == nlag)
329 oldestlag = 0;
330 }
331
332 if (fwrite (buf, sizeof (buf[0]), nread, fout) != nread) {
333 perror ("short write");
334 exit (1);
335 }
336 } while (nread);
337 free (buf);
338 free (circbuf);
339 }
340
341 static
do_file_filter(FILE * fin,FILE * fout,const kffsamp_t * imp_resp,size_t n_imp_resp,size_t nfft)342 void do_file_filter(
343 FILE * fin,
344 FILE * fout,
345 const kffsamp_t * imp_resp,
346 size_t n_imp_resp,
347 size_t nfft )
348 {
349 int fdout;
350 size_t n_samps_buf;
351
352 kiss_fastfir_cfg cfg;
353 kffsamp_t *inbuf,*outbuf;
354 int nread,nwrite;
355 size_t idx_inbuf;
356
357 fdout = fileno(fout);
358
359 cfg=kiss_fastfir_alloc(imp_resp,n_imp_resp,&nfft,0,0);
360
361 /* use length to minimize buffer shift*/
362 n_samps_buf = 8*4096/sizeof(kffsamp_t);
363 n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
364
365 if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) );
366
367
368 /*allocate space and initialize pointers */
369 inbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
370 outbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
371
372 idx_inbuf=0;
373 do{
374 /* start reading at inbuf[idx_inbuf] */
375 nread = fread( inbuf + idx_inbuf, sizeof(kffsamp_t), n_samps_buf - idx_inbuf,fin );
376
377 /* If nread==0, then this is a flush.
378 The total number of samples in input is idx_inbuf + nread . */
379 nwrite = kiss_fastfir(cfg, inbuf, outbuf,nread,&idx_inbuf) * sizeof(kffsamp_t);
380 /* kiss_fastfir moved any unused samples to the front of inbuf and updated idx_inbuf */
381
382 if ( write(fdout, outbuf, nwrite) != nwrite ) {
383 perror("short write");
384 exit(1);
385 }
386 }while ( nread );
387 free(cfg);
388 free(inbuf);
389 free(outbuf);
390 }
391
main(int argc,char ** argv)392 int main(int argc,char**argv)
393 {
394 kffsamp_t * h;
395 int use_direct=0;
396 size_t nh,nfft=0;
397 FILE *fin=stdin;
398 FILE *fout=stdout;
399 FILE *filtfile=NULL;
400 while (1) {
401 int c=getopt(argc,argv,"n:h:i:o:vd");
402 if (c==-1) break;
403 switch (c) {
404 case 'v':
405 verbose=1;
406 break;
407 case 'n':
408 nfft=atoi(optarg);
409 break;
410 case 'i':
411 fin = fopen(optarg,"rb");
412 if (fin==NULL) {
413 perror(optarg);
414 exit(1);
415 }
416 break;
417 case 'o':
418 fout = fopen(optarg,"w+b");
419 if (fout==NULL) {
420 perror(optarg);
421 exit(1);
422 }
423 break;
424 case 'h':
425 filtfile = fopen(optarg,"rb");
426 if (filtfile==NULL) {
427 perror(optarg);
428 exit(1);
429 }
430 break;
431 case 'd':
432 use_direct=1;
433 break;
434 case '?':
435 fprintf(stderr,"usage options:\n"
436 "\t-n nfft: fft size to use\n"
437 "\t-d : use direct FIR filtering, not fast convolution\n"
438 "\t-i filename: input file\n"
439 "\t-o filename: output(filtered) file\n"
440 "\t-n nfft: fft size to use\n"
441 "\t-h filename: impulse response\n");
442 exit (1);
443 default:fprintf(stderr,"bad %c\n",c);break;
444 }
445 }
446 if (filtfile==NULL) {
447 fprintf(stderr,"You must supply the FIR coeffs via -h\n");
448 exit(1);
449 }
450 fseek(filtfile,0,SEEK_END);
451 nh = ftell(filtfile) / sizeof(kffsamp_t);
452 if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
453 h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
454 fseek(filtfile,0,SEEK_SET);
455 if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh)
456 fprintf(stderr,"short read on filter file\n");
457
458 fclose(filtfile);
459
460 if (use_direct)
461 direct_file_filter( fin, fout, h,nh);
462 else
463 do_file_filter( fin, fout, h,nh,nfft);
464
465 if (fout!=stdout) fclose(fout);
466 if (fin!=stdin) fclose(fin);
467
468 return 0;
469 }
470 #endif
471