1 /* 2 * Copyright (C) 2002 - David W. Durham 3 * 4 * This file is part of ReZound, an audio editing application. 5 * 6 * ReZound is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published 8 * by the Free Software Foundation; either version 2 of the License, 9 * or (at your option) any later version. 10 * 11 * ReZound is distributed in the hope that it will be useful, but 12 * WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 19 */ 20 21 #ifndef __DSP_Convolver_H__ 22 #define __DSP_Convolver_H__ 23 24 #include "../../config/common.h" 25 26 #include <stdexcept> 27 28 #include <istring> 29 30 #include "Delay.h" 31 32 33 /* --- TSimpleConvolver ------------------------------------ 34 * 35 * This class is a DSP block to do a sample by sample convolution of the given 36 * array of coefficients with the input given by the repeated calls to processSample() 37 * 38 * The frist template parameter specifies the type of the input samples (and is thus 39 * the type also of the output, the return value of processSample() ). And The second 40 * template parameter specifies the type of the coefficients. 41 * 42 * Actually, I saw a code example of time-domain convolution in O(n^log2(3)) which beats 43 * the current implementation O(n^2) 44 * Titled: "Time domain convolution with O(n^log2(3))" 45 * http://www.musicdsp.org/archive.php?classid=3#66 46 */ 47 template<class sample_t,class coefficient_t> class TSimpleConvolver 48 { 49 public: TSimpleConvolver(const coefficient_t _coefficients[],size_t _coefficientCount)50 TSimpleConvolver(const coefficient_t _coefficients[],size_t _coefficientCount) : 51 coefficients(_coefficients), 52 coefficientCount(_coefficientCount), 53 coefficientCountSub1(_coefficientCount-1), 54 delay(_coefficientCount) 55 { 56 if(coefficientCount<1) 57 throw runtime_error(string(__func__)+" -- invalid coefficientCount: "+istring(coefficientCount)); 58 } 59 ~TSimpleConvolver()60 virtual ~TSimpleConvolver() 61 { 62 } 63 processSample(const sample_t input)64 const sample_t processSample(const sample_t input) 65 { 66 coefficient_t output=input*coefficients[0]; 67 for(unsigned t=coefficientCountSub1;t>0;t--) 68 output+=delay.getSample(t)*coefficients[t]; 69 70 delay.putSample((coefficient_t)input); 71 72 return (sample_t)output; 73 } 74 75 private: 76 const coefficient_t *coefficients; // aka, the impluse response 77 const size_t coefficientCount; 78 const size_t coefficientCountSub1; 79 80 TDSPDelay<coefficient_t> delay; 81 }; 82 83 84 85 #ifdef HAVE_FFTW 86 87 #include <fftw3.h> 88 89 #include <TAutoBuffer.h> 90 91 typedef double fftw_real; 92 93 /* 94 * Written partly from stuff I learned in "The Scientist and 95 * Engineer's Guide to Digital Signal Processing": 96 * http://www.dspguide.com 97 * 98 * and partly from libfftw's documentation: 99 * http://www.fftw.org 100 * 101 * To use this DSP block it must be instantiated with a time-domain 102 * set of convolution coefficients, called the 'filter kernel'. 103 * 104 * Call beginWrite(); then, exactly getChunkSize() samples should be written 105 * with the writeSample() method. 106 * Then the beginRead() method should be called, and the same number of 107 * samples that were just written with writeSample() must be read with the 108 * readSample() method. And this 3-part procedure can be repeated. 109 * 110 * The one exception to writing exactly getChunkSize() samples is 111 * on the last chunk. If there are not getChunkSize() samples 112 * remaining in the input, the beginRead() method can be called 113 * sooner. And then readSample() can be called again for as many 114 * samples as were written. 115 * 116 * Optionally, after this is all finished, since the complete 117 * convolution actually creates the kernel filter's size - 1 118 * extra samples of output more than the size of the input, these 119 * can be read by calling readEndingSample() for the filter kernel's 120 * size - 1 times. 121 * 122 * The reset() method can be called if everything is to start over 123 * as if just after construction 124 * 125 * And of course, the object can be safely destroyed without having 126 * read all the samples that were written. 127 */ 128 129 130 // ??? I should be able to remove the extra element I allocate and the setting of the last element to zero from when I was using macros to access the data (which I'm not doing anymore).. just make sure the current implementation wouldn't expact that extra element to be there 131 template <class sample_t,class coefficient_t> class TFFTConvolverTimeDomainKernel 132 { 133 // ??? there are perhaps some optimizations like using memset instead of for-loops 134 public: TFFTConvolverTimeDomainKernel(const coefficient_t filterKernel[],size_t filterKernelSize)135 TFFTConvolverTimeDomainKernel(const coefficient_t filterKernel[],size_t filterKernelSize) : 136 M(filterKernelSize), 137 W(getFFTWindowSize()),fW(W), 138 N(W-M+1), 139 140 data(W),dataPos(0), 141 xform(W+1), 142 143 p (fftw_plan_r2r_1d(W, data, xform, FFTW_R2HC, FFTW_ESTIMATE)), 144 un_p(fftw_plan_r2r_1d(W, xform, data, FFTW_HC2R, FFTW_ESTIMATE)), 145 146 kernel_real(W/2+1), 147 kernel_img(W/2+1), 148 149 overlap(M-1), 150 overlapPos(0) 151 { 152 if(p==NULL || un_p==NULL) 153 throw runtime_error(string(__func__)+" -- fftw had an error creating plans"); 154 155 // ??? might want to check coefficient_t against fftw_real 156 157 prepareFilterKernel(filterKernel,data,xform,p,un_p); 158 //printf("chosen window size: %d\n",W); 159 160 reset(); 161 } 162 ~TFFTConvolverTimeDomainKernel()163 virtual ~TFFTConvolverTimeDomainKernel() 164 { 165 fftw_destroy_plan(p); 166 fftw_destroy_plan(un_p); 167 } 168 169 // the NEW set of coefficients MUST be the same size as the original one at construction setNewFilterKernel(const coefficient_t filterKernel[])170 void setNewFilterKernel(const coefficient_t filterKernel[]) 171 { 172 TAutoBuffer<fftw_real> data(W),xform(W+1); 173 fftw_plan p = fftw_plan_r2r_1d(W, data, xform, FFTW_R2HC, FFTW_ESTIMATE); 174 fftw_plan un_p = fftw_plan_r2r_1d(W, xform, data, FFTW_HC2R, FFTW_ESTIMATE); 175 176 prepareFilterKernel(filterKernel,data,xform,p,un_p); 177 178 fftw_destroy_plan(p); 179 fftw_destroy_plan(un_p); 180 } 181 182 getChunkSize()183 const size_t getChunkSize() const 184 { 185 return N; 186 } 187 beginWrite()188 void beginWrite() 189 { 190 dataPos=0; 191 } 192 writeSample(const sample_t s)193 void writeSample(const sample_t s) 194 { 195 data[dataPos++]=s; 196 } 197 beginRead()198 void beginRead() 199 { 200 if(dataPos>N) // inappropriate use 201 throw runtime_error(string(__func__)+" -- writeSample() was called too many times: "+istring(dataPos)+">TFFTConvolver::getChunkSize() ("+istring(N)+")"); 202 // pad data with zero 203 while(dataPos<W) 204 data[dataPos++]=0; 205 206 // do the fft data --> xform 207 fftw_execute(p); 208 xform[W]=0; // to help out macros 209 210 211 // ??? and here is where I could just as easily deconvolve by dividing with some flag 212 // multiply the frequency-domain kernel with the now frequency-domain audio 213 for(size_t t=0;t<=W/2;t++) 214 { 215 const fftw_real re=xform[t]; 216 const fftw_real im= (t==0 || t==W/2) ? 0 : xform[W-t]; 217 218 // complex multiplication 219 if(t!=0 && t!=W/2) 220 xform[W-t]=re*kernel_img[t] + im*kernel_real[t]; // im= re*k_im + im*k_re 221 xform[t]=re*kernel_real[t] - im*kernel_img[t]; // re= re*k_re - im*k_im 222 } 223 224 // do the inverse fft xfrorm --> data 225 fftw_execute(un_p); 226 227 // add the last segment's overlap to this segment 228 for(size_t t=0;t<M-1;t++) 229 data[t]+=overlap[t]; 230 231 // save the samples that will overlap the next segment 232 for(size_t t=N;t<W;t++) 233 overlap[t-N]=data[t]; 234 235 dataPos=0; 236 } 237 readSample()238 const fftw_real readSample() 239 { 240 return data[dataPos++]/fW; 241 } 242 readEndingSample()243 const fftw_real readEndingSample() 244 { 245 return overlap[overlapPos++]/fW; 246 } 247 reset()248 void reset() 249 { 250 dataPos=0; 251 overlapPos=0; 252 253 for(size_t t=0;t<M-1;t++) 254 overlap[t]=0; 255 } 256 getFilterKernelSize()257 const size_t getFilterKernelSize() const 258 { 259 return M; 260 } 261 getGoodFilterKernelSizes()262 static const vector<size_t> getGoodFilterKernelSizes() 263 { 264 // 2^13 5^6 2^14 3^9 2^15 3^10 2^16 5^7 7^6 2^17 3^11 2^18 5^8 2^19 3^12 7^7 2^20 3^13 5^9 2^21 2^22 3^14 7^8 2^23 5^10 3^15 2^24 11^7 265 static const size_t fftw_good_sizes[]={0,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,15625,16384,19683,32768,59049,65536,78125,117649,131072,177147,262144,390625,524288,531441,823543,1048576,1594323,1953125,2097152,4194304,4782969,5764801,8388608,9765625,14348907,16777216,19487171}; 266 static const size_t num_sizes=sizeof(fftw_good_sizes)/sizeof(*fftw_good_sizes); 267 268 vector<size_t> v; 269 for(unsigned t=0;t<num_sizes;t++) 270 v.push_back(fftw_good_sizes[t]); 271 return v; 272 } 273 274 private: 275 276 const size_t M; // length of filter kernel 277 const size_t W; // fft window size 278 const fftw_real fW; 279 const size_t N; // length of audio chunk to be processed each fft window (W-M+1) 280 281 TAutoBuffer<fftw_real> data; 282 size_t dataPos; 283 284 TAutoBuffer<fftw_real> xform; 285 286 fftw_plan p; 287 fftw_plan un_p; 288 289 TAutoBuffer<fftw_real> kernel_real; 290 TAutoBuffer<fftw_real> kernel_img; 291 292 TAutoBuffer<fftw_real> overlap; 293 size_t overlapPos; 294 295 getFFTWindowSize()296 const size_t getFFTWindowSize() const 297 { 298 if(M<=0) 299 throw runtime_error(string(__func__)+" -- filter kernel length is <= 0 -- "+istring(M)); 300 301 const vector<size_t> fftw_good_sizes=getGoodFilterKernelSizes(); 302 303 // the window size to use should be the first item in fftw_good_sizes that can accomidate the filterKernel, then one more bigger 304 for(size_t t=0;t<fftw_good_sizes.size()-1;t++) 305 { 306 if(fftw_good_sizes[t]>=M) 307 return fftw_good_sizes[t+1]; 308 } 309 310 throw runtime_error(string(__func__)+" -- cannot handle a filter kernel of size "+istring(M)+" -- perhaps simply another element needs to be added to fftw_good_sizes"); 311 } 312 prepareFilterKernel(const coefficient_t filterKernel[],fftw_real data[],fftw_real xform[],fftw_plan p,fftw_plan un_p)313 void prepareFilterKernel(const coefficient_t filterKernel[],fftw_real data[],fftw_real xform[],fftw_plan p,fftw_plan un_p) 314 { 315 // ??? perhaps a parameter could be passed that would indicate what the filterKernel is.. whether it's time-domain or freq domain already 316 317 // convert the given time-domain kernel into a frequency-domain kernel 318 for(size_t t=0;t<M;t++) // copy to data 319 data[t]=filterKernel[t]; 320 for(size_t t=M;t<W;t++) // pad with zero 321 data[t]=0; 322 323 fftw_execute(p); 324 xform[W]=0; // to help out macros 325 326 // copy into kernel_real and kernel_img 327 for(size_t t=0;t<=W/2;t++) 328 { 329 kernel_real[t]=xform[t]; 330 kernel_img[t]= (t==0 || t==W/2) ? 0 : xform[W-t]; 331 } 332 } 333 334 }; 335 336 337 template <class sample_t,class coefficient_t> class TFFTConvolverFrequencyDomainKernel : public TFFTConvolverTimeDomainKernel<sample_t,coefficient_t> 338 { 339 public: 340 // magnitude is the frequency domain gains to apply to the frequences, responseSize is the length of the magnitude array and the phase is optional, but if it's given, then it must be the same length as magnitude 341 // ??? explain that better 342 TFFTConvolverFrequencyDomainKernel(const coefficient_t magnitude[],size_t responseSize,const coefficient_t phase[]=NULL) : convertToTimeDomain(magnitude,phase,responseSize)343 TFFTConvolverTimeDomainKernel<sample_t,coefficient_t>(convertToTimeDomain(magnitude,phase,responseSize),(responseSize-1)*2) 344 { 345 delete [] tempKernel; 346 } 347 ~TFFTConvolverFrequencyDomainKernel()348 virtual ~TFFTConvolverFrequencyDomainKernel() 349 { 350 } 351 352 /* the responseSize must match the original responseSize at construction or results are undefined */ 353 void setNewMagnitudeArray(const coefficient_t magnitude[],size_t responseSize,const coefficient_t phase[]=NULL) 354 { 355 this->setNewFilterKernel(convertToTimeDomain(magnitude,phase,responseSize)); 356 delete [] tempKernel; 357 } 358 359 360 private: 361 362 coefficient_t *tempKernel; 363 polar_to_rec_real(double mag,double phase)364 static double polar_to_rec_real(double mag,double phase) 365 { 366 return mag*cos(phase); 367 } 368 polar_to_rec_img(double mag,double phase)369 static double polar_to_rec_img(double mag,double phase) 370 { 371 return mag*sin(phase); 372 } 373 convertToTimeDomain(const coefficient_t magnitude[],const coefficient_t _phase[],size_t responseSize)374 const coefficient_t *convertToTimeDomain(const coefficient_t magnitude[],const coefficient_t _phase[],size_t responseSize) 375 { 376 tempKernel=NULL; 377 378 // allocate zeroed-out phase response if it wasn't given 379 coefficient_t *phase=new coefficient_t[responseSize]; 380 if(_phase==NULL) 381 { // create all zero phase array 382 for(size_t t=0;t<responseSize;t++) 383 phase[t]=0; 384 } 385 else 386 { // copy from given phase arrray 387 for(size_t t=0;t<responseSize;t++) 388 phase[t]=_phase[t]; 389 390 // assert this property of the real DFT 391 phase[0]=0; 392 phase[responseSize-1]=0; 393 } 394 395 // convert given polar coordinate frequency domain kernel to rectangular coordinates 396 TAutoBuffer<coefficient_t> real(responseSize); 397 TAutoBuffer<coefficient_t> imaginary(responseSize); 398 for(size_t t=0;t<responseSize;t++) 399 { 400 real[t]=polar_to_rec_real(magnitude[t],phase[t]); 401 imaginary[t]=polar_to_rec_img(magnitude[t],phase[t]); 402 } 403 404 405 // convert rectangular coordinate frequency domain kernel to the time domain 406 const size_t W=(responseSize-1)*2; 407 TAutoBuffer<fftw_real> xform(W); 408 for(size_t t=0;t<responseSize;t++) 409 { // setup the array in the form fftw expects: [ r0,r1,r2, ... rW/2, iW/2-1, ... i2, i1 ] 410 xform[t]=real[t]; 411 if(t!=0 && t!=W/2) 412 xform[W-t]=imaginary[t]; 413 } 414 TAutoBuffer<fftw_real> data(W); 415 fftw_plan un_p = fftw_plan_r2r_1d(W, xform, data, FFTW_HC2R, FFTW_ESTIMATE); 416 fftw_execute(un_p); // xform -> data 417 fftw_destroy_plan(un_p); 418 419 420 // fftw scales the output of the complex->real transform by W (undo this) 421 for(size_t t=0;t<W;t++) 422 data[t]/=W; 423 424 425 // now I have to shift, truncate and window the resulting time domain kernel 426 427 // truncate to M samples 428 const size_t M=responseSize; 429 430 // rotate the kernel forward by M/2 (also unfortunately causes a delay in the filtered signal by M/2 samples) 431 TAutoBuffer<coefficient_t> temp(W); 432 for(size_t t=0;t<W;t++) 433 temp[(t+M/2)%W]=data[t]; 434 435 // apply the hamming window to the kernel centered around M/2 and truncate anything past M (and put finally into tempKernel which will be returned) 436 tempKernel=new coefficient_t[W]; 437 for(size_t t=0;t<W;t++) 438 { 439 if(t<=M) 440 tempKernel[t]=temp[t]*(0.54-0.46*cos(2.0*M_PI*t/M)); 441 else 442 tempKernel[t]=0; 443 444 } 445 446 // filterKernel is now ~ twice the length as the original given magnitude array, except it's padded with zeros beyond what was given 447 448 #if 0 449 // test the filter kernel 450 { 451 TAutoBuffer<fftw_real> data(W); 452 TAutoBuffer<fftw_real> xform(W); 453 454 for(size_t t=0;t<W;t++) 455 data[t]=tempKernel[t]; 456 457 rfftw_plan p=rfftw_create_plan(W, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE); 458 rfftw_one(p, data, xform); // data -> xform 459 rfftw_destroy_plan(p); 460 461 462 // avoid the first and last element cause the imaginary parts aren't in xform 463 FILE *f=fopen("/home/ddurham/code/rezound/o","w"); 464 for(size_t t=1;t<W/2-1;t++) 465 fprintf(f,"%d\t%f\n",t,sqrt(pow(xform[t],2.0)+pow(xform[W-t],2.0))); 466 fclose(f); 467 468 } 469 #endif 470 471 delete [] phase; 472 473 return tempKernel; 474 } 475 }; 476 477 478 479 /* some helper functions for if/when I need to convert fftw's rectangular transformed output to polar coordinates and back 480 #include <math.h> 481 static float rec_to_polar_mag(float real,float img) 482 { 483 return sqrt(real*real+img*img); 484 } 485 486 static float rec_to_polar_phase(float real,float img) 487 { 488 if(real==0) real=1e-20; 489 float phase=atan(img/real); 490 if(real<0 && img<0) phase=phase-M_PI; 491 else if(real<0 && img>=0) phase=phase+M_PI; 492 return phase; 493 } 494 495 static float polar_to_rec_real(float mag,float phase) 496 { 497 return mag*cos(phase); 498 } 499 500 static float polar_to_rec_img(float mag,float phase) 501 { 502 return mag*sin(phase); 503 } 504 */ 505 506 507 508 #endif 509 510 #endif 511