1 /*
2  * Copyright (C) 2002 - David W. Durham
3  *
4  * This file is part of ReZound, an audio editing application.
5  *
6  * ReZound is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published
8  * by the Free Software Foundation; either version 2 of the License,
9  * or (at your option) any later version.
10  *
11  * ReZound is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
19  */
20 
21 #ifndef __DSP_Convolver_H__
22 #define __DSP_Convolver_H__
23 
24 #include "../../config/common.h"
25 
26 #include <stdexcept>
27 
28 #include <istring>
29 
30 #include "Delay.h"
31 
32 
33 /* --- TSimpleConvolver ------------------------------------
34  *
35  *	This class is a DSP block to do a sample by sample convolution of the given
36  *	array of coefficients with the input given by the repeated calls to processSample()
37  *
38  *	The frist template parameter specifies the type of the input samples (and is thus
39  *	the type also of the output, the return value of processSample() ).  And The second
40  *	template parameter specifies the type of the coefficients.
41  *
42  *	Actually, I saw a code example of time-domain convolution in O(n^log2(3)) which beats
43  * 	the current implementation O(n^2)
44  *		Titled: "Time domain convolution with O(n^log2(3))"
45  * 			http://www.musicdsp.org/archive.php?classid=3#66
46  */
47 template<class sample_t,class coefficient_t> class TSimpleConvolver
48 {
49 public:
TSimpleConvolver(const coefficient_t _coefficients[],size_t _coefficientCount)50 	TSimpleConvolver(const coefficient_t _coefficients[],size_t _coefficientCount) :
51 		coefficients(_coefficients),
52 		coefficientCount(_coefficientCount),
53 		coefficientCountSub1(_coefficientCount-1),
54 		delay(_coefficientCount)
55 	{
56 		if(coefficientCount<1)
57 			throw runtime_error(string(__func__)+" -- invalid coefficientCount: "+istring(coefficientCount));
58 	}
59 
~TSimpleConvolver()60 	virtual ~TSimpleConvolver()
61 	{
62 	}
63 
processSample(const sample_t input)64 	const sample_t processSample(const sample_t input)
65 	{
66 		coefficient_t output=input*coefficients[0];
67 		for(unsigned t=coefficientCountSub1;t>0;t--)
68 			output+=delay.getSample(t)*coefficients[t];
69 
70 		delay.putSample((coefficient_t)input);
71 
72 		return (sample_t)output;
73 	}
74 
75 private:
76 	const coefficient_t *coefficients; // aka, the impluse response
77 	const size_t coefficientCount;
78 	const size_t coefficientCountSub1;
79 
80 	TDSPDelay<coefficient_t> delay;
81 };
82 
83 
84 
85 #ifdef HAVE_FFTW
86 
87 #include <fftw3.h>
88 
89 #include <TAutoBuffer.h>
90 
91 typedef double fftw_real;
92 
93 /*
94  *	Written partly from stuff I learned in "The Scientist and
95  *	Engineer's Guide to Digital Signal Processing":
96  *		http://www.dspguide.com
97  *
98  *	and partly from libfftw's documentation:
99  *		http://www.fftw.org
100  *
101  * 	To use this DSP block it must be instantiated with a time-domain
102  * 	set of convolution coefficients, called the 'filter kernel'.
103  *
104  * 	Call beginWrite(); then, exactly getChunkSize() samples should be written
105  * 	with the writeSample() method.
106  * 	Then the beginRead() method should be called,  and the same number of
107  * 	samples that were just written with writeSample() must be read with the
108  * 	readSample() method.  And this 3-part procedure can be repeated.
109  *
110  * 	The one exception to writing exactly getChunkSize() samples is
111  * 	on the last chunk.  If there are not getChunkSize() samples
112  * 	remaining in the input, the beginRead() method can be called
113  * 	sooner.  And then readSample() can be called again for as many
114  * 	samples as were written.
115  *
116  * 	Optionally, after this is all finished, since the complete
117  * 	convolution actually creates the kernel filter's size - 1
118  * 	extra samples of output more than the size of the input, these
119  * 	can be read by calling readEndingSample() for the filter kernel's
120  * 	size - 1 times.
121  *
122  * 	The reset() method can be called if everything is to start over
123  * 	as if just after construction
124  *
125  * 	And of course, the object can be safely destroyed without having
126  * 	read all the samples that were written.
127  */
128 
129 
130 // ??? I should be able to remove the extra element I allocate and the setting of the last element to zero from when I was using macros to access the data (which I'm not doing anymore).. just make sure the current implementation wouldn't expact that extra element to be there
131 template <class sample_t,class coefficient_t> class TFFTConvolverTimeDomainKernel
132 {
133 	// ??? there are perhaps some optimizations like using memset instead of for-loops
134 public:
TFFTConvolverTimeDomainKernel(const coefficient_t filterKernel[],size_t filterKernelSize)135 	TFFTConvolverTimeDomainKernel(const coefficient_t filterKernel[],size_t filterKernelSize) :
136 		M(filterKernelSize),
137 		W(getFFTWindowSize()),fW(W),
138 		N(W-M+1),
139 
140 		data(W),dataPos(0),
141 		xform(W+1),
142 
143 		p   (fftw_plan_r2r_1d(W, data, xform, FFTW_R2HC, FFTW_ESTIMATE)),
144 		un_p(fftw_plan_r2r_1d(W, xform, data, FFTW_HC2R, FFTW_ESTIMATE)),
145 
146 		kernel_real(W/2+1),
147 		kernel_img(W/2+1),
148 
149 		overlap(M-1),
150 		overlapPos(0)
151 	{
152 		if(p==NULL || un_p==NULL)
153 			throw runtime_error(string(__func__)+" -- fftw had an error creating plans");
154 
155 		// ??? might want to check coefficient_t against fftw_real
156 
157 		prepareFilterKernel(filterKernel,data,xform,p,un_p);
158 		//printf("chosen window size: %d\n",W);
159 
160 		reset();
161 	}
162 
~TFFTConvolverTimeDomainKernel()163 	virtual ~TFFTConvolverTimeDomainKernel()
164 	{
165 		fftw_destroy_plan(p);
166 		fftw_destroy_plan(un_p);
167 	}
168 
169 	// the NEW set of coefficients MUST be the same size as the original one at construction
setNewFilterKernel(const coefficient_t filterKernel[])170 	void setNewFilterKernel(const coefficient_t filterKernel[])
171 	{
172 		TAutoBuffer<fftw_real> data(W),xform(W+1);
173 		fftw_plan p    = fftw_plan_r2r_1d(W, data, xform, FFTW_R2HC, FFTW_ESTIMATE);
174 		fftw_plan un_p = fftw_plan_r2r_1d(W, xform, data, FFTW_HC2R, FFTW_ESTIMATE);
175 
176 		prepareFilterKernel(filterKernel,data,xform,p,un_p);
177 
178 		fftw_destroy_plan(p);
179 		fftw_destroy_plan(un_p);
180 	}
181 
182 
getChunkSize()183 	const size_t getChunkSize() const
184 	{
185 		return N;
186 	}
187 
beginWrite()188 	void beginWrite()
189 	{
190 		dataPos=0;
191 	}
192 
writeSample(const sample_t s)193 	void writeSample(const sample_t s)
194 	{
195 		data[dataPos++]=s;
196 	}
197 
beginRead()198 	void beginRead()
199 	{
200 		if(dataPos>N) // inappropriate use
201 			throw runtime_error(string(__func__)+" -- writeSample() was called too many times: "+istring(dataPos)+">TFFTConvolver::getChunkSize() ("+istring(N)+")");
202 		// pad data with zero
203 		while(dataPos<W)
204 			data[dataPos++]=0;
205 
206 		// do the fft data --> xform
207 		fftw_execute(p);
208 		xform[W]=0; // to help out macros
209 
210 
211 			// ??? and here is where I could just as easily deconvolve by dividing with some flag
212 		// multiply the frequency-domain kernel with the now frequency-domain audio
213 		for(size_t t=0;t<=W/2;t++)
214 		{
215 			const fftw_real re=xform[t];
216 			const fftw_real im= (t==0 || t==W/2) ? 0 : xform[W-t];
217 
218 			// complex multiplication
219 			if(t!=0 && t!=W/2)
220 				xform[W-t]=re*kernel_img[t] + im*kernel_real[t];	// im= re*k_im + im*k_re
221 			xform[t]=re*kernel_real[t] - im*kernel_img[t];			// re= re*k_re - im*k_im
222 		}
223 
224 		// do the inverse fft xfrorm --> data
225 		fftw_execute(un_p);
226 
227 		// add the last segment's overlap to this segment
228 		for(size_t t=0;t<M-1;t++)
229 			data[t]+=overlap[t];
230 
231 		// save the samples that will overlap the next segment
232 		for(size_t t=N;t<W;t++)
233 			overlap[t-N]=data[t];
234 
235 		dataPos=0;
236 	}
237 
readSample()238 	const fftw_real readSample()
239 	{
240 		return data[dataPos++]/fW;
241 	}
242 
readEndingSample()243 	const fftw_real readEndingSample()
244 	{
245 		return overlap[overlapPos++]/fW;
246 	}
247 
reset()248 	void reset()
249 	{
250 		dataPos=0;
251 		overlapPos=0;
252 
253 		for(size_t t=0;t<M-1;t++)
254 			overlap[t]=0;
255 	}
256 
getFilterKernelSize()257 	const size_t getFilterKernelSize() const
258 	{
259 		return M;
260 	}
261 
getGoodFilterKernelSizes()262 	static const vector<size_t> getGoodFilterKernelSizes()
263 	{
264 		//                                                                                 2^13 5^6   2^14  3^9   2^15  3^10  2^16  5^7   7^6    2^17   3^11   2^18   5^8    2^19   3^12   7^7    2^20    3^13    5^9     2^21    2^22    3^14    7^8     2^23    5^10    3^15     2^24     11^7
265 		static const size_t fftw_good_sizes[]={0,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,15625,16384,19683,32768,59049,65536,78125,117649,131072,177147,262144,390625,524288,531441,823543,1048576,1594323,1953125,2097152,4194304,4782969,5764801,8388608,9765625,14348907,16777216,19487171};
266 		static const size_t num_sizes=sizeof(fftw_good_sizes)/sizeof(*fftw_good_sizes);
267 
268 		vector<size_t> v;
269 		for(unsigned t=0;t<num_sizes;t++)
270 			v.push_back(fftw_good_sizes[t]);
271 		return v;
272 	}
273 
274 private:
275 
276 	const size_t M; // length of filter kernel
277 	const size_t W; // fft window size
278 		const fftw_real fW;
279 	const size_t N; // length of audio chunk to be processed each fft window (W-M+1)
280 
281 	TAutoBuffer<fftw_real> data;
282 	size_t dataPos;
283 
284 	TAutoBuffer<fftw_real> xform;
285 
286 	fftw_plan p;
287 	fftw_plan un_p;
288 
289 	TAutoBuffer<fftw_real> kernel_real;
290 	TAutoBuffer<fftw_real> kernel_img;
291 
292 	TAutoBuffer<fftw_real> overlap;
293 	size_t overlapPos;
294 
295 
getFFTWindowSize()296 	const size_t getFFTWindowSize() const
297 	{
298 		if(M<=0)
299 			throw runtime_error(string(__func__)+" -- filter kernel length is <= 0 -- "+istring(M));
300 
301 		const vector<size_t> fftw_good_sizes=getGoodFilterKernelSizes();
302 
303 		// the window size to use should be the first item in fftw_good_sizes that can accomidate the filterKernel, then one more bigger
304 		for(size_t t=0;t<fftw_good_sizes.size()-1;t++)
305 		{
306 			if(fftw_good_sizes[t]>=M)
307 				return fftw_good_sizes[t+1];
308 		}
309 
310 		throw runtime_error(string(__func__)+" -- cannot handle a filter kernel of size "+istring(M)+" -- perhaps simply another element needs to be added to fftw_good_sizes");
311 	}
312 
prepareFilterKernel(const coefficient_t filterKernel[],fftw_real data[],fftw_real xform[],fftw_plan p,fftw_plan un_p)313 	void prepareFilterKernel(const coefficient_t filterKernel[],fftw_real data[],fftw_real xform[],fftw_plan p,fftw_plan un_p)
314 	{
315 			// ??? perhaps a parameter could be passed that would indicate what the filterKernel is.. whether it's time-domain or freq domain already
316 
317 		// convert the given time-domain kernel into a frequency-domain kernel
318 		for(size_t t=0;t<M;t++) // copy to data
319 			data[t]=filterKernel[t];
320 		for(size_t t=M;t<W;t++) // pad with zero
321 			data[t]=0;
322 
323 		fftw_execute(p);
324 		xform[W]=0; // to help out macros
325 
326 		// copy into kernel_real and kernel_img
327 		for(size_t t=0;t<=W/2;t++)
328 		{
329 			kernel_real[t]=xform[t];
330 			kernel_img[t]= (t==0 || t==W/2) ? 0 : xform[W-t];
331 		}
332 	}
333 
334 };
335 
336 
337 template <class sample_t,class coefficient_t> class TFFTConvolverFrequencyDomainKernel : public TFFTConvolverTimeDomainKernel<sample_t,coefficient_t>
338 {
339 public:
340 	// magnitude is the frequency domain gains to apply to the frequences, responseSize is the length of the magnitude array and the phase is optional, but if it's given, then it must be the same length as magnitude
341 			// ??? explain that better
342 	TFFTConvolverFrequencyDomainKernel(const coefficient_t magnitude[],size_t responseSize,const coefficient_t phase[]=NULL) :
convertToTimeDomain(magnitude,phase,responseSize)343 		TFFTConvolverTimeDomainKernel<sample_t,coefficient_t>(convertToTimeDomain(magnitude,phase,responseSize),(responseSize-1)*2)
344 	{
345 		delete [] tempKernel;
346 	}
347 
~TFFTConvolverFrequencyDomainKernel()348 	virtual ~TFFTConvolverFrequencyDomainKernel()
349 	{
350 	}
351 
352 	/* the responseSize must match the original responseSize at construction or results are undefined */
353 	void setNewMagnitudeArray(const coefficient_t magnitude[],size_t responseSize,const coefficient_t phase[]=NULL)
354 	{
355 		this->setNewFilterKernel(convertToTimeDomain(magnitude,phase,responseSize));
356 		delete [] tempKernel;
357 	}
358 
359 
360 private:
361 
362 	coefficient_t *tempKernel;
363 
polar_to_rec_real(double mag,double phase)364 	static double polar_to_rec_real(double mag,double phase)
365 	{
366 		return mag*cos(phase);
367 	}
368 
polar_to_rec_img(double mag,double phase)369 	static double polar_to_rec_img(double mag,double phase)
370 	{
371 		return mag*sin(phase);
372 	}
373 
convertToTimeDomain(const coefficient_t magnitude[],const coefficient_t _phase[],size_t responseSize)374 	const coefficient_t *convertToTimeDomain(const coefficient_t magnitude[],const coefficient_t _phase[],size_t responseSize)
375 	{
376 		tempKernel=NULL;
377 
378 		// allocate zeroed-out phase response if it wasn't given
379 		coefficient_t *phase=new coefficient_t[responseSize];
380 		if(_phase==NULL)
381 		{ // create all zero phase array
382 			for(size_t t=0;t<responseSize;t++)
383 				phase[t]=0;
384 		}
385 		else
386 		{ // copy from given phase arrray
387 			for(size_t t=0;t<responseSize;t++)
388 				phase[t]=_phase[t];
389 
390 			// assert this property of the real DFT
391 			phase[0]=0;
392 			phase[responseSize-1]=0;
393 		}
394 
395 		// convert given polar coordinate frequency domain kernel to rectangular coordinates
396 		TAutoBuffer<coefficient_t> real(responseSize);
397 		TAutoBuffer<coefficient_t> imaginary(responseSize);
398 		for(size_t t=0;t<responseSize;t++)
399 		{
400 			real[t]=polar_to_rec_real(magnitude[t],phase[t]);
401 			imaginary[t]=polar_to_rec_img(magnitude[t],phase[t]);
402 		}
403 
404 
405 		// convert rectangular coordinate frequency domain kernel to the time domain
406 		const size_t W=(responseSize-1)*2;
407 		TAutoBuffer<fftw_real> xform(W);
408 		for(size_t t=0;t<responseSize;t++)
409 		{ // setup the array in the form fftw expects: [  r0,r1,r2, ... rW/2, iW/2-1, ... i2, i1 ]
410 			xform[t]=real[t];
411 			if(t!=0 && t!=W/2)
412 				xform[W-t]=imaginary[t];
413 		}
414 		TAutoBuffer<fftw_real> data(W);
415 		fftw_plan un_p = fftw_plan_r2r_1d(W, xform, data, FFTW_HC2R, FFTW_ESTIMATE);
416 		fftw_execute(un_p);	// xform -> data
417 		fftw_destroy_plan(un_p);
418 
419 
420 		// fftw scales the output of the complex->real transform by W (undo this)
421 		for(size_t t=0;t<W;t++)
422 			data[t]/=W;
423 
424 
425 		// now I have to shift, truncate and window the resulting time domain kernel
426 
427 		// truncate to M samples
428 		const size_t M=responseSize;
429 
430 		// rotate the kernel forward by M/2 (also unfortunately causes a delay in the filtered signal by M/2 samples)
431 		TAutoBuffer<coefficient_t> temp(W);
432 		for(size_t t=0;t<W;t++)
433 			temp[(t+M/2)%W]=data[t];
434 
435 		// apply the hamming window to the kernel centered around M/2 and truncate anything past M (and put finally into tempKernel which will be returned)
436 		tempKernel=new coefficient_t[W];
437 		for(size_t t=0;t<W;t++)
438 		{
439 			if(t<=M)
440 				tempKernel[t]=temp[t]*(0.54-0.46*cos(2.0*M_PI*t/M));
441 			else
442 				tempKernel[t]=0;
443 
444 		}
445 
446 		// filterKernel is now ~ twice the length as the original given magnitude array, except it's padded with zeros beyond what was given
447 
448 #if 0
449 		// test the filter kernel
450 		{
451 			TAutoBuffer<fftw_real> data(W);
452 			TAutoBuffer<fftw_real> xform(W);
453 
454 			for(size_t t=0;t<W;t++)
455 				data[t]=tempKernel[t];
456 
457 			rfftw_plan p=rfftw_create_plan(W, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE);
458 			rfftw_one(p, data, xform);	// data -> xform
459 			rfftw_destroy_plan(p);
460 
461 
462 			// avoid the first and last element cause the imaginary parts aren't in xform
463 			FILE *f=fopen("/home/ddurham/code/rezound/o","w");
464 			for(size_t t=1;t<W/2-1;t++)
465 				fprintf(f,"%d\t%f\n",t,sqrt(pow(xform[t],2.0)+pow(xform[W-t],2.0)));
466 			fclose(f);
467 
468 		}
469 #endif
470 
471 		delete [] phase;
472 
473 		return tempKernel;
474 	}
475 };
476 
477 
478 
479 /* some helper functions for if/when I need to convert fftw's rectangular transformed output to polar coordinates and back
480 #include <math.h>
481 static float rec_to_polar_mag(float real,float img)
482 {
483 	return sqrt(real*real+img*img);
484 }
485 
486 static float rec_to_polar_phase(float real,float img)
487 {
488 	if(real==0) real=1e-20;
489 	float phase=atan(img/real);
490 	if(real<0 && img<0) phase=phase-M_PI;
491 	else if(real<0 && img>=0) phase=phase+M_PI;
492 	return phase;
493 }
494 
495 static float polar_to_rec_real(float mag,float phase)
496 {
497 	return mag*cos(phase);
498 }
499 
500 static float polar_to_rec_img(float mag,float phase)
501 {
502 	return mag*sin(phase);
503 }
504 */
505 
506 
507 
508 #endif
509 
510 #endif
511