1 // ----------------------------------------------------------------------------
2 //
3 //  Copyright (C) 2006-2011 Fons Adriaensen <fons@linuxaudio.org>
4 //
5 //  This program is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU General Public License as published by
7 //  the Free Software Foundation; either version 3 of the License, or
8 //  (at your option) any later version.
9 //
10 //  This program is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU General Public License for more details.
14 //
15 //  You should have received a copy of the GNU General Public License
16 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 //
18 // ----------------------------------------------------------------------------
19 
20 
21 #include <unistd.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25 #include <cmath>
26 extern "C" {
27 #define __STDC_CONSTANT_MACROS  // needed for UINT64_C (libavutil 0.8.6)
28 #include <libavutil/common.h>
29 }
30 #include "zita-convolver.h"
31 #include "gx_compiler.h"
32 
zita_convolver_major_version(void)33 int zita_convolver_major_version (void)
34 {
35     return ZITA_CONVOLVER_MAJOR_VERSION;
36 }
37 
38 
39 float Convproc::_mac_cost = 1.0f;
40 float Convproc::_fft_cost = 5.0f;
41 
42 
Convproc(void)43 Convproc::Convproc (void) :
44     _state (ST_IDLE),
45     _skipcnt (0),
46     _density (0),
47     _ninp (0),
48     _nout (0),
49     _quantum (0),
50     _minpart (0),
51     _maxpart (0),
52     _nlevels (0),
53     _latecnt (0)
54 {
55     memset (_inpbuff, 0, MAXINP * sizeof (float *));
56     memset (_outbuff, 0, MAXOUT * sizeof (float *));
57     memset (_convlev, 0, MAXLEV * sizeof (Convlevel *));
58 }
59 
60 
~Convproc(void)61 Convproc::~Convproc (void)
62 {
63     cleanup ();
64 }
65 
66 
set_density(float density)67 void Convproc::set_density (float density)
68 {
69     _density = density;
70 }
71 
72 
set_skipcnt(unsigned int skipcnt)73 void Convproc::set_skipcnt (unsigned int skipcnt)
74 {
75     if ((_quantum == _minpart) && (_quantum == _maxpart)) _skipcnt = skipcnt;
76 }
77 
78 
configure(unsigned int ninp,unsigned int nout,unsigned int maxsize,unsigned int quantum,unsigned int minpart,unsigned int maxpart)79 int Convproc::configure (unsigned int ninp,
80                          unsigned int nout,
81                          unsigned int maxsize,
82                          unsigned int quantum,
83                          unsigned int minpart,
84 			 unsigned int maxpart)
85 {
86     unsigned int  offs, npar, size, pind, nmin, nmax, step, i;
87     int           prio, d, r, s;
88     float         cfft, cmac, t;
89 
90     if (_state != ST_IDLE) return Converror::BAD_STATE;
91     if (   (quantum & (quantum - 1))
92         || (quantum < MINQUANT)
93         || (quantum > MAXQUANT)
94         || (minpart & (minpart - 1))
95 	|| (minpart < MINPART)
96         || (minpart < quantum)
97         || (minpart > MAXDIVIS * quantum)
98         || (maxpart & (maxpart - 1))
99 	|| (maxpart > MAXPART)
100 	|| (maxpart < minpart)) return Converror::BAD_PARAM;
101 
102     if (ninp < nout) { nmin = ninp; nmax = nout; }
103     else             { nmin = nout; nmax = ninp; }
104 
105     if (_density <= 0) _density = 1.0 / nmin;
106     else
107     {
108         t = 1.0f / nmax;
109         if (_density < t) _density = t;
110         if (_density > 1) _density = 1;
111     }
112 
113     cfft = _fft_cost * (ninp + nout);
114     cmac = _mac_cost * ninp * nout * _density;
115     step = (cfft < 4 * cmac) ? 1 : 2;
116 
117     if (step == 2)
118     {
119         r = maxpart / minpart;
120         s = (r & 0xAAAA) ? 1 : 2;
121     }
122     else s = 1;
123     nmin = (s == 1) ? 2 : 6;
124     if (minpart == quantum) nmin++;
125 
126     prio = 0;
127     size = quantum;
128     while (size < minpart)
129     {
130 	prio -= 1;
131 	size <<= 1;
132     }
133 
134     try
135     {
136 	for (offs = pind = 0; offs < maxsize; pind++)
137 	{
138 	    npar = (maxsize - offs + size - 1) / size;
139 	    if ((size < maxpart) && (npar > nmin))
140 	    {
141 		r = 1 << s;
142 		d = npar - nmin;
143 		d = d - (d + r - 1) / r;
144 		if (cfft < d * cmac) npar = nmin;
145 	    }
146 	    _convlev [pind] = new Convlevel ();
147 	    _convlev [pind]->configure (prio, offs, npar, size);
148 
149 	    offs += size * npar;
150 	    if (offs < maxsize)
151 	    {
152 		prio -= s;
153 		size <<= s;
154 		s = step;
155                 nmin = (s == 1) ? 2 : 6;
156 	    }
157 	}
158 
159 	_ninp = ninp;
160 	_nout = nout;
161 	_quantum = quantum;
162 	_minpart = minpart;
163 	_maxpart = size;
164 	_nlevels = pind;
165 	_latecnt = 0;
166 	_inpsize = 2 * size;
167 
168 	for (i = 0; i < ninp; i++) _inpbuff [i] = new float [_inpsize];
169 	for (i = 0; i < nout; i++) _outbuff [i] = new float [_minpart];
170     }
171     catch (...)
172     {
173 	cleanup ();
174 	return Converror::MEM_ALLOC;
175     }
176 
177     _state = ST_STOP;
178     return 0;
179 }
180 
181 
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)182 int Convproc::impdata_create (unsigned int inp,
183                               unsigned int out,
184                               unsigned int step,
185                               float       *data,
186                               int          ind0,
187                               int          ind1)
188 {
189     unsigned int j;
190 
191     if (_state != ST_STOP) return Converror::BAD_STATE;
192     try
193     {
194         for (j = 0; j < _nlevels; j++)
195 	{
196             _convlev [j]->impdata_create (inp, out, step, data, ind0, ind1);
197 	}
198     }
199     catch (...)
200     {
201 	cleanup ();
202 	return Converror::MEM_ALLOC;
203     }
204     return 0;
205 }
206 
207 
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)208 int Convproc::impdata_update (unsigned int inp,
209                               unsigned int out,
210                               unsigned int step,
211                               float       *data,
212                               int          ind0,
213                               int          ind1)
214 {
215     unsigned int j;
216 
217     if (_state < ST_STOP) return Converror::BAD_STATE;
218     for (j = 0; j < _nlevels; j++)
219     {
220         _convlev [j]->impdata_update (inp, out, step, data, ind0, ind1);
221     }
222     return 0;
223 }
224 
225 
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)226 int Convproc::impdata_copy (unsigned int inp1,
227                             unsigned int out1,
228                             unsigned int inp2,
229                             unsigned int out2)
230 {
231     unsigned int j;
232 
233     if (_state != ST_STOP) return Converror::BAD_STATE;
234     try
235     {
236         for (j = 0; j < _nlevels; j++)
237 	{
238             _convlev [j]->impdata_copy (inp1, out1, inp2, out2);
239 	}
240     }
241     catch (...)
242     {
243 	cleanup ();
244 	return Converror::MEM_ALLOC;
245     }
246     return 0;
247 }
248 
249 
reset(void)250 int Convproc::reset (void)
251 {
252     unsigned int k;
253 
254     if (_state == ST_IDLE) return Converror::BAD_STATE;
255     for (k = 0; k < _ninp; k++) memset (_inpbuff [k], 0, _inpsize * sizeof (float));
256     for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
257     for (k = 0; k < _nlevels; k++) _convlev [k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
258     return 0;
259 }
260 
261 
start_process(int abspri,int policy)262 int Convproc::start_process (int abspri, int policy)
263 {
264     unsigned int k;
265 
266     if (_state != ST_STOP) return Converror::BAD_STATE;
267 
268     _latecnt = 0;
269     _inpoffs = 0;
270     _outoffs = 0;
271     reset ();
272     for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++)
273     {
274          _convlev [k]->start (abspri, policy);
275     }
276     _state = ST_PROC;
277     return 0;
278 }
279 
280 
process(bool sync)281 int __rt_func Convproc::process (bool sync)
282 {
283     unsigned int k;
284     int f = 0;
285 
286     if (_state != ST_PROC) return 0;
287 
288     _inpoffs += _quantum;
289     if (_inpoffs == _inpsize) _inpoffs = 0;
290 
291     _outoffs += _quantum;
292     if (_outoffs == _minpart)
293     {
294         _outoffs = 0;
295 	for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
296 	for (k = 0; k < _nlevels; k++) f |= _convlev [k]->readout (sync, _skipcnt);
297 	if (_skipcnt < _minpart) _skipcnt = 0;
298 	else _skipcnt -= _minpart;
299         if (f)
300 	{
301             if (++_latecnt >= 5)
302             {
303 	        f |= FL_LOAD;
304 	    }
305 	}
306         else _latecnt = 0;
307     }
308     return f;
309 }
310 
311 
stop_process(void)312 int Convproc::stop_process (void)
313 {
314     unsigned int k;
315 
316     if (_state != ST_PROC) return Converror::BAD_STATE;
317     for (k = 0; k < _nlevels; k++) _convlev [k]->stop ();
318     _state = ST_WAIT;
319     return 0;
320 }
321 
322 
cleanup(void)323 int Convproc::cleanup (void)
324 {
325     unsigned int k;
326 
327     while (! check_stop ())
328     {
329         usleep (100000);
330     }
331     if (_state != ST_STOP)
332     {
333         return Converror::BAD_STATE;
334     }
335 
336     for (k = 0; k < _ninp; k++)
337     {
338         delete[] _inpbuff [k];
339 	_inpbuff [k] = 0;
340     }
341     for (k = 0; k < _nout; k++)
342     {
343         delete[] _outbuff [k];
344 	_outbuff [k] = 0;
345     }
346     for (k = 0; k < _nlevels; k++)
347     {
348 	delete _convlev [k];
349 	_convlev [k] = 0;
350     }
351 
352     _state = ST_IDLE;
353     _skipcnt = 0;
354     _density = 0;
355     _ninp = 0;
356     _nout = 0;
357     _quantum = 0;
358     _minpart = 0;
359     _maxpart = 0;
360     _nlevels = 0;
361     _latecnt = 0;
362     return 0;
363 }
364 
365 
check_stop(void)366 bool Convproc::check_stop (void)
367 {
368     unsigned int k;
369 
370     for (k = 0; (k < _nlevels) && (_convlev [k]->_stat == Convlevel::ST_IDLE); k++);
371     if (k == _nlevels)
372     {
373 	_state = ST_STOP;
374 	return true;
375     }
376     return false;
377 }
378 
379 
print(FILE * F)380 void Convproc::print (FILE *F)
381 {
382     unsigned int k;
383 
384     for (k = 0; k < _nlevels; k++) _convlev [k]->print (F);
385 }
386 
387 
388 
389 typedef float FV4 __attribute__ ((vector_size(16)));
390 
391 
Convlevel(void)392 Convlevel::Convlevel (void) :
393     _stat (ST_IDLE),
394     _npar (0),
395     _parsize (0),
396     _pthr (0),
397     _inp_list (0),
398     _out_list (0),
399     _plan_r2c (0),
400     _plan_c2r (0),
401     _freq_data (0)
402 {
403 }
404 
405 
406 
~Convlevel(void)407 Convlevel::~Convlevel (void)
408 {
409     cleanup ();
410 }
411 
412 
alloc_aligned(size_t size)413 void *Convlevel::alloc_aligned (size_t size)
414 {
415     void *p;
416 
417     p = av_malloc(size);
418     memset (p, 0, size);
419     return p;
420 }
421 
configure(int prio,unsigned int offs,unsigned int npar,unsigned int parsize)422 void Convlevel::configure (int prio,
423                            unsigned int offs,
424                            unsigned int npar,
425                            unsigned int parsize)
426 {
427     _prio = prio;
428     _offs = offs;
429     _npar = npar;
430     _parsize = parsize;
431 
432     _freq_data = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
433     _plan_r2c = av_rdft_init (int(log2(2 * _parsize)), DFT_R2C);
434     _plan_c2r = av_rdft_init (int(log2(2 * _parsize)), IDFT_C2R);
435     if (_plan_r2c && _plan_c2r) return;
436     throw (Converror (Converror::MEM_ALLOC));
437 }
438 
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)439 void Convlevel::impdata_create (unsigned int inp,
440                                 unsigned int out,
441                                 unsigned int step,
442                                 float *data,
443                                 int i0,
444                                 int i1)
445 {
446     unsigned int   k;
447     int            j, j0, j1, n;
448     float          norm;
449     fftwf_complex *fftb;
450     Macnode        *M;
451 
452     n = i1 - i0;
453     i0 = _offs - i0;
454     i1 = i0 + _npar * _parsize;
455     if ((i0 >= n) || (i1 <= 0)) return;
456 
457     M = findmacnode (inp, out, true);
458     if (! (M->_fftb))
459     {
460 	M->_fftb = new fftwf_complex * [_npar];
461 	memset (M->_fftb, 0, _npar * sizeof (fftwf_complex *));
462     }
463 
464     norm = 1.0f / _parsize; //FIXME
465     for (k = 0; k < _npar; k++)
466     {
467 	i1 = i0 + _parsize;
468 	if ((i0 < n) && (i1 > 0))
469 	{
470 	    if (! (M->_fftb [k]))
471 	    {
472 		M->_fftb [k] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
473 	    }
474 	    float *prepdata = (float*)_freq_data;
475 	    memset (prepdata, 0, 2 * (_parsize+1) * sizeof (float));
476 	    j0 = (i0 < 0) ? 0 : i0;
477 	    j1 = (i1 > n) ? n : i1;
478 	    for (j = j0; j < j1; j++) prepdata [j - i0] = norm * data [j * step];
479 	    av_rdft_calc(_plan_r2c, prepdata);
480 	    // adjust for packing convention
481 	    _freq_data[_parsize][0] = _freq_data[0][1];
482 	    _freq_data[0][1] = 0;
483 	    fftswap (_freq_data);
484   	    fftb = M->_fftb [k];
485 	    for (j = 0; j <= (int)_parsize; j++)
486 	    {
487 	        fftb [j][0] += _freq_data [j][0];
488 	        fftb [j][1] += _freq_data [j][1];
489 	    }
490 	}
491 	i0 = i1;
492     }
493 }
494 
495 
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)496 void Convlevel::impdata_update (unsigned int inp,
497                                 unsigned int out,
498                                 unsigned int step,
499                                 float *data,
500                                 int i0,
501                                 int i1)
502 {
503     unsigned int   k;
504     int            j, j0, j1, n;
505     float          norm;
506     fftwf_complex *fftb;
507     Macnode        *M;
508 
509     M = findmacnode (inp, out, false);
510     if (! M) return;
511 
512     n = i1 - i0;
513     i0 = _offs - i0;
514     i1 = i0 + _npar * _parsize;
515     if ((i0 >= n) || (i1 <= 0)) return;
516 
517     norm = 1.0f / _parsize; // FIXME
518     for (k = 0; k < _npar; k++)
519     {
520 	i1 = i0 + _parsize;
521 	fftb = M->_fftb [k];
522 	if (fftb && (i0 < n) && (i1 > 0))
523 	{
524 	    float *prepdata = (float*)fftb;
525 	    memset (prepdata, 0, 2 * _parsize * sizeof (float));
526 	    j0 = (i0 < 0) ? 0 : i0;
527 	    j1 = (i1 > n) ? n : i1;
528 	    for (j = j0; j < j1; j++) prepdata [j - i0] = norm * data [j * step];
529 	    av_rdft_calc(_plan_r2c, prepdata);
530 	    // adjust for packing convention
531 	    fftb[_parsize][0] = fftb[0][1];
532 	    fftb[_parsize][1] = 0;
533 	    fftb[0][1] = 0;
534 	    fftswap (fftb);
535 	}
536 	i0 = i1;
537     }
538 }
539 
540 
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)541 void Convlevel::impdata_copy (unsigned int inp1,
542                               unsigned int out1,
543                               unsigned int inp2,
544                               unsigned int out2)
545 {
546     Macnode  *M1;
547     Macnode  *M2;
548 
549     M1 = findmacnode (inp1, out1, false);
550     if (! M1) return;
551     M2 = findmacnode (inp2, out2, true);
552     if (M2->_fftb) return;
553     M2->_fftb = M1->_fftb;
554     M2->_copy = true;
555 }
556 
557 
reset(unsigned int inpsize,unsigned int outsize,float ** inpbuff,float ** outbuff)558 void Convlevel::reset (unsigned int  inpsize,
559                        unsigned int  outsize,
560 		       float         **inpbuff,
561 		       float         **outbuff)
562 {
563     unsigned int  i;
564     Inpnode      *X;
565     Outnode      *Y;
566 
567     _inpsize = inpsize;
568     _outsize = outsize;
569     _inpbuff = inpbuff;
570     _outbuff = outbuff;
571     for (X = _inp_list; X; X = X->_next)
572     {
573         for (i = 0; i < _npar; i++)
574 	{
575             memset (X->_ffta [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
576 	}
577     }
578     for (Y = _out_list; Y; Y = Y->_next)
579     {
580 	for (i = 0; i < 3; i++)
581 	{
582             memset (Y->_buff [i], 0, _parsize * sizeof (float));
583 	}
584     }
585     if (_parsize == _outsize)
586     {
587         _outoffs = 0;
588         _inpoffs = 0;
589     }
590     else
591     {
592         _outoffs = _parsize / 2;
593         _inpoffs = _inpsize - _outoffs;
594     }
595     _bits = _parsize / _outsize;
596     _wait = 0;
597     _ptind = 0;
598     _opind = 0;
599     _trig.init (0, 0);
600     _done.init (0, 0);
601 }
602 
603 
start(int abspri,int policy)604 void Convlevel::start (int abspri, int policy)
605 {
606     int                min, max;
607     pthread_attr_t     attr;
608     struct sched_param parm;
609 
610     _pthr = 0;
611     min = sched_get_priority_min (policy);
612     max = sched_get_priority_max (policy);
613     abspri += _prio;
614     if (abspri > max) abspri = max;
615     if (abspri < min) abspri = min;
616     parm.sched_priority = abspri;
617     pthread_attr_init (&attr);
618     pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
619     pthread_attr_setschedpolicy (&attr, policy);
620     pthread_attr_setschedparam (&attr, &parm);
621     pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
622     pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
623     pthread_attr_setstacksize (&attr, 0x10000);
624     pthread_create (&_pthr, &attr, static_main, this);
625     pthread_attr_destroy (&attr);
626 }
627 
628 
stop(void)629 void Convlevel::stop (void)
630 {
631     if (_stat != ST_IDLE)
632     {
633         _stat = ST_TERM;
634 	_trig.post ();
635     }
636 }
637 
638 
cleanup(void)639 void Convlevel::cleanup (void)
640 {
641     unsigned int  i;
642     Inpnode       *X, *X1;
643     Outnode       *Y, *Y1;
644     Macnode       *M, *M1;
645 
646     X = _inp_list;
647     while (X)
648     {
649         for (i = 0; i < _npar; i++) free (X->_ffta [i]);
650 	delete[] X->_ffta;
651 	X1 = X->_next;
652 	delete X;
653 	X = X1;
654     }
655     _inp_list = 0;
656 
657     Y = _out_list;
658     while (Y)
659     {
660 	M = Y->_list;
661 	while (M)
662 	{
663 	    if ((M->_fftb) && !(M->_copy))
664 	    {
665 	        for (i = 0; i < _npar; i++)
666 		{
667                     free (M->_fftb [i]);
668 		}
669 	        delete[] M->_fftb;
670 	    }
671 	    M1 = M->_next;
672 	    delete M;
673 	    M = M1;
674 	}
675 	for (i = 0; i < 3; i++) free (Y->_buff [i]);
676 	Y1 = Y->_next;
677 	delete Y;
678 	Y = Y1;
679     }
680     _out_list = 0;
681 
682     av_rdft_end (_plan_r2c);
683     av_rdft_end (_plan_c2r);
684     av_free (_freq_data);
685     _plan_r2c = 0;
686     _plan_c2r = 0;
687     _freq_data = 0;
688 }
689 
690 
static_main(void * arg)691 void *Convlevel::static_main (void *arg)
692 {
693     ((Convlevel *) arg)->main ();
694     return 0;
695 }
696 
697 
main(void)698 void __rt_func Convlevel::main (void)
699 {
700     _stat = ST_PROC;
701     while (true)
702     {
703 	_trig.wait ();
704 	if (_stat == ST_TERM)
705 	{
706             _stat = ST_IDLE;
707 	    _pthr = 0;
708             return;
709         }
710 	process (false);
711 	_done.post ();
712     }
713 }
714 
715 
process(bool skip)716 void __rt_func Convlevel::process (bool skip)
717 {
718     unsigned int    i, j, k;
719     unsigned int    i1, n1, n2, opi1, opi2;
720 
721     Inpnode         *X;
722     Macnode         *M;
723     Outnode         *Y;
724     fftwf_complex   *ffta;
725     fftwf_complex   *fftb;
726     float           *inpd;
727     float           *outd;
728 
729     i1 = _inpoffs;
730     n1 = _parsize;
731     n2 = 0;
732     _inpoffs = i1 + n1;
733     if (_inpoffs >= _inpsize)
734     {
735         _inpoffs -= _inpsize;
736 	n2 = _inpoffs;
737 	n1 -= n2;
738     }
739 
740     opi1 = (_opind + 1) % 3;
741     opi2 = (_opind + 2) % 3;
742 
743     for (X = _inp_list; X; X = X->_next)
744     {
745 	inpd = _inpbuff [X->_inp];
746 	fftwf_complex *freqdata = X->_ffta [_ptind];
747 	float *time_data = (float*)freqdata;
748 	if (n1) memcpy (time_data, inpd + i1, n1 * sizeof (float));
749 	if (n2) memcpy (time_data + n1, inpd, n2 * sizeof (float));
750 	memset (time_data + _parsize, 0, (_parsize+2) * sizeof (float));
751 	av_rdft_calc(_plan_r2c, time_data);
752 	// adjust for packing convention
753 	freqdata[_parsize][0] = freqdata[0][1];
754 	freqdata[0][1] = 0;
755 	fftswap (X->_ffta [_ptind]);
756     }
757 
758     if (skip)
759     {
760         for (Y = _out_list; Y; Y = Y->_next)
761 	{
762 	    outd = Y->_buff [opi2];
763 	    memset (outd, 0, _parsize * sizeof (float));
764 	}
765     }
766     else
767     {
768 	for (Y = _out_list; Y; Y = Y->_next)
769 	{
770 	    memset (_freq_data, 0, (_parsize + 1) * sizeof (fftwf_complex));
771 	    for (M = Y->_list; M; M = M->_next)
772 	    {
773 		X = M->_inpn;
774 		i = _ptind;
775 		for (j = 0; j < _npar; j++)
776 		{
777 		    ffta = X->_ffta [i];
778 		    fftb = M->_fftb [j];
779 		    if (fftb)
780 		    {
781 			FV4 *A = (FV4 *) ffta;
782 			FV4 *B = (FV4 *) fftb;
783 			FV4 *D = (FV4 *) _freq_data;
784 			for (k = 0; k < _parsize; k += 4)
785 			{
786 			    D [0] += A [0] * B [0] - A [1] * B [1];
787 			    D [1] += A [0] * B [1] + A [1] * B [0];
788 			    A += 2;
789 			    B += 2;
790 			    D += 2;
791 			}
792 			_freq_data [_parsize][0] += ffta [_parsize][0] * fftb [_parsize][0];
793 			_freq_data [_parsize][1] = 0;
794 		    }
795 		    if (i == 0) i = _npar;
796 		    i--;
797 		}
798 	    }
799 
800 	    fftswap (_freq_data);
801 	    _freq_data[0][1] = _freq_data[_parsize][0]; // adjust for packing convention
802 	    av_rdft_calc(_plan_c2r, (float*)_freq_data);
803 	    outd = Y->_buff [opi1];
804 	    for (k = 0; k < _parsize; k++) outd [k] += ((float*)_freq_data) [k];
805 	    outd = Y->_buff [opi2];
806 	    memcpy (outd, ((float*)_freq_data) + _parsize, _parsize * sizeof (float));
807 	}
808     }
809 
810     _ptind++;
811     if (_ptind == _npar) _ptind = 0;
812 }
813 
814 
readout(bool sync,unsigned int skipcnt)815 int __rt_func Convlevel::readout (bool sync, unsigned int skipcnt)
816 {
817     unsigned int  i;
818     float         *p, *q;
819     Outnode       *Y;
820 
821     _outoffs += _outsize;
822     if (_outoffs == _parsize)
823     {
824 	_outoffs = 0;
825 	if (_stat == ST_PROC)
826 	{
827    	    while (_wait)
828 	    {
829 		if (sync) _done.wait ();
830 		else if (_done.trywait ()) break;
831   	        _wait--;
832 	    }
833 	    if (++_opind == 3) _opind = 0;
834             _trig.post ();
835 	    _wait++;
836 	}
837         else
838 	{
839             process (skipcnt >= 2 * _parsize);
840 	    if (++_opind == 3) _opind = 0;
841 	}
842     }
843 
844     for (Y = _out_list; Y; Y = Y->_next)
845     {
846         p = Y->_buff [_opind] + _outoffs;
847         q = _outbuff [Y->_out];
848         for (i = 0; i < _outsize; i++) q [i] += p [i];
849     }
850 
851     return (_wait > 1) ? _bits : 0;
852 }
853 
854 
print(FILE * F)855 void Convlevel::print (FILE *F)
856 {
857     fprintf (F, "prio = %4d, offs = %6d,  parsize = %5d,  npar = %3d\n", _prio, _offs, _parsize, _npar);
858 }
859 
860 
findmacnode(unsigned int inp,unsigned int out,bool create)861 Macnode *Convlevel::findmacnode (unsigned int inp, unsigned int out, bool create)
862 {
863     unsigned int  i;
864     Inpnode       *X;
865     Outnode       *Y;
866     Macnode       *M;
867 
868     for (X = _inp_list; X && (X->_inp != inp); X = X->_next);
869     if (! X)
870     {
871 	if (! create) return 0;
872 	X = new Inpnode;
873 	X->_next = _inp_list;
874 	_inp_list = X;
875 	X->_inp = inp;
876 	X->_ffta = new fftwf_complex * [_npar];
877 	memset (X->_ffta, 0, _npar * sizeof (fftwf_complex *));
878         for (i = 0; i < _npar; i++)
879 	{
880             X->_ffta [i] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
881 	}
882     }
883 
884     for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next);
885     if (! Y)
886     {
887 	if (! create) return 0;
888 	Y = new Outnode;
889 	Y->_next = _out_list;
890 	_out_list = Y;
891 	Y->_out = out;
892 	Y->_list = 0;
893         for (i = 0; i < 3; i++)
894 	{
895 	    Y->_buff [i] = 0;
896 	}
897         for (i = 0; i < 3; i++)
898 	{
899 	    Y->_buff [i] = (float *)(alloc_aligned (_parsize * sizeof (float)));
900 	}
901     }
902 
903     for (M = Y->_list; M && (M->_inpn != X); M = M->_next);
904     if (! M)
905     {
906 	if (! create) return 0;
907 	M = new Macnode;
908 	M->_next = Y->_list;
909 	Y->_list = M;
910 	M->_inpn = X;
911 	M->_fftb = 0;
912 	M->_copy = false;
913     }
914 
915     return M;
916 }
917 
918 
fftswap(fftwf_complex * p)919 void __rt_func Convlevel::fftswap (fftwf_complex *p)
920 {
921     unsigned int  n = _parsize;
922     float         a, b;
923 
924     while (n)
925     {
926 	a = p [2][0];
927 	b = p [3][0];
928         p [2][0] = p [0][1];
929         p [3][0] = p [1][1];
930         p [0][1] = a;
931         p [1][1] = b;
932 	p += 4;
933         n -= 4;
934     }
935 }
936