1 // ----------------------------------------------------------------------------
2 //
3 //  Copyright (C) 2006-2011 Fons Adriaensen <fons@linuxaudio.org>
4 //
5 //  This program is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU General Public License as published by
7 //  the Free Software Foundation; either version 3 of the License, or
8 //  (at your option) any later version.
9 //
10 //  This program is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU General Public License for more details.
14 //
15 //  You should have received a copy of the GNU General Public License
16 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 //
18 // ----------------------------------------------------------------------------
19 
20 
21 #include <unistd.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25 #include "zita-convolver.h"
26 
27 
28 
zita_convolver_major_version(void)29 int zita_convolver_major_version (void)
30 {
31     return ZITA_CONVOLVER_MAJOR_VERSION;
32 }
33 
34 
35 float Convproc::_mac_cost = 1.0f;
36 float Convproc::_fft_cost = 5.0f;
37 
38 
Convproc(void)39 Convproc::Convproc (void) :
40     _state (ST_IDLE),
41     _options (0),
42     _skipcnt (0),
43     _density (0),
44     _ninp (0),
45     _nout (0),
46     _quantum (0),
47     _minpart (0),
48     _maxpart (0),
49     _nlevels (0),
50     _latecnt (0)
51 {
52     memset (_inpbuff, 0, MAXINP * sizeof (float *));
53     memset (_outbuff, 0, MAXOUT * sizeof (float *));
54     memset (_convlev, 0, MAXLEV * sizeof (Convlevel *));
55 }
56 
57 
~Convproc(void)58 Convproc::~Convproc (void)
59 {
60     cleanup ();
61 }
62 
63 
set_options(unsigned int options)64 void Convproc::set_options (unsigned int options)
65 {
66     _options = options;
67 }
68 
69 
set_density(float density)70 void Convproc::set_density (float density)
71 {
72     _density = density;
73 }
74 
75 
set_skipcnt(unsigned int skipcnt)76 void Convproc::set_skipcnt (unsigned int skipcnt)
77 {
78     if ((_quantum == _minpart) && (_quantum == _maxpart)) _skipcnt = skipcnt;
79 }
80 
81 
configure(unsigned int ninp,unsigned int nout,unsigned int maxsize,unsigned int quantum,unsigned int minpart,unsigned int maxpart)82 int Convproc::configure (unsigned int ninp,
83                          unsigned int nout,
84                          unsigned int maxsize,
85                          unsigned int quantum,
86                          unsigned int minpart,
87 			 unsigned int maxpart)
88 {
89     unsigned int  offs, npar, size, pind, nmin, nmax, step, i;
90     int           prio, d, r, s;
91     float         cfft, cmac, t;
92 
93     if (_state != ST_IDLE) return Converror::BAD_STATE;
94     if (   (quantum & (quantum - 1))
95         || (quantum < MINQUANT)
96         || (quantum > MAXQUANT)
97         || (minpart & (minpart - 1))
98 	|| (minpart < MINPART)
99         || (minpart < quantum)
100         || (minpart > MAXDIVIS * quantum)
101         || (maxpart & (maxpart - 1))
102 	|| (maxpart > MAXPART)
103 	|| (maxpart < minpart)) return Converror::BAD_PARAM;
104 
105     if (ninp < nout) { nmin = ninp; nmax = nout; }
106     else             { nmin = nout; nmax = ninp; }
107 
108     if (_density <= 0) _density = 1.0 / nmin;
109     else
110     {
111         t = 1.0f / nmax;
112         if (_density < t) _density = t;
113         if (_density > 1) _density = 1;
114     }
115 
116     cfft = _fft_cost * (ninp + nout);
117     cmac = _mac_cost * ninp * nout * _density;
118     step = (cfft < 4 * cmac) ? 1 : 2;
119 
120     if (step == 2)
121     {
122         r = maxpart / minpart;
123         s = (r & 0xAAAA) ? 1 : 2;
124     }
125     else s = 1;
126     nmin = (s == 1) ? 2 : 6;
127     if (minpart == quantum) nmin++;
128 
129     prio = 0;
130     size = quantum;
131     while (size < minpart)
132     {
133 	prio -= 1;
134 	size <<= 1;
135     }
136 
137     try
138     {
139 	for (offs = pind = 0; offs < maxsize; pind++)
140 	{
141 	    npar = (maxsize - offs + size - 1) / size;
142 	    if ((size < maxpart) && (npar > nmin))
143 	    {
144 		r = 1 << s;
145 		d = npar - nmin;
146 		d = d - (d + r - 1) / r;
147 		if (cfft < d * cmac) npar = nmin;
148 	    }
149 	    _convlev [pind] = new Convlevel ();
150 	    _convlev [pind]->configure (prio, offs, npar, size, _options);
151 
152 	    offs += size * npar;
153 	    if (offs < maxsize)
154 	    {
155 		prio -= s;
156 		size <<= s;
157 		s = step;
158                 nmin = (s == 1) ? 2 : 6;
159 	    }
160 	}
161 
162 	_ninp = ninp;
163 	_nout = nout;
164 	_quantum = quantum;
165 	_minpart = minpart;
166 	_maxpart = size;
167 	_nlevels = pind;
168 	_latecnt = 0;
169 	_inpsize = 2 * size;
170 
171 	for (i = 0; i < ninp; i++) _inpbuff [i] = new float [_inpsize];
172 	for (i = 0; i < nout; i++) _outbuff [i] = new float [_minpart];
173     }
174     catch (...)
175     {
176 	cleanup ();
177 	return Converror::MEM_ALLOC;
178     }
179 
180     _state = ST_STOP;
181     return 0;
182 }
183 
184 
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)185 int Convproc::impdata_create (unsigned int inp,
186                               unsigned int out,
187                               unsigned int step,
188                               float       *data,
189                               int          ind0,
190                               int          ind1)
191 {
192     unsigned int j;
193 
194     if (_state != ST_STOP) return Converror::BAD_STATE;
195     try
196     {
197         for (j = 0; j < _nlevels; j++)
198 	{
199             _convlev [j]->impdata_create (inp, out, step, data, ind0, ind1);
200 	}
201     }
202     catch (...)
203     {
204 	cleanup ();
205 	return Converror::MEM_ALLOC;
206     }
207     return 0;
208 }
209 
210 
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)211 int Convproc::impdata_update (unsigned int inp,
212                               unsigned int out,
213                               unsigned int step,
214                               float       *data,
215                               int          ind0,
216                               int          ind1)
217 {
218     unsigned int j;
219 
220     if (_state < ST_STOP) return Converror::BAD_STATE;
221     for (j = 0; j < _nlevels; j++)
222     {
223         _convlev [j]->impdata_update (inp, out, step, data, ind0, ind1);
224     }
225     return 0;
226 }
227 
228 
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)229 int Convproc::impdata_copy (unsigned int inp1,
230                             unsigned int out1,
231                             unsigned int inp2,
232                             unsigned int out2)
233 {
234     unsigned int j;
235 
236     if (_state != ST_STOP) return Converror::BAD_STATE;
237     try
238     {
239         for (j = 0; j < _nlevels; j++)
240 	{
241             _convlev [j]->impdata_copy (inp1, out1, inp2, out2);
242 	}
243     }
244     catch (...)
245     {
246 	cleanup ();
247 	return Converror::MEM_ALLOC;
248     }
249     return 0;
250 }
251 
252 
reset(void)253 int Convproc::reset (void)
254 {
255     unsigned int k;
256 
257     if (_state == ST_IDLE) return Converror::BAD_STATE;
258     for (k = 0; k < _ninp; k++) memset (_inpbuff [k], 0, _inpsize * sizeof (float));
259     for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
260     for (k = 0; k < _nlevels; k++) _convlev [k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
261     return 0;
262 }
263 
264 
start_process(int abspri,int policy)265 int Convproc::start_process (int abspri, int policy)
266 {
267     unsigned int k;
268 
269     if (_state != ST_STOP) return Converror::BAD_STATE;
270 
271     _latecnt = 0;
272     _inpoffs = 0;
273     _outoffs = 0;
274     reset ();
275     for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++)
276     {
277          _convlev [k]->start (abspri, policy);
278     }
279     _state = ST_PROC;
280     return 0;
281 }
282 
283 
process(bool sync)284 int Convproc::process (bool sync)
285 {
286     unsigned int k;
287     int f = 0;
288 
289     if (_state != ST_PROC) return 0;
290 
291     _inpoffs += _quantum;
292     if (_inpoffs == _inpsize) _inpoffs = 0;
293 
294     _outoffs += _quantum;
295     if (_outoffs == _minpart)
296     {
297         _outoffs = 0;
298 	for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
299 	for (k = 0; k < _nlevels; k++) f |= _convlev [k]->readout (sync, _skipcnt);
300 	if (_skipcnt < _minpart) _skipcnt = 0;
301 	else _skipcnt -= _minpart;
302         if (f)
303 	{
304             if (++_latecnt >= 5)
305             {
306 	        stop_process ();
307 	        f |= FL_LOAD;
308 	    }
309 	}
310         else _latecnt = 0;
311     }
312     return f;
313 }
314 
315 
stop_process(void)316 int Convproc::stop_process (void)
317 {
318     unsigned int k;
319 
320     if (_state != ST_PROC) return Converror::BAD_STATE;
321     for (k = 0; k < _nlevels; k++) _convlev [k]->stop ();
322     _state = ST_WAIT;
323     return 0;
324 }
325 
326 
cleanup(void)327 int Convproc::cleanup (void)
328 {
329     unsigned int k;
330 
331     while (! check_stop ())
332     {
333         usleep (100000);
334     }
335     if (_state != ST_STOP)
336     {
337         return Converror::BAD_STATE;
338     }
339 
340     for (k = 0; k < _ninp; k++)
341     {
342         delete[] _inpbuff [k];
343 	_inpbuff [k] = 0;
344     }
345     for (k = 0; k < _nout; k++)
346     {
347         delete[] _outbuff [k];
348 	_outbuff [k] = 0;
349     }
350     for (k = 0; k < _nlevels; k++)
351     {
352 	delete _convlev [k];
353 	_convlev [k] = 0;
354     }
355 
356     _state = ST_IDLE;
357     _options = 0;
358     _skipcnt = 0;
359     _density = 0;
360     _ninp = 0;
361     _nout = 0;
362     _quantum = 0;
363     _minpart = 0;
364     _maxpart = 0;
365     _nlevels = 0;
366     _latecnt = 0;
367     return 0;
368 }
369 
370 
check_stop(void)371 bool Convproc::check_stop (void)
372 {
373     unsigned int k;
374 
375     for (k = 0; (k < _nlevels) && (_convlev [k]->_stat == Convlevel::ST_IDLE); k++);
376     if (k == _nlevels)
377     {
378 	_state = ST_STOP;
379 	return true;
380     }
381     return false;
382 }
383 
384 
print(FILE * F)385 void Convproc::print (FILE *F)
386 {
387     unsigned int k;
388 
389     for (k = 0; k < _nlevels; k++) _convlev [k]->print (F);
390 }
391 
392 
393 
394 typedef float FV4 __attribute__ ((vector_size(16)));
395 
396 
Convlevel(void)397 Convlevel::Convlevel (void) :
398     _stat (ST_IDLE),
399     _npar (0),
400     _parsize (0),
401     _options (0),
402     _pthr (0),
403     _inp_list (0),
404     _out_list (0),
405     _plan_r2c (0),
406     _plan_c2r (0),
407     _time_data (0),
408     _prep_data (0),
409     _freq_data (0)
410 {
411 }
412 
413 
414 
~Convlevel(void)415 Convlevel::~Convlevel (void)
416 {
417     cleanup ();
418 }
419 
420 
alloc_aligned(size_t size)421 void *Convlevel::alloc_aligned (size_t size)
422 {
423     void *p;
424 
425     if (posix_memalign (&p, 16, size)) throw (Converror (Converror::MEM_ALLOC));
426     memset (p, 0, size);
427     return p;
428 }
429 
430 
configure(int prio,unsigned int offs,unsigned int npar,unsigned int parsize,unsigned int options)431 void Convlevel::configure (int prio,
432                            unsigned int offs,
433                            unsigned int npar,
434                            unsigned int parsize,
435 			   unsigned int options)
436 {
437     int fftwopt = (options & OPT_FFTW_MEASURE) ? FFTW_MEASURE : FFTW_ESTIMATE;
438 
439     _prio = prio;
440     _offs = offs;
441     _npar = npar;
442     _parsize = parsize;
443     _options = options;
444 
445     _time_data = (float *)(alloc_aligned (2 * _parsize * sizeof (float)));
446     _prep_data = (float *)(alloc_aligned (2 * _parsize * sizeof (float)));
447     _freq_data = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
448     _plan_r2c = fftwf_plan_dft_r2c_1d (2 * _parsize, _time_data, _freq_data, fftwopt);
449     _plan_c2r = fftwf_plan_dft_c2r_1d (2 * _parsize, _freq_data, _time_data, fftwopt);
450     if (_plan_r2c && _plan_c2r) return;
451     throw (Converror (Converror::MEM_ALLOC));
452 }
453 
454 
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)455 void Convlevel::impdata_create (unsigned int inp,
456                                 unsigned int out,
457                                 unsigned int step,
458                                 float *data,
459                                 int i0,
460                                 int i1)
461 {
462     unsigned int   k;
463     int            j, j0, j1, n;
464     float          norm;
465     fftwf_complex  *fftb;
466     Macnode        *M;
467 
468     n = i1 - i0;
469     i0 = _offs - i0;
470     i1 = i0 + _npar * _parsize;
471     if ((i0 >= n) || (i1 <= 0)) return;
472 
473     M = findmacnode (inp, out, true);
474     if (! (M->_fftb))
475     {
476 	M->_fftb = new fftwf_complex * [_npar];
477 	memset (M->_fftb, 0, _npar * sizeof (fftwf_complex *));
478     }
479 
480     norm = 0.5f / _parsize;
481     for (k = 0; k < _npar; k++)
482     {
483 	i1 = i0 + _parsize;
484 	if ((i0 < n) && (i1 > 0))
485 	{
486 	    if (! (M->_fftb [k]))
487 	    {
488 		M->_fftb [k] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
489 	    }
490 	    memset (_prep_data, 0, 2 * _parsize * sizeof (float));
491 	    j0 = (i0 < 0) ? 0 : i0;
492 	    j1 = (i1 > n) ? n : i1;
493 	    for (j = j0; j < j1; j++) _prep_data [j - i0] = norm * data [j * step];
494 	    fftwf_execute_dft_r2c (_plan_r2c, _prep_data, _freq_data);
495 #ifdef ENABLE_VECTOR_MODE
496 	    if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
497 #endif
498   	    fftb = M->_fftb [k];
499 	    for (j = 0; j <= (int)_parsize; j++)
500 	    {
501 	        fftb [j][0] += _freq_data [j][0];
502 	        fftb [j][1] += _freq_data [j][1];
503 	    }
504 	}
505 	i0 = i1;
506     }
507 }
508 
509 
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)510 void Convlevel::impdata_update (unsigned int inp,
511                                 unsigned int out,
512                                 unsigned int step,
513                                 float *data,
514                                 int i0,
515                                 int i1)
516 {
517     unsigned int   k;
518     int            j, j0, j1, n;
519     float          norm;
520     fftwf_complex  *fftb;
521     Macnode        *M;
522 
523     M = findmacnode (inp, out, false);
524     if (! M) return;
525 
526     n = i1 - i0;
527     i0 = _offs - i0;
528     i1 = i0 + _npar * _parsize;
529     if ((i0 >= n) || (i1 <= 0)) return;
530 
531     norm = 0.5f / _parsize;
532     for (k = 0; k < _npar; k++)
533     {
534 	i1 = i0 + _parsize;
535 	fftb = M->_fftb [k];
536 	if (fftb && (i0 < n) && (i1 > 0))
537 	{
538 	    memset (_prep_data, 0, 2 * _parsize * sizeof (float));
539 	    j0 = (i0 < 0) ? 0 : i0;
540 	    j1 = (i1 > n) ? n : i1;
541 	    for (j = j0; j < j1; j++) _prep_data [j - i0] = norm * data [j * step];
542 	    fftwf_execute_dft_r2c (_plan_r2c, _prep_data, fftb);
543 #ifdef ENABLE_VECTOR_MODE
544 	    if (_options & OPT_VECTOR_MODE) fftswap (fftb);
545 #endif
546 	}
547 	i0 = i1;
548     }
549 }
550 
551 
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)552 void Convlevel::impdata_copy (unsigned int inp1,
553                               unsigned int out1,
554                               unsigned int inp2,
555                               unsigned int out2)
556 {
557     Macnode  *M1;
558     Macnode  *M2;
559 
560     M1 = findmacnode (inp1, out1, false);
561     if (! M1) return;
562     M2 = findmacnode (inp2, out2, true);
563     if (M2->_fftb) return;
564     M2->_fftb = M1->_fftb;
565     M2->_copy = true;
566 }
567 
568 
reset(unsigned int inpsize,unsigned int outsize,float ** inpbuff,float ** outbuff)569 void Convlevel::reset (unsigned int  inpsize,
570                        unsigned int  outsize,
571 		       float         **inpbuff,
572 		       float         **outbuff)
573 {
574     unsigned int  i;
575     Inpnode      *X;
576     Outnode      *Y;
577 
578     _inpsize = inpsize;
579     _outsize = outsize;
580     _inpbuff = inpbuff;
581     _outbuff = outbuff;
582     for (X = _inp_list; X; X = X->_next)
583     {
584         for (i = 0; i < _npar; i++)
585 	{
586             memset (X->_ffta [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
587 	}
588     }
589     for (Y = _out_list; Y; Y = Y->_next)
590     {
591 	for (i = 0; i < 3; i++)
592 	{
593             memset (Y->_buff [i], 0, _parsize * sizeof (float));
594 	}
595     }
596     if (_parsize == _outsize)
597     {
598         _outoffs = 0;
599         _inpoffs = 0;
600     }
601     else
602     {
603         _outoffs = _parsize / 2;
604         _inpoffs = _inpsize - _outoffs;
605     }
606     _bits = _parsize / _outsize;
607     _wait = 0;
608     _ptind = 0;
609     _opind = 0;
610     _trig.init (0, 0);
611     _done.init (0, 0);
612 }
613 
614 
start(int abspri,int policy)615 void Convlevel::start (int abspri, int policy)
616 {
617     int                min, max;
618     pthread_attr_t     attr;
619     struct sched_param parm;
620 
621     _pthr = 0;
622     min = sched_get_priority_min (policy);
623     max = sched_get_priority_max (policy);
624     abspri += _prio;
625     if (abspri > max) abspri = max;
626     if (abspri < min) abspri = min;
627     parm.sched_priority = abspri;
628     pthread_attr_init (&attr);
629     pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
630     pthread_attr_setschedpolicy (&attr, policy);
631     pthread_attr_setschedparam (&attr, &parm);
632     pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
633     pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
634     pthread_attr_setstacksize (&attr, 0x10000);
635     pthread_create (&_pthr, &attr, static_main, this);
636     pthread_attr_destroy (&attr);
637 }
638 
639 
stop(void)640 void Convlevel::stop (void)
641 {
642     if (_stat != ST_IDLE)
643     {
644         _stat = ST_TERM;
645 	_trig.post ();
646     }
647 }
648 
649 
cleanup(void)650 void Convlevel::cleanup (void)
651 {
652     unsigned int  i;
653     Inpnode       *X, *X1;
654     Outnode       *Y, *Y1;
655     Macnode       *M, *M1;
656 
657     X = _inp_list;
658     while (X)
659     {
660         for (i = 0; i < _npar; i++) free (X->_ffta [i]);
661 	delete[] X->_ffta;
662 	X1 = X->_next;
663 	delete X;
664 	X = X1;
665     }
666     _inp_list = 0;
667 
668     Y = _out_list;
669     while (Y)
670     {
671 	M = Y->_list;
672 	while (M)
673 	{
674 	    if ((M->_fftb) && !(M->_copy))
675 	    {
676 	        for (i = 0; i < _npar; i++)
677 		{
678                     free (M->_fftb [i]);
679 		}
680 	        delete[] M->_fftb;
681 	    }
682 	    M1 = M->_next;
683 	    delete M;
684 	    M = M1;
685 	}
686 	for (i = 0; i < 3; i++) free (Y->_buff [i]);
687 	Y1 = Y->_next;
688 	delete Y;
689 	Y = Y1;
690     }
691     _out_list = 0;
692 
693     fftwf_destroy_plan (_plan_r2c);
694     fftwf_destroy_plan (_plan_c2r);
695     free (_time_data);
696     free (_prep_data);
697     free (_freq_data);
698     _plan_r2c = 0;
699     _plan_c2r = 0;
700     _time_data = 0;
701     _prep_data = 0;
702     _freq_data = 0;
703 }
704 
705 
static_main(void * arg)706 void *Convlevel::static_main (void *arg)
707 {
708     ((Convlevel *) arg)->main ();
709     return 0;
710 }
711 
712 
main(void)713 void Convlevel::main (void)
714 {
715     _stat = ST_PROC;
716     while (true)
717     {
718 	_trig.wait ();
719 	if (_stat == ST_TERM)
720 	{
721             _stat = ST_IDLE;
722 	    _pthr = 0;
723             return;
724         }
725 	process (false);
726 	_done.post ();
727     }
728 }
729 
730 
process(bool skip)731 void Convlevel::process (bool skip)
732 {
733     unsigned int    i, j, k;
734     unsigned int    i1, n1, n2, opi1, opi2;
735 
736     Inpnode         *X;
737     Macnode         *M;
738     Outnode         *Y;
739     fftwf_complex   *ffta;
740     fftwf_complex   *fftb;
741     float           *inpd;
742     float           *outd;
743 
744     i1 = _inpoffs;
745     n1 = _parsize;
746     n2 = 0;
747     _inpoffs = i1 + n1;
748     if (_inpoffs >= _inpsize)
749     {
750         _inpoffs -= _inpsize;
751 	n2 = _inpoffs;
752 	n1 -= n2;
753     }
754 
755     opi1 = (_opind + 1) % 3;
756     opi2 = (_opind + 2) % 3;
757 
758     for (X = _inp_list; X; X = X->_next)
759     {
760 	inpd = _inpbuff [X->_inp];
761 	if (n1) memcpy (_time_data, inpd + i1, n1 * sizeof (float));
762 	if (n2) memcpy (_time_data + n1, inpd, n2 * sizeof (float));
763 	memset (_time_data + _parsize, 0, _parsize * sizeof (float));
764 	fftwf_execute_dft_r2c (_plan_r2c, _time_data, X->_ffta [_ptind]);
765 #ifdef ENABLE_VECTOR_MODE
766 	if (_options & OPT_VECTOR_MODE) fftswap (X->_ffta [_ptind]);
767 #endif
768     }
769 
770     if (skip)
771     {
772         for (Y = _out_list; Y; Y = Y->_next)
773 	{
774 	    outd = Y->_buff [opi2];
775 	    memset (outd, 0, _parsize * sizeof (float));
776 	}
777     }
778     else
779     {
780 	for (Y = _out_list; Y; Y = Y->_next)
781 	{
782 	    memset (_freq_data, 0, (_parsize + 1) * sizeof (fftwf_complex));
783 	    for (M = Y->_list; M; M = M->_next)
784 	    {
785 		X = M->_inpn;
786 		i = _ptind;
787 		for (j = 0; j < _npar; j++)
788 		{
789 		    ffta = X->_ffta [i];
790 		    fftb = M->_fftb [j];
791 		    if (fftb)
792 		    {
793 #ifdef ENABLE_VECTOR_MODE
794 			if (_options & OPT_VECTOR_MODE)
795 			{
796 			    FV4 *A = (FV4 *) ffta;
797 			    FV4 *B = (FV4 *) fftb;
798 			    FV4 *D = (FV4 *) _freq_data;
799 			    for (k = 0; k < _parsize; k += 4)
800 			    {
801 				D [0] += A [0] * B [0] - A [1] * B [1];
802 				D [1] += A [0] * B [1] + A [1] * B [0];
803 				A += 2;
804 				B += 2;
805 				D += 2;
806 			    }
807 			    _freq_data [_parsize][0] += ffta [_parsize][0] * fftb [_parsize][0];
808 			    _freq_data [_parsize][1] = 0;
809 			}
810 			else
811 #endif
812 			{
813 			    for (k = 0; k <= _parsize; k++)
814 			    {
815 				_freq_data [k][0] += ffta [k][0] * fftb [k][0] - ffta [k][1] * fftb [k][1];
816 				_freq_data [k][1] += ffta [k][0] * fftb [k][1] + ffta [k][1] * fftb [k][0];
817 			    }
818 			}
819 		    }
820 		    if (i == 0) i = _npar;
821 		    i--;
822 		}
823 	    }
824 
825 #ifdef ENABLE_VECTOR_MODE
826 	    if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
827 #endif
828 	    fftwf_execute_dft_c2r (_plan_c2r, _freq_data, _time_data);
829 	    outd = Y->_buff [opi1];
830 	    for (k = 0; k < _parsize; k++) outd [k] += _time_data [k];
831 	    outd = Y->_buff [opi2];
832 	    memcpy (outd, _time_data + _parsize, _parsize * sizeof (float));
833 	}
834     }
835 
836     _ptind++;
837     if (_ptind == _npar) _ptind = 0;
838 }
839 
840 
readout(bool sync,unsigned int skipcnt)841 int Convlevel::readout (bool sync, unsigned int skipcnt)
842 {
843     unsigned int  i;
844     float         *p, *q;
845     Outnode       *Y;
846 
847     _outoffs += _outsize;
848     if (_outoffs == _parsize)
849     {
850 	_outoffs = 0;
851 	if (_stat == ST_PROC)
852 	{
853    	    while (_wait)
854 	    {
855 		if (sync) _done.wait ();
856 		else if (_done.trywait ()) break;
857   	        _wait--;
858 	    }
859 	    if (++_opind == 3) _opind = 0;
860             _trig.post ();
861 	    _wait++;
862 	}
863         else
864 	{
865             process (skipcnt >= 2 * _parsize);
866 	    if (++_opind == 3) _opind = 0;
867 	}
868     }
869 
870     for (Y = _out_list; Y; Y = Y->_next)
871     {
872         p = Y->_buff [_opind] + _outoffs;
873         q = _outbuff [Y->_out];
874         for (i = 0; i < _outsize; i++) q [i] += p [i];
875     }
876 
877     return (_wait > 1) ? _bits : 0;
878 }
879 
880 
print(FILE * F)881 void Convlevel::print (FILE *F)
882 {
883     fprintf (F, "prio = %4d, offs = %6d,  parsize = %5d,  npar = %3d\n", _prio, _offs, _parsize, _npar);
884 }
885 
886 
findmacnode(unsigned int inp,unsigned int out,bool create)887 Macnode *Convlevel::findmacnode (unsigned int inp, unsigned int out, bool create)
888 {
889     unsigned int  i;
890     Inpnode       *X;
891     Outnode       *Y;
892     Macnode       *M;
893 
894     for (X = _inp_list; X && (X->_inp != inp); X = X->_next);
895     if (! X)
896     {
897 	if (! create) return 0;
898 	X = new Inpnode;
899 	X->_next = _inp_list;
900 	_inp_list = X;
901 	X->_inp = inp;
902 	X->_ffta = new fftwf_complex * [_npar];
903 	memset (X->_ffta, 0, _npar * sizeof (fftw_complex *));
904         for (i = 0; i < _npar; i++)
905 	{
906             X->_ffta [i] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
907 	}
908     }
909 
910     for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next);
911     if (! Y)
912     {
913 	if (! create) return 0;
914 	Y = new Outnode;
915 	Y->_next = _out_list;
916 	_out_list = Y;
917 	Y->_out = out;
918 	Y->_list = 0;
919         for (i = 0; i < 3; i++)
920 	{
921 	    Y->_buff [i] = 0;
922 	}
923         for (i = 0; i < 3; i++)
924 	{
925 	    Y->_buff [i] = (float *)(alloc_aligned (_parsize * sizeof (float)));
926 	}
927     }
928 
929     for (M = Y->_list; M && (M->_inpn != X); M = M->_next);
930     if (! M)
931     {
932 	if (! create) return 0;
933 	M = new Macnode;
934 	M->_next = Y->_list;
935 	Y->_list = M;
936 	M->_inpn = X;
937 	M->_fftb = 0;
938 	M->_copy = false;
939     }
940 
941     return M;
942 }
943 
944 
945 #ifdef ENABLE_VECTOR_MODE
946 
fftswap(fftwf_complex * p)947 void Convlevel::fftswap (fftwf_complex *p)
948 {
949     unsigned int  n = _parsize;
950     float         a, b;
951 
952     while (n)
953     {
954 	a = p [2][0];
955 	b = p [3][0];
956         p [2][0] = p [0][1];
957         p [3][0] = p [1][1];
958         p [0][1] = a;
959         p [1][1] = b;
960 	p += 4;
961         n -= 4;
962     }
963 }
964 
965 #endif
966 
967 
968