1 // ----------------------------------------------------------------------------
2 //
3 // Copyright (C) 2006-2011 Fons Adriaensen <fons@linuxaudio.org>
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // ----------------------------------------------------------------------------
19
20
21 #include <unistd.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25 #include <cmath>
26 extern "C" {
27 #define __STDC_CONSTANT_MACROS // needed for UINT64_C (libavutil 0.8.6)
28 #include <libavutil/common.h>
29 }
30 #include "zita-convolver.h"
31 #include "gx_compiler.h"
32
zita_convolver_major_version(void)33 int zita_convolver_major_version (void)
34 {
35 return ZITA_CONVOLVER_MAJOR_VERSION;
36 }
37
38
39 float Convproc::_mac_cost = 1.0f;
40 float Convproc::_fft_cost = 5.0f;
41
42
Convproc(void)43 Convproc::Convproc (void) :
44 _state (ST_IDLE),
45 _skipcnt (0),
46 _density (0),
47 _ninp (0),
48 _nout (0),
49 _quantum (0),
50 _minpart (0),
51 _maxpart (0),
52 _nlevels (0),
53 _latecnt (0)
54 {
55 memset (_inpbuff, 0, MAXINP * sizeof (float *));
56 memset (_outbuff, 0, MAXOUT * sizeof (float *));
57 memset (_convlev, 0, MAXLEV * sizeof (Convlevel *));
58 }
59
60
~Convproc(void)61 Convproc::~Convproc (void)
62 {
63 cleanup ();
64 }
65
66
set_density(float density)67 void Convproc::set_density (float density)
68 {
69 _density = density;
70 }
71
72
set_skipcnt(unsigned int skipcnt)73 void Convproc::set_skipcnt (unsigned int skipcnt)
74 {
75 if ((_quantum == _minpart) && (_quantum == _maxpart)) _skipcnt = skipcnt;
76 }
77
78
configure(unsigned int ninp,unsigned int nout,unsigned int maxsize,unsigned int quantum,unsigned int minpart,unsigned int maxpart)79 int Convproc::configure (unsigned int ninp,
80 unsigned int nout,
81 unsigned int maxsize,
82 unsigned int quantum,
83 unsigned int minpart,
84 unsigned int maxpart)
85 {
86 unsigned int offs, npar, size, pind, nmin, nmax, step, i;
87 int prio, d, r, s;
88 float cfft, cmac, t;
89
90 if (_state != ST_IDLE) return Converror::BAD_STATE;
91 if ( (quantum & (quantum - 1))
92 || (quantum < MINQUANT)
93 || (quantum > MAXQUANT)
94 || (minpart & (minpart - 1))
95 || (minpart < MINPART)
96 || (minpart < quantum)
97 || (minpart > MAXDIVIS * quantum)
98 || (maxpart & (maxpart - 1))
99 || (maxpart > MAXPART)
100 || (maxpart < minpart)) return Converror::BAD_PARAM;
101
102 if (ninp < nout) { nmin = ninp; nmax = nout; }
103 else { nmin = nout; nmax = ninp; }
104
105 if (_density <= 0) _density = 1.0 / nmin;
106 else
107 {
108 t = 1.0f / nmax;
109 if (_density < t) _density = t;
110 if (_density > 1) _density = 1;
111 }
112
113 cfft = _fft_cost * (ninp + nout);
114 cmac = _mac_cost * ninp * nout * _density;
115 step = (cfft < 4 * cmac) ? 1 : 2;
116
117 if (step == 2)
118 {
119 r = maxpart / minpart;
120 s = (r & 0xAAAA) ? 1 : 2;
121 }
122 else s = 1;
123 nmin = (s == 1) ? 2 : 6;
124 if (minpart == quantum) nmin++;
125
126 prio = 0;
127 size = quantum;
128 while (size < minpart)
129 {
130 prio -= 1;
131 size <<= 1;
132 }
133
134 try
135 {
136 for (offs = pind = 0; offs < maxsize; pind++)
137 {
138 npar = (maxsize - offs + size - 1) / size;
139 if ((size < maxpart) && (npar > nmin))
140 {
141 r = 1 << s;
142 d = npar - nmin;
143 d = d - (d + r - 1) / r;
144 if (cfft < d * cmac) npar = nmin;
145 }
146 _convlev [pind] = new Convlevel ();
147 _convlev [pind]->configure (prio, offs, npar, size);
148
149 offs += size * npar;
150 if (offs < maxsize)
151 {
152 prio -= s;
153 size <<= s;
154 s = step;
155 nmin = (s == 1) ? 2 : 6;
156 }
157 }
158
159 _ninp = ninp;
160 _nout = nout;
161 _quantum = quantum;
162 _minpart = minpart;
163 _maxpart = size;
164 _nlevels = pind;
165 _latecnt = 0;
166 _inpsize = 2 * size;
167
168 for (i = 0; i < ninp; i++) _inpbuff [i] = new float [_inpsize];
169 for (i = 0; i < nout; i++) _outbuff [i] = new float [_minpart];
170 }
171 catch (...)
172 {
173 cleanup ();
174 return Converror::MEM_ALLOC;
175 }
176
177 _state = ST_STOP;
178 return 0;
179 }
180
181
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)182 int Convproc::impdata_create (unsigned int inp,
183 unsigned int out,
184 unsigned int step,
185 float *data,
186 int ind0,
187 int ind1)
188 {
189 unsigned int j;
190
191 if (_state != ST_STOP) return Converror::BAD_STATE;
192 try
193 {
194 for (j = 0; j < _nlevels; j++)
195 {
196 _convlev [j]->impdata_create (inp, out, step, data, ind0, ind1);
197 }
198 }
199 catch (...)
200 {
201 cleanup ();
202 return Converror::MEM_ALLOC;
203 }
204 return 0;
205 }
206
207
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)208 int Convproc::impdata_update (unsigned int inp,
209 unsigned int out,
210 unsigned int step,
211 float *data,
212 int ind0,
213 int ind1)
214 {
215 unsigned int j;
216
217 if (_state < ST_STOP) return Converror::BAD_STATE;
218 for (j = 0; j < _nlevels; j++)
219 {
220 _convlev [j]->impdata_update (inp, out, step, data, ind0, ind1);
221 }
222 return 0;
223 }
224
225
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)226 int Convproc::impdata_copy (unsigned int inp1,
227 unsigned int out1,
228 unsigned int inp2,
229 unsigned int out2)
230 {
231 unsigned int j;
232
233 if (_state != ST_STOP) return Converror::BAD_STATE;
234 try
235 {
236 for (j = 0; j < _nlevels; j++)
237 {
238 _convlev [j]->impdata_copy (inp1, out1, inp2, out2);
239 }
240 }
241 catch (...)
242 {
243 cleanup ();
244 return Converror::MEM_ALLOC;
245 }
246 return 0;
247 }
248
249
reset(void)250 int Convproc::reset (void)
251 {
252 unsigned int k;
253
254 if (_state == ST_IDLE) return Converror::BAD_STATE;
255 for (k = 0; k < _ninp; k++) memset (_inpbuff [k], 0, _inpsize * sizeof (float));
256 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
257 for (k = 0; k < _nlevels; k++) _convlev [k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
258 return 0;
259 }
260
261
start_process(int abspri,int policy)262 int Convproc::start_process (int abspri, int policy)
263 {
264 unsigned int k;
265
266 if (_state != ST_STOP) return Converror::BAD_STATE;
267
268 _latecnt = 0;
269 _inpoffs = 0;
270 _outoffs = 0;
271 reset ();
272 for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++)
273 {
274 _convlev [k]->start (abspri, policy);
275 }
276 _state = ST_PROC;
277 return 0;
278 }
279
280
process(bool sync)281 int __rt_func Convproc::process (bool sync)
282 {
283 unsigned int k;
284 int f = 0;
285
286 if (_state != ST_PROC) return 0;
287
288 _inpoffs += _quantum;
289 if (_inpoffs == _inpsize) _inpoffs = 0;
290
291 _outoffs += _quantum;
292 if (_outoffs == _minpart)
293 {
294 _outoffs = 0;
295 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
296 for (k = 0; k < _nlevels; k++) f |= _convlev [k]->readout (sync, _skipcnt);
297 if (_skipcnt < _minpart) _skipcnt = 0;
298 else _skipcnt -= _minpart;
299 if (f)
300 {
301 if (++_latecnt >= 5)
302 {
303 f |= FL_LOAD;
304 }
305 }
306 else _latecnt = 0;
307 }
308 return f;
309 }
310
311
stop_process(void)312 int Convproc::stop_process (void)
313 {
314 unsigned int k;
315
316 if (_state != ST_PROC) return Converror::BAD_STATE;
317 for (k = 0; k < _nlevels; k++) _convlev [k]->stop ();
318 _state = ST_WAIT;
319 return 0;
320 }
321
322
cleanup(void)323 int Convproc::cleanup (void)
324 {
325 unsigned int k;
326
327 while (! check_stop ())
328 {
329 usleep (100000);
330 }
331 if (_state != ST_STOP)
332 {
333 return Converror::BAD_STATE;
334 }
335
336 for (k = 0; k < _ninp; k++)
337 {
338 delete[] _inpbuff [k];
339 _inpbuff [k] = 0;
340 }
341 for (k = 0; k < _nout; k++)
342 {
343 delete[] _outbuff [k];
344 _outbuff [k] = 0;
345 }
346 for (k = 0; k < _nlevels; k++)
347 {
348 delete _convlev [k];
349 _convlev [k] = 0;
350 }
351
352 _state = ST_IDLE;
353 _skipcnt = 0;
354 _density = 0;
355 _ninp = 0;
356 _nout = 0;
357 _quantum = 0;
358 _minpart = 0;
359 _maxpart = 0;
360 _nlevels = 0;
361 _latecnt = 0;
362 return 0;
363 }
364
365
check_stop(void)366 bool Convproc::check_stop (void)
367 {
368 unsigned int k;
369
370 for (k = 0; (k < _nlevels) && (_convlev [k]->_stat == Convlevel::ST_IDLE); k++);
371 if (k == _nlevels)
372 {
373 _state = ST_STOP;
374 return true;
375 }
376 return false;
377 }
378
379
print(FILE * F)380 void Convproc::print (FILE *F)
381 {
382 unsigned int k;
383
384 for (k = 0; k < _nlevels; k++) _convlev [k]->print (F);
385 }
386
387
388
389 typedef float FV4 __attribute__ ((vector_size(16)));
390
391
Convlevel(void)392 Convlevel::Convlevel (void) :
393 _stat (ST_IDLE),
394 _npar (0),
395 _parsize (0),
396 _pthr (0),
397 _inp_list (0),
398 _out_list (0),
399 _plan_r2c (0),
400 _plan_c2r (0),
401 _freq_data (0)
402 {
403 }
404
405
406
~Convlevel(void)407 Convlevel::~Convlevel (void)
408 {
409 cleanup ();
410 }
411
412
alloc_aligned(size_t size)413 void *Convlevel::alloc_aligned (size_t size)
414 {
415 void *p;
416
417 p = av_malloc(size);
418 memset (p, 0, size);
419 return p;
420 }
421
configure(int prio,unsigned int offs,unsigned int npar,unsigned int parsize)422 void Convlevel::configure (int prio,
423 unsigned int offs,
424 unsigned int npar,
425 unsigned int parsize)
426 {
427 _prio = prio;
428 _offs = offs;
429 _npar = npar;
430 _parsize = parsize;
431
432 _freq_data = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
433 _plan_r2c = av_rdft_init (int(log2(2 * _parsize)), DFT_R2C);
434 _plan_c2r = av_rdft_init (int(log2(2 * _parsize)), IDFT_C2R);
435 if (_plan_r2c && _plan_c2r) return;
436 throw (Converror (Converror::MEM_ALLOC));
437 }
438
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)439 void Convlevel::impdata_create (unsigned int inp,
440 unsigned int out,
441 unsigned int step,
442 float *data,
443 int i0,
444 int i1)
445 {
446 unsigned int k;
447 int j, j0, j1, n;
448 float norm;
449 fftwf_complex *fftb;
450 Macnode *M;
451
452 n = i1 - i0;
453 i0 = _offs - i0;
454 i1 = i0 + _npar * _parsize;
455 if ((i0 >= n) || (i1 <= 0)) return;
456
457 M = findmacnode (inp, out, true);
458 if (! (M->_fftb))
459 {
460 M->_fftb = new fftwf_complex * [_npar];
461 memset (M->_fftb, 0, _npar * sizeof (fftwf_complex *));
462 }
463
464 norm = 1.0f / _parsize; //FIXME
465 for (k = 0; k < _npar; k++)
466 {
467 i1 = i0 + _parsize;
468 if ((i0 < n) && (i1 > 0))
469 {
470 if (! (M->_fftb [k]))
471 {
472 M->_fftb [k] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
473 }
474 float *prepdata = (float*)_freq_data;
475 memset (prepdata, 0, 2 * (_parsize+1) * sizeof (float));
476 j0 = (i0 < 0) ? 0 : i0;
477 j1 = (i1 > n) ? n : i1;
478 for (j = j0; j < j1; j++) prepdata [j - i0] = norm * data [j * step];
479 av_rdft_calc(_plan_r2c, prepdata);
480 // adjust for packing convention
481 _freq_data[_parsize][0] = _freq_data[0][1];
482 _freq_data[0][1] = 0;
483 fftswap (_freq_data);
484 fftb = M->_fftb [k];
485 for (j = 0; j <= (int)_parsize; j++)
486 {
487 fftb [j][0] += _freq_data [j][0];
488 fftb [j][1] += _freq_data [j][1];
489 }
490 }
491 i0 = i1;
492 }
493 }
494
495
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)496 void Convlevel::impdata_update (unsigned int inp,
497 unsigned int out,
498 unsigned int step,
499 float *data,
500 int i0,
501 int i1)
502 {
503 unsigned int k;
504 int j, j0, j1, n;
505 float norm;
506 fftwf_complex *fftb;
507 Macnode *M;
508
509 M = findmacnode (inp, out, false);
510 if (! M) return;
511
512 n = i1 - i0;
513 i0 = _offs - i0;
514 i1 = i0 + _npar * _parsize;
515 if ((i0 >= n) || (i1 <= 0)) return;
516
517 norm = 1.0f / _parsize; // FIXME
518 for (k = 0; k < _npar; k++)
519 {
520 i1 = i0 + _parsize;
521 fftb = M->_fftb [k];
522 if (fftb && (i0 < n) && (i1 > 0))
523 {
524 float *prepdata = (float*)fftb;
525 memset (prepdata, 0, 2 * _parsize * sizeof (float));
526 j0 = (i0 < 0) ? 0 : i0;
527 j1 = (i1 > n) ? n : i1;
528 for (j = j0; j < j1; j++) prepdata [j - i0] = norm * data [j * step];
529 av_rdft_calc(_plan_r2c, prepdata);
530 // adjust for packing convention
531 fftb[_parsize][0] = fftb[0][1];
532 fftb[_parsize][1] = 0;
533 fftb[0][1] = 0;
534 fftswap (fftb);
535 }
536 i0 = i1;
537 }
538 }
539
540
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)541 void Convlevel::impdata_copy (unsigned int inp1,
542 unsigned int out1,
543 unsigned int inp2,
544 unsigned int out2)
545 {
546 Macnode *M1;
547 Macnode *M2;
548
549 M1 = findmacnode (inp1, out1, false);
550 if (! M1) return;
551 M2 = findmacnode (inp2, out2, true);
552 if (M2->_fftb) return;
553 M2->_fftb = M1->_fftb;
554 M2->_copy = true;
555 }
556
557
reset(unsigned int inpsize,unsigned int outsize,float ** inpbuff,float ** outbuff)558 void Convlevel::reset (unsigned int inpsize,
559 unsigned int outsize,
560 float **inpbuff,
561 float **outbuff)
562 {
563 unsigned int i;
564 Inpnode *X;
565 Outnode *Y;
566
567 _inpsize = inpsize;
568 _outsize = outsize;
569 _inpbuff = inpbuff;
570 _outbuff = outbuff;
571 for (X = _inp_list; X; X = X->_next)
572 {
573 for (i = 0; i < _npar; i++)
574 {
575 memset (X->_ffta [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
576 }
577 }
578 for (Y = _out_list; Y; Y = Y->_next)
579 {
580 for (i = 0; i < 3; i++)
581 {
582 memset (Y->_buff [i], 0, _parsize * sizeof (float));
583 }
584 }
585 if (_parsize == _outsize)
586 {
587 _outoffs = 0;
588 _inpoffs = 0;
589 }
590 else
591 {
592 _outoffs = _parsize / 2;
593 _inpoffs = _inpsize - _outoffs;
594 }
595 _bits = _parsize / _outsize;
596 _wait = 0;
597 _ptind = 0;
598 _opind = 0;
599 _trig.init (0, 0);
600 _done.init (0, 0);
601 }
602
603
start(int abspri,int policy)604 void Convlevel::start (int abspri, int policy)
605 {
606 int min, max;
607 pthread_attr_t attr;
608 struct sched_param parm;
609
610 _pthr = 0;
611 min = sched_get_priority_min (policy);
612 max = sched_get_priority_max (policy);
613 abspri += _prio;
614 if (abspri > max) abspri = max;
615 if (abspri < min) abspri = min;
616 parm.sched_priority = abspri;
617 pthread_attr_init (&attr);
618 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
619 pthread_attr_setschedpolicy (&attr, policy);
620 pthread_attr_setschedparam (&attr, &parm);
621 pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
622 pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
623 pthread_attr_setstacksize (&attr, 0x10000);
624 pthread_create (&_pthr, &attr, static_main, this);
625 pthread_attr_destroy (&attr);
626 }
627
628
stop(void)629 void Convlevel::stop (void)
630 {
631 if (_stat != ST_IDLE)
632 {
633 _stat = ST_TERM;
634 _trig.post ();
635 }
636 }
637
638
cleanup(void)639 void Convlevel::cleanup (void)
640 {
641 unsigned int i;
642 Inpnode *X, *X1;
643 Outnode *Y, *Y1;
644 Macnode *M, *M1;
645
646 X = _inp_list;
647 while (X)
648 {
649 for (i = 0; i < _npar; i++) free (X->_ffta [i]);
650 delete[] X->_ffta;
651 X1 = X->_next;
652 delete X;
653 X = X1;
654 }
655 _inp_list = 0;
656
657 Y = _out_list;
658 while (Y)
659 {
660 M = Y->_list;
661 while (M)
662 {
663 if ((M->_fftb) && !(M->_copy))
664 {
665 for (i = 0; i < _npar; i++)
666 {
667 free (M->_fftb [i]);
668 }
669 delete[] M->_fftb;
670 }
671 M1 = M->_next;
672 delete M;
673 M = M1;
674 }
675 for (i = 0; i < 3; i++) free (Y->_buff [i]);
676 Y1 = Y->_next;
677 delete Y;
678 Y = Y1;
679 }
680 _out_list = 0;
681
682 av_rdft_end (_plan_r2c);
683 av_rdft_end (_plan_c2r);
684 av_free (_freq_data);
685 _plan_r2c = 0;
686 _plan_c2r = 0;
687 _freq_data = 0;
688 }
689
690
static_main(void * arg)691 void *Convlevel::static_main (void *arg)
692 {
693 ((Convlevel *) arg)->main ();
694 return 0;
695 }
696
697
main(void)698 void __rt_func Convlevel::main (void)
699 {
700 _stat = ST_PROC;
701 while (true)
702 {
703 _trig.wait ();
704 if (_stat == ST_TERM)
705 {
706 _stat = ST_IDLE;
707 _pthr = 0;
708 return;
709 }
710 process (false);
711 _done.post ();
712 }
713 }
714
715
process(bool skip)716 void __rt_func Convlevel::process (bool skip)
717 {
718 unsigned int i, j, k;
719 unsigned int i1, n1, n2, opi1, opi2;
720
721 Inpnode *X;
722 Macnode *M;
723 Outnode *Y;
724 fftwf_complex *ffta;
725 fftwf_complex *fftb;
726 float *inpd;
727 float *outd;
728
729 i1 = _inpoffs;
730 n1 = _parsize;
731 n2 = 0;
732 _inpoffs = i1 + n1;
733 if (_inpoffs >= _inpsize)
734 {
735 _inpoffs -= _inpsize;
736 n2 = _inpoffs;
737 n1 -= n2;
738 }
739
740 opi1 = (_opind + 1) % 3;
741 opi2 = (_opind + 2) % 3;
742
743 for (X = _inp_list; X; X = X->_next)
744 {
745 inpd = _inpbuff [X->_inp];
746 fftwf_complex *freqdata = X->_ffta [_ptind];
747 float *time_data = (float*)freqdata;
748 if (n1) memcpy (time_data, inpd + i1, n1 * sizeof (float));
749 if (n2) memcpy (time_data + n1, inpd, n2 * sizeof (float));
750 memset (time_data + _parsize, 0, (_parsize+2) * sizeof (float));
751 av_rdft_calc(_plan_r2c, time_data);
752 // adjust for packing convention
753 freqdata[_parsize][0] = freqdata[0][1];
754 freqdata[0][1] = 0;
755 fftswap (X->_ffta [_ptind]);
756 }
757
758 if (skip)
759 {
760 for (Y = _out_list; Y; Y = Y->_next)
761 {
762 outd = Y->_buff [opi2];
763 memset (outd, 0, _parsize * sizeof (float));
764 }
765 }
766 else
767 {
768 for (Y = _out_list; Y; Y = Y->_next)
769 {
770 memset (_freq_data, 0, (_parsize + 1) * sizeof (fftwf_complex));
771 for (M = Y->_list; M; M = M->_next)
772 {
773 X = M->_inpn;
774 i = _ptind;
775 for (j = 0; j < _npar; j++)
776 {
777 ffta = X->_ffta [i];
778 fftb = M->_fftb [j];
779 if (fftb)
780 {
781 FV4 *A = (FV4 *) ffta;
782 FV4 *B = (FV4 *) fftb;
783 FV4 *D = (FV4 *) _freq_data;
784 for (k = 0; k < _parsize; k += 4)
785 {
786 D [0] += A [0] * B [0] - A [1] * B [1];
787 D [1] += A [0] * B [1] + A [1] * B [0];
788 A += 2;
789 B += 2;
790 D += 2;
791 }
792 _freq_data [_parsize][0] += ffta [_parsize][0] * fftb [_parsize][0];
793 _freq_data [_parsize][1] = 0;
794 }
795 if (i == 0) i = _npar;
796 i--;
797 }
798 }
799
800 fftswap (_freq_data);
801 _freq_data[0][1] = _freq_data[_parsize][0]; // adjust for packing convention
802 av_rdft_calc(_plan_c2r, (float*)_freq_data);
803 outd = Y->_buff [opi1];
804 for (k = 0; k < _parsize; k++) outd [k] += ((float*)_freq_data) [k];
805 outd = Y->_buff [opi2];
806 memcpy (outd, ((float*)_freq_data) + _parsize, _parsize * sizeof (float));
807 }
808 }
809
810 _ptind++;
811 if (_ptind == _npar) _ptind = 0;
812 }
813
814
readout(bool sync,unsigned int skipcnt)815 int __rt_func Convlevel::readout (bool sync, unsigned int skipcnt)
816 {
817 unsigned int i;
818 float *p, *q;
819 Outnode *Y;
820
821 _outoffs += _outsize;
822 if (_outoffs == _parsize)
823 {
824 _outoffs = 0;
825 if (_stat == ST_PROC)
826 {
827 while (_wait)
828 {
829 if (sync) _done.wait ();
830 else if (_done.trywait ()) break;
831 _wait--;
832 }
833 if (++_opind == 3) _opind = 0;
834 _trig.post ();
835 _wait++;
836 }
837 else
838 {
839 process (skipcnt >= 2 * _parsize);
840 if (++_opind == 3) _opind = 0;
841 }
842 }
843
844 for (Y = _out_list; Y; Y = Y->_next)
845 {
846 p = Y->_buff [_opind] + _outoffs;
847 q = _outbuff [Y->_out];
848 for (i = 0; i < _outsize; i++) q [i] += p [i];
849 }
850
851 return (_wait > 1) ? _bits : 0;
852 }
853
854
print(FILE * F)855 void Convlevel::print (FILE *F)
856 {
857 fprintf (F, "prio = %4d, offs = %6d, parsize = %5d, npar = %3d\n", _prio, _offs, _parsize, _npar);
858 }
859
860
findmacnode(unsigned int inp,unsigned int out,bool create)861 Macnode *Convlevel::findmacnode (unsigned int inp, unsigned int out, bool create)
862 {
863 unsigned int i;
864 Inpnode *X;
865 Outnode *Y;
866 Macnode *M;
867
868 for (X = _inp_list; X && (X->_inp != inp); X = X->_next);
869 if (! X)
870 {
871 if (! create) return 0;
872 X = new Inpnode;
873 X->_next = _inp_list;
874 _inp_list = X;
875 X->_inp = inp;
876 X->_ffta = new fftwf_complex * [_npar];
877 memset (X->_ffta, 0, _npar * sizeof (fftwf_complex *));
878 for (i = 0; i < _npar; i++)
879 {
880 X->_ffta [i] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
881 }
882 }
883
884 for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next);
885 if (! Y)
886 {
887 if (! create) return 0;
888 Y = new Outnode;
889 Y->_next = _out_list;
890 _out_list = Y;
891 Y->_out = out;
892 Y->_list = 0;
893 for (i = 0; i < 3; i++)
894 {
895 Y->_buff [i] = 0;
896 }
897 for (i = 0; i < 3; i++)
898 {
899 Y->_buff [i] = (float *)(alloc_aligned (_parsize * sizeof (float)));
900 }
901 }
902
903 for (M = Y->_list; M && (M->_inpn != X); M = M->_next);
904 if (! M)
905 {
906 if (! create) return 0;
907 M = new Macnode;
908 M->_next = Y->_list;
909 Y->_list = M;
910 M->_inpn = X;
911 M->_fftb = 0;
912 M->_copy = false;
913 }
914
915 return M;
916 }
917
918
fftswap(fftwf_complex * p)919 void __rt_func Convlevel::fftswap (fftwf_complex *p)
920 {
921 unsigned int n = _parsize;
922 float a, b;
923
924 while (n)
925 {
926 a = p [2][0];
927 b = p [3][0];
928 p [2][0] = p [0][1];
929 p [3][0] = p [1][1];
930 p [0][1] = a;
931 p [1][1] = b;
932 p += 4;
933 n -= 4;
934 }
935 }
936