1 // ----------------------------------------------------------------------------
2 //
3 // Copyright (C) 2006-2011 Fons Adriaensen <fons@linuxaudio.org>
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // ----------------------------------------------------------------------------
19
20
21 #include <unistd.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25 #include "zita-convolver.h"
26
27
28
zita_convolver_major_version(void)29 int zita_convolver_major_version (void)
30 {
31 return ZITA_CONVOLVER_MAJOR_VERSION;
32 }
33
34
35 float Convproc::_mac_cost = 1.0f;
36 float Convproc::_fft_cost = 5.0f;
37
38
Convproc(void)39 Convproc::Convproc (void) :
40 _state (ST_IDLE),
41 _options (0),
42 _skipcnt (0),
43 _density (0),
44 _ninp (0),
45 _nout (0),
46 _quantum (0),
47 _minpart (0),
48 _maxpart (0),
49 _nlevels (0),
50 _latecnt (0)
51 {
52 memset (_inpbuff, 0, MAXINP * sizeof (float *));
53 memset (_outbuff, 0, MAXOUT * sizeof (float *));
54 memset (_convlev, 0, MAXLEV * sizeof (Convlevel *));
55 }
56
57
~Convproc(void)58 Convproc::~Convproc (void)
59 {
60 cleanup ();
61 }
62
63
set_options(unsigned int options)64 void Convproc::set_options (unsigned int options)
65 {
66 _options = options;
67 }
68
69
set_density(float density)70 void Convproc::set_density (float density)
71 {
72 _density = density;
73 }
74
75
set_skipcnt(unsigned int skipcnt)76 void Convproc::set_skipcnt (unsigned int skipcnt)
77 {
78 if ((_quantum == _minpart) && (_quantum == _maxpart)) _skipcnt = skipcnt;
79 }
80
81
configure(unsigned int ninp,unsigned int nout,unsigned int maxsize,unsigned int quantum,unsigned int minpart,unsigned int maxpart)82 int Convproc::configure (unsigned int ninp,
83 unsigned int nout,
84 unsigned int maxsize,
85 unsigned int quantum,
86 unsigned int minpart,
87 unsigned int maxpart)
88 {
89 unsigned int offs, npar, size, pind, nmin, nmax, step, i;
90 int prio, d, r, s;
91 float cfft, cmac, t;
92
93 if (_state != ST_IDLE) return Converror::BAD_STATE;
94 if ( (quantum & (quantum - 1))
95 || (quantum < MINQUANT)
96 || (quantum > MAXQUANT)
97 || (minpart & (minpart - 1))
98 || (minpart < MINPART)
99 || (minpart < quantum)
100 || (minpart > MAXDIVIS * quantum)
101 || (maxpart & (maxpart - 1))
102 || (maxpart > MAXPART)
103 || (maxpart < minpart)) return Converror::BAD_PARAM;
104
105 if (ninp < nout) { nmin = ninp; nmax = nout; }
106 else { nmin = nout; nmax = ninp; }
107
108 if (_density <= 0) _density = 1.0 / nmin;
109 else
110 {
111 t = 1.0f / nmax;
112 if (_density < t) _density = t;
113 if (_density > 1) _density = 1;
114 }
115
116 cfft = _fft_cost * (ninp + nout);
117 cmac = _mac_cost * ninp * nout * _density;
118 step = (cfft < 4 * cmac) ? 1 : 2;
119
120 if (step == 2)
121 {
122 r = maxpart / minpart;
123 s = (r & 0xAAAA) ? 1 : 2;
124 }
125 else s = 1;
126 nmin = (s == 1) ? 2 : 6;
127 if (minpart == quantum) nmin++;
128
129 prio = 0;
130 size = quantum;
131 while (size < minpart)
132 {
133 prio -= 1;
134 size <<= 1;
135 }
136
137 try
138 {
139 for (offs = pind = 0; offs < maxsize; pind++)
140 {
141 npar = (maxsize - offs + size - 1) / size;
142 if ((size < maxpart) && (npar > nmin))
143 {
144 r = 1 << s;
145 d = npar - nmin;
146 d = d - (d + r - 1) / r;
147 if (cfft < d * cmac) npar = nmin;
148 }
149 _convlev [pind] = new Convlevel ();
150 _convlev [pind]->configure (prio, offs, npar, size, _options);
151
152 offs += size * npar;
153 if (offs < maxsize)
154 {
155 prio -= s;
156 size <<= s;
157 s = step;
158 nmin = (s == 1) ? 2 : 6;
159 }
160 }
161
162 _ninp = ninp;
163 _nout = nout;
164 _quantum = quantum;
165 _minpart = minpart;
166 _maxpart = size;
167 _nlevels = pind;
168 _latecnt = 0;
169 _inpsize = 2 * size;
170
171 for (i = 0; i < ninp; i++) _inpbuff [i] = new float [_inpsize];
172 for (i = 0; i < nout; i++) _outbuff [i] = new float [_minpart];
173 }
174 catch (...)
175 {
176 cleanup ();
177 return Converror::MEM_ALLOC;
178 }
179
180 _state = ST_STOP;
181 return 0;
182 }
183
184
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)185 int Convproc::impdata_create (unsigned int inp,
186 unsigned int out,
187 unsigned int step,
188 float *data,
189 int ind0,
190 int ind1)
191 {
192 unsigned int j;
193
194 if (_state != ST_STOP) return Converror::BAD_STATE;
195 try
196 {
197 for (j = 0; j < _nlevels; j++)
198 {
199 _convlev [j]->impdata_create (inp, out, step, data, ind0, ind1);
200 }
201 }
202 catch (...)
203 {
204 cleanup ();
205 return Converror::MEM_ALLOC;
206 }
207 return 0;
208 }
209
210
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int ind0,int ind1)211 int Convproc::impdata_update (unsigned int inp,
212 unsigned int out,
213 unsigned int step,
214 float *data,
215 int ind0,
216 int ind1)
217 {
218 unsigned int j;
219
220 if (_state < ST_STOP) return Converror::BAD_STATE;
221 for (j = 0; j < _nlevels; j++)
222 {
223 _convlev [j]->impdata_update (inp, out, step, data, ind0, ind1);
224 }
225 return 0;
226 }
227
228
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)229 int Convproc::impdata_copy (unsigned int inp1,
230 unsigned int out1,
231 unsigned int inp2,
232 unsigned int out2)
233 {
234 unsigned int j;
235
236 if (_state != ST_STOP) return Converror::BAD_STATE;
237 try
238 {
239 for (j = 0; j < _nlevels; j++)
240 {
241 _convlev [j]->impdata_copy (inp1, out1, inp2, out2);
242 }
243 }
244 catch (...)
245 {
246 cleanup ();
247 return Converror::MEM_ALLOC;
248 }
249 return 0;
250 }
251
252
reset(void)253 int Convproc::reset (void)
254 {
255 unsigned int k;
256
257 if (_state == ST_IDLE) return Converror::BAD_STATE;
258 for (k = 0; k < _ninp; k++) memset (_inpbuff [k], 0, _inpsize * sizeof (float));
259 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
260 for (k = 0; k < _nlevels; k++) _convlev [k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
261 return 0;
262 }
263
264
start_process(int abspri,int policy)265 int Convproc::start_process (int abspri, int policy)
266 {
267 unsigned int k;
268
269 if (_state != ST_STOP) return Converror::BAD_STATE;
270
271 _latecnt = 0;
272 _inpoffs = 0;
273 _outoffs = 0;
274 reset ();
275 for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++)
276 {
277 _convlev [k]->start (abspri, policy);
278 }
279 _state = ST_PROC;
280 return 0;
281 }
282
283
process(bool sync)284 int Convproc::process (bool sync)
285 {
286 unsigned int k;
287 int f = 0;
288
289 if (_state != ST_PROC) return 0;
290
291 _inpoffs += _quantum;
292 if (_inpoffs == _inpsize) _inpoffs = 0;
293
294 _outoffs += _quantum;
295 if (_outoffs == _minpart)
296 {
297 _outoffs = 0;
298 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
299 for (k = 0; k < _nlevels; k++) f |= _convlev [k]->readout (sync, _skipcnt);
300 if (_skipcnt < _minpart) _skipcnt = 0;
301 else _skipcnt -= _minpart;
302 if (f)
303 {
304 if (++_latecnt >= 5)
305 {
306 stop_process ();
307 f |= FL_LOAD;
308 }
309 }
310 else _latecnt = 0;
311 }
312 return f;
313 }
314
315
stop_process(void)316 int Convproc::stop_process (void)
317 {
318 unsigned int k;
319
320 if (_state != ST_PROC) return Converror::BAD_STATE;
321 for (k = 0; k < _nlevels; k++) _convlev [k]->stop ();
322 _state = ST_WAIT;
323 return 0;
324 }
325
326
cleanup(void)327 int Convproc::cleanup (void)
328 {
329 unsigned int k;
330
331 while (! check_stop ())
332 {
333 usleep (100000);
334 }
335 if (_state != ST_STOP)
336 {
337 return Converror::BAD_STATE;
338 }
339
340 for (k = 0; k < _ninp; k++)
341 {
342 delete[] _inpbuff [k];
343 _inpbuff [k] = 0;
344 }
345 for (k = 0; k < _nout; k++)
346 {
347 delete[] _outbuff [k];
348 _outbuff [k] = 0;
349 }
350 for (k = 0; k < _nlevels; k++)
351 {
352 delete _convlev [k];
353 _convlev [k] = 0;
354 }
355
356 _state = ST_IDLE;
357 _options = 0;
358 _skipcnt = 0;
359 _density = 0;
360 _ninp = 0;
361 _nout = 0;
362 _quantum = 0;
363 _minpart = 0;
364 _maxpart = 0;
365 _nlevels = 0;
366 _latecnt = 0;
367 return 0;
368 }
369
370
check_stop(void)371 bool Convproc::check_stop (void)
372 {
373 unsigned int k;
374
375 for (k = 0; (k < _nlevels) && (_convlev [k]->_stat == Convlevel::ST_IDLE); k++);
376 if (k == _nlevels)
377 {
378 _state = ST_STOP;
379 return true;
380 }
381 return false;
382 }
383
384
print(FILE * F)385 void Convproc::print (FILE *F)
386 {
387 unsigned int k;
388
389 for (k = 0; k < _nlevels; k++) _convlev [k]->print (F);
390 }
391
392
393
394 typedef float FV4 __attribute__ ((vector_size(16)));
395
396
Convlevel(void)397 Convlevel::Convlevel (void) :
398 _stat (ST_IDLE),
399 _npar (0),
400 _parsize (0),
401 _options (0),
402 _pthr (0),
403 _inp_list (0),
404 _out_list (0),
405 _plan_r2c (0),
406 _plan_c2r (0),
407 _time_data (0),
408 _prep_data (0),
409 _freq_data (0)
410 {
411 }
412
413
414
~Convlevel(void)415 Convlevel::~Convlevel (void)
416 {
417 cleanup ();
418 }
419
420
alloc_aligned(size_t size)421 void *Convlevel::alloc_aligned (size_t size)
422 {
423 void *p;
424
425 if (posix_memalign (&p, 16, size)) throw (Converror (Converror::MEM_ALLOC));
426 memset (p, 0, size);
427 return p;
428 }
429
430
configure(int prio,unsigned int offs,unsigned int npar,unsigned int parsize,unsigned int options)431 void Convlevel::configure (int prio,
432 unsigned int offs,
433 unsigned int npar,
434 unsigned int parsize,
435 unsigned int options)
436 {
437 int fftwopt = (options & OPT_FFTW_MEASURE) ? FFTW_MEASURE : FFTW_ESTIMATE;
438
439 _prio = prio;
440 _offs = offs;
441 _npar = npar;
442 _parsize = parsize;
443 _options = options;
444
445 _time_data = (float *)(alloc_aligned (2 * _parsize * sizeof (float)));
446 _prep_data = (float *)(alloc_aligned (2 * _parsize * sizeof (float)));
447 _freq_data = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
448 _plan_r2c = fftwf_plan_dft_r2c_1d (2 * _parsize, _time_data, _freq_data, fftwopt);
449 _plan_c2r = fftwf_plan_dft_c2r_1d (2 * _parsize, _freq_data, _time_data, fftwopt);
450 if (_plan_r2c && _plan_c2r) return;
451 throw (Converror (Converror::MEM_ALLOC));
452 }
453
454
impdata_create(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)455 void Convlevel::impdata_create (unsigned int inp,
456 unsigned int out,
457 unsigned int step,
458 float *data,
459 int i0,
460 int i1)
461 {
462 unsigned int k;
463 int j, j0, j1, n;
464 float norm;
465 fftwf_complex *fftb;
466 Macnode *M;
467
468 n = i1 - i0;
469 i0 = _offs - i0;
470 i1 = i0 + _npar * _parsize;
471 if ((i0 >= n) || (i1 <= 0)) return;
472
473 M = findmacnode (inp, out, true);
474 if (! (M->_fftb))
475 {
476 M->_fftb = new fftwf_complex * [_npar];
477 memset (M->_fftb, 0, _npar * sizeof (fftwf_complex *));
478 }
479
480 norm = 0.5f / _parsize;
481 for (k = 0; k < _npar; k++)
482 {
483 i1 = i0 + _parsize;
484 if ((i0 < n) && (i1 > 0))
485 {
486 if (! (M->_fftb [k]))
487 {
488 M->_fftb [k] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
489 }
490 memset (_prep_data, 0, 2 * _parsize * sizeof (float));
491 j0 = (i0 < 0) ? 0 : i0;
492 j1 = (i1 > n) ? n : i1;
493 for (j = j0; j < j1; j++) _prep_data [j - i0] = norm * data [j * step];
494 fftwf_execute_dft_r2c (_plan_r2c, _prep_data, _freq_data);
495 #ifdef ENABLE_VECTOR_MODE
496 if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
497 #endif
498 fftb = M->_fftb [k];
499 for (j = 0; j <= (int)_parsize; j++)
500 {
501 fftb [j][0] += _freq_data [j][0];
502 fftb [j][1] += _freq_data [j][1];
503 }
504 }
505 i0 = i1;
506 }
507 }
508
509
impdata_update(unsigned int inp,unsigned int out,unsigned int step,float * data,int i0,int i1)510 void Convlevel::impdata_update (unsigned int inp,
511 unsigned int out,
512 unsigned int step,
513 float *data,
514 int i0,
515 int i1)
516 {
517 unsigned int k;
518 int j, j0, j1, n;
519 float norm;
520 fftwf_complex *fftb;
521 Macnode *M;
522
523 M = findmacnode (inp, out, false);
524 if (! M) return;
525
526 n = i1 - i0;
527 i0 = _offs - i0;
528 i1 = i0 + _npar * _parsize;
529 if ((i0 >= n) || (i1 <= 0)) return;
530
531 norm = 0.5f / _parsize;
532 for (k = 0; k < _npar; k++)
533 {
534 i1 = i0 + _parsize;
535 fftb = M->_fftb [k];
536 if (fftb && (i0 < n) && (i1 > 0))
537 {
538 memset (_prep_data, 0, 2 * _parsize * sizeof (float));
539 j0 = (i0 < 0) ? 0 : i0;
540 j1 = (i1 > n) ? n : i1;
541 for (j = j0; j < j1; j++) _prep_data [j - i0] = norm * data [j * step];
542 fftwf_execute_dft_r2c (_plan_r2c, _prep_data, fftb);
543 #ifdef ENABLE_VECTOR_MODE
544 if (_options & OPT_VECTOR_MODE) fftswap (fftb);
545 #endif
546 }
547 i0 = i1;
548 }
549 }
550
551
impdata_copy(unsigned int inp1,unsigned int out1,unsigned int inp2,unsigned int out2)552 void Convlevel::impdata_copy (unsigned int inp1,
553 unsigned int out1,
554 unsigned int inp2,
555 unsigned int out2)
556 {
557 Macnode *M1;
558 Macnode *M2;
559
560 M1 = findmacnode (inp1, out1, false);
561 if (! M1) return;
562 M2 = findmacnode (inp2, out2, true);
563 if (M2->_fftb) return;
564 M2->_fftb = M1->_fftb;
565 M2->_copy = true;
566 }
567
568
reset(unsigned int inpsize,unsigned int outsize,float ** inpbuff,float ** outbuff)569 void Convlevel::reset (unsigned int inpsize,
570 unsigned int outsize,
571 float **inpbuff,
572 float **outbuff)
573 {
574 unsigned int i;
575 Inpnode *X;
576 Outnode *Y;
577
578 _inpsize = inpsize;
579 _outsize = outsize;
580 _inpbuff = inpbuff;
581 _outbuff = outbuff;
582 for (X = _inp_list; X; X = X->_next)
583 {
584 for (i = 0; i < _npar; i++)
585 {
586 memset (X->_ffta [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
587 }
588 }
589 for (Y = _out_list; Y; Y = Y->_next)
590 {
591 for (i = 0; i < 3; i++)
592 {
593 memset (Y->_buff [i], 0, _parsize * sizeof (float));
594 }
595 }
596 if (_parsize == _outsize)
597 {
598 _outoffs = 0;
599 _inpoffs = 0;
600 }
601 else
602 {
603 _outoffs = _parsize / 2;
604 _inpoffs = _inpsize - _outoffs;
605 }
606 _bits = _parsize / _outsize;
607 _wait = 0;
608 _ptind = 0;
609 _opind = 0;
610 _trig.init (0, 0);
611 _done.init (0, 0);
612 }
613
614
start(int abspri,int policy)615 void Convlevel::start (int abspri, int policy)
616 {
617 int min, max;
618 pthread_attr_t attr;
619 struct sched_param parm;
620
621 _pthr = 0;
622 min = sched_get_priority_min (policy);
623 max = sched_get_priority_max (policy);
624 abspri += _prio;
625 if (abspri > max) abspri = max;
626 if (abspri < min) abspri = min;
627 parm.sched_priority = abspri;
628 pthread_attr_init (&attr);
629 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
630 pthread_attr_setschedpolicy (&attr, policy);
631 pthread_attr_setschedparam (&attr, &parm);
632 pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
633 pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
634 pthread_attr_setstacksize (&attr, 0x10000);
635 pthread_create (&_pthr, &attr, static_main, this);
636 pthread_attr_destroy (&attr);
637 }
638
639
stop(void)640 void Convlevel::stop (void)
641 {
642 if (_stat != ST_IDLE)
643 {
644 _stat = ST_TERM;
645 _trig.post ();
646 }
647 }
648
649
cleanup(void)650 void Convlevel::cleanup (void)
651 {
652 unsigned int i;
653 Inpnode *X, *X1;
654 Outnode *Y, *Y1;
655 Macnode *M, *M1;
656
657 X = _inp_list;
658 while (X)
659 {
660 for (i = 0; i < _npar; i++) free (X->_ffta [i]);
661 delete[] X->_ffta;
662 X1 = X->_next;
663 delete X;
664 X = X1;
665 }
666 _inp_list = 0;
667
668 Y = _out_list;
669 while (Y)
670 {
671 M = Y->_list;
672 while (M)
673 {
674 if ((M->_fftb) && !(M->_copy))
675 {
676 for (i = 0; i < _npar; i++)
677 {
678 free (M->_fftb [i]);
679 }
680 delete[] M->_fftb;
681 }
682 M1 = M->_next;
683 delete M;
684 M = M1;
685 }
686 for (i = 0; i < 3; i++) free (Y->_buff [i]);
687 Y1 = Y->_next;
688 delete Y;
689 Y = Y1;
690 }
691 _out_list = 0;
692
693 fftwf_destroy_plan (_plan_r2c);
694 fftwf_destroy_plan (_plan_c2r);
695 free (_time_data);
696 free (_prep_data);
697 free (_freq_data);
698 _plan_r2c = 0;
699 _plan_c2r = 0;
700 _time_data = 0;
701 _prep_data = 0;
702 _freq_data = 0;
703 }
704
705
static_main(void * arg)706 void *Convlevel::static_main (void *arg)
707 {
708 ((Convlevel *) arg)->main ();
709 return 0;
710 }
711
712
main(void)713 void Convlevel::main (void)
714 {
715 _stat = ST_PROC;
716 while (true)
717 {
718 _trig.wait ();
719 if (_stat == ST_TERM)
720 {
721 _stat = ST_IDLE;
722 _pthr = 0;
723 return;
724 }
725 process (false);
726 _done.post ();
727 }
728 }
729
730
process(bool skip)731 void Convlevel::process (bool skip)
732 {
733 unsigned int i, j, k;
734 unsigned int i1, n1, n2, opi1, opi2;
735
736 Inpnode *X;
737 Macnode *M;
738 Outnode *Y;
739 fftwf_complex *ffta;
740 fftwf_complex *fftb;
741 float *inpd;
742 float *outd;
743
744 i1 = _inpoffs;
745 n1 = _parsize;
746 n2 = 0;
747 _inpoffs = i1 + n1;
748 if (_inpoffs >= _inpsize)
749 {
750 _inpoffs -= _inpsize;
751 n2 = _inpoffs;
752 n1 -= n2;
753 }
754
755 opi1 = (_opind + 1) % 3;
756 opi2 = (_opind + 2) % 3;
757
758 for (X = _inp_list; X; X = X->_next)
759 {
760 inpd = _inpbuff [X->_inp];
761 if (n1) memcpy (_time_data, inpd + i1, n1 * sizeof (float));
762 if (n2) memcpy (_time_data + n1, inpd, n2 * sizeof (float));
763 memset (_time_data + _parsize, 0, _parsize * sizeof (float));
764 fftwf_execute_dft_r2c (_plan_r2c, _time_data, X->_ffta [_ptind]);
765 #ifdef ENABLE_VECTOR_MODE
766 if (_options & OPT_VECTOR_MODE) fftswap (X->_ffta [_ptind]);
767 #endif
768 }
769
770 if (skip)
771 {
772 for (Y = _out_list; Y; Y = Y->_next)
773 {
774 outd = Y->_buff [opi2];
775 memset (outd, 0, _parsize * sizeof (float));
776 }
777 }
778 else
779 {
780 for (Y = _out_list; Y; Y = Y->_next)
781 {
782 memset (_freq_data, 0, (_parsize + 1) * sizeof (fftwf_complex));
783 for (M = Y->_list; M; M = M->_next)
784 {
785 X = M->_inpn;
786 i = _ptind;
787 for (j = 0; j < _npar; j++)
788 {
789 ffta = X->_ffta [i];
790 fftb = M->_fftb [j];
791 if (fftb)
792 {
793 #ifdef ENABLE_VECTOR_MODE
794 if (_options & OPT_VECTOR_MODE)
795 {
796 FV4 *A = (FV4 *) ffta;
797 FV4 *B = (FV4 *) fftb;
798 FV4 *D = (FV4 *) _freq_data;
799 for (k = 0; k < _parsize; k += 4)
800 {
801 D [0] += A [0] * B [0] - A [1] * B [1];
802 D [1] += A [0] * B [1] + A [1] * B [0];
803 A += 2;
804 B += 2;
805 D += 2;
806 }
807 _freq_data [_parsize][0] += ffta [_parsize][0] * fftb [_parsize][0];
808 _freq_data [_parsize][1] = 0;
809 }
810 else
811 #endif
812 {
813 for (k = 0; k <= _parsize; k++)
814 {
815 _freq_data [k][0] += ffta [k][0] * fftb [k][0] - ffta [k][1] * fftb [k][1];
816 _freq_data [k][1] += ffta [k][0] * fftb [k][1] + ffta [k][1] * fftb [k][0];
817 }
818 }
819 }
820 if (i == 0) i = _npar;
821 i--;
822 }
823 }
824
825 #ifdef ENABLE_VECTOR_MODE
826 if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
827 #endif
828 fftwf_execute_dft_c2r (_plan_c2r, _freq_data, _time_data);
829 outd = Y->_buff [opi1];
830 for (k = 0; k < _parsize; k++) outd [k] += _time_data [k];
831 outd = Y->_buff [opi2];
832 memcpy (outd, _time_data + _parsize, _parsize * sizeof (float));
833 }
834 }
835
836 _ptind++;
837 if (_ptind == _npar) _ptind = 0;
838 }
839
840
readout(bool sync,unsigned int skipcnt)841 int Convlevel::readout (bool sync, unsigned int skipcnt)
842 {
843 unsigned int i;
844 float *p, *q;
845 Outnode *Y;
846
847 _outoffs += _outsize;
848 if (_outoffs == _parsize)
849 {
850 _outoffs = 0;
851 if (_stat == ST_PROC)
852 {
853 while (_wait)
854 {
855 if (sync) _done.wait ();
856 else if (_done.trywait ()) break;
857 _wait--;
858 }
859 if (++_opind == 3) _opind = 0;
860 _trig.post ();
861 _wait++;
862 }
863 else
864 {
865 process (skipcnt >= 2 * _parsize);
866 if (++_opind == 3) _opind = 0;
867 }
868 }
869
870 for (Y = _out_list; Y; Y = Y->_next)
871 {
872 p = Y->_buff [_opind] + _outoffs;
873 q = _outbuff [Y->_out];
874 for (i = 0; i < _outsize; i++) q [i] += p [i];
875 }
876
877 return (_wait > 1) ? _bits : 0;
878 }
879
880
print(FILE * F)881 void Convlevel::print (FILE *F)
882 {
883 fprintf (F, "prio = %4d, offs = %6d, parsize = %5d, npar = %3d\n", _prio, _offs, _parsize, _npar);
884 }
885
886
findmacnode(unsigned int inp,unsigned int out,bool create)887 Macnode *Convlevel::findmacnode (unsigned int inp, unsigned int out, bool create)
888 {
889 unsigned int i;
890 Inpnode *X;
891 Outnode *Y;
892 Macnode *M;
893
894 for (X = _inp_list; X && (X->_inp != inp); X = X->_next);
895 if (! X)
896 {
897 if (! create) return 0;
898 X = new Inpnode;
899 X->_next = _inp_list;
900 _inp_list = X;
901 X->_inp = inp;
902 X->_ffta = new fftwf_complex * [_npar];
903 memset (X->_ffta, 0, _npar * sizeof (fftw_complex *));
904 for (i = 0; i < _npar; i++)
905 {
906 X->_ffta [i] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex)));
907 }
908 }
909
910 for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next);
911 if (! Y)
912 {
913 if (! create) return 0;
914 Y = new Outnode;
915 Y->_next = _out_list;
916 _out_list = Y;
917 Y->_out = out;
918 Y->_list = 0;
919 for (i = 0; i < 3; i++)
920 {
921 Y->_buff [i] = 0;
922 }
923 for (i = 0; i < 3; i++)
924 {
925 Y->_buff [i] = (float *)(alloc_aligned (_parsize * sizeof (float)));
926 }
927 }
928
929 for (M = Y->_list; M && (M->_inpn != X); M = M->_next);
930 if (! M)
931 {
932 if (! create) return 0;
933 M = new Macnode;
934 M->_next = Y->_list;
935 Y->_list = M;
936 M->_inpn = X;
937 M->_fftb = 0;
938 M->_copy = false;
939 }
940
941 return M;
942 }
943
944
945 #ifdef ENABLE_VECTOR_MODE
946
fftswap(fftwf_complex * p)947 void Convlevel::fftswap (fftwf_complex *p)
948 {
949 unsigned int n = _parsize;
950 float a, b;
951
952 while (n)
953 {
954 a = p [2][0];
955 b = p [3][0];
956 p [2][0] = p [0][1];
957 p [3][0] = p [1][1];
958 p [0][1] = a;
959 p [1][1] = b;
960 p += 4;
961 n -= 4;
962 }
963 }
964
965 #endif
966
967
968