1 /*
2   liveconv.c:
3 
4   Copyright (C) 2017 Sigurd Saue, Oeyvind Brandtsegg
5 
6   This file is part of Csound.
7 
8   The Csound Library is free software; you can redistribute it
9   and/or modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 2.1 of the License, or (at your option) any later version.
12 
13   Csound is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU Lesser General Public License for more details.
17 
18   You should have received a copy of the GNU Lesser General Public
19   License along with Csound; if not, write to the Free Software
20   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
21   02110-1301 USA
22 */
23 
24 /* The implementation is indebted to the ftconv opcode by Istvan Varga 2005 */
25 
26 #include "csdl.h"
27 #include <math.h>
28 
29 /*
30 ** Data structures holding the load/unload information
31 */
32 
33 typedef struct {
34   enum { NO_LOAD, LOADING, UNLOADING } status;
35   int32_t pos;
36 } load_t;
37 
38 typedef struct {
39   load_t  *begin;
40   load_t  *end;
41   load_t  *head;
42   int32_t                     available;
43 } rbload_t;
44 
45 static inline
init_load(rbload_t * buffer,int32_t size)46 void init_load(rbload_t *buffer, int32_t size)
47 {
48     load_t* iter = buffer->begin;
49 
50     buffer->head = buffer->begin;
51     buffer->end = buffer->begin + size;
52     buffer->available = 1;
53 
54     for (iter = buffer->begin; iter != buffer->end; iter++) {
55       iter->status = NO_LOAD;
56       iter->pos = 0;
57     }
58 }
59 
60 static inline
next_load(const rbload_t * buffer,load_t * const now)61 load_t* next_load(const rbload_t *buffer, load_t* const now)
62 {
63     load_t *temp = now + 1;
64     if (UNLIKELY(temp == buffer->end)) {
65       temp = buffer->begin;
66     }
67     return temp;
68 }
69 
70 static inline
previous_load(const rbload_t * buffer,load_t * const now)71 load_t* previous_load(const rbload_t *buffer, load_t* const now)
72 {
73   return (now == buffer->begin) ? (buffer->end - 1) : (now - 1);
74 }
75 
76 /*
77 ** liveconv - data structure holding the internal state
78 */
79 
80 typedef struct {
81 
82   /*
83   **  Input parameters given by user
84   */
85   OPDS    h;
86   MYFLT   *aOut;          // output buffer
87   MYFLT   *aIn;           // input buffer
88 
89   MYFLT   *iFTNum;        // impulse respons table
90   MYFLT   *iPartLen;      // length of impulse response partitions
91                           // (latency <-> CPU usage)
92 
93   MYFLT     *kUpdate;     // Control variable for updating the IR buffer
94                           // (+1 is start load, -1 is start unload)
95   MYFLT     *kClear;      // Clear output buffers
96 
97   /*
98   ** Internal state of opcode maintained outside
99   */
100   int32_t     initDone;       /* flag to indicate initialization */
101   int32_t     cnt;            /* buffer position, 0 to partSize - 1       */
102   int32_t     nPartitions;    /* number of convolve partitions            */
103   int32_t     partSize;       /* partition length in sample frames
104                              (= iPartLen as integer) */
105   int32_t     rbCnt;          /* ring buffer index, 0 to nPartitions - 1  */
106 
107   /* The following pointer point into the auxData buffer */
108   MYFLT   *tmpBuf;        /* temporary buffer for accumulating FFTs   */
109   MYFLT   *ringBuf;       /* ring buffer of FFTs of input partitions -
110                              these buffers are now computed during init */
111   MYFLT   *IR_Data;       /* impulse responses (scaled)       */
112   MYFLT   *outBuf;        /* output buffer (size=partSize*2)  */
113 
114   rbload_t        loader; /* Bookkeeping of load/unload operations */
115 
116   void    *fwdsetup, *invsetup;
117   AUXCH   auxData;        /* Aux data buffer allocated in init pass */
118 } liveconv_t;
119 
120 /*
121 **  Function to multiply the FFT buffers
122 **    outBuf - the output of the operation (called with tmpBuf), single channel only
123 **    ringBuf - the partitions of the single input signal
124 **    IR_data - the impulse response of a particular channel
125 **    partSize - size of partition
126 **    nPartitions - number of partitions
127 **    ringBuf_startPos - the starting position of the ring buffer
128 **                       (corresponds to the start of the partition after the
129 **                        last filled partition)
130 */
multiply_fft_buffers(MYFLT * outBuf,MYFLT * ringBuf,MYFLT * IR_Data,int32_t partSize,int nPartitions,int32_t ringBuf_startPos)131 static void multiply_fft_buffers(MYFLT *outBuf, MYFLT *ringBuf, MYFLT *IR_Data,
132                                  int32_t partSize, int nPartitions,
133                                  int32_t ringBuf_startPos)
134 {
135     MYFLT   re, im, re1, re2, im1, im2;
136     MYFLT   *rbPtr, *irPtr, *outBufPtr, *outBufEndPm2, *rbEndP;
137 
138     /* note: partSize must be at least 2 samples */
139     partSize <<= 1; /* locale partsize is twice the size of the partition size */
140              /* Finding the index of the last sample pair in the output buffer */
141     outBufEndPm2 = (MYFLT*) outBuf + (int32_t) (partSize - 2);
142                                                  /* The end of the ring buffer */
143     rbEndP = (MYFLT*) ringBuf + (int32_t) (partSize * nPartitions);
144     rbPtr = &(ringBuf[ringBuf_startPos]);    /* Initialize ring buffer pointer */
145     irPtr = IR_Data;                        /* Initialize impulse data pointer */
146     outBufPtr = outBuf;                    /* Initialize output buffer pointer */
147 
148     /* clear output buffer to zero */
149     memset(outBuf, 0, sizeof(MYFLT)*partSize);
150 
151     /*
152     ** Multiply FFTs for each partition and mix to output buffer
153     ** Note: IRs are stored in reverse partition order
154     */
155     do {
156       /* wrap ring buffer position */
157       if (rbPtr >= rbEndP)
158         rbPtr = ringBuf;
159       outBufPtr = outBuf;
160       *(outBufPtr++) +=
161         *(rbPtr++) * *(irPtr++); /* convolve DC - real part only */
162       *(outBufPtr++) +=
163         *(rbPtr++) * *(irPtr++); /* convolve Nyquist - real part only */
164       re1 = *(rbPtr++);
165       im1 = *(rbPtr++);
166       re2 = *(irPtr++);
167       im2 = *(irPtr++);
168 
169       /*
170       ** Status:
171       ** outBuf + 2, ringBuf + 4, irBuf + 4
172       ** re = buf + 2, im = buf + 3
173       */
174 
175       re = re1 * re2 - im1 * im2;
176       im = re1 * im2 + re2 * im1;
177       while (outBufPtr < outBufEndPm2) {
178         /* complex multiply */
179         re1 = rbPtr[0];
180         im1 = rbPtr[1];
181         re2 = irPtr[0];
182         im2 = irPtr[1];
183         outBufPtr[0] += re;
184         outBufPtr[1] += im;
185         re = re1 * re2 - im1 * im2;
186         im = re1 * im2 + re2 * im1;
187         re1 = rbPtr[2];
188         im1 = rbPtr[3];
189         re2 = irPtr[2];
190         im2 = irPtr[3];
191         outBufPtr[2] += re;
192         outBufPtr[3] += im;
193         re = re1 * re2 - im1 * im2;
194         im = re1 * im2 + re2 * im1;
195         outBufPtr += 4;
196         rbPtr += 4;
197         irPtr += 4;
198         /*
199         ** Status:
200         ** outBuf + 2 + 4n, ringBuf + 4 + 4n, irBuf + 4 + 4n
201         ** re = buf + 2 + 4n, im = buf + 3 + 4n
202         */
203       }
204       outBufPtr[0] += re;
205       outBufPtr[1] += im;
206 
207     } while (--nPartitions);
208 }
buf_bytes_alloc(int32_t partSize,int32_t nPartitions)209 static inline int32_t buf_bytes_alloc(int32_t partSize, int32_t nPartitions)
210 {
211     int32_t nSmps;
212 
213     nSmps = (partSize << 1);                            /* tmpBuf     */
214     nSmps += ((partSize << 1) * nPartitions);           /* ringBuf    */
215     nSmps += ((partSize << 1) * nPartitions);           /* IR_Data    */
216     nSmps += ((partSize << 1));                         /* outBuf */
217     nSmps *= (int32_t) sizeof(MYFLT);                   /* Buffer type MYFLT */
218 
219     nSmps += (nPartitions+1) * (int32_t) sizeof(load_t);/* Load/unload structure */
220     /* One load/unload pr. partitions and an extra for buffering is sufficient */
221 
222     return nSmps;
223 }
224 
set_buf_pointers(liveconv_t * p,int32_t partSize,int32_t nPartitions)225 static void set_buf_pointers(liveconv_t *p, int32_t partSize, int32_t nPartitions)
226 {
227     MYFLT *ptr;
228 
229     ptr = (MYFLT*) (p->auxData.auxp);
230     p->tmpBuf = ptr;
231     ptr += (partSize << 1);
232     p->ringBuf = ptr;
233     ptr += ((partSize << 1) * nPartitions);
234     p->IR_Data = ptr;
235     ptr += ((partSize << 1) * nPartitions);
236     p->outBuf = ptr;
237     ptr += (partSize << 1);
238 
239     p->loader.begin = (load_t*) ptr;
240 }
241 
liveconv_init(CSOUND * csound,liveconv_t * p)242 static int32_t liveconv_init(CSOUND *csound, liveconv_t *p)
243 {
244     FUNC    *ftp;       // function table
245     int32_t     n, nBytes;
246 
247     /* set p->partSize to the initial partition length, iPartLen */
248     p->partSize = MYFLT2LRND(*(p->iPartLen));
249     if (UNLIKELY(p->partSize < 4 || (p->partSize & (p->partSize - 1)) != 0)) {
250       // Must be a power of 2 at least as large as 4
251       return csound->InitError(csound, "%s",
252                                Str("liveconv: invalid impulse response "
253                                    "partition length"));
254     }
255 
256     /* Find and assign the function table numbered iFTNum */
257     ftp = csound->FTnp2Finde(csound, p->iFTNum);
258     if (UNLIKELY(ftp == NULL))
259       return NOTOK; /* ftfind should already have printed the error message */
260 
261     /* Calculate the total length  */
262     n = (int32_t) ftp->flen;
263     if (UNLIKELY(n <= 0)) {
264       return csound->InitError(csound, "%s",
265                                Str("liveconv: invalid length, or insufficient"
266                                    " IR data for convolution"));
267     }
268 
269     // Compute the number of partitions (total length / partition size)
270     p->nPartitions = (n + (p->partSize - 1)) / p->partSize;
271 
272     /*
273     ** Calculate the amount of aux space to allocate (in bytes) and
274     ** allocate if necessary
275     ** Function of partition size and number of partitions
276     */
277 
278     nBytes = buf_bytes_alloc(p->partSize, p->nPartitions);
279     if (nBytes != (int32_t) p->auxData.size)
280       csound->AuxAlloc(csound, (int32) nBytes, &(p->auxData));
281 
282     /*
283     ** From here on is initialization of data
284     */
285 
286     /* initialize buffer pointers */
287     set_buf_pointers(p, p->partSize, p->nPartitions);
288 
289     /* Initialize load bookkeeping */
290     init_load(&p->loader, (p->nPartitions + 1));
291 
292     /* clear ring buffer to zero */
293     n = (p->partSize << 1) * p->nPartitions;
294     memset(p->ringBuf, 0, n*sizeof(MYFLT));
295 
296     /* initialize buffer indices */
297     p->cnt = 0;
298     p->rbCnt = 0;
299 
300     p->fwdsetup = csound->RealFFT2Setup(csound, (p->partSize << 1), FFT_FWD);
301     p->invsetup = csound->RealFFT2Setup(csound, (p->partSize << 1), FFT_INV);
302 
303     /* clear IR buffer to zero */
304     memset(p->IR_Data, 0, n*sizeof(MYFLT));
305 
306     /* clear output buffers to zero */
307     memset(p->outBuf, 0, (p->partSize << 1)*sizeof(MYFLT));
308 
309     /*
310     ** After initialization:
311     **    Buffer indexes are zero
312     **    tmpBuf is filled with rubish
313     **    ringBuf and outBuf are filled with zero
314     **    IR_Data buffers are filled with zero
315     */
316 
317     p->initDone = 1;
318     return OK;
319 }
320 
liveconv_perf(CSOUND * csound,liveconv_t * p)321 static int32_t liveconv_perf(CSOUND *csound, liveconv_t *p)
322 {
323     MYFLT       *x, *rBuf;
324     FUNC        *ftp;       // function table
325     int32_t         i, k, n, nSamples, rBufPos, updateIR, clearBuf, nPart, cnt;
326 
327     load_t      *load_ptr;
328     // uint32_t                numLoad = p->nPartitions + 1;
329 
330     uint32_t      offset = p->h.insdshead->ksmps_offset;
331     uint32_t      early  = p->h.insdshead->ksmps_no_end;
332     uint32_t      nn, nsmps = CS_KSMPS;
333 
334     /* Only continue if initialized */
335     if (UNLIKELY(p->initDone <= 0)) goto err1;
336 
337     ftp = csound->FTnp2Finde(csound, p->iFTNum);
338     nSamples = p->partSize;   /* Length of partition */
339                               /* Pointer to a partition of the ring buffer */
340     rBuf = &(p->ringBuf[p->rbCnt * (nSamples << 1)]);
341 
342     if (UNLIKELY(offset))
343       memset(p->aOut, '\0', offset*sizeof(MYFLT));
344     if (UNLIKELY(early)) {
345       nsmps -= early;
346       memset(&p->aOut[nsmps], '\0', early*sizeof(MYFLT));
347     }
348 
349     /* If clear flag is set: empty buffers and reset indexes */
350     clearBuf = MYFLT2LRND(*(p->kClear));
351     if (clearBuf) {
352 
353       /* clear ring buffer to zero */
354       n = (nSamples << 1) * p->nPartitions;
355       memset(p->ringBuf, 0, n*sizeof(MYFLT));
356 
357       /* initialize buffer index */
358       p->cnt = 0;
359       p->rbCnt = 0;
360 
361       /* clear output buffers to zero */
362       memset(p->outBuf, 0, (nSamples << 1)*sizeof(MYFLT));
363     }
364 
365     /*
366     ** How to handle the kUpdate input:
367     ** -1: Gradually clear the IR buffer
368     **      0: Do nothing
369     **  1: Gradually load the IR buffer
370     */
371 
372     if (p->loader.available) {
373 
374       // The buffer before the head position is the temporary buffer
375       load_ptr = previous_load(&p->loader, p->loader.head);
376       updateIR = MYFLT2LRND(*(p->kUpdate));
377       if (updateIR == 1) {
378         load_ptr->status = LOADING;
379         load_ptr->pos = 0;
380       }
381       else if (updateIR == -1) {
382         load_ptr->status = UNLOADING;
383         load_ptr->pos = 0;
384       }
385       if (load_ptr->status != NO_LOAD) {
386         p->loader.available = 0;
387 
388         /* Special case: At a partition border: Make the temporary buffer
389            head position */
390         if (p->cnt == 0)
391           p->loader.head = load_ptr;
392       }
393     }
394 
395     /* For each sample in the audio input buffer (length = ksmps) */
396     for (nn = offset; nn < nsmps; nn++) {
397 
398       /* store input signal in buffer */
399       rBuf[p->cnt] = p->aIn[nn];
400 
401       /* copy output signals from buffer (contains data from previous
402          convolution pass) */
403       p->aOut[nn] = p->outBuf[p->cnt];
404 
405       /* is input buffer full ? */
406       if (++p->cnt < nSamples)
407         continue;                   /* no, continue with next sample */
408 
409       /* Check if there are any IR partitions to load/unload */
410       load_ptr = p->loader.head;
411       while (load_ptr->status != NO_LOAD) {
412 
413         cnt = load_ptr->pos;
414         if (load_ptr->status == LOADING) {
415 
416           nPart = cnt / nSamples + 1;
417           /* IR write position, starting with the last! */
418           n = (nSamples << 1) * (p->nPartitions - nPart);
419 
420           /* Iterate over IR partitions in reverse order */
421           for (k = 0; k < nSamples; k++) {
422             /* Fill IR_Data with scaled IR data, or zero if outside the IR buffer */
423             p->IR_Data[n + k] =
424               (cnt < (int32_t)ftp->flen) ? ftp->ftable[cnt] : FL(0.0);
425             cnt++;
426           }
427 
428           /* pad second half of IR to zero */
429           for (k = nSamples; k < (nSamples << 1); k++)
430             p->IR_Data[n + k] = FL(0.0);
431 
432           /* calculate FFT (replace in the same buffer) */
433           csound->RealFFT2(csound, p->fwdsetup, &(p->IR_Data[n]));
434 
435         }
436         else if (load_ptr->status == UNLOADING) {
437 
438           nPart = cnt / nSamples + 1;
439           /* IR write position, starting with the last! */
440           n = (nSamples << 1) * (p->nPartitions - nPart);
441           memset(p->IR_Data + n, 0, (nSamples << 1)*sizeof(MYFLT));
442         }
443 
444         // Update load buffer and move to the next buffer
445         load_ptr->pos += nSamples;
446         if (load_ptr->pos >= p->nPartitions * nSamples) {
447           load_ptr->status = NO_LOAD;
448         }
449 
450         load_ptr = next_load(&p->loader, load_ptr);
451       }
452       p->loader.available = 1;
453 
454       // Check if there is a temporary buffer ready to get loaded with the
455       // next partition
456       load_ptr = previous_load(&p->loader, p->loader.head);
457       if (load_ptr->status != NO_LOAD)
458         p->loader.head = load_ptr;
459 
460       /* Now the partition is filled with input --> start calculate the
461          convolution */
462       p->cnt = 0; /* reset buffer position */
463 
464       /* pad input in ring buffer with zeros to double length */
465       for (i = nSamples; i < (nSamples << 1); i++)
466         rBuf[i] = FL(0.0);
467 
468       /* calculate FFT of input */
469       csound->RealFFT2(csound, p->fwdsetup, rBuf);
470 
471       /* update ring buffer position */
472       p->rbCnt++;
473       if (p->rbCnt >= p->nPartitions)
474         p->rbCnt = 0;
475       rBufPos = p->rbCnt * (nSamples << 1);
476 
477       /* Move to next partition in ring buffer (used in next iteration to
478          store the next input sample) */
479       rBuf = &(p->ringBuf[rBufPos]);
480 
481       /* multiply complex arrays --> multiplication in the frequency domain */
482       multiply_fft_buffers(p->tmpBuf, p->ringBuf, p->IR_Data,
483                            nSamples, p->nPartitions, rBufPos);
484 
485       /* inverse FFT */
486       csound->RealFFT2(csound, p->invsetup, p->tmpBuf);
487 
488       /*
489       ** Copy IFFT result to output buffer
490       ** The second half is left as "tail" for next iteration
491       ** The first half is overlapped with "tail" of previous block
492       */
493       x = &(p->outBuf[0]);
494       for (i = 0; i < nSamples; i++) {
495         x[i] = p->tmpBuf[i] + x[i + nSamples];
496         x[i + nSamples] = p->tmpBuf[i + nSamples];
497       }
498 
499     }
500     return OK;
501 
502  err1:
503     return csound->PerfError(csound, &(p->h),
504                              "%s", Str("liveconv: not initialised"));
505 }
506 
507 /* module interface functions */
508 
509 static OENTRY localops[] = {
510   {
511     "liveconv",             // name of opcode
512     sizeof(liveconv_t),     // data size of state block
513     TR, 3,                  // thread
514     "a",                    // output arguments
515     "aiikk",                // input arguments
516     (SUBR) liveconv_init,   // init function
517     (SUBR) liveconv_perf    // a-rate function
518   }
519 };
520 
521 LINKAGE
522