1 /* lappedTransform.cpp - source file for class providing time-frequency transformation
2  * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices
3  *
4  * The copyright in this software is being made available under the exhale Copyright License
5  * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
6  * party rights, including patent rights. No such rights are granted under this License.
7  *
8  * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
9  */
10 
11 #include "exhaleLibPch.h"
12 #include "lappedTransform.h"
13 #ifdef _MSC_VER
14 # include <intrin.h> // _BitScanReverse
15 
16 # pragma intrinsic (_BitScanReverse)
17 #endif
18 
19 // static helper functions
createPermutTable(const short tableSize)20 static short* createPermutTable (const short tableSize)
21 {
22   const short lOver2 = tableSize >> 1;
23   short* permutTable = nullptr;
24   short  i = 0;
25 
26   if ((permutTable = (short*) malloc (tableSize * sizeof (short))) == nullptr)
27   {
28     return nullptr; // allocation error
29   }
30 
31   permutTable[0] = 0;
32   for (short s = 1; s < tableSize; s++)
33   {
34     short l = lOver2;
35 
36     while (i >= l)
37     {
38       i -= l;
39       l >>= 1;
40     }
41     permutTable[s] = (i += l);
42   }
43 
44   return permutTable;
45 }
46 
shortIntLog2(uint16_t s)47 static inline int shortIntLog2 (uint16_t s)
48 {
49 #ifdef _MSC_VER
50   unsigned long l;
51 
52   _BitScanReverse (&l, s);
53 
54   return (int) l;
55 #else
56   // fast base-2 integer logarithm by Todd Lehman (Stack Overflow), July 2014
57   int i = 0;
58 # define S(k) if (s >= (1u << k)) { i += k; s >>= k; }
59   S (8); S (4); S (2); S (1);
60 # undef S
61   return i;
62 #endif
63 }
64 
65 // private helper functions
applyHalfSizeFFT(int32_t * const iR,int32_t * const iI,const bool shortTransform)66 void LappedTransform::applyHalfSizeFFT (int32_t* const iR/*eal*/, int32_t* const iI/*mag*/, const bool shortTransform) // works in-place
67 {
68   // int32 FFT version based on http://paulbourke.net/miscellaneous/dft, 1993
69   const int    l = (shortTransform ? m_transfLengthS : m_transfLengthL) >> 1;
70   const short* p = (shortTransform ? m_fftPermutS : m_fftPermutL); // look-up
71   int l2 = 1, l3 = m_transfLengthL >> 1;
72 
73   if (iR == nullptr)
74   {
75     return; // null-pointer input error
76   }
77 
78   // sort input with permutation look-up table
79   if (iI != nullptr)
80   {
81     for (int i = l - 1; i >= 0; i--)
82     {
83       const int j = p[i];
84 
85       if (j > i) // swap input data at i and j
86       {
87         const int32_t iRTmp = iR[i]; // use re-
88         const int32_t iITmp = iI[i]; // gisters
89 
90         iR[i] = iR[j];
91         iI[i] = iI[j];
92         iR[j] = iRTmp;
93         iI[j] = iITmp;
94       }
95     }
96   }
97   else // real-valued input, no imaginary part
98   {
99     for (int i = l - 1; i >= 0; i--)
100     {
101       const int j = p[i];
102 
103       if (j > i) // swap real input at i and j
104       {
105         const int32_t iRTmp = iR[i];
106 
107         iR[i] = iR[j];
108         iR[j] = iRTmp;
109       }
110       iI[i] = 0;  // zero imaginary input data
111     }
112   }
113 
114   // get length-l Fast Fourier Transform (FFT)
115   for (int k = shortIntLog2 ((uint16_t) l) - 1; k >= 0; k--)
116   {
117     const int l1 = l2;
118     l2 <<= 1;
119     l3 >>= 1;
120 
121     for (int j = l1 - 1; j >= 0; j--)
122     {
123       const int       jTl3 = j * l3;
124       const int64_t cosjl3 = m_fftHalfCos[jTl3]; // cos/sin
125       const int64_t sinjl3 = m_fftHalfSin[jTl3]; // look-up
126 
127       for (int i = j; i < l; i += l2)
128       {
129         const int     iPl1 = i + l1;
130         const int32_t rotR = int32_t ((cosjl3 * iR[iPl1] + sinjl3 * iI[iPl1] + LUT_OFFSET) >> LUT_SHIFT); // clockwise
131         const int32_t rotI = int32_t ((cosjl3 * iI[iPl1] - sinjl3 * iR[iPl1] + LUT_OFFSET) >> LUT_SHIFT); // rotation
132 
133         iR[iPl1] = iR[i] + rotR;  iR[i] -= rotR;
134         iI[iPl1] = iI[i] + rotI;  iI[i] -= rotI;
135       }
136     }
137   }
138 }
139 
windowAndFoldInL(const int32_t * inputL,const bool shortTransform,const bool kbdWindowL,const bool lowOverlapL,const bool mdstKernel,int32_t * const output)140 void LappedTransform::windowAndFoldInL (const int32_t* inputL, const bool shortTransform, const bool kbdWindowL, const bool lowOverlapL,
141                                         const bool mdstKernel, int32_t* const output)
142 {
143   const unsigned ws = (kbdWindowL ? 1 : 0); // shape
144   const int32_t* wl = (lowOverlapL ? m_timeWindowS[ws] : m_timeWindowL[ws]);
145   const int Mo2     = (shortTransform ? m_transfLengthS : m_transfLengthL) >> 1;
146   const int Mm1     = Mo2 * 2 - 1;
147   const int Mo2m1   = Mo2 - 1;
148   const int Mo2mO   = (lowOverlapL ? Mo2 - (m_transfLengthS >> 1) : 0);
149   const int Mm1mO   = Mm1 - Mo2mO; // overlap offset
150   int n;
151 
152   if (mdstKernel) // time-reversal and TDA sign flip
153   {
154     for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
155     {
156       const int64_t i64 = (int64_t) inputL[Mm1 - n] * wl[Mm1mO - n] + (int64_t) inputL[n] * wl[n - Mo2mO];
157 
158       output[Mo2m1 - n] = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
159     }
160     for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
161     {
162       output[Mo2m1 - n] = (inputL[Mm1 - n] + 2) >> 2;
163     }
164   }
165   else // MDCT kernel, no time-reversal or sign flip
166   {
167     for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
168     {
169       const int64_t i64 = (int64_t) inputL[Mm1 - n] * wl[Mm1mO - n] - (int64_t) inputL[n] * wl[n - Mo2mO];
170 
171       output[Mo2 + n]   = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
172     }
173     for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
174     {
175       output[Mo2 + n]   = (inputL[Mm1 - n] + 2) >> 2;
176     }
177   }
178 }
179 
windowAndFoldInR(const int32_t * inputR,const bool shortTransform,const bool kbdWindowR,const bool lowOverlapR,const bool mdstKernel,int32_t * const output)180 void LappedTransform::windowAndFoldInR (const int32_t* inputR, const bool shortTransform, const bool kbdWindowR, const bool lowOverlapR,
181                                         const bool mdstKernel, int32_t* const output)
182 {
183   const unsigned ws = (kbdWindowR ? 1 : 0); // shape
184   const int32_t* wr = (lowOverlapR ? m_timeWindowS[ws] : m_timeWindowL[ws]);
185   const int Mo2     = (shortTransform ? m_transfLengthS : m_transfLengthL) >> 1;
186   const int Mm1     = Mo2 * 2 - 1;
187   const int Mo2m1   = Mo2 - 1;
188   const int Mo2mO   = (lowOverlapR ? Mo2 - (m_transfLengthS >> 1) : 0);
189   const int Mm1mO   = Mm1 - Mo2mO; // overlap offset
190   int n;
191 
192   if (mdstKernel) // time-reversal and TDA sign flip
193   {
194     for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
195     {
196       const int64_t i64 = (int64_t) inputR[n] * wr[Mm1mO - n] - (int64_t) inputR[Mm1 - n] * wr[n - Mo2mO];
197 
198       output[Mo2 + n]   = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
199     }
200     for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
201     {
202       output[Mo2 + n]   = (inputR[n] + 2) >> 2;
203     }
204   }
205   else // MDCT kernel, no time-reversal or sign flip
206   {
207     for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
208     {
209       const int64_t i64 = (int64_t) inputR[n] * wr[Mm1mO - n] + (int64_t) inputR[Mm1 - n] * wr[n - Mo2mO];
210 
211       output[Mo2m1 - n] = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
212     }
213     for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
214     {
215       output[Mo2m1 - n] = (inputR[n] + 2) >> 2;
216     }
217   }
218 }
219 
220 // constructor
LappedTransform()221 LappedTransform::LappedTransform ()
222 {
223   // initialize all helper buffers
224   m_dctRotCosL = nullptr;
225   m_dctRotCosS = nullptr;
226   m_dctRotSinL = nullptr;
227   m_dctRotSinS = nullptr;
228   m_fftHalfCos = nullptr;
229   m_fftHalfSin = nullptr;
230   m_fftPermutL = nullptr;
231   m_fftPermutS = nullptr;
232   m_tempIntBuf = nullptr;
233 
234   // initialize all window buffers
235   for (short s = 0; s < 2; s++)
236   {
237     m_timeWindowL[s] = nullptr;
238     m_timeWindowS[s] = nullptr;
239   }
240   m_transfLengthL = 0;
241   m_transfLengthS = 0;
242 }
243 
244 // destructor
~LappedTransform()245 LappedTransform::~LappedTransform ()
246 {
247   // free allocated helper buffers
248   MFREE (m_dctRotCosL);
249   MFREE (m_dctRotCosS);
250   MFREE (m_dctRotSinL);
251   MFREE (m_dctRotSinS);
252   MFREE (m_fftHalfCos);
253   MFREE (m_fftHalfSin);
254   MFREE (m_fftPermutL);
255   MFREE (m_fftPermutS);
256   m_tempIntBuf = nullptr;
257 }
258 
259 // public functions
applyNegDCT4(int32_t * const signal,const bool shortTransform)260 unsigned LappedTransform::applyNegDCT4 (int32_t* const signal, const bool shortTransform) // works in-place
261 {
262   // int32 negative-DCT-IV version based on http://www.ee.columbia.edu/~marios/mdct/mdct_giraffe.html, 2003
263   // NOTE: amplifies short-transform results (8 times shorter than long-transform results) by a factor of 8
264   const int lm1   = (shortTransform ? m_transfLengthS : m_transfLengthL) - 1;
265   const int lm1o2 = lm1 >> 1;
266   const int32_t* rotatCos = (shortTransform ? m_dctRotCosS : m_dctRotCosL);
267   const int32_t* rotatSin = (shortTransform ? m_dctRotSinS : m_dctRotSinL);
268   const int64_t rotOffset = (shortTransform ? LUT_OFFSET>>3 : LUT_OFFSET);
269   const int64_t  rotShift = (shortTransform ? LUT_SHIFT - 3 : LUT_SHIFT);
270   int32_t* const tempReal = m_tempIntBuf;
271   int32_t* const tempImag = &m_tempIntBuf[lm1o2 + 1];
272   int i, i2;
273 
274   if (signal == nullptr)
275   {
276     return 1; // null-pointer input error
277   }
278 
279   // resort, separate, pre-twiddle signal
280   for (i = lm1o2, i2 = lm1 - 1; i >= 0; i--, i2 -= 2)
281   {
282     const int64_t c = rotatCos[i];
283     const int64_t s = rotatSin[i];
284     const int64_t e = signal[i2/*even*/];
285     const int64_t o = signal[lm1 - i2];
286 
287     tempReal[i] = int32_t ((rotOffset + e * c - o * s) >> rotShift);
288     tempImag[i] = int32_t ((rotOffset + o * c + e * s) >> rotShift);
289   }
290 
291   applyHalfSizeFFT (tempReal, tempImag, shortTransform);
292 
293   // post-twiddle, combine, resort output
294   for (i = lm1o2, i2 = lm1 - 1; i >= 0; i--, i2 -= 2)
295   {
296     const int64_t c = rotatCos[i];
297     const int64_t s = rotatSin[i];
298     const int64_t e = tempReal[i];
299     const int64_t o = tempImag[i];
300 
301     signal[i2/*even*/] = int32_t ((LUT_OFFSET + o * s - e * c) >> LUT_SHIFT);
302     signal[lm1 - i2]   = int32_t ((LUT_OFFSET + e * s + o * c) >> LUT_SHIFT);
303   }
304 
305   return 0; // no error
306 }
307 
applyMCLT(const int32_t * timeSig,const bool eightTransforms,bool kbdWindowL,const bool kbdWindowR,const bool lowOverlapL,const bool lowOverlapR,int32_t * const outMdct,int32_t * const outMdst)308 unsigned LappedTransform::applyMCLT (const int32_t* timeSig, const bool eightTransforms, bool kbdWindowL, const bool kbdWindowR,
309                                      const bool lowOverlapL, const bool lowOverlapR, int32_t* const outMdct, int32_t* const outMdst)
310 {
311   if ((timeSig == nullptr) || (outMdct == nullptr) || (outMdst == nullptr))
312   {
313     return 1; // invalid arguments error
314   }
315 
316   if (eightTransforms)  // short windows
317   {
318     const int32_t* tSigS = &timeSig[(m_transfLengthL - m_transfLengthS) >> 1];
319     int32_t*    outMdctS = outMdct;
320     int32_t*    outMdstS = outMdst;
321 
322     for (unsigned w = 0; w < 8; w++)
323     {
324       windowAndFoldInL (tSigS /*window half 1*/, true, kbdWindowL, lowOverlapL, false, outMdctS);
325       windowAndFoldInR (&tSigS[m_transfLengthS], true, kbdWindowR, lowOverlapR, false, outMdctS);
326       windowAndFoldInL (tSigS /*window half 1*/, true, kbdWindowL, lowOverlapL, true, outMdstS);
327       windowAndFoldInR (&tSigS[m_transfLengthS], true, kbdWindowR, lowOverlapR, true, outMdstS);
328       // MDCT via DCT-IV
329       applyNegDCT4 (outMdctS, true);
330       // MDST via DCT-IV
331       applyNegDCT4 (outMdstS, true);
332 #if 1 // not needed here
333       for (int i = m_transfLengthS - 2; i >= 0; i -= 2)
334       {
335         outMdstS[i] *= -1; // DCT to DST
336       }
337 #endif
338       kbdWindowL = kbdWindowR; // only first window uses last frame's shape
339       tSigS     += m_transfLengthS;
340       outMdctS  += m_transfLengthS;
341       outMdstS  += m_transfLengthS;
342     }
343   }
344   else  // 1 long window
345   {
346     windowAndFoldInL (timeSig /*window half 1*/, false, kbdWindowL, lowOverlapL, false, outMdct);
347     windowAndFoldInR (&timeSig[m_transfLengthL], false, kbdWindowR, lowOverlapR, false, outMdct);
348     windowAndFoldInL (timeSig /*window half 1*/, false, kbdWindowL, lowOverlapL, true, outMdst);
349     windowAndFoldInR (&timeSig[m_transfLengthL], false, kbdWindowR, lowOverlapR, true, outMdst);
350     // MDCT using DCT-IV
351     applyNegDCT4 (outMdct, false);
352     // MDST using DCT-IV
353     applyNegDCT4 (outMdst, false);
354 #if 1 // not needed here
355     for (int i = m_transfLengthL - 2; i >= 0; i -= 2)
356     {
357       outMdst[i] *= -1; // DCT to DST
358     }
359 #endif
360   }
361 
362   return 0; // no error
363 }
364 
initConstants(int32_t * const tempIntBuf,int32_t * const timeWindowL[2],int32_t * const timeWindowS[2],const unsigned maxTransfLength)365 unsigned LappedTransform::initConstants (int32_t* const tempIntBuf, int32_t* const timeWindowL[2], int32_t* const timeWindowS[2],
366                                          const unsigned maxTransfLength)
367 {
368   const short  halfLength = short (maxTransfLength >> 1);
369   const short  sixtLength = short (maxTransfLength >> 4);
370   const double dNormL     = 3.141592653589793 / (2.0 * halfLength);
371   const double dNormS     = 3.141592653589793 / (2.0 * sixtLength);
372   const double dNormL4    = dNormL * 4.0;
373   short s;
374 
375   if ((tempIntBuf == nullptr) || (timeWindowL == nullptr) || (timeWindowS == nullptr) ||
376       (maxTransfLength < 128) || (maxTransfLength > 8192) || (maxTransfLength & (maxTransfLength - 1)))
377   {
378     return 1; // invalid arguments error
379   }
380 
381   m_transfLengthL = 2 * halfLength;
382   m_transfLengthS = 2 * sixtLength;
383 
384   if ((m_dctRotCosL = (int32_t*) malloc (halfLength * sizeof (int32_t))) == nullptr ||
385       (m_dctRotCosS = (int32_t*) malloc (sixtLength * sizeof (int32_t))) == nullptr ||
386       (m_dctRotSinL = (int32_t*) malloc (halfLength * sizeof (int32_t))) == nullptr ||
387       (m_dctRotSinS = (int32_t*) malloc (sixtLength * sizeof (int32_t))) == nullptr ||
388       (m_fftHalfCos = (int32_t*) malloc ((halfLength >> 1) * sizeof (int32_t))) == nullptr ||
389       (m_fftHalfSin = (int32_t*) malloc ((halfLength >> 1) * sizeof (int32_t))) == nullptr ||
390       (m_fftPermutL = createPermutTable (halfLength)) == nullptr ||
391       (m_fftPermutS = createPermutTable (sixtLength)) == nullptr)
392   {
393     return 2; // memory allocation error
394   }
395 
396   // obtain cosine and sine coefficients
397   for (s = 0; s < halfLength; s++)
398   {
399     m_dctRotCosL[s] = int32_t (cos (dNormL * (s + 0.125)) * (INT_MAX + 1.0) + 0.5);
400     m_dctRotSinL[s] = int32_t (sin (dNormL * (s + 0.125)) * INT_MIN - 0.5);
401   }
402   for (s = 0; s < sixtLength; s++)
403   {
404     m_dctRotCosS[s] = int32_t (cos (dNormS * (s + 0.125)) * (INT_MAX + 1.0) + 0.5);
405     m_dctRotSinS[s] = int32_t (sin (dNormS * (s + 0.125)) * INT_MIN - 0.5);
406   }
407 
408   for (s = 0; s < m_transfLengthS; s++)
409   {
410     m_fftHalfSin[s] = int32_t (sin (dNormL4 * s) * INT_MIN - 0.5);
411     m_fftHalfCos[m_transfLengthS + s] = -m_fftHalfSin[s];
412   }
413   // complete missing entries by copying
414   m_fftHalfSin[s] = INT_MIN;
415   m_fftHalfCos[0] = INT_MIN;
416   for (s = 1; s < m_transfLengthS; s++)
417   {
418     m_fftHalfSin[m_transfLengthS + s] = m_fftHalfSin[m_transfLengthS - s];
419     m_fftHalfCos[m_transfLengthS - s] = m_fftHalfSin[s];
420   }
421 
422   // adopt helper/window buffer pointers
423   m_tempIntBuf = tempIntBuf;
424   for (s = 0; s < 2; s++)
425   {
426     m_timeWindowL[s] = timeWindowL[s];
427     m_timeWindowS[s] = timeWindowS[s];
428   }
429 
430   return 0; // no error
431 }
432