1 /* lappedTransform.cpp - source file for class providing time-frequency transformation
2 * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices
3 *
4 * The copyright in this software is being made available under the exhale Copyright License
5 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
6 * party rights, including patent rights. No such rights are granted under this License.
7 *
8 * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
9 */
10
11 #include "exhaleLibPch.h"
12 #include "lappedTransform.h"
13 #ifdef _MSC_VER
14 # include <intrin.h> // _BitScanReverse
15
16 # pragma intrinsic (_BitScanReverse)
17 #endif
18
19 // static helper functions
createPermutTable(const short tableSize)20 static short* createPermutTable (const short tableSize)
21 {
22 const short lOver2 = tableSize >> 1;
23 short* permutTable = nullptr;
24 short i = 0;
25
26 if ((permutTable = (short*) malloc (tableSize * sizeof (short))) == nullptr)
27 {
28 return nullptr; // allocation error
29 }
30
31 permutTable[0] = 0;
32 for (short s = 1; s < tableSize; s++)
33 {
34 short l = lOver2;
35
36 while (i >= l)
37 {
38 i -= l;
39 l >>= 1;
40 }
41 permutTable[s] = (i += l);
42 }
43
44 return permutTable;
45 }
46
shortIntLog2(uint16_t s)47 static inline int shortIntLog2 (uint16_t s)
48 {
49 #ifdef _MSC_VER
50 unsigned long l;
51
52 _BitScanReverse (&l, s);
53
54 return (int) l;
55 #else
56 // fast base-2 integer logarithm by Todd Lehman (Stack Overflow), July 2014
57 int i = 0;
58 # define S(k) if (s >= (1u << k)) { i += k; s >>= k; }
59 S (8); S (4); S (2); S (1);
60 # undef S
61 return i;
62 #endif
63 }
64
65 // private helper functions
applyHalfSizeFFT(int32_t * const iR,int32_t * const iI,const bool shortTransform)66 void LappedTransform::applyHalfSizeFFT (int32_t* const iR/*eal*/, int32_t* const iI/*mag*/, const bool shortTransform) // works in-place
67 {
68 // int32 FFT version based on http://paulbourke.net/miscellaneous/dft, 1993
69 const int l = (shortTransform ? m_transfLengthS : m_transfLengthL) >> 1;
70 const short* p = (shortTransform ? m_fftPermutS : m_fftPermutL); // look-up
71 int l2 = 1, l3 = m_transfLengthL >> 1;
72
73 if (iR == nullptr)
74 {
75 return; // null-pointer input error
76 }
77
78 // sort input with permutation look-up table
79 if (iI != nullptr)
80 {
81 for (int i = l - 1; i >= 0; i--)
82 {
83 const int j = p[i];
84
85 if (j > i) // swap input data at i and j
86 {
87 const int32_t iRTmp = iR[i]; // use re-
88 const int32_t iITmp = iI[i]; // gisters
89
90 iR[i] = iR[j];
91 iI[i] = iI[j];
92 iR[j] = iRTmp;
93 iI[j] = iITmp;
94 }
95 }
96 }
97 else // real-valued input, no imaginary part
98 {
99 for (int i = l - 1; i >= 0; i--)
100 {
101 const int j = p[i];
102
103 if (j > i) // swap real input at i and j
104 {
105 const int32_t iRTmp = iR[i];
106
107 iR[i] = iR[j];
108 iR[j] = iRTmp;
109 }
110 iI[i] = 0; // zero imaginary input data
111 }
112 }
113
114 // get length-l Fast Fourier Transform (FFT)
115 for (int k = shortIntLog2 ((uint16_t) l) - 1; k >= 0; k--)
116 {
117 const int l1 = l2;
118 l2 <<= 1;
119 l3 >>= 1;
120
121 for (int j = l1 - 1; j >= 0; j--)
122 {
123 const int jTl3 = j * l3;
124 const int64_t cosjl3 = m_fftHalfCos[jTl3]; // cos/sin
125 const int64_t sinjl3 = m_fftHalfSin[jTl3]; // look-up
126
127 for (int i = j; i < l; i += l2)
128 {
129 const int iPl1 = i + l1;
130 const int32_t rotR = int32_t ((cosjl3 * iR[iPl1] + sinjl3 * iI[iPl1] + LUT_OFFSET) >> LUT_SHIFT); // clockwise
131 const int32_t rotI = int32_t ((cosjl3 * iI[iPl1] - sinjl3 * iR[iPl1] + LUT_OFFSET) >> LUT_SHIFT); // rotation
132
133 iR[iPl1] = iR[i] + rotR; iR[i] -= rotR;
134 iI[iPl1] = iI[i] + rotI; iI[i] -= rotI;
135 }
136 }
137 }
138 }
139
windowAndFoldInL(const int32_t * inputL,const bool shortTransform,const bool kbdWindowL,const bool lowOverlapL,const bool mdstKernel,int32_t * const output)140 void LappedTransform::windowAndFoldInL (const int32_t* inputL, const bool shortTransform, const bool kbdWindowL, const bool lowOverlapL,
141 const bool mdstKernel, int32_t* const output)
142 {
143 const unsigned ws = (kbdWindowL ? 1 : 0); // shape
144 const int32_t* wl = (lowOverlapL ? m_timeWindowS[ws] : m_timeWindowL[ws]);
145 const int Mo2 = (shortTransform ? m_transfLengthS : m_transfLengthL) >> 1;
146 const int Mm1 = Mo2 * 2 - 1;
147 const int Mo2m1 = Mo2 - 1;
148 const int Mo2mO = (lowOverlapL ? Mo2 - (m_transfLengthS >> 1) : 0);
149 const int Mm1mO = Mm1 - Mo2mO; // overlap offset
150 int n;
151
152 if (mdstKernel) // time-reversal and TDA sign flip
153 {
154 for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
155 {
156 const int64_t i64 = (int64_t) inputL[Mm1 - n] * wl[Mm1mO - n] + (int64_t) inputL[n] * wl[n - Mo2mO];
157
158 output[Mo2m1 - n] = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
159 }
160 for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
161 {
162 output[Mo2m1 - n] = (inputL[Mm1 - n] + 2) >> 2;
163 }
164 }
165 else // MDCT kernel, no time-reversal or sign flip
166 {
167 for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
168 {
169 const int64_t i64 = (int64_t) inputL[Mm1 - n] * wl[Mm1mO - n] - (int64_t) inputL[n] * wl[n - Mo2mO];
170
171 output[Mo2 + n] = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
172 }
173 for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
174 {
175 output[Mo2 + n] = (inputL[Mm1 - n] + 2) >> 2;
176 }
177 }
178 }
179
windowAndFoldInR(const int32_t * inputR,const bool shortTransform,const bool kbdWindowR,const bool lowOverlapR,const bool mdstKernel,int32_t * const output)180 void LappedTransform::windowAndFoldInR (const int32_t* inputR, const bool shortTransform, const bool kbdWindowR, const bool lowOverlapR,
181 const bool mdstKernel, int32_t* const output)
182 {
183 const unsigned ws = (kbdWindowR ? 1 : 0); // shape
184 const int32_t* wr = (lowOverlapR ? m_timeWindowS[ws] : m_timeWindowL[ws]);
185 const int Mo2 = (shortTransform ? m_transfLengthS : m_transfLengthL) >> 1;
186 const int Mm1 = Mo2 * 2 - 1;
187 const int Mo2m1 = Mo2 - 1;
188 const int Mo2mO = (lowOverlapR ? Mo2 - (m_transfLengthS >> 1) : 0);
189 const int Mm1mO = Mm1 - Mo2mO; // overlap offset
190 int n;
191
192 if (mdstKernel) // time-reversal and TDA sign flip
193 {
194 for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
195 {
196 const int64_t i64 = (int64_t) inputR[n] * wr[Mm1mO - n] - (int64_t) inputR[Mm1 - n] * wr[n - Mo2mO];
197
198 output[Mo2 + n] = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
199 }
200 for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
201 {
202 output[Mo2 + n] = (inputR[n] + 2) >> 2;
203 }
204 }
205 else // MDCT kernel, no time-reversal or sign flip
206 {
207 for (n = Mo2m1; n >= Mo2mO; n--) // windowed pt.
208 {
209 const int64_t i64 = (int64_t) inputR[n] * wr[Mm1mO - n] + (int64_t) inputR[Mm1 - n] * wr[n - Mo2mO];
210
211 output[Mo2m1 - n] = int32_t ((i64 + WIN_OFFSET) >> WIN_SHIFT);
212 }
213 for (/*Mo2mO-1*/; n >= 0; n--) // unwindowed pt.
214 {
215 output[Mo2m1 - n] = (inputR[n] + 2) >> 2;
216 }
217 }
218 }
219
220 // constructor
LappedTransform()221 LappedTransform::LappedTransform ()
222 {
223 // initialize all helper buffers
224 m_dctRotCosL = nullptr;
225 m_dctRotCosS = nullptr;
226 m_dctRotSinL = nullptr;
227 m_dctRotSinS = nullptr;
228 m_fftHalfCos = nullptr;
229 m_fftHalfSin = nullptr;
230 m_fftPermutL = nullptr;
231 m_fftPermutS = nullptr;
232 m_tempIntBuf = nullptr;
233
234 // initialize all window buffers
235 for (short s = 0; s < 2; s++)
236 {
237 m_timeWindowL[s] = nullptr;
238 m_timeWindowS[s] = nullptr;
239 }
240 m_transfLengthL = 0;
241 m_transfLengthS = 0;
242 }
243
244 // destructor
~LappedTransform()245 LappedTransform::~LappedTransform ()
246 {
247 // free allocated helper buffers
248 MFREE (m_dctRotCosL);
249 MFREE (m_dctRotCosS);
250 MFREE (m_dctRotSinL);
251 MFREE (m_dctRotSinS);
252 MFREE (m_fftHalfCos);
253 MFREE (m_fftHalfSin);
254 MFREE (m_fftPermutL);
255 MFREE (m_fftPermutS);
256 m_tempIntBuf = nullptr;
257 }
258
259 // public functions
applyNegDCT4(int32_t * const signal,const bool shortTransform)260 unsigned LappedTransform::applyNegDCT4 (int32_t* const signal, const bool shortTransform) // works in-place
261 {
262 // int32 negative-DCT-IV version based on http://www.ee.columbia.edu/~marios/mdct/mdct_giraffe.html, 2003
263 // NOTE: amplifies short-transform results (8 times shorter than long-transform results) by a factor of 8
264 const int lm1 = (shortTransform ? m_transfLengthS : m_transfLengthL) - 1;
265 const int lm1o2 = lm1 >> 1;
266 const int32_t* rotatCos = (shortTransform ? m_dctRotCosS : m_dctRotCosL);
267 const int32_t* rotatSin = (shortTransform ? m_dctRotSinS : m_dctRotSinL);
268 const int64_t rotOffset = (shortTransform ? LUT_OFFSET>>3 : LUT_OFFSET);
269 const int64_t rotShift = (shortTransform ? LUT_SHIFT - 3 : LUT_SHIFT);
270 int32_t* const tempReal = m_tempIntBuf;
271 int32_t* const tempImag = &m_tempIntBuf[lm1o2 + 1];
272 int i, i2;
273
274 if (signal == nullptr)
275 {
276 return 1; // null-pointer input error
277 }
278
279 // resort, separate, pre-twiddle signal
280 for (i = lm1o2, i2 = lm1 - 1; i >= 0; i--, i2 -= 2)
281 {
282 const int64_t c = rotatCos[i];
283 const int64_t s = rotatSin[i];
284 const int64_t e = signal[i2/*even*/];
285 const int64_t o = signal[lm1 - i2];
286
287 tempReal[i] = int32_t ((rotOffset + e * c - o * s) >> rotShift);
288 tempImag[i] = int32_t ((rotOffset + o * c + e * s) >> rotShift);
289 }
290
291 applyHalfSizeFFT (tempReal, tempImag, shortTransform);
292
293 // post-twiddle, combine, resort output
294 for (i = lm1o2, i2 = lm1 - 1; i >= 0; i--, i2 -= 2)
295 {
296 const int64_t c = rotatCos[i];
297 const int64_t s = rotatSin[i];
298 const int64_t e = tempReal[i];
299 const int64_t o = tempImag[i];
300
301 signal[i2/*even*/] = int32_t ((LUT_OFFSET + o * s - e * c) >> LUT_SHIFT);
302 signal[lm1 - i2] = int32_t ((LUT_OFFSET + e * s + o * c) >> LUT_SHIFT);
303 }
304
305 return 0; // no error
306 }
307
applyMCLT(const int32_t * timeSig,const bool eightTransforms,bool kbdWindowL,const bool kbdWindowR,const bool lowOverlapL,const bool lowOverlapR,int32_t * const outMdct,int32_t * const outMdst)308 unsigned LappedTransform::applyMCLT (const int32_t* timeSig, const bool eightTransforms, bool kbdWindowL, const bool kbdWindowR,
309 const bool lowOverlapL, const bool lowOverlapR, int32_t* const outMdct, int32_t* const outMdst)
310 {
311 if ((timeSig == nullptr) || (outMdct == nullptr) || (outMdst == nullptr))
312 {
313 return 1; // invalid arguments error
314 }
315
316 if (eightTransforms) // short windows
317 {
318 const int32_t* tSigS = &timeSig[(m_transfLengthL - m_transfLengthS) >> 1];
319 int32_t* outMdctS = outMdct;
320 int32_t* outMdstS = outMdst;
321
322 for (unsigned w = 0; w < 8; w++)
323 {
324 windowAndFoldInL (tSigS /*window half 1*/, true, kbdWindowL, lowOverlapL, false, outMdctS);
325 windowAndFoldInR (&tSigS[m_transfLengthS], true, kbdWindowR, lowOverlapR, false, outMdctS);
326 windowAndFoldInL (tSigS /*window half 1*/, true, kbdWindowL, lowOverlapL, true, outMdstS);
327 windowAndFoldInR (&tSigS[m_transfLengthS], true, kbdWindowR, lowOverlapR, true, outMdstS);
328 // MDCT via DCT-IV
329 applyNegDCT4 (outMdctS, true);
330 // MDST via DCT-IV
331 applyNegDCT4 (outMdstS, true);
332 #if 1 // not needed here
333 for (int i = m_transfLengthS - 2; i >= 0; i -= 2)
334 {
335 outMdstS[i] *= -1; // DCT to DST
336 }
337 #endif
338 kbdWindowL = kbdWindowR; // only first window uses last frame's shape
339 tSigS += m_transfLengthS;
340 outMdctS += m_transfLengthS;
341 outMdstS += m_transfLengthS;
342 }
343 }
344 else // 1 long window
345 {
346 windowAndFoldInL (timeSig /*window half 1*/, false, kbdWindowL, lowOverlapL, false, outMdct);
347 windowAndFoldInR (&timeSig[m_transfLengthL], false, kbdWindowR, lowOverlapR, false, outMdct);
348 windowAndFoldInL (timeSig /*window half 1*/, false, kbdWindowL, lowOverlapL, true, outMdst);
349 windowAndFoldInR (&timeSig[m_transfLengthL], false, kbdWindowR, lowOverlapR, true, outMdst);
350 // MDCT using DCT-IV
351 applyNegDCT4 (outMdct, false);
352 // MDST using DCT-IV
353 applyNegDCT4 (outMdst, false);
354 #if 1 // not needed here
355 for (int i = m_transfLengthL - 2; i >= 0; i -= 2)
356 {
357 outMdst[i] *= -1; // DCT to DST
358 }
359 #endif
360 }
361
362 return 0; // no error
363 }
364
initConstants(int32_t * const tempIntBuf,int32_t * const timeWindowL[2],int32_t * const timeWindowS[2],const unsigned maxTransfLength)365 unsigned LappedTransform::initConstants (int32_t* const tempIntBuf, int32_t* const timeWindowL[2], int32_t* const timeWindowS[2],
366 const unsigned maxTransfLength)
367 {
368 const short halfLength = short (maxTransfLength >> 1);
369 const short sixtLength = short (maxTransfLength >> 4);
370 const double dNormL = 3.141592653589793 / (2.0 * halfLength);
371 const double dNormS = 3.141592653589793 / (2.0 * sixtLength);
372 const double dNormL4 = dNormL * 4.0;
373 short s;
374
375 if ((tempIntBuf == nullptr) || (timeWindowL == nullptr) || (timeWindowS == nullptr) ||
376 (maxTransfLength < 128) || (maxTransfLength > 8192) || (maxTransfLength & (maxTransfLength - 1)))
377 {
378 return 1; // invalid arguments error
379 }
380
381 m_transfLengthL = 2 * halfLength;
382 m_transfLengthS = 2 * sixtLength;
383
384 if ((m_dctRotCosL = (int32_t*) malloc (halfLength * sizeof (int32_t))) == nullptr ||
385 (m_dctRotCosS = (int32_t*) malloc (sixtLength * sizeof (int32_t))) == nullptr ||
386 (m_dctRotSinL = (int32_t*) malloc (halfLength * sizeof (int32_t))) == nullptr ||
387 (m_dctRotSinS = (int32_t*) malloc (sixtLength * sizeof (int32_t))) == nullptr ||
388 (m_fftHalfCos = (int32_t*) malloc ((halfLength >> 1) * sizeof (int32_t))) == nullptr ||
389 (m_fftHalfSin = (int32_t*) malloc ((halfLength >> 1) * sizeof (int32_t))) == nullptr ||
390 (m_fftPermutL = createPermutTable (halfLength)) == nullptr ||
391 (m_fftPermutS = createPermutTable (sixtLength)) == nullptr)
392 {
393 return 2; // memory allocation error
394 }
395
396 // obtain cosine and sine coefficients
397 for (s = 0; s < halfLength; s++)
398 {
399 m_dctRotCosL[s] = int32_t (cos (dNormL * (s + 0.125)) * (INT_MAX + 1.0) + 0.5);
400 m_dctRotSinL[s] = int32_t (sin (dNormL * (s + 0.125)) * INT_MIN - 0.5);
401 }
402 for (s = 0; s < sixtLength; s++)
403 {
404 m_dctRotCosS[s] = int32_t (cos (dNormS * (s + 0.125)) * (INT_MAX + 1.0) + 0.5);
405 m_dctRotSinS[s] = int32_t (sin (dNormS * (s + 0.125)) * INT_MIN - 0.5);
406 }
407
408 for (s = 0; s < m_transfLengthS; s++)
409 {
410 m_fftHalfSin[s] = int32_t (sin (dNormL4 * s) * INT_MIN - 0.5);
411 m_fftHalfCos[m_transfLengthS + s] = -m_fftHalfSin[s];
412 }
413 // complete missing entries by copying
414 m_fftHalfSin[s] = INT_MIN;
415 m_fftHalfCos[0] = INT_MIN;
416 for (s = 1; s < m_transfLengthS; s++)
417 {
418 m_fftHalfSin[m_transfLengthS + s] = m_fftHalfSin[m_transfLengthS - s];
419 m_fftHalfCos[m_transfLengthS - s] = m_fftHalfSin[s];
420 }
421
422 // adopt helper/window buffer pointers
423 m_tempIntBuf = tempIntBuf;
424 for (s = 0; s < 2; s++)
425 {
426 m_timeWindowL[s] = timeWindowL[s];
427 m_timeWindowS[s] = timeWindowS[s];
428 }
429
430 return 0; // no error
431 }
432