1 /* stereoProcessing.cpp - source file for class providing M/S stereo coding functionality
2  * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
3  *
4  * The copyright in this software is being made available under the exhale Copyright License
5  * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
6  * party rights, including patent rights. No such rights are granted under this License.
7  *
8  * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
9  */
10 
11 #include "exhaleLibPch.h"
12 #include "stereoProcessing.h"
13 #include "bitAllocation.h" // define BA_MORE_CBR (more constant bit-rate, experimental!)
14 
15 // static helper functions
complexAbsMS(const int32_t realPart,const int32_t imagPart)16 static inline uint64_t complexAbsMS (const int32_t realPart, const int32_t imagPart)
17 {
18 #if SA_EXACT_COMPLEX_ABS
19   const double complexSqr = (double) realPart * (double) realPart + (double) imagPart * (double) imagPart;
20 
21   return uint64_t (sqrt (complexSqr) + 0.5);
22 #else
23   const uint64_t absReal  = abs (realPart); // Richard Lyons, 1997; en.wikipedia.org/
24   const uint64_t absImag  = abs (imagPart); // wiki/Alpha_max_plus_beta_min_algorithm
25 
26   return (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
27 #endif
28 }
29 
getMidSample(const int64_t value1,const int64_t value2)30 static inline int32_t  getMidSample (const int64_t value1, const int64_t value2)
31 {
32   return int32_t ((value1 + value2 + 1) >> 1);
33 }
34 
getSideSample(const int64_t value1,const int64_t value2)35 static inline int32_t getSideSample (const int64_t value1, const int64_t value2)
36 {
37   return int32_t ((value1 - value2 + 1) >> 1);
38 }
39 
getResiSample(const int64_t valueR,const int64_t valueD,const int64_t alphaQ)40 static inline int32_t getResiSample (const int64_t valueR, const int64_t valueD, const int64_t alphaQ)
41 {
42   return int32_t (valueR - ((valueD * alphaQ - SHRT_MIN) >> 16));
43 }
44 
setStepSizesMS(const uint32_t * const rmsSfbL,const uint32_t * const rmsSfbR,const uint32_t * const rmsSfbM,const uint32_t * const rmsSfbS,const uint32_t * const grpRms1,const uint32_t * const grpRms2,uint32_t * const grpStepSizes1,uint32_t * const grpStepSizes2,const uint16_t sfb,const uint16_t b,const bool applyPredSte)45 static inline void   setStepSizesMS (const uint32_t* const rmsSfbL, const uint32_t* const rmsSfbR,
46                                      const uint32_t* const rmsSfbM, const uint32_t* const rmsSfbS,
47                                      const uint32_t* const grpRms1, const uint32_t* const grpRms2,
48                                      uint32_t* const grpStepSizes1, uint32_t* const grpStepSizes2,
49                                      const uint16_t sfb, const uint16_t b, const bool applyPredSte)
50 {
51   const uint16_t     idx = sfb + b;
52   const uint32_t sfbRmsL = __max (SP_EPS, rmsSfbL[b]);
53   const uint32_t sfbRmsR = __max (SP_EPS, rmsSfbR[b]);
54   const double  sfbFacLR = (sfbRmsL < (grpStepSizes1[idx] >> 1) ? 1.0 : 2.0) * (sfbRmsR < (grpStepSizes2[idx] >> 1) ? 1.0 : 2.0);
55   const double  sfbMaxMS = (applyPredSte ? __max (rmsSfbM[b], rmsSfbS[b]) : __max (grpRms1[idx], grpRms2[idx]));
56 
57   if ((grpStepSizes1[idx] == 0) || (grpStepSizes2[idx] == 0))  // HF noise filled SFB
58   {
59     grpStepSizes1[idx] = grpStepSizes2[idx] = 0;
60   }
61   else if (sfbFacLR <= 1.0) // simultaneous masking, so no positive SNR in either SFB
62   {
63     const double max = __max (sfbRmsL, sfbRmsR);
64 
65     grpStepSizes1[idx] = grpStepSizes2[idx] = uint32_t (__max (grpStepSizes1[idx], grpStepSizes2[idx]) * (sfbMaxMS / max) + 0.5);
66   }
67   else // partial/no masking, redistribute positive SNR into at least one channel SFB
68   {
69     const double min = (applyPredSte ? __min (rmsSfbM[b], rmsSfbS[b]) : __min (grpRms1[idx], grpRms2[idx]));
70     const double rat = __min (1.0, grpStepSizes1[idx] / (sfbRmsL * 2.0)) * __min (1.0, grpStepSizes2[idx] / (sfbRmsR * 2.0)) * sfbFacLR;
71 
72     grpStepSizes1[idx] = grpStepSizes2[idx] = uint32_t (__max (SP_EPS, (min > rat * sfbMaxMS ? sqrt (rat * sfbMaxMS * min) :
73                                                                         __max (1.0/2048.0, __min (1.0, rat)) * sfbMaxMS)) + 0.5);
74   }
75 }
76 
77 // constructor
StereoProcessor()78 StereoProcessor::StereoProcessor ()
79 {
80 #if SP_OPT_ALPHA_QUANT
81   memset (m_randomIntMemRe, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
82 # if SP_MDST_PRED
83   memset (m_randomIntMemIm, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
84 # endif
85 #endif
86   memset (m_stereoCorrValue, 0, (1024 >> SA_BW_SHIFT) * sizeof (uint8_t));
87 }
88 
89 // public functions
applyPredJointStereo(int32_t * const mdctSpectrum1,int32_t * const mdctSpectrum2,int32_t * const mdstSpectrum1,int32_t * const mdstSpectrum2,SfbGroupData & groupingData1,SfbGroupData & groupingData2,const TnsData & filterData1,const TnsData & filterData2,const uint8_t numSwbFrame,uint8_t * const sfbStereoData,const uint8_t bitRateMode,const bool useFullFrameMS,const bool reversePredDir,const uint8_t realOnlyOffset,uint32_t * const sfbStepSize1,uint32_t * const sfbStepSize2)90 unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, int32_t* const mdctSpectrum2,
91                                                 int32_t* const mdstSpectrum1, int32_t* const mdstSpectrum2,
92                                                 SfbGroupData&  groupingData1, SfbGroupData&  groupingData2,
93                                                 const TnsData&   filterData1, const TnsData&   filterData2,
94                                                 const uint8_t    numSwbFrame, uint8_t* const sfbStereoData,
95                                                 const uint8_t    bitRateMode, const bool    useFullFrameMS,
96                                                 const bool    reversePredDir, const uint8_t realOnlyOffset,
97                                                 uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2)
98 {
99   const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
100   const SfbGroupData& grp = groupingData1;
101   const bool  eightShorts = (grp.numWindowGroups > 1);
102   const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame);
103   const bool  perCorrData = ((bitRateMode <= 4) && !eightShorts); // perceptual correlation?
104 #if SP_OPT_ALPHA_QUANT
105   const bool  quantDither = ((bitRateMode >= 4) && !eightShorts); // quantization dithering?
106 #endif
107   bool       alterPredDir = (applyPredSte && reversePredDir); // predict mid from side band?
108   uint32_t rmsSfbL[2] = {0, 0}, rmsSfbR[2] = {0, 0};
109   uint32_t  numSfbPredSte = 0; // counter
110 #if SP_SFB_WISE_STEREO
111   uint16_t  numSfbNoMsSte = 0, idxSfbNoMsSte = 0, nNoMS = 0;
112   uint32_t rms1NoMsSte[2] = {0, 0}, rms2NoMsSte[2] = {0, 0};
113   uint32_t rmsMNoMsSte[2] = {0, 0}, rmsSNoMsSte[2] = {0, 0};
114   uint8_t  dataNoMsSte[2] = {0, 0};
115   bool nonZeroPredNoMsSte = false;
116 #endif
117 
118   if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (numSwbFrame < maxSfbSte) || (grp.numWindowGroups != groupingData2.numWindowGroups) ||
119       (sfbStepSize1  == nullptr) || (sfbStepSize2  == nullptr) || (numSwbFrame < MIN_NUM_SWB_SHORT) || (numSwbFrame > MAX_NUM_SWB_LONG))
120   {
121     return 1;  // invalid arguments error
122   }
123 #if !SP_SFB_WISE_STEREO
124   if (!useFullFrameMS)
125   {
126     if (applyPredSte) memset (sfbStereoData, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
127 
128     return 0; // zeroed ms_used, no pred.
129   }
130 #endif
131 #if SP_MDST_PRED
132   if (applyPredSte && (bitRateMode > 4) && !eightShorts && !reversePredDir) // pred_dir test
133   {
134     uint64_t sumRealM = 0, sumRealS = 0;
135 
136     for (uint16_t s = grp.sfbOffsets[numSwbFrame] - 1; s > 0; s--)
137     {
138       sumRealM += abs (mdctSpectrum1[s] + mdctSpectrum2[s]); // i.e., 2*mid,
139       sumRealS += abs (mdctSpectrum1[s] - mdctSpectrum2[s]); // i.e., 2*side
140     }
141     alterPredDir = (sumRealS * 2 > sumRealM * 3);
142   }
143 #endif
144 
145   if (applyPredSte && perCorrData) memcpy (m_stereoCorrValue, sfbStereoData, (grp.sfbOffsets[numSwbFrame] >> SA_BW_SHIFT) * sizeof (uint8_t));
146 #if SP_OPT_ALPHA_QUANT
147   if ((bitRateMode >= 4) && eightShorts) // reset quantizer dither memory in short transform
148   {
149     for (uint16_t sfb = 0; sfb <= MAX_NUM_SWB_LONG / 2; sfb++)
150     {
151       m_randomIntMemRe[sfb] = (1 << 30);
152 # if SP_MDST_PRED
153       m_randomIntMemIm[sfb] = (1 << 30);
154 # endif
155     }
156   }
157 #endif
158 
159   for (uint16_t n = 0, gr = 0; gr < grp.numWindowGroups; gr++)
160   {
161     const uint16_t grOffset = numSwbFrame * gr;
162     const uint8_t grpLength = grp.windowGroupLength[gr];
163     const bool realOnlyCalc = ((grpLength == 1) && (filterData1.numFilters[n] > 0 || filterData2.numFilters[n] > 0)) || !mdstSpectrum1 || !mdstSpectrum2;
164     const uint16_t*  grpOff = &grp.sfbOffsets[grOffset];
165     uint32_t* const grpRms1 = &groupingData1.sfbRmsValues[grOffset];
166     uint32_t* const grpRms2 = &groupingData2.sfbRmsValues[grOffset];
167     uint32_t* grpStepSizes1 = &sfbStepSize1[grOffset];
168     uint32_t* grpStepSizes2 = &sfbStepSize2[grOffset];
169     int32_t  b = 0, prevReM = 0, prevReS = 0;
170 
171     if (realOnlyCalc) // preparation for first magnitude value
172     {
173       const uint16_t sIndex = grpOff[realOnlyOffset] + (grpOff[realOnlyOffset] > 0 ? -1 : 1);
174 
175       prevReM = getMidSample (mdctSpectrum1[sIndex], mdctSpectrum2[sIndex]);
176       prevReS = getSideSample(mdctSpectrum1[sIndex], mdctSpectrum2[sIndex]);
177     }
178 
179     for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
180     {
181       const int32_t  sfbIsOdd = sfb & 1;
182       const uint16_t sfbStart = grpOff[sfb];
183       const uint16_t sfbWidth = grpOff[sfb + 1] - sfbStart;
184       int32_t* sfbMdct1 = &mdctSpectrum1[sfbStart];
185       int32_t* sfbMdct2 = &mdctSpectrum2[sfbStart];
186       int32_t* sfbMdst1 = &mdstSpectrum1[sfbStart];
187       int32_t* sfbMdst2 = &mdstSpectrum2[sfbStart];
188       uint64_t sumAbsValM = 0, sumAbsValS = 0;
189       double   sfbTempVar;
190 
191 #if SP_SFB_WISE_STEREO
192       if ((sfbIsOdd == 0) && !useFullFrameMS) // save L/R data
193       {
194         const uint16_t cpyWidth = (grpOff[__min (maxSfbSte, sfb + 2)] - sfbStart) * sizeof (int32_t);
195 
196         memcpy (m_originBandMdct1, sfbMdct1, cpyWidth);
197         memcpy (m_originBandMdct2, sfbMdct2, cpyWidth);
198         memcpy (m_originBandMdst1, sfbMdst1, cpyWidth);
199         memcpy (m_originBandMdst2, sfbMdst2, cpyWidth);
200       }
201 #endif
202       if (realOnlyCalc && (sfb >= realOnlyOffset)) // real-valued data, only MDCTs available
203       {
204         const int32_t* sfbNext1 = &sfbMdct1[1];
205         const int32_t* sfbNext2 = &sfbMdct2[1];
206 
207         for (uint16_t s = sfbWidth - (sfb + 1 == numSwbFrame ? 1 : 0); s > 0; s--)
208         {
209           const int32_t dmixReM = getMidSample (*sfbMdct1, *sfbMdct2);
210           const int32_t dmixReS = getSideSample(*sfbMdct1, *sfbMdct2);
211           // TODO: improve the following lines since the calculation is partially redundant!
212           const int32_t dmixImM = int32_t ((getMidSample (*sfbNext1, *sfbNext2) - (int64_t) prevReM) >> 1); // estimate, see also
213           const int32_t dmixImS = int32_t ((getSideSample(*sfbNext1, *sfbNext2) - (int64_t) prevReS) >> 1); // getMeanAbsValues()
214 
215           sumAbsValM += complexAbsMS (dmixReM, dmixImM);
216           sumAbsValS += complexAbsMS (dmixReS, dmixImS);
217 
218           *(sfbMdct1++) = dmixReM;
219           *(sfbMdct2++) = dmixReS;
220           *(sfbMdst1++) = dmixImM;
221           *(sfbMdst2++) = dmixImS;
222           sfbNext1++; prevReM = dmixReM;
223           sfbNext2++; prevReS = dmixReS;
224         }
225         if (sfb + 1 == numSwbFrame) // process the last sample
226         {
227           const int32_t dmixReM = getMidSample (*sfbMdct1, *sfbMdct2);
228           const int32_t dmixReS = getSideSample(*sfbMdct1, *sfbMdct2);
229 
230           sumAbsValM += abs (dmixReM);
231           sumAbsValS += abs (dmixReS);
232 
233           *sfbMdct1 = dmixReM;
234           *sfbMdct2 = dmixReS;
235           *sfbMdst1 = 0;
236           *sfbMdst2 = 0;
237         }
238       }
239       else // complex data, both MDCTs and MDSTs are available
240       {
241         for (uint16_t s = sfbWidth; s > 0; s--)
242         {
243           const int32_t dmixReM = getMidSample (*sfbMdct1, *sfbMdct2);
244           const int32_t dmixReS = getSideSample(*sfbMdct1, *sfbMdct2);
245           const int32_t dmixImM = getMidSample (*sfbMdst1, *sfbMdst2);
246           const int32_t dmixImS = getSideSample(*sfbMdst1, *sfbMdst2);
247 
248           sumAbsValM += complexAbsMS (dmixReM, dmixImM);
249           sumAbsValS += complexAbsMS (dmixReS, dmixImS);
250 
251           *(sfbMdct1++) = dmixReM;
252           *(sfbMdct2++) = dmixReS;
253           *(sfbMdst1++) = dmixImM;
254           *(sfbMdst2++) = dmixImS;
255         }
256       } // realOnlyCalc && sfb >= realOnlyOffset
257 
258       rmsSfbL[sfbIsOdd] = grpRms1[sfb];
259       rmsSfbR[sfbIsOdd] = grpRms2[sfb];
260       // average spectral sample magnitude across current band
261       grpRms1[sfb] = uint32_t ((sumAbsValM + (sfbWidth >> 1)) / sfbWidth);
262       grpRms2[sfb] = uint32_t ((sumAbsValS + (sfbWidth >> 1)) / sfbWidth);
263 
264       if (applyPredSte) sfbStereoData[sfb + grOffset] = 16; // initialize alpha_q_.. to zero
265 
266       if ((sfbIsOdd) || (sfb + 1 == maxSfbSte)) // finish pair
267       {
268         const uint16_t sfbEv = sfb & 0xFFFE; // even SFB index
269         uint32_t  rmsSfbM[2] = {0, 0}, rmsSfbS[2] = {0, 0};
270         bool nonZeroPredCoef = false;
271 
272         if (applyPredSte) // calc real-prediction coefficients
273         {
274           const uint16_t offEv = grpOff[sfbEv];
275           const uint16_t width = grpOff[sfb + 1] - offEv;
276           const int32_t* mdctA = (alterPredDir ? &mdctSpectrum2[offEv] : &mdctSpectrum1[offEv]);
277           const int32_t* mdctB = (alterPredDir ? &mdctSpectrum1[offEv] : &mdctSpectrum2[offEv]);
278           const int32_t* mdstA = (alterPredDir ? &mdstSpectrum2[offEv] : &mdstSpectrum1[offEv]);
279           const int32_t* mdstB = (alterPredDir ? &mdstSpectrum1[offEv] : &mdstSpectrum2[offEv]);
280           int64_t sumPrdReAReB = 0, sumPrdReAReA = SP_EPS;  // stabilizes the division below
281 #if SP_MDST_PRED
282           int64_t sumPrdImAReB = 0;
283 #endif
284           double d, alphaLimit = 1.5; // max alpha_q magnitude
285 
286           for (uint16_t s = width; s > 0; s--, mdctA++, mdctB++, mdstA++, mdstB++)
287           {
288             const int64_t prdReAReA = ((int64_t) *mdctA * (int64_t) *mdctA + SA_BW) >> (SA_BW_SHIFT + 1);
289             const int64_t prdImAImA = ((int64_t) *mdstA * (int64_t) *mdstA + SA_BW) >> (SA_BW_SHIFT + 1);
290 
291             sumPrdReAReB += ((int64_t) *mdctA * (int64_t) *mdctB + SA_BW) >> (SA_BW_SHIFT + 1);
292             sumPrdReAReA += prdReAReA;
293 #if SP_MDST_PRED
294             sumPrdImAReB += ((int64_t) *mdstA * (int64_t) *mdctB + SA_BW) >> (SA_BW_SHIFT + 1);
295 #endif
296             // add complex conjugate part, increases stability
297             sumPrdReAReB += ((int64_t) *mdstA * (int64_t) *mdstB + SA_BW) >> (SA_BW_SHIFT + 1);
298             sumPrdReAReA += prdImAImA;
299 #if SP_MDST_PRED
300             sumPrdImAReB -= ((int64_t) *mdctA * (int64_t) *mdstB + SA_BW) >> (SA_BW_SHIFT + 1);
301 #endif
302           }
303           for (b = sfbIsOdd; b >= 0; b--) // limit alpha_q to prevent residual RMS increases
304           {
305             const int idx = sfbEv + b;
306 
307             d = (alterPredDir ? (double) grpRms1[idx] / __max (SP_EPS, grpRms2[idx]) : (double) grpRms2[idx] / __max (SP_EPS, grpRms1[idx]));
308             if (alphaLimit > d) alphaLimit = d;
309           }
310           sfbTempVar = CLIP_PM ((double) sumPrdReAReB / (double) sumPrdReAReA, alphaLimit);
311 #if SP_OPT_ALPHA_QUANT
312           b = __max (512, 524 - int32_t (abs (10.0 * sfbTempVar))); // rounding optimization
313 # if 1
314           if (quantDither)
315           {
316             const int32_t r = (int32_t) m_randomInt32 ();
317             const double dr = 10.0 * sfbTempVar + (r - m_randomIntMemRe[sfbEv >> 1]) * SP_DIV;
318 
319             b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
320             m_randomIntMemRe[sfbEv >> 1] = r;
321           }
322           else
323 # endif
324           b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0.0 ? -0.0009765625 : 0.0009765625));
325 #else
326           b = int32_t (10.0 * sfbTempVar + (sfbTempVar < 0 ? -0.5 : 0.5));// nearest integer
327 #endif
328           sfbStereoData[sfbEv + grOffset] = uint8_t (b + 16); // save SFB's final alpha_q_re
329 #if SP_MDST_PRED
330           alphaLimit = CLIP_PM ((double) sumPrdImAReB / (double) sumPrdReAReA, alphaLimit);
331 # if SP_OPT_ALPHA_QUANT
332           b = __max (512, 524 - int32_t (abs (10.0 * alphaLimit))); // rounding optimization
333 #  if 1
334           if (quantDither)
335           {
336             const int32_t r = (int32_t) m_randomInt32 ();
337             const double dr = 10.0 * alphaLimit + (r - m_randomIntMemIm[sfbEv >> 1]) * SP_DIV;
338 
339             b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
340             m_randomIntMemIm[sfbEv >> 1] = r;
341           }
342           else
343 #  endif
344           b = int32_t (10.0 * alphaLimit + b * (alphaLimit < 0.0 ? -0.0009765625 : 0.0009765625));
345 # else
346           b = int32_t (10.0 * alphaLimit + (alphaLimit < 0 ? -0.5 : 0.5));// nearest integer
347 # endif
348           if (sfbEv + 1 < numSwbFrame)
349           sfbStereoData[sfbEv + 1 + grOffset] = uint8_t (b + 16); // save initial alpha_q_im
350 #endif // SP_MDST_PRED
351 
352           if (perCorrData && ((offEv & (SA_BW - 1)) == 0) && ((width & (SA_BW - 1)) == 0))
353           {
354             const uint8_t* const perCorr = &m_stereoCorrValue[offEv >> SA_BW_SHIFT];
355 
356             // perceptual correlation data available from previous call to stereoSigAnalysis
357             b = (width == SA_BW ? perCorr[0] : ((int32_t) perCorr[0] + (int32_t) perCorr[1] + 1) >> 1);
358           }
359           else b = UCHAR_MAX; // previous correlation data unavailable, assume maximum value
360 
361           if ((b > SCHAR_MAX && sfbStereoData[sfbEv + grOffset] != 16) || // if perceptually
362               (2 <= abs ( (int) sfbStereoData[sfbEv + grOffset] - 16))) // significant pred.
363           {
364             nonZeroPredCoef = true;
365           }
366           sfbTempVar *= sfbTempVar;  // account for residual RMS reduction due to prediction
367 #if SP_MDST_PRED && !(BA_MORE_CBR)
368           if (bitRateMode > 0) sfbTempVar += alphaLimit * alphaLimit;  // including alpha_im
369 #endif
370           for (b = sfbIsOdd; b >= 0; b--)
371           {
372             const int idx = sfbEv + b;
373 
374             if (alterPredDir)
375             {
376               d = (double) grpRms1[idx] * grpRms1[idx] - sfbTempVar * (double) grpRms2[idx] * grpRms2[idx];
377               // consider discarding prediction if gain (residual RMS loss) is below -0.9 dB
378               if ((double) grpRms1[idx] * grpRms1[idx] * 0.8125 < d) nonZeroPredCoef = false;
379               rmsSfbM[b] = uint32_t (sqrt (__max (0.0, d)) + 0.5);
380               rmsSfbS[b] = grpRms2[idx];
381             }
382             else // mid>side
383             {
384               d = (double) grpRms2[idx] * grpRms2[idx] - sfbTempVar * (double) grpRms1[idx] * grpRms1[idx];
385               // consider discarding prediction if gain (residual RMS loss) is below -0.9 dB
386               if ((double) grpRms2[idx] * grpRms2[idx] * 0.8125 < d) nonZeroPredCoef = false;
387               rmsSfbS[b] = uint32_t (sqrt (__max (0.0, d)) + 0.5);
388               rmsSfbM[b] = grpRms1[idx];
389             }
390           }
391         } // if applyPredSte
392 
393 #if SP_SFB_WISE_STEREO
394         if (!useFullFrameMS) // test M/S compaction gain, revert to L/R if it's insufficient
395         {
396           const uint64_t bandSum1 = (sfbIsOdd > 0 ? (uint64_t) grpRms1[sfbEv] + (uint64_t) grpRms1[sfbEv + 1] : grpRms1[sfbEv]);
397           const uint64_t bandSum2 = (sfbIsOdd > 0 ? (uint64_t) grpRms2[sfbEv] + (uint64_t) grpRms2[sfbEv + 1] : grpRms2[sfbEv]);
398           const uint64_t bandSumL = (sfbIsOdd > 0 ? (uint64_t) rmsSfbL[0] + (uint64_t) rmsSfbL[1] : rmsSfbL[0]) >> 1;
399           const uint64_t bandSumR = (sfbIsOdd > 0 ? (uint64_t) rmsSfbR[0] + (uint64_t) rmsSfbR[1] : rmsSfbR[0]) >> 1;
400           const uint64_t bandSumM = (applyPredSte ? (uint64_t) rmsSfbM[0] + (uint64_t) rmsSfbM[1] : bandSum1) >> 1;
401           const uint64_t bandSumS = (applyPredSte ? (uint64_t) rmsSfbS[0] + (uint64_t) rmsSfbS[1] : bandSum2) >> 1;
402 
403           if ((__min (bandSumM, bandSumS) * __max (bandSumL, bandSumR) >= __min (bandSumL, bandSumR) * __max (bandSumM, bandSumS)) ||
404               (nonZeroPredCoef && (abs ( (int) sfbStereoData[sfbEv + grOffset] - 16) >= 10)))
405           {
406             const uint16_t sfbOffEv = grpOff[sfbEv];
407             const uint16_t cpyWidth = (grpOff[sfb + 1] - sfbOffEv) * sizeof (int32_t);
408 
409             memcpy (&mdctSpectrum1[sfbOffEv], m_originBandMdct1, cpyWidth); // revert to L/R
410             memcpy (&mdctSpectrum2[sfbOffEv], m_originBandMdct2, cpyWidth);
411             memcpy (&mdstSpectrum1[sfbOffEv], m_originBandMdst1, cpyWidth);
412             memcpy (&mdstSpectrum2[sfbOffEv], m_originBandMdst2, cpyWidth);
413 
414             for (b = sfbIsOdd; b >= 0; b--)
415             {
416               const int idx = sfbEv + b;
417 
418               if (numSfbNoMsSte == 0)
419               {
420                 rms1NoMsSte[b] = grpRms1[idx];
421                 rms2NoMsSte[b] = grpRms2[idx];
422               }
423               grpRms1[idx] = rmsSfbL[b]; // recover left/right band energies
424               grpRms2[idx] = rmsSfbR[b];
425               if (applyPredSte)
426               {
427                 if (numSfbNoMsSte == 0)
428                 {
429                   rmsMNoMsSte[b] = rmsSfbM[b];
430                   rmsSNoMsSte[b] = rmsSfbS[b];
431                   dataNoMsSte[b] = sfbStereoData[idx + grOffset];
432                 }
433                 sfbStereoData[idx + grOffset] = 0;  // set ms_used flag to 0
434               }
435             }
436             numSfbNoMsSte++;  nNoMS = n;
437             idxSfbNoMsSte = sfbEv + grOffset;
438             nonZeroPredNoMsSte = nonZeroPredCoef;
439 
440             continue; // M/S is not used
441           }
442         } // if !useFullFrameMS
443 #endif
444         for (b = sfbIsOdd; b >= 0; b--) setStepSizesMS (rmsSfbL, rmsSfbR, rmsSfbM, rmsSfbS, grpRms1, grpRms2,
445                                                         grpStepSizes1, grpStepSizes2, sfbEv, (uint16_t) b, applyPredSte);
446         if (nonZeroPredCoef) numSfbPredSte++; // if perceptually significant prediction band
447       } // if pair completed
448     }
449     if (grpLength == 1) n++;
450   } // for gr
451 
452 #if SP_SFB_WISE_STEREO
453   if (numSfbNoMsSte == 1) // upgrade single L/R to M/S band to reduce M/S signaling overhead
454   {
455     const uint16_t   grNoMS = idxSfbNoMsSte / numSwbFrame;
456     const uint16_t  offNoMS = numSwbFrame * grNoMS;
457     const uint16_t  sfbNoMS = idxSfbNoMsSte - offNoMS;
458     const uint8_t grpLength = grp.windowGroupLength[grNoMS];
459     const bool realOnlyCalc = ((grpLength == 1) && (filterData1.numFilters[nNoMS] > 0 || filterData2.numFilters[nNoMS] > 0)) || !mdstSpectrum1 || !mdstSpectrum2;
460     const uint16_t*  grpOff = &grp.sfbOffsets[offNoMS];
461     uint32_t* const grpRms1 = &groupingData1.sfbRmsValues[offNoMS];
462     uint32_t* const grpRms2 = &groupingData2.sfbRmsValues[offNoMS];
463 
464     for (int32_t b = (sfbNoMS + 1 < maxSfbSte ? 1 : 0); b >= 0; b--)
465     {
466       const int idx = sfbNoMS + b; // sfbNoMS = even SFB index
467       const uint16_t sfbStart = grpOff[idx];
468       const uint16_t sfbWidth = grpOff[idx + 1] - sfbStart;
469       int32_t* sfbMdct1 = &mdctSpectrum1[sfbStart];
470       int32_t* sfbMdct2 = &mdctSpectrum2[sfbStart];
471 
472       rmsSfbL[b] = grpRms1[idx];  // save left/right band RMSs
473       rmsSfbR[b] = grpRms2[idx];
474       grpRms1[idx] = rms1NoMsSte[b];  // recover M/S band RMSs
475       grpRms2[idx] = rms2NoMsSte[b];
476       if (applyPredSte) sfbStereoData[idx + offNoMS] = dataNoMsSte[b];
477 
478       if (realOnlyCalc && (idx >= realOnlyOffset)) // real-valued data, only MDCTs available
479       {
480         for (uint16_t s = sfbWidth; s > 0; s--)
481         {
482           const int32_t dmixReM = getMidSample (*sfbMdct1, *sfbMdct2);
483           const int32_t dmixReS = getSideSample(*sfbMdct1, *sfbMdct2);
484 
485           *(sfbMdct1++) = dmixReM;
486           *(sfbMdct2++) = dmixReS;
487         }
488       }
489       else // complex data, both MDCTs and MDSTs are available
490       {
491         int32_t* sfbMdst1 = &mdstSpectrum1[sfbStart];
492         int32_t* sfbMdst2 = &mdstSpectrum2[sfbStart];
493 
494         for (uint16_t s = sfbWidth; s > 0; s--)
495         {
496           const int32_t dmixReM = getMidSample (*sfbMdct1, *sfbMdct2);
497           const int32_t dmixReS = getSideSample(*sfbMdct1, *sfbMdct2);
498           const int32_t dmixImM = getMidSample (*sfbMdst1, *sfbMdst2);
499           const int32_t dmixImS = getSideSample(*sfbMdst1, *sfbMdst2);
500 
501           *(sfbMdct1++) = dmixReM;
502           *(sfbMdct2++) = dmixReS;
503           *(sfbMdst1++) = dmixImM;
504           *(sfbMdst2++) = dmixImS;
505         }
506       } // realOnlyCalc && idx >= realOnlyOffset
507 
508       setStepSizesMS (rmsSfbL, rmsSfbR, rmsMNoMsSte, rmsSNoMsSte, grpRms1, grpRms2,
509                       &sfbStepSize1[offNoMS], &sfbStepSize2[offNoMS], sfbNoMS, (uint16_t) b, applyPredSte);
510     }
511     if (nonZeroPredNoMsSte) numSfbPredSte++; // was perceptually significant prediction band
512   } // if numSfbNoMsSte == 1
513 #endif
514 
515   if (numSfbPredSte == 0) // discard prediction coefficients and stay with legacy M/S stereo
516   {
517     if (applyPredSte)
518     {
519       for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
520       {
521         uint8_t* const grpSData = &sfbStereoData[numSwbFrame * gr];
522 
523         for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
524         {
525           if (grpSData[sfb] > 0) grpSData[sfb] = 16;
526         }
527         if (numSwbFrame > maxSfbSte) memset (&grpSData[maxSfbSte], (useFullFrameMS ? 16 : 0), (numSwbFrame - maxSfbSte) * sizeof (uint8_t));
528       }
529     }
530   }
531   else // at least one "significant" prediction band, apply prediction and update RMS values
532   {
533     for (uint16_t n = 0, gr = 0; gr < grp.numWindowGroups; gr++)
534     {
535       const uint16_t grOffset = numSwbFrame * gr;
536       const uint8_t grpLength = grp.windowGroupLength[gr];
537       const bool realOnlyCalc = ((grpLength == 1) && (filterData1.numFilters[n] > 0 || filterData2.numFilters[n] > 0)) || !mdstSpectrum1 || !mdstSpectrum2;
538       const uint16_t*  grpOff = &grp.sfbOffsets[grOffset];
539       uint32_t* const grpRms1 = &groupingData1.sfbRmsValues[grOffset];
540       uint32_t* const grpRms2 = &groupingData2.sfbRmsValues[grOffset];
541       uint8_t* const grpSData = &sfbStereoData[grOffset];
542       int32_t prevResi = 0;
543 
544       if (realOnlyCalc) // preparation of res. magnitude value
545       {
546         const int64_t alphaRe = (grpSData[realOnlyOffset & 0xFE] > 0 ? (int) grpSData[realOnlyOffset & 0xFE] - 16 : 0) * SP_0_DOT_1_16BIT;
547         const uint16_t sIndex = grpOff[realOnlyOffset] + (grpOff[realOnlyOffset] > 0 ? -1 : 1);
548 
549         prevResi = (alterPredDir ? getResiSample (mdctSpectrum1[sIndex], mdctSpectrum2[sIndex], alphaRe)
550                                  : getResiSample (mdctSpectrum2[sIndex], mdctSpectrum1[sIndex], alphaRe));
551       }
552 
553       for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
554       {
555         const uint16_t sfbEv = sfb & 0xFFFE; // even SFB index
556         const uint16_t sfbStart = grpOff[sfb];
557         const uint16_t sfbWidth = grpOff[sfb + 1] - sfbStart;
558         const int64_t   alphaRe = (grpSData[sfbEv] > 0 ? (int) grpSData[sfbEv] - 16 : 0) * SP_0_DOT_1_16BIT;
559         int32_t* sfbMdctD = (alterPredDir ? &mdctSpectrum2[sfbStart] : &mdctSpectrum1[sfbStart]);
560         int32_t* sfbMdctR = (alterPredDir ? &mdctSpectrum1[sfbStart] : &mdctSpectrum2[sfbStart]);
561         uint64_t sumAbsValR = 0;
562 
563         if (alphaRe == 0)
564         {
565           if (realOnlyCalc && (sfb >= realOnlyOffset)) // update previous residual MDCT data
566           {
567             sfbMdctR += sfbWidth - 1;
568             prevResi = (grpSData[sfbEv] > 0 ? *sfbMdctR : int32_t (((int64_t) sfbMdctD[sfbWidth - 1] +
569                                           (alterPredDir ? 1 : -1) * (int64_t) *sfbMdctR + 1) >> 1));
570           }
571           continue; // nothing more to do, i.e., no prediction
572         }
573 
574         if (realOnlyCalc && (sfb >= realOnlyOffset))  // real-valued, only MDCT is available
575         {
576           const int32_t* sfbNextD = &sfbMdctD[1];
577           const int32_t* sfbNextR = &sfbMdctR[1];
578 
579           for (uint16_t s = sfbWidth - (sfb + 1 == numSwbFrame ? 1 : 0); s > 0; s--)
580           {
581             const int32_t  resiRe = getResiSample (*sfbMdctR, *sfbMdctD, alphaRe);
582             // TODO: improve the following line since the calculation is partially redundant
583             const int32_t  resiIm = int32_t ((getResiSample (*sfbNextR, *sfbNextD, alphaRe) - (int64_t) prevResi) >> 1);
584 
585             sumAbsValR += complexAbsMS (resiRe, resiIm);
586 
587             sfbMdctD++;
588             *(sfbMdctR++) = resiRe;
589             sfbNextD++;
590             sfbNextR++; prevResi = resiRe;
591           }
592           if (sfb + 1 == numSwbFrame)  // process final sample
593           {
594             const int32_t  resiRe = getResiSample (*sfbMdctR, *sfbMdctD, alphaRe);
595 
596             sumAbsValR += abs (resiRe);
597 
598             *sfbMdctR = resiRe;
599           }
600         }
601         else // complex data, both MDCT and MDST are available
602         {
603           int32_t* sfbMdstD = (alterPredDir ? &mdstSpectrum2[sfbStart] : &mdstSpectrum1[sfbStart]);
604           int32_t* sfbMdstR = (alterPredDir ? &mdstSpectrum1[sfbStart] : &mdstSpectrum2[sfbStart]);
605 
606           for (uint16_t s = sfbWidth; s > 0; s--)
607           {
608             const int32_t  resiRe = getResiSample (*sfbMdctR, *sfbMdctD, alphaRe);
609             const int32_t  resiIm = getResiSample (*sfbMdstR, *sfbMdstD, alphaRe);
610 
611             sumAbsValR += complexAbsMS (resiRe, resiIm);
612 
613             sfbMdctD++;
614             *(sfbMdctR++) = resiRe;
615             sfbMdstD++;
616             *(sfbMdstR++) = resiIm;
617           }
618         } // realOnlyCalc && sfb >= realOnlyOffset
619 
620         // average spectral res. magnitude across current band
621         sumAbsValR = (sumAbsValR + (sfbWidth >> 1)) / sfbWidth;
622         if (alterPredDir) grpRms1[sfb] = (uint32_t) sumAbsValR; else grpRms2[sfb] = (uint32_t) sumAbsValR;
623       }
624       if (numSwbFrame > maxSfbSte) memset (&grpSData[maxSfbSte], (useFullFrameMS ? 16 : 0), (numSwbFrame - maxSfbSte) * sizeof (uint8_t));
625 
626       if (alterPredDir) // swap channel data when pred_dir = 1
627       {
628         for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
629         {
630           const uint16_t sfbStart = grpOff[sfb];
631           int32_t* sfbMdct1 = &mdctSpectrum1[sfbStart];
632           int32_t* sfbMdct2 = &mdctSpectrum2[sfbStart];
633 
634           if (grpSData[sfb & 0xFFFE] == 0) continue; // no M/S
635 
636           for (uint16_t s = grpOff[sfb + 1] - sfbStart; s > 0; s--)
637           {
638             const int32_t i = *sfbMdct1;
639             *(sfbMdct1++)   = *sfbMdct2;
640             *(sfbMdct2++)   = i;
641           }
642           numSfbPredSte = grpRms1[sfb];
643           grpRms1[sfb]  = grpRms2[sfb];
644           grpRms2[sfb]  = numSfbPredSte;
645         }
646       }
647       if (grpLength == 1) n++;
648     } // for gr
649 
650 #if SP_MDST_PRED
651     numSfbPredSte = (applyPredSte && (alterPredDir != reversePredDir) ? 4 /*pred_dir=1*/ : 2);
652 #else
653     numSfbPredSte = 2;
654 #endif
655   }
656 
657   return numSfbPredSte; // no error
658 }
659