1 /* quantization.cpp - source file for class with nonuniform quantization functionality
2 * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
3 *
4 * The copyright in this software is being made available under the exhale Copyright License
5 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
6 * party rights, including patent rights. No such rights are granted under this License.
7 *
8 * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
9 */
10
11 #include "exhaleLibPch.h"
12 #include "quantization.h"
13
14 #define EC_TRAIN (0 && EC_TRELLIS_OPT_CODING) // for RDOC testing
15
16 // static helper functions
getBitCount(EntropyCoder & entrCoder,const int sfIndex,const int sfIndexPred,const uint8_t groupLength,const uint8_t * coeffQuant,const uint16_t coeffOffset,const uint16_t numCoeffs)17 static inline short getBitCount (EntropyCoder& entrCoder, const int sfIndex, const int sfIndexPred,
18 const uint8_t groupLength, const uint8_t* coeffQuant,
19 const uint16_t coeffOffset, const uint16_t numCoeffs)
20 {
21 unsigned bitCount = (sfIndex != UCHAR_MAX && sfIndexPred == UCHAR_MAX ? 8 : entrCoder.indexGetBitCount (sfIndex - sfIndexPred));
22
23 if (groupLength == 1) // include arithmetic coding in bit count
24 {
25 #if EC_TRELLIS_OPT_CODING
26 const unsigned bitsStart = (entrCoder.arithGetCtxState () >> 17) & 31;
27 #endif
28 bitCount += entrCoder.arithCodeSigMagn (coeffQuant, coeffOffset, numCoeffs);
29 #if EC_TRELLIS_OPT_CODING
30 bitCount += (entrCoder.arithGetCtxState () >> 17) & 31;
31 bitCount -= __min (bitsStart, bitCount); // +new-old m_acBits
32 #endif
33 }
34
35 return (short) __min (SHRT_MAX, bitCount); // exclude sign bits
36 }
37
38 #if EC_TRELLIS_OPT_CODING && !EC_TRAIN
getLagrangeValue(const uint16_t rateIndex)39 static inline double getLagrangeValue (const uint16_t rateIndex) // RD optimization constant
40 {
41 return (95.0 + rateIndex * rateIndex) * 0.0009765625; // / 1024
42 }
43 #endif
44
45 // private helper functions
getQuantDist(const unsigned * const coeffMagn,const uint8_t scaleFactor,const uint8_t * const coeffQuant,const uint16_t numCoeffs)46 double SfbQuantizer::getQuantDist (const unsigned* const coeffMagn, const uint8_t scaleFactor,
47 const uint8_t* const coeffQuant, const uint16_t numCoeffs)
48 {
49 const double stepSizeDiv = m_lutSfNorm[scaleFactor];
50 double dDist = 0.0;
51
52 for (int i = numCoeffs - 1; i >= 0; i--)
53 {
54 const double d = m_lutXExp43[coeffQuant[i]] - coeffMagn[i] * stepSizeDiv;
55
56 dDist += d * d;
57 }
58
59 // consider quantization step-size in calculation of distortion
60 return dDist * m_lut2ExpX4[scaleFactor] * m_lut2ExpX4[scaleFactor];
61 }
62
quantizeMagnSfb(const unsigned * const coeffMagn,const uint8_t scaleFactor,uint8_t * const coeffQuant,const uint16_t numCoeffs,EntropyCoder * const arithmCoder,const uint16_t coeffOffset,short * const sigMaxQ,short * const sigNumQ)63 uint8_t SfbQuantizer::quantizeMagnSfb (const unsigned* const coeffMagn, const uint8_t scaleFactor,
64 /*mod*/uint8_t* const coeffQuant, const uint16_t numCoeffs,
65 #if EC_TRELLIS_OPT_CODING
66 EntropyCoder* const arithmCoder, const uint16_t coeffOffset,
67 #endif
68 short* const sigMaxQ /*= nullptr*/, short* const sigNumQ /*= nullptr*/)
69 {
70 const double stepSizeDiv = m_lutSfNorm[scaleFactor];
71 double dNum = 0.0, dDen = 0.0;
72 short sf, maxQ = 0, numQ = 0;
73
74 for (int i = numCoeffs - 1; i >= 0; i--)
75 {
76 const double normalizedMagn = (double) coeffMagn[i] * stepSizeDiv;
77 #if SFB_QUANT_FAST_POW
78 short q;
79
80 if (normalizedMagn < 28.5) // fast approximate pow (d, 0.75)
81 {
82 // based on code from: N. N. Schraudolph, "A Fast, Compact Approximation of the Expo-
83 // nential Function," Neural Comput., vol. 11, pp. 853-862, 1998 and M. Ankerl, 2007,
84 // https://martin.ankerl.com/2007/10/04/optimized-pow-approximation-for-java-and-c-c/
85 union { double d; int32_t i[2]; } u = { normalizedMagn };
86
87 u.i[1] = int32_t (0.75 * (u.i[1] - 1072632447) + 1072632447.0);
88 u.i[0] = 0;
89 q = short (u.d + (u.d < 1.0 ? 0.3822484 : 0.2734375));
90 }
91 else
92 {
93 q = short (SFB_QUANT_OFFSET + pow (normalizedMagn, 0.75));
94 }
95 #else
96 short q = short (SFB_QUANT_OFFSET + pow (normalizedMagn, 0.75));
97 #endif
98
99 if (q > 0)
100 {
101 if (q >= SCHAR_MAX)
102 {
103 if (maxQ < q)
104 {
105 maxQ = q; // find maximum quantized magnitude in vector
106 }
107 q = SCHAR_MAX;
108 }
109 else
110 {
111 const double diffRoundD = m_lutXExp43[q ] - normalizedMagn;
112 const double diffRoundU = m_lutXExp43[q + 1] - normalizedMagn;
113
114 if (diffRoundU * diffRoundU < diffRoundD * diffRoundD)
115 {
116 q++; // round-up gives lower distortion than round-down
117 }
118 }
119 if (maxQ < q)
120 {
121 maxQ = q;
122 }
123 numQ++;
124 dNum += m_lutXExp43[q] * normalizedMagn;
125 dDen += m_lutXExp43[q] * m_lutXExp43[q];
126 }
127 #if SFB_QUANT_PERCEPT_OPT
128 else // q == 0, assume perceptual transparency for code below
129 {
130 dNum += normalizedMagn * normalizedMagn;
131 dDen += normalizedMagn * normalizedMagn;
132 }
133 #endif
134 coeffQuant[i] = (uint8_t) q;
135 }
136
137 if (sigMaxQ) *sigMaxQ = maxQ; // max. quantized value magnitude
138 if (sigNumQ) *sigNumQ = numQ; // nonzero coeff. count (L0 norm)
139
140 sf = scaleFactor;
141 // compute least-squares optimal modifier added to scale factor
142 if (dNum > SF_THRESH_POS * dDen) sf++;
143 else
144 if (dNum < SF_THRESH_NEG * dDen) sf--;
145
146 #if EC_TRELLIS_OPT_CODING
147 if (arithmCoder && (sf > 0) && (maxQ <= SCHAR_MAX)) // use RDOC
148 {
149 EntropyCoder& entrCoder = *arithmCoder;
150 #if EC_TRAIN
151 const uint32_t codStart = entrCoder.arithGetCodState ();
152 const uint32_t ctxStart = entrCoder.arithGetCtxState ();
153 uint32_t bitCount = entrCoder.arithCodeSigMagn (&coeffQuant[-((int) coeffOffset)], coeffOffset, numCoeffs);
154
155 bitCount += (entrCoder.arithGetCtxState () >> 17) & 31; // refinement: +new-old m_acBits
156 bitCount -= __min ((ctxStart >> 17) & 31, bitCount);
157 bitCount += (uint32_t) numQ; // add sign bits for completion
158
159 entrCoder.arithSetCodState (codStart); // back to last state
160 entrCoder.arithSetCtxState (ctxStart);
161 #else
162 uint32_t bitCount = (uint32_t) numQ;
163 #endif
164 if ((bitCount = quantizeMagnRDOC (entrCoder, (uint8_t) sf, bitCount, coeffOffset, coeffMagn, numCoeffs, coeffQuant)) > 0)
165 {
166 numQ = bitCount & SHRT_MAX;
167
168 if ((numQ > 0) && (sf < m_maxSfIndex)) // nonzero-quantized
169 {
170 const double magnNormDiv = m_lutSfNorm[sf];
171
172 dNum = dDen = 0.0;
173 for (int i = numCoeffs - 1; i >= 0; i--)
174 {
175 const double normalizedMagn = (double) coeffMagn[i] * magnNormDiv;
176 const uint8_t q = coeffQuant[i];
177
178 if (q > 0)
179 {
180 dNum += m_lutXExp43[q] * normalizedMagn;
181 dDen += m_lutXExp43[q] * m_lutXExp43[q];
182 }
183 #if SFB_QUANT_PERCEPT_OPT
184 else // assume perceptual transparency for code below
185 {
186 dNum += normalizedMagn * normalizedMagn;
187 dDen += normalizedMagn * normalizedMagn;
188 }
189 #endif
190 }
191
192 // re-compute least-squares optimal scale factor modifier
193 if (dNum > SF_THRESH_POS * dDen) sf++;
194 #if !SFB_QUANT_PERCEPT_OPT
195 else
196 if (dNum < SF_THRESH_NEG * dDen) sf--; // reduces SFB RMS
197 #endif
198 } // if nonzero
199
200 if (sigMaxQ) *sigMaxQ = (numQ > 0 ? maxQ : 0); // a new max
201 if (sigNumQ) *sigNumQ = numQ; // a new nonzero coeff. count
202 }
203 }
204 #endif // EC_TRELLIS_OPT_CODING
205
206 #if SFB_QUANT_PERCEPT_OPT
207 if ((numQ > 0) && (sf > 0 && sf <= scaleFactor)) // recover RMS
208 {
209 const double magnNormDiv = m_lutSfNorm[sf];
210
211 dNum = 0.0; // dDen has normalized energy after quantization
212 for (int i = numCoeffs - 1; i >= 0; i--)
213 {
214 const double normalizedMagn = (double) coeffMagn[i] * magnNormDiv;
215
216 dNum += normalizedMagn * normalizedMagn;
217 }
218
219 if (dNum > SF_THRESH_POS * SF_THRESH_POS * dDen) sf++;
220 }
221 #endif
222 return (uint8_t) __max (0, sf); // optimized scale factor index
223 }
224
225 #if EC_TRELLIS_OPT_CODING
quantizeMagnRDOC(EntropyCoder & entropyCoder,const uint8_t optimalSf,const unsigned targetBitCount,const uint16_t coeffOffset,const unsigned * const coeffMagn,const uint16_t numCoeffs,uint8_t * const quantCoeffs)226 uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8_t optimalSf, const unsigned targetBitCount,
227 const uint16_t coeffOffset, const unsigned* const coeffMagn, // initial MDCT magnitudes
228 const uint16_t numCoeffs, uint8_t* const quantCoeffs) // returns updated SFB statistics
229 {
230 // numTuples: num of trellis stages. Based on: A. Aggarwal, S. L. Regunathan, and K. Rose,
231 // "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," in Proc. IEEE Workshop on
232 // Speech Coding, pp. 142-144, Sep. 2000. Modified for arithmetic instead of Huffman coder
233 const uint32_t codStart = entropyCoder.arithGetCodState ();
234 const uint32_t ctxStart = entropyCoder.arithGetCtxState (); // before call to getBitCount
235 const double stepSizeDiv = m_lutSfNorm[optimalSf];
236 const uint16_t numStates = 4; // 4 reduction types: [0, 0], [0, -1], [-1, 0], and [-1, -1]
237 const uint16_t numTuples = numCoeffs >> 1;
238 uint8_t* const quantRate = &m_coeffTemp[((unsigned) m_maxSize8M1 + 1) << 3];
239 uint32_t prevCodState[4] = {0, 0, 0, 0};
240 uint32_t prevCtxState[4] = {0, 0, 0, 0};
241 double prevVtrbCost[4] = {0, 0, 0, 0};
242 uint32_t tempCodState[4] = {0, 0, 0, 0};
243 uint32_t tempCtxState[4] = {0, 0, 0, 0};
244 double tempVtrbCost[4] = {0, 0, 0, 0};
245 double quantDist[32][4]; // TODO: dynamic memory allocation
246 uint8_t* const optimalIs = (uint8_t* const) (quantDist[32-1]);
247 uint8_t tempQuant[4], numQ; // for tuple/SFB sign bit counting
248 unsigned tempBitCount, tuple, is;
249 int ds;
250 #if EC_TRAIN
251 double refSfbDist = 0.0, tempSfbDist = 0.0;
252 #else
253 const double lambda = getLagrangeValue (m_rateIndex);
254 #endif
255
256 if ((coeffMagn == nullptr) || (quantCoeffs == nullptr) || (optimalSf > m_maxSfIndex) || (numTuples == 0) || (numTuples > 32) ||
257 (targetBitCount == 0) || (targetBitCount > SHRT_MAX))
258 {
259 return 0; // invalid input error
260 }
261
262 // save third-last tuple value, required due to an insufficiency of arithGet/SetCtxState()
263 if (coeffOffset > 5) tempQuant[3] = entropyCoder.arithGetTuplePtr ()[(coeffOffset >> 1) - 3];
264
265 for (tuple = 0; tuple < numTuples; tuple++) // tuple-wise non-weighted distortion and rate
266 {
267 const uint16_t tupleStart = tuple << 1;
268 const uint16_t tupleOffset = coeffOffset + tupleStart;
269 const double normalMagnA = (double) coeffMagn[tupleStart ] * stepSizeDiv;
270 const double normalMagnB = (double) coeffMagn[tupleStart + 1] * stepSizeDiv;
271 uint8_t coeffQuantA = quantCoeffs[tupleStart];
272 uint8_t coeffQuantB = quantCoeffs[tupleStart + 1];
273
274 for (is = 0; is < numStates; is++) // populate tuple trellis
275 {
276 uint8_t* const mag = (is != 0 ? tempQuant : quantCoeffs) - (int) tupleOffset; // see arithCodeSigMagn()
277 uint8_t* currRate = &quantRate[(is + tuple * numStates) * numStates];
278 double diffA, diffB;
279
280 if (is != 0) // test reduction of quantized MDCT magnitudes
281 {
282 const uint8_t redA = is >> 1;
283 const uint8_t redB = is & 1;
284
285 if ((redA > 0 && coeffQuantA != 1) || (redB > 0 && coeffQuantB != 1)) // avoid path
286 {
287 tempCodState[is] = tempCodState[0];
288 tempCtxState[is] = tempCtxState[0];
289 memset (currRate, UCHAR_MAX, numStates);
290
291 continue;
292 }
293 tempQuant[0] = (coeffQuantA -= redA);
294 tempQuant[1] = (coeffQuantB -= redB);
295 }
296 diffA = m_lutXExp43[coeffQuantA] - normalMagnA;
297 diffB = m_lutXExp43[coeffQuantB] - normalMagnB;
298 quantDist[tuple][is] = diffA * diffA + diffB * diffB;
299
300 numQ = (coeffQuantA > 0 ? 1 : 0) + (coeffQuantB > 0 ? 1 : 0);
301
302 if (tuple == 0) // first tuple, with tupleStart == sfbStart
303 {
304 entropyCoder.arithSetCodState (codStart); // start of SFB
305 entropyCoder.arithSetCtxState (ctxStart);
306 tempBitCount = entropyCoder.arithCodeSigMagn (mag, tupleOffset, 2);
307
308 tempBitCount += (entropyCoder.arithGetCtxState () >> 17) & 31; // +new-old m_acBits
309 tempBitCount -= __min ((ctxStart >> 17) & 31, tempBitCount);
310
311 memset (currRate, tempBitCount + numQ, numStates);
312 }
313 else // tuple > 0, rate depends on decisions for last tuple
314 {
315 for (ds = numStates - 1; ds >= 0; ds--)
316 {
317 if (quantRate[(ds + (tuple-1) * numStates) * numStates] >= UCHAR_MAX)// avoid path
318 {
319 currRate[ds] = UCHAR_MAX;
320
321 continue;
322 }
323
324 entropyCoder.arithSetCodState (prevCodState[ds]);
325 entropyCoder.arithSetCtxState (prevCtxState[ds], tupleOffset);
326 tempBitCount = entropyCoder.arithCodeSigMagn (mag, tupleOffset, 2);
327
328 tempBitCount += (entropyCoder.arithGetCtxState () >> 17) & 31;// +new-old m_acBits
329 tempBitCount -= __min ((prevCtxState[ds] >> 17) & 31, tempBitCount);
330
331 currRate[ds] = uint8_t (tempBitCount + numQ);
332 }
333 }
334 // statistically best place to save states is after ds == 0
335 tempCodState[is] = entropyCoder.arithGetCodState ();
336 tempCtxState[is] = entropyCoder.arithGetCtxState ();
337 } // for is
338 #if EC_TRAIN
339 refSfbDist += quantDist[tuple][0];
340 #endif
341 memcpy (prevCodState, tempCodState, numStates * sizeof (uint32_t));
342 memcpy (prevCtxState, tempCtxState, numStates * sizeof (uint32_t));
343 } // for tuple
344
345 entropyCoder.arithSetCodState (codStart); // back to last state
346 entropyCoder.arithSetCtxState (ctxStart, coeffOffset);
347 // restore third-last tuple value, see insufficiency note above
348 if (coeffOffset > 5) entropyCoder.arithGetTuplePtr ()[(coeffOffset >> 1) - 3] = tempQuant[3];
349
350 #if EC_TRAIN
351 tempBitCount = targetBitCount + 1; // Viterbi search for minimum distortion at target rate
352 for (double lambda = 0.015625; (lambda <= 0.375) && (tempBitCount > targetBitCount); lambda += 0.0078125)
353 #endif
354 {
355 double* const prevCost = prevVtrbCost;
356 #if !EC_TRAIN
357 uint8_t* const prevPath = (uint8_t*) quantDist;// backtracker
358 #endif
359 double costMinIs = (double) UINT_MAX;
360 unsigned pathMinIs = 0;
361 #if EC_TRAIN
362 uint8_t prevPath[16*4];
363 tempSfbDist = 0.0;
364 #endif
365
366 for (is = 0; is < numStates; is++) // initialize minimum path
367 {
368 const uint8_t currRate = quantRate[is * numStates];
369
370 prevCost[is] = (currRate >= UCHAR_MAX ? (double) UINT_MAX : lambda * currRate + quantDist[0][is]);
371 prevPath[is] = 0;
372 }
373
374 for (tuple = 1; tuple < numTuples; tuple++) // find min. path
375 {
376 double* const currCost = tempVtrbCost;
377 uint8_t* const currPath = &prevPath[tuple * numStates];
378
379 for (is = 0; is < numStates; is++) // tuple's minimum path
380 {
381 uint8_t* currRate = &quantRate[(is + tuple * numStates) * numStates];
382 double costMinDs = (double) UINT_MAX;
383 uint8_t pathMinDs = 0;
384
385 for (ds = numStates - 1; ds >= 0; ds--) // transitions
386 {
387 const double costCurr = (currRate[ds] >= UCHAR_MAX ? (double) UINT_MAX : prevCost[ds] + lambda * currRate[ds]);
388
389 if (costMinDs > costCurr)
390 {
391 costMinDs = costCurr;
392 pathMinDs = (uint8_t) ds;
393 }
394 }
395 if (costMinDs < UINT_MAX) costMinDs += quantDist[tuple][is];
396
397 currCost[is] = costMinDs;
398 currPath[is] = pathMinDs;
399 } // for is
400
401 memcpy (prevCost, currCost, numStates * sizeof (double)); // TODO: avoid memcpy, use pointer swapping instead for speed
402 } // for tuple
403
404 for (tempBitCount = is = 0; is < numStates; is++) // minimum
405 {
406 if (costMinIs > prevCost[is])
407 {
408 costMinIs = prevCost[is];
409 pathMinIs = is;
410 }
411 }
412
413 for (tuple--; tuple > 0; tuple--) // min-cost rate and types
414 {
415 const uint8_t* currPath = &prevPath[tuple * numStates];
416 const uint8_t pathMinDs = currPath[pathMinIs];
417
418 optimalIs[tuple] = (uint8_t) pathMinIs;
419 tempBitCount += quantRate[pathMinDs + (pathMinIs + tuple * numStates) * numStates];
420 #if EC_TRAIN
421 tempSfbDist += quantDist[tuple][pathMinIs];
422 #endif
423 pathMinIs = pathMinDs;
424 }
425 optimalIs[0] = (uint8_t) pathMinIs;
426 tempBitCount += quantRate[pathMinIs * numStates];
427 #if EC_TRAIN
428 tempSfbDist += quantDist[0][pathMinIs];
429 #endif
430 } // Viterbi search
431
432 #if EC_TRAIN
433 if ((tempSfbDist <= refSfbDist) || (tempBitCount <= targetBitCount))
434 #endif
435 {
436 #if !EC_TRAIN
437 numQ = 0;
438 #endif
439 for (tuple = 0; tuple < numTuples; tuple++) // re-quantize SFB with R/D optimal rounding
440 {
441 const uint16_t tupleStart = tuple << 1;
442 const uint8_t tupIs = optimalIs[tuple];
443 uint8_t& coeffQuantA = quantCoeffs[tupleStart];
444 uint8_t& coeffQuantB = quantCoeffs[tupleStart + 1];
445
446 if (tupIs != 0) // optimal red of quantized MDCT magnitudes
447 {
448 coeffQuantA -= (tupIs >> 1);
449 coeffQuantB -= (tupIs & 1);
450 }
451 #if !EC_TRAIN
452 numQ += (coeffQuantA > 0 ? 1 : 0) + (coeffQuantB > 0 ? 1 : 0);
453 #endif
454 } // for tuple
455
456 #if EC_TRAIN
457 return tempBitCount;
458 #else
459 return (1u << 15) | numQ; // final stats: OK flag | sign bits
460 #endif
461 }
462
463 return targetBitCount;
464 }
465 #endif // EC_TRELLIS_OPT_CODING
466
467 // constructor
SfbQuantizer()468 SfbQuantizer::SfbQuantizer ()
469 {
470 // initialize all helper buffers
471 m_coeffMagn = nullptr;
472 #if EC_TRELLIS_OPT_CODING
473 m_coeffTemp = nullptr;
474 #endif
475 m_lut2ExpX4 = nullptr;
476 m_lutSfNorm = nullptr;
477 m_lutXExp43 = nullptr;
478
479 m_maxSfIndex = 0;
480 #if EC_TRELLIS_OPT_CODING
481 m_numCStates = 0;
482
483 for (unsigned b = 0; b < 52; b++)
484 {
485 m_quantDist[b] = nullptr;
486 m_quantInSf[b] = nullptr;
487 m_quantRate[b] = nullptr;
488 }
489 #endif
490 }
491
492 // destructor
~SfbQuantizer()493 SfbQuantizer::~SfbQuantizer ()
494 {
495 // free allocated helper buffers
496 MFREE (m_coeffMagn);
497 #if EC_TRELLIS_OPT_CODING
498 MFREE (m_coeffTemp);
499 #endif
500 MFREE (m_lut2ExpX4);
501 MFREE (m_lutSfNorm);
502 MFREE (m_lutXExp43);
503 #if EC_TRELLIS_OPT_CODING
504
505 for (unsigned b = 0; b < 52; b++)
506 {
507 MFREE (m_quantDist[b]);
508 MFREE (m_quantInSf[b]);
509 MFREE (m_quantRate[b]);
510 }
511 #endif
512 }
513
514 // public functions
initQuantMemory(const unsigned maxTransfLength,const uint8_t numSwb,const uint8_t bitRateMode,const unsigned samplingRate,const uint8_t maxScaleFacIndex)515 unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
516 #if EC_TRELLIS_OPT_CODING
517 const uint8_t numSwb, const uint8_t bitRateMode, const unsigned samplingRate,
518 #endif
519 const uint8_t maxScaleFacIndex /*= SCHAR_MAX*/)
520 {
521 const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
522 #if EC_TRELLIS_OPT_CODING
523 const uint8_t complexityOffset = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) + ((bitRateMode == 0) && (samplingRate >= 8192) ? 1 : 0);
524 const uint8_t numTrellisStates = complexityOffset - __min (2, (bitRateMode + 2) >> 2); // number of states per SFB
525 const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
526 const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC()
527 #endif
528 unsigned x;
529
530 if ((maxTransfLength < 128) || (maxTransfLength > 2048) || (maxTransfLength & 7) || (maxScaleFacIndex == 0) || (maxScaleFacIndex > SCHAR_MAX))
531 {
532 return 1; // invalid arguments error
533 }
534
535 m_maxSfIndex = maxScaleFacIndex;
536
537 if ((m_coeffMagn = (unsigned*) malloc (maxTransfLength * sizeof (unsigned))) == nullptr ||
538 #if EC_TRELLIS_OPT_CODING
539 (m_coeffTemp = (uint8_t* ) malloc (maxTransfLength + quantRateLength )) == nullptr ||
540 #endif
541 (m_lut2ExpX4 = (double* ) malloc (numScaleFactors * sizeof (double ))) == nullptr ||
542 (m_lutSfNorm = (double* ) malloc (numScaleFactors * sizeof (double ))) == nullptr ||
543 (m_lutXExp43 = (double* ) malloc ((SCHAR_MAX + 1) * sizeof (double ))) == nullptr)
544 {
545 return 2; // memory allocation error
546 }
547 #if EC_TRELLIS_OPT_CODING
548 m_maxSize8M1 = (maxTransfLength >> 3) - 1;
549 m_numCStates = numTrellisStates;
550 m_rateIndex = bitRateMode;
551
552 for (x = 0; x < __min (52u, numSwb); x++)
553 {
554 if ((m_quantDist[x] = (double* ) malloc (numTrellisStates * sizeof (double ))) == nullptr ||
555 (m_quantInSf[x] = (uint8_t* ) malloc (numTrellisStates * sizeof (uint8_t ))) == nullptr ||
556 (m_quantRate[x] = (uint16_t*) malloc (numSquaredStates * sizeof (uint16_t))) == nullptr)
557 {
558 return 2;
559 }
560 }
561 #else
562 memset (m_coeffTemp, 0, sizeof (m_coeffTemp));
563 #endif
564 // calculate scale factor gain 2^(x/4)
565 for (x = 0; x < numScaleFactors; x++)
566 {
567 m_lut2ExpX4[x] = pow (2.0, (double) x / 4.0);
568 m_lutSfNorm[x] = 1.0 / m_lut2ExpX4[x];
569 }
570 // calculate dequantized coeff x^(4/3)
571 for (x = 0; x < (SCHAR_MAX + 1); x++)
572 {
573 m_lutXExp43[x] = pow ((double) x, 4.0 / 3.0);
574 }
575
576 return 0; // no error
577 }
578
quantizeSpecSfb(EntropyCoder & entropyCoder,const int32_t * const inputCoeffs,const uint8_t grpLength,const uint16_t * const grpOffsets,uint32_t * const grpStats,const unsigned sfb,const uint8_t sfIndex,const uint8_t sfIndexPred,uint8_t * const quantCoeffs)579 uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t* const inputCoeffs, const uint8_t grpLength,
580 const uint16_t* const grpOffsets, uint32_t* const grpStats, // quant./coding statistics
581 const unsigned sfb, const uint8_t sfIndex, const uint8_t sfIndexPred /*= UCHAR_MAX*/,
582 uint8_t* const quantCoeffs /*= nullptr*/) // returns the RD optimized scale factor index
583 {
584 #if EC_TRELLIS_OPT_CODING
585 EntropyCoder* const entrCoder = (grpLength == 1 ? &entropyCoder : nullptr);
586 #endif
587 uint8_t sfBest = sfIndex;
588
589 if ((inputCoeffs == nullptr) || (grpOffsets == nullptr) || (sfb >= 52) || (sfIndex > m_maxSfIndex))
590 {
591 return UCHAR_MAX; // invalid input error
592 }
593
594 #if EC_TRELLIS_OPT_CODING
595 if (grpLength == 1) // references for RDOC
596 {
597 m_quantDist[sfb][1] = -1.0;
598 m_quantInSf[sfb][1] = sfIndex;
599 m_quantRate[sfb][1] = 0; // for sgn bits
600 m_quantRate[sfb][0] = entropyCoder.arithGetCtxState () & USHRT_MAX; // ref start context
601 }
602 #endif
603 if ((sfIndex == 0) || (sfIndexPred <= m_maxSfIndex && sfIndex + INDEX_OFFSET < sfIndexPred))
604 {
605 const uint16_t grpStart = grpOffsets[0];
606 const uint16_t sfbStart = grpOffsets[sfb];
607 const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
608 uint32_t* const coeffMagn = &m_coeffMagn[sfbStart];
609
610 for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
611 {
612 coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
613 }
614
615 if (quantCoeffs)
616 {
617 memset (&quantCoeffs[sfbStart], 0, sfbWidth * sizeof (uint8_t)); // SFB output zeroing
618 if (grpStats) // approximate bit count
619 {
620 grpStats[sfb] = getBitCount (entropyCoder, 0, 0, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
621 }
622 }
623 return sfIndexPred - (sfIndex == 0 ? 0 : INDEX_OFFSET); // save delta bits if applicable
624 }
625 else // nonzero sf, optimized quantization
626 {
627 const uint16_t grpStart = grpOffsets[0];
628 const uint16_t sfbStart = grpOffsets[sfb];
629 const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
630 const uint16_t cpyWidth = sfbWidth * sizeof (uint8_t);
631 uint32_t* const coeffMagn = &m_coeffMagn[sfbStart];
632 uint32_t codStart = 0, ctxStart = 0;
633 uint32_t codFinal = 0, ctxFinal = 0;
634 double distBest = 0, distCurr = 0;
635 short maxQBest = 0, maxQCurr = 0;
636 short numQBest = 0, numQCurr = 0;
637 #if EC_TRELLIS_OPT_CODING
638 bool rdOptimQuant = (grpLength != 1);
639 #else
640 bool rdOptimQuant = true;
641 #endif
642 uint8_t* ptrBest = &m_coeffTemp[0];
643 uint8_t* ptrCurr = &m_coeffTemp[100];
644 uint8_t sfCurr = sfIndex;
645
646 for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
647 {
648 coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
649 }
650
651 // --- determine default quantization result using range limited scale factor as a reference
652 sfBest = quantizeMagnSfb (coeffMagn, sfCurr, ptrBest, sfbWidth,
653 #if EC_TRELLIS_OPT_CODING
654 entrCoder, sfbStart - grpStart,
655 #endif
656 &maxQBest, &numQBest);
657
658 if (maxQBest > SCHAR_MAX) // limit SNR via scale factor index
659 {
660 for (uint8_t c = 0; (c < 2) && (maxQBest > SCHAR_MAX); c++) // very rarely done twice
661 {
662 sfCurr += getScaleFacOffset (pow ((double) maxQBest, 4.0 / 3.0) * 0.001566492688) + c; // / m_lutXExp43[SCHAR_MAX]
663 sfBest = quantizeMagnSfb (coeffMagn, sfCurr, ptrBest, sfbWidth,
664 #if EC_TRELLIS_OPT_CODING
665 entrCoder, sfbStart - grpStart,
666 #endif
667 &maxQBest, &numQBest);
668 }
669 rdOptimQuant = false;
670 }
671 else if ((sfBest < sfCurr) && (sfBest != sfIndexPred)) // re-optimize above quantization
672 {
673 sfBest = quantizeMagnSfb (coeffMagn, --sfCurr, ptrBest, sfbWidth,
674 #if EC_TRELLIS_OPT_CODING
675 entrCoder, sfbStart - grpStart,
676 #endif
677 &maxQBest, &numQBest);
678
679 rdOptimQuant &= (maxQBest <= SCHAR_MAX);
680 }
681
682 #if EC_TRELLIS_OPT_CODING
683 if (grpLength == 1) // ref masking level
684 {
685 m_quantInSf[sfb][1] = __min (sfCurr, m_maxSfIndex);
686 }
687 #endif
688 if (maxQBest == 0) // SFB was quantized to zero - zero output
689 {
690 if (quantCoeffs)
691 {
692 memset (&quantCoeffs[sfbStart], 0, cpyWidth);
693 if (grpStats) // estimated bit count
694 {
695 grpStats[sfb] = getBitCount (entropyCoder, 0, 0, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
696 }
697 }
698 return sfIndexPred; // repeat scale factor, save delta bits
699 }
700
701 // --- check whether optimized quantization and coding results in lower rate-distortion cost
702 distBest = getQuantDist (coeffMagn, sfBest, ptrBest, sfbWidth);
703
704 #if EC_TRELLIS_OPT_CODING
705 if (grpLength == 1) // ref band-wise NMR
706 {
707 const double refSfbNmrDiv = m_lutSfNorm[m_quantInSf[sfb][1]];
708
709 m_quantDist[sfb][1] = distBest * refSfbNmrDiv * refSfbNmrDiv;
710 m_quantRate[sfb][1] = numQBest; // sgn
711 }
712 #endif
713 if (quantCoeffs)
714 {
715 memcpy (&quantCoeffs[sfbStart], ptrBest, cpyWidth);
716
717 codStart = entropyCoder.arithGetCodState (); // start state
718 ctxStart = entropyCoder.arithGetCtxState ();
719 numQBest += getBitCount (entropyCoder, sfBest, sfIndexPred, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
720 codFinal = entropyCoder.arithGetCodState (); // final state
721 ctxFinal = entropyCoder.arithGetCtxState ();
722 }
723 rdOptimQuant &= (distBest > 0.0);
724
725 if ((sfBest < sfCurr) && (sfBest != sfIndexPred) && rdOptimQuant) // R/D re-optimization
726 {
727 #if EC_TRELLIS_OPT_CODING
728 const double refSfbNmrDiv = m_lutSfNorm[sfCurr];
729 const double lambda = getLagrangeValue (m_rateIndex);
730 #endif
731 sfCurr = quantizeMagnSfb (coeffMagn, sfCurr - 1, ptrCurr, sfbWidth,
732 #if EC_TRELLIS_OPT_CODING
733 entrCoder, sfbStart - grpStart,
734 #endif
735 &maxQCurr, &numQCurr);
736
737 distCurr = getQuantDist (coeffMagn, sfCurr, ptrCurr, sfbWidth);
738 if (quantCoeffs)
739 {
740 memcpy (&quantCoeffs[sfbStart], ptrCurr, cpyWidth);
741
742 entropyCoder.arithSetCodState (codStart); // reset state
743 entropyCoder.arithSetCtxState (ctxStart);
744 numQCurr += getBitCount (entropyCoder, sfCurr, sfIndexPred, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
745 }
746
747 // rate-distortion decision, using empirical Lagrange value
748 #if EC_TRELLIS_OPT_CODING
749 if (distCurr * refSfbNmrDiv * refSfbNmrDiv + lambda * numQCurr < distBest * refSfbNmrDiv * refSfbNmrDiv + lambda * numQBest)
750 #else
751 if ((maxQCurr <= maxQBest) && (numQCurr <= numQBest + (distCurr >= distBest ? -1 : short (0.5 + distBest / __max (1.0, distCurr)))))
752 #endif
753 {
754 maxQBest = maxQCurr;
755 numQBest = numQCurr;
756 sfBest = sfCurr;
757 }
758 else if (quantCoeffs) // discard result, recover best trial
759 {
760 memcpy (&quantCoeffs[sfbStart], ptrBest, cpyWidth);
761
762 entropyCoder.arithSetCodState (codFinal); // reset state
763 entropyCoder.arithSetCtxState (ctxFinal);
764 }
765 }
766
767 if (grpStats)
768 {
769 grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magnitude and bit count
770 }
771 } // if sfIndex == 0
772
773 return __min (sfBest, m_maxSfIndex);
774 }
775
776 #if EC_TRELLIS_OPT_CODING
quantizeSpecRDOC(EntropyCoder & entropyCoder,uint8_t * const optimalSf,const unsigned targetBitCount,const uint16_t * const grpOffsets,uint32_t * const grpStats,const unsigned numSfb,uint8_t * const quantCoeffs)777 unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* const optimalSf, const unsigned targetBitCount,
778 const uint16_t* const grpOffsets, uint32_t* const grpStats, // quant./coding statistics
779 const unsigned numSfb, uint8_t* const quantCoeffs) // returns RD optimization bit count
780 {
781 // numSfb: number of trellis stages. Based on: A. Aggarwal, S. L. Regunathan, and K. Rose,
782 // "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," see also quantizeMagnRDOC
783 const uint32_t codStart = USHRT_MAX << 16;
784 const uint32_t ctxStart = m_quantRate[0][0]; // start context before call to quantizeSfb()
785 const uint32_t codFinal = entropyCoder.arithGetCodState ();
786 const uint32_t ctxFinal = entropyCoder.arithGetCtxState (); // after call to quantizeSfb()
787 const uint16_t grpStart = grpOffsets[0];
788 uint8_t* const inScaleFac = &m_coeffTemp[((unsigned) m_maxSize8M1 - 6) << 3];
789 uint32_t prevCodState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
790 uint32_t prevCtxState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
791 uint8_t prevScaleFac[8] = {0, 0, 0, 0, 0, 0, 0, 0};
792 double prevVtrbCost[8] = {0, 0, 0, 0, 0, 0, 0, 0};
793 uint32_t tempCodState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
794 uint32_t tempCtxState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
795 uint8_t tempScaleFac[8] = {0, 0, 0, 0, 0, 0, 0, 0};
796 double tempVtrbCost[8] = {0, 0, 0, 0, 0, 0, 0, 0};
797 unsigned tempBitCount, sfb, is;
798 int ds;
799 #if EC_TRAIN
800 double refGrpDist = 0.0, tempGrpDist = 0.0;
801 #else
802 const double lambda = getLagrangeValue (m_rateIndex);
803 #endif
804
805 if ((optimalSf == nullptr) || (quantCoeffs == nullptr) || (grpOffsets == nullptr) || (numSfb == 0) || (numSfb > 52) ||
806 (targetBitCount == 0) || (targetBitCount > SHRT_MAX))
807 {
808 return 0; // invalid input error
809 }
810
811 for (sfb = 0; sfb < numSfb; sfb++) // SFB-wise scale factor, weighted distortion, and rate
812 {
813 const uint8_t refSf = m_quantInSf[sfb][1];
814 const uint16_t refNumQ = m_quantRate[sfb][1];
815 const double refQuantDist = m_quantDist[sfb][1];
816 const double refQuantNorm = m_lutSfNorm[refSf] * m_lutSfNorm[refSf];
817 const uint16_t sfbStart = grpOffsets[sfb];
818 const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
819 const uint32_t* coeffMagn = &m_coeffMagn[sfbStart];
820 uint8_t* const tempQuant = &m_coeffTemp[sfbStart - grpStart];
821 bool maxSnrReached = false;
822
823 if (refQuantDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
824 #if EC_TRAIN
825 else refGrpDist += refQuantDist;
826 #endif
827 if (grpStats)
828 {
829 grpStats[sfb] = (grpStats[sfb] & (USHRT_MAX << 16)) | refNumQ; // keep magn, sign bits
830 }
831
832 for (is = 0; is < m_numCStates; is++) // populate SFB trellis
833 {
834 const uint8_t* mag = (is != 1 ? m_coeffTemp /*= tempQuant[grpStart - sfbStart]*/ : &quantCoeffs[grpStart]);
835 double& currDist = m_quantDist[sfb][is];
836 uint16_t* currRate = &m_quantRate[sfb][is * m_numCStates];
837 uint8_t sfBest = optimalSf[sfb]; // optimal scalefactor
838 short maxQCurr = 0, numQCurr = 0; // for sign bits counting
839
840 if (refQuantDist < 0.0) // -1.0 means SFB is zero-quantized
841 {
842 currDist = -1.0;
843 m_quantInSf[sfb][is] = refSf;
844 }
845 else if (is != 1) // quantization & distortion not computed
846 {
847 const uint8_t sfCurr = __max (0, __min (m_maxSfIndex, refSf + 1 - (int) is));
848
849 currDist = -1.0;
850 if ((sfCurr == 0) || maxSnrReached)
851 {
852 maxSnrReached = true;
853 }
854 else // sfCurr > 0 && sfCurr <= m_maxSfIndex, re-quantize
855 {
856 sfBest = quantizeMagnSfb (coeffMagn, sfCurr, tempQuant, sfbWidth,
857 &entropyCoder, sfbStart - grpStart,
858 &maxQCurr, &numQCurr);
859
860 if (maxQCurr > SCHAR_MAX)
861 {
862 maxSnrReached = true; numQCurr = 0;
863 }
864 else
865 {
866 currDist = getQuantDist (coeffMagn, sfBest, tempQuant, sfbWidth) * refQuantNorm;
867 }
868 }
869 if (currDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
870 m_quantInSf[sfb][is] = sfCurr; // store initial scale fac
871 }
872 else // is == 1, quant. & dist. computed with quantizeSfb()
873 {
874 numQCurr = refNumQ;
875 }
876
877 if (sfb == 0) // first SFB, having sfbStart - grpStart == 0
878 {
879 entropyCoder.arithSetCodState (codStart); // group start
880 entropyCoder.arithSetCtxState (ctxStart, 0);
881 tempBitCount = (maxSnrReached ? USHRT_MAX : numQCurr + getBitCount (entropyCoder, sfBest, UCHAR_MAX, 1, mag, 0, sfbWidth));
882
883 for (ds = m_numCStates - 1; ds >= 0; ds--)
884 {
885 currRate[ds] = (uint16_t) tempBitCount;
886 }
887 tempCodState[is] = entropyCoder.arithGetCodState ();
888 tempCtxState[is] = entropyCoder.arithGetCtxState ();
889 }
890 else // sfb > 0, rate depends on decisions in preceding SFB
891 {
892 for (ds = m_numCStates - 1; ds >= 0; ds--)
893 {
894 const uint16_t prevRate = m_quantRate[sfb - 1][ds * m_numCStates];
895
896 entropyCoder.arithSetCodState (prevCodState[ds]);
897 entropyCoder.arithSetCtxState (prevCtxState[ds], sfbStart - grpStart);
898 tempBitCount = (maxSnrReached || (prevRate >= USHRT_MAX) ? USHRT_MAX : numQCurr + getBitCount (entropyCoder,
899 (numQCurr == 0 ? prevScaleFac[ds] : sfBest), prevScaleFac[ds], 1, mag, sfbStart - grpStart, sfbWidth));
900 currRate[ds] = (uint16_t) tempBitCount;
901
902 if (ds == 1) // statistically best place to save states
903 {
904 tempCodState[is] = entropyCoder.arithGetCodState ();
905 tempCtxState[is] = entropyCoder.arithGetCtxState ();
906 }
907 }
908 }
909 tempScaleFac[is] = sfBest; // optimized factor for next SFB
910 } // for is
911
912 memcpy (prevCodState, tempCodState, m_numCStates * sizeof (uint32_t));
913 memcpy (prevCtxState, tempCtxState, m_numCStates * sizeof (uint32_t));
914 memcpy (prevScaleFac, tempScaleFac, m_numCStates * sizeof (uint8_t ));
915 } // for sfb
916
917 entropyCoder.arithSetCodState (codFinal); // back to last state
918 entropyCoder.arithSetCtxState (ctxFinal, grpOffsets[numSfb] - grpStart);
919
920 #if EC_TRAIN
921 tempBitCount = targetBitCount + 1; // Viterbi search for minimum distortion at target rate
922 for (double lambda = 0.015625; (lambda <= 0.375) && (tempBitCount > targetBitCount); lambda += 0.0078125)
923 #endif
924 {
925 double* const prevCost = prevVtrbCost;
926 uint8_t* const prevPath = m_coeffTemp; // trellis backtracker
927 double costMinIs = (double) UINT_MAX;
928 unsigned pathMinIs = 1;
929 #if EC_TRAIN
930 tempGrpDist = 0.0;
931 #endif
932
933 for (is = 0; is < m_numCStates; is++) // initial minimum path
934 {
935 const uint16_t currRate = m_quantRate[0][is * m_numCStates];
936
937 prevCost[is] = (currRate >= USHRT_MAX ? (double) UINT_MAX : lambda * currRate + __max (0.0, m_quantDist[0][is]));
938 prevPath[is] = 0;
939 }
940
941 for (sfb = 1; sfb < numSfb; sfb++) // search for minimum path
942 {
943 double* const currCost = tempVtrbCost;
944 uint8_t* const currPath = &prevPath[sfb * m_numCStates];
945
946 for (is = 0; is < m_numCStates; is++) // SFB's minimum path
947 {
948 uint16_t* currRate = &m_quantRate[sfb][is * m_numCStates];
949 double costMinDs = (double) UINT_MAX;
950 uint8_t pathMinDs = 1;
951
952 for (ds = m_numCStates - 1; ds >= 0; ds--) // transitions
953 {
954 const double costCurr = (currRate[ds] >= USHRT_MAX ? (double) UINT_MAX : prevCost[ds] + lambda * currRate[ds]);
955
956 if (costMinDs > costCurr)
957 {
958 costMinDs = costCurr;
959 pathMinDs = (uint8_t) ds;
960 }
961 }
962 if (costMinDs < UINT_MAX) costMinDs += __max (0.0, m_quantDist[sfb][is]);
963
964 currCost[is] = costMinDs;
965 currPath[is] = pathMinDs;
966 } // for is
967
968 memcpy (prevCost, currCost, m_numCStates * sizeof (double)); // TODO: avoid memcpy, use pointer swapping instead for speed
969 } // for sfb
970
971 for (sfb--, is = 0; is < m_numCStates; is++) // group minimum
972 {
973 if (costMinIs > prevCost[is])
974 {
975 costMinIs = prevCost[is];
976 pathMinIs = is;
977 }
978 }
979
980 for (tempBitCount = 0; sfb > 0; sfb--) // min-cost group rate
981 {
982 const uint8_t* currPath = &prevPath[sfb * m_numCStates];
983 const uint8_t pathMinDs = currPath[pathMinIs];
984
985 inScaleFac[sfb] = (m_quantDist[sfb][pathMinIs] < 0.0 ? UCHAR_MAX : m_quantInSf[sfb][pathMinIs]);
986 tempBitCount += m_quantRate[sfb][pathMinDs + pathMinIs * m_numCStates];
987 #if EC_TRAIN
988 tempGrpDist += __max (0.0, m_quantDist[sfb][pathMinIs]);
989 #endif
990 pathMinIs = pathMinDs;
991 }
992 inScaleFac[0] = (m_quantDist[0][pathMinIs] < 0.0 ? UCHAR_MAX : m_quantInSf[0][pathMinIs]);
993 tempBitCount += m_quantRate[0][pathMinIs * m_numCStates];
994 #if EC_TRAIN
995 tempGrpDist += __max (0.0, m_quantDist[0][pathMinIs]);
996 #endif
997 } // Viterbi search
998
999 #if EC_TRAIN
1000 if ((tempGrpDist <= refGrpDist) || (tempBitCount <= targetBitCount))
1001 #endif
1002 {
1003 uint8_t sfIndexPred = UCHAR_MAX;
1004
1005 if (grpStats)
1006 {
1007 entropyCoder.arithSetCodState (codStart);// set group start
1008 entropyCoder.arithSetCtxState (ctxStart, 0);
1009
1010 tempBitCount = 0;
1011 }
1012 for (sfb = 0; sfb < numSfb; sfb++) // re-quantize spectrum with R/D optimized parameters
1013 {
1014 const uint16_t sfbStart = grpOffsets[sfb];
1015 const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
1016
1017 if ((inScaleFac[sfb] == UCHAR_MAX) || (sfIndexPred <= m_maxSfIndex && inScaleFac[sfb] + INDEX_OFFSET < sfIndexPred))
1018 {
1019 memset (&quantCoeffs[sfbStart], 0, sfbWidth * sizeof (uint8_t)); // zero SFB output
1020
1021 optimalSf[sfb] = sfIndexPred - (inScaleFac[sfb] == UCHAR_MAX ? 0 : INDEX_OFFSET);
1022 }
1023 else if (inScaleFac[sfb] != m_quantInSf[sfb][1]) // speedup
1024 {
1025 short maxQBest = 0, numQBest = 0;
1026
1027 optimalSf[sfb] = quantizeMagnSfb (&m_coeffMagn[sfbStart], inScaleFac[sfb], &quantCoeffs[sfbStart], sfbWidth,
1028 &entropyCoder, sfbStart - grpStart,
1029 &maxQBest, &numQBest);
1030
1031 if (maxQBest == 0) optimalSf[sfb] = sfIndexPred; // empty
1032 if (grpStats)
1033 {
1034 grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magn. and sign bits
1035 }
1036 }
1037
1038 if (grpStats) // complete statistics with per-SFB bit count
1039 {
1040 grpStats[sfb] += getBitCount (entropyCoder, optimalSf[sfb], sfIndexPred, 1, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
1041 tempBitCount += grpStats[sfb] & USHRT_MAX;
1042 }
1043
1044 if ((sfb > 0) && (optimalSf[sfb] < UCHAR_MAX) && (sfIndexPred == UCHAR_MAX))
1045 {
1046 memset (optimalSf, optimalSf[sfb], sfb * sizeof (uint8_t)); // back-propagate factor
1047 }
1048 sfIndexPred = optimalSf[sfb];
1049 } // for sfb
1050
1051 return tempBitCount + (grpStats ? 2 : 0); // last coding bits
1052 }
1053
1054 return targetBitCount;
1055 }
1056 #endif // EC_TRELLIS_OPT_CODING
1057