1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2007-2009 Timothy B. Terriberry
4 Written by Timothy B. Terriberry and Jean-Marc Valin */
5 /*
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
21 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include "os_support.h"
35 #include <stdlib.h>
36 #include <string.h>
37 #include "cwrs.h"
38 #include "mathops.h"
39 #include "arch.h"
40
41 /*Guaranteed to return a conservatively large estimate of the binary logarithm
42 with frac bits of fractional precision.
43 Tested for all possible 32-bit inputs with frac=4, where the maximum
44 overestimation is 0.06254243 bits.*/
log2_frac(celt_uint32 val,int frac)45 int log2_frac(celt_uint32 val, int frac)
46 {
47 int l;
48 l=EC_ILOG(val);
49 if(val&val-1){
50 /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
51 before the shift would cause overflow (e.g., for 0xFFFFxxxx).*/
52 if(l>16)val=(val>>l-16)+((val&(1<<l-16)-1)+(1<<l-16)-1>>l-16);
53 else val<<=16-l;
54 l=l-1<<frac;
55 /*Note that we always need one iteration, since the rounding up above means
56 that we might need to adjust the integer part of the logarithm.*/
57 do{
58 int b;
59 b=(int)(val>>16);
60 l+=b<<frac;
61 val=val+b>>b;
62 val=val*val+0x7FFF>>15;
63 }
64 while(frac-->0);
65 /*If val is not exactly 0x8000, then we have to round up the remainder.*/
66 return l+(val>0x8000);
67 }
68 /*Exact powers of two require no rounding.*/
69 else return l-1<<frac;
70 }
71
72 #ifndef SMALL_FOOTPRINT
73
74
75 #define MASK32 (0xFFFFFFFF)
76
77 /*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
78 static const celt_uint32 INV_TABLE[64]={
79 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
80 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
81 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
82 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
83 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
84 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
85 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
86 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
87 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
88 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
89 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
90 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
91 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
92 0xD8FD8FD9,0x8D28AC43,0xDA6C0965,0xDB195E8F,
93 0x0FDBC091,0x61F2A4BB,0xDCFDCFDD,0x46FDD947,
94 0x56BE69C9,0xEB2FDEB3,0x26E978D5,0xEFDFBF7F,
95 /*
96 0x0FE03F81,0xC9484E2B,0xE133F84D,0xE1A8C537,
97 0x077975B9,0x70586723,0xCD29C245,0xFAA11E6F,
98 0x0FE3C071,0x08B51D9B,0x8CE2CABD,0xBF937F27,
99 0xA8FE53A9,0x592FE593,0x2C0685B5,0x2EB11B5F,
100 0xFCD1E361,0x451AB30B,0x72CFE72D,0xDB35A717,
101 0xFB74A399,0xE80BFA03,0x0D516325,0x1BCB564F,
102 0xE02E4851,0xD962AE7B,0x10F8ED9D,0x95AEDD07,
103 0xE9DC0589,0xA18A4473,0xEA53FA95,0xEE936F3F,
104 0x90948F41,0xEAFEAFEB,0x3D137E0D,0xEF46C0F7,
105 0x028C1979,0x791064E3,0xC04FEC05,0xE115062F,
106 0x32385831,0x6E68575B,0xA10D387D,0x6FECF2E7,
107 0x3FB47F69,0xED4BFB53,0x74FED775,0xDB43BB1F,
108 0x87654321,0x9BA144CB,0x478BBCED,0xBFB912D7,
109 0x1FDCD759,0x14B2A7C3,0xCB125CE5,0x437B2E0F,
110 0x10FEF011,0xD2B3183B,0x386CAB5D,0xEF6AC0C7,
111 0x0E64C149,0x9A020A33,0xE6B41C55,0xFEFEFEFF*/
112 };
113
114 /*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
115 _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
116 fits in 32 bits, but currently the table for multiplicative inverses is only
117 valid for _d<128.*/
imusdiv32odd(celt_uint32 _a,celt_uint32 _b,celt_uint32 _c,int _d)118 static inline celt_uint32 imusdiv32odd(celt_uint32 _a,celt_uint32 _b,
119 celt_uint32 _c,int _d){
120 return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
121 }
122
123 /*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
124 _d does not actually have to be even, but imusdiv32odd will be faster when
125 it's odd, so you should use that instead.
126 _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
127 table for multiplicative inverses is only valid for _d<=256).
128 _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
129 32 bits.*/
imusdiv32even(celt_uint32 _a,celt_uint32 _b,celt_uint32 _c,int _d)130 static inline celt_uint32 imusdiv32even(celt_uint32 _a,celt_uint32 _b,
131 celt_uint32 _c,int _d){
132 celt_uint32 inv;
133 int mask;
134 int shift;
135 int one;
136 celt_assert(_d>0);
137 shift=EC_ILOG(_d^_d-1);
138 celt_assert(_d<=256);
139 inv=INV_TABLE[_d-1>>shift];
140 shift--;
141 one=1<<shift;
142 mask=one-1;
143 return (_a*(_b>>shift)-(_c>>shift)+
144 (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32;
145 }
146
147 #endif /* SMALL_FOOTPRINT */
148
149 /*Although derived separately, the pulse vector coding scheme is equivalent to
150 a Pyramid Vector Quantizer \cite{Fis86}.
151 Some additional notes about an early version appear at
152 http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
153 and the definitions of some terms have evolved since that was written.
154
155 The conversion from a pulse vector to an integer index (encoding) and back
156 (decoding) is governed by two related functions, V(N,K) and U(N,K).
157
158 V(N,K) = the number of combinations, with replacement, of N items, taken K
159 at a time, when a sign bit is added to each item taken at least once (i.e.,
160 the number of N-dimensional unit pulse vectors with K pulses).
161 One way to compute this is via
162 V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
163 where choose() is the binomial function.
164 A table of values for N<10 and K<10 looks like:
165 V[10][10] = {
166 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
167 {1, 2, 2, 2, 2, 2, 2, 2, 2, 2},
168 {1, 4, 8, 12, 16, 20, 24, 28, 32, 36},
169 {1, 6, 18, 38, 66, 102, 146, 198, 258, 326},
170 {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992},
171 {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290},
172 {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436},
173 {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598},
174 {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688},
175 {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146}
176 };
177
178 U(N,K) = the number of such combinations wherein N-1 objects are taken at
179 most K-1 at a time.
180 This is given by
181 U(N,K) = sum(k=0...K-1,V(N-1,k))
182 = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
183 The latter expression also makes clear that U(N,K) is half the number of such
184 combinations wherein the first object is taken at least once.
185 Although it may not be clear from either of these definitions, U(N,K) is the
186 natural function to work with when enumerating the pulse vector codebooks,
187 not V(N,K).
188 U(N,K) is not well-defined for N=0, but with the extension
189 U(0,K) = K>0 ? 0 : 1,
190 the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
191 U[10][10] = {
192 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
193 {0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
194 {0, 1, 3, 5, 7, 9, 11, 13, 15, 17},
195 {0, 1, 5, 13, 25, 41, 61, 85, 113, 145},
196 {0, 1, 7, 25, 63, 129, 231, 377, 575, 833},
197 {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649},
198 {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073},
199 {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081},
200 {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545},
201 {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729}
202 };
203
204 With this extension, V(N,K) may be written in terms of U(N,K):
205 V(N,K) = U(N,K) + U(N,K+1)
206 for all N>=0, K>=0.
207 Thus U(N,K+1) represents the number of combinations where the first element
208 is positive or zero, and U(N,K) represents the number of combinations where
209 it is negative.
210 With a large enough table of U(N,K) values, we could write O(N) encoding
211 and O(min(N*log(K),N+K)) decoding routines, but such a table would be
212 prohibitively large for small embedded devices (K may be as large as 32767
213 for small N, and N may be as large as 200).
214
215 Both functions obey the same recurrence relation:
216 V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
217 U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
218 for all N>0, K>0, with different initial conditions at N=0 or K=0.
219 This allows us to construct a row of one of the tables above given the
220 previous row or the next row.
221 Thus we can derive O(NK) encoding and decoding routines with O(K) memory
222 using only addition and subtraction.
223
224 When encoding, we build up from the U(2,K) row and work our way forwards.
225 When decoding, we need to start at the U(N,K) row and work our way backwards,
226 which requires a means of computing U(N,K).
227 U(N,K) may be computed from two previous values with the same N:
228 U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
229 for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
230 previous values with the same K:
231 U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
232 for all K>1.
233 This allows us to construct an arbitrary row of the U(N,K) table by starting
234 with the first two values, which are constants.
235 This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
236 multiplications.
237 Similar relations can be derived for V(N,K), but are not used here.
238
239 For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
240 polynomial for fixed N.
241 The first few are
242 U(1,K) = 1,
243 U(2,K) = 2*K-1,
244 U(3,K) = (2*K-2)*K+1,
245 U(4,K) = (((4*K-6)*K+8)*K-3)/3,
246 U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
247 and
248 V(1,K) = 2,
249 V(2,K) = 4*K,
250 V(3,K) = 4*K*K+2,
251 V(4,K) = 8*(K*K+2)*K/3,
252 V(5,K) = ((4*K*K+20)*K*K+6)/3,
253 for all K>0.
254 This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
255 small N (and indeed decoding is also O(N) for N<3).
256
257 @ARTICLE{Fis86,
258 author="Thomas R. Fischer",
259 title="A Pyramid Vector Quantizer",
260 journal="IEEE Transactions on Information Theory",
261 volume="IT-32",
262 number=4,
263 pages="568--583",
264 month=Jul,
265 year=1986
266 }*/
267
268 #ifndef SMALL_FOOTPRINT
269
270 /*Compute U(1,_k).*/
ucwrs1(int _k)271 static inline unsigned ucwrs1(int _k){
272 return _k?1:0;
273 }
274
275 /*Compute V(1,_k).*/
ncwrs1(int _k)276 static inline unsigned ncwrs1(int _k){
277 return _k?2:1;
278 }
279
280 /*Compute U(2,_k).
281 Note that this may be called with _k=32768 (maxK[2]+1).*/
ucwrs2(unsigned _k)282 static inline unsigned ucwrs2(unsigned _k){
283 return _k?_k+(_k-1):0;
284 }
285
286 /*Compute V(2,_k).*/
ncwrs2(int _k)287 static inline celt_uint32 ncwrs2(int _k){
288 return _k?4*(celt_uint32)_k:1;
289 }
290
291 /*Compute U(3,_k).
292 Note that this may be called with _k=32768 (maxK[3]+1).*/
ucwrs3(unsigned _k)293 static inline celt_uint32 ucwrs3(unsigned _k){
294 return _k?(2*(celt_uint32)_k-2)*_k+1:0;
295 }
296
297 /*Compute V(3,_k).*/
ncwrs3(int _k)298 static inline celt_uint32 ncwrs3(int _k){
299 return _k?2*(2*(unsigned)_k*(celt_uint32)_k+1):1;
300 }
301
302 /*Compute U(4,_k).*/
ucwrs4(int _k)303 static inline celt_uint32 ucwrs4(int _k){
304 return _k?imusdiv32odd(2*_k,(2*_k-3)*(celt_uint32)_k+4,3,1):0;
305 }
306
307 /*Compute V(4,_k).*/
ncwrs4(int _k)308 static inline celt_uint32 ncwrs4(int _k){
309 return _k?((_k*(celt_uint32)_k+2)*_k)/3<<3:1;
310 }
311
312 /*Compute U(5,_k).*/
ucwrs5(int _k)313 static inline celt_uint32 ucwrs5(int _k){
314 return _k?(((((_k-2)*(unsigned)_k+5)*(celt_uint32)_k-4)*_k)/3<<1)+1:0;
315 }
316
317 /*Compute V(5,_k).*/
ncwrs5(int _k)318 static inline celt_uint32 ncwrs5(int _k){
319 return _k?(((_k*(unsigned)_k+5)*(celt_uint32)_k*_k)/3<<2)+2:1;
320 }
321
322 #endif /* SMALL_FOOTPRINT */
323
324 /*Computes the next row/column of any recurrence that obeys the relation
325 u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
326 _ui0 is the base case for the new row/column.*/
unext(celt_uint32 * _ui,unsigned _len,celt_uint32 _ui0)327 static inline void unext(celt_uint32 *_ui,unsigned _len,celt_uint32 _ui0){
328 celt_uint32 ui1;
329 unsigned j;
330 /*This do-while will overrun the array if we don't have storage for at least
331 2 values.*/
332 j=1; do {
333 ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
334 _ui[j-1]=_ui0;
335 _ui0=ui1;
336 } while (++j<_len);
337 _ui[j-1]=_ui0;
338 }
339
340 /*Computes the previous row/column of any recurrence that obeys the relation
341 u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
342 _ui0 is the base case for the new row/column.*/
uprev(celt_uint32 * _ui,unsigned _n,celt_uint32 _ui0)343 static inline void uprev(celt_uint32 *_ui,unsigned _n,celt_uint32 _ui0){
344 celt_uint32 ui1;
345 unsigned j;
346 /*This do-while will overrun the array if we don't have storage for at least
347 2 values.*/
348 j=1; do {
349 ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
350 _ui[j-1]=_ui0;
351 _ui0=ui1;
352 } while (++j<_n);
353 _ui[j-1]=_ui0;
354 }
355
356 /*Compute V(_n,_k), as well as U(_n,0..._k+1).
357 _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
ncwrs_urow(unsigned _n,unsigned _k,celt_uint32 * _u)358 static celt_uint32 ncwrs_urow(unsigned _n,unsigned _k,celt_uint32 *_u){
359 celt_uint32 um2;
360 unsigned len;
361 unsigned k;
362 len=_k+2;
363 /*We require storage at least 3 values (e.g., _k>0).*/
364 celt_assert(len>=3);
365 _u[0]=0;
366 _u[1]=um2=1;
367 #ifndef SMALL_FOOTPRINT
368 if(_n<=6 || _k>255)
369 #endif
370 {
371 /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
372 /*If _n==1, _u[i] should be 1 for i>1.*/
373 celt_assert(_n>=2);
374 /*If _k==0, the following do-while loop will overflow the buffer.*/
375 celt_assert(_k>0);
376 k=2;
377 do _u[k]=(k<<1)-1;
378 while(++k<len);
379 for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
380 }
381 #ifndef SMALL_FOOTPRINT
382 else{
383 celt_uint32 um1;
384 celt_uint32 n2m1;
385 _u[2]=n2m1=um1=(_n<<1)-1;
386 for(k=3;k<len;k++){
387 /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
388 _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
389 if(++k>=len)break;
390 _u[k]=um1=imusdiv32odd(n2m1,um2,um1,k-1>>1)+um1;
391 }
392 }
393 #endif /* SMALL_FOOTPRINT */
394 return _u[_k]+_u[_k+1];
395 }
396
397 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
398 set of size 1 with associated sign bits.
399 _y: Returns the vector of pulses.*/
cwrsi1(int _k,celt_uint32 _i,int * _y)400 static inline void cwrsi1(int _k,celt_uint32 _i,int *_y){
401 int s;
402 s=-(int)_i;
403 _y[0]=_k+s^s;
404 }
405
406 #ifndef SMALL_FOOTPRINT
407
408 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
409 set of size 2 with associated sign bits.
410 _y: Returns the vector of pulses.*/
cwrsi2(int _k,celt_uint32 _i,int * _y)411 static inline void cwrsi2(int _k,celt_uint32 _i,int *_y){
412 celt_uint32 p;
413 int s;
414 int yj;
415 p=ucwrs2(_k+1U);
416 s=-(_i>=p);
417 _i-=p&s;
418 yj=_k;
419 _k=_i+1>>1;
420 p=ucwrs2(_k);
421 _i-=p;
422 yj-=_k;
423 _y[0]=yj+s^s;
424 cwrsi1(_k,_i,_y+1);
425 }
426
427 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
428 set of size 3 with associated sign bits.
429 _y: Returns the vector of pulses.*/
cwrsi3(int _k,celt_uint32 _i,int * _y)430 static void cwrsi3(int _k,celt_uint32 _i,int *_y){
431 celt_uint32 p;
432 int s;
433 int yj;
434 p=ucwrs3(_k+1U);
435 s=-(_i>=p);
436 _i-=p&s;
437 yj=_k;
438 /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
439 _i<2147418113=U(3,32768)).*/
440 _k=_i>0?isqrt32(2*_i-1)+1>>1:0;
441 p=ucwrs3(_k);
442 _i-=p;
443 yj-=_k;
444 _y[0]=yj+s^s;
445 cwrsi2(_k,_i,_y+1);
446 }
447
448 /*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
449 of size 4 with associated sign bits.
450 _y: Returns the vector of pulses.*/
cwrsi4(int _k,celt_uint32 _i,int * _y)451 static void cwrsi4(int _k,celt_uint32 _i,int *_y){
452 celt_uint32 p;
453 int s;
454 int yj;
455 int kl;
456 int kr;
457 p=ucwrs4(_k+1);
458 s=-(_i>=p);
459 _i-=p&s;
460 yj=_k;
461 /*We could solve a cubic for k here, but the form of the direct solution does
462 not lend itself well to exact integer arithmetic.
463 Instead we do a binary search on U(4,K).*/
464 kl=0;
465 kr=_k;
466 for(;;){
467 _k=kl+kr>>1;
468 p=ucwrs4(_k);
469 if(p<_i){
470 if(_k>=kr)break;
471 kl=_k+1;
472 }
473 else if(p>_i)kr=_k-1;
474 else break;
475 }
476 _i-=p;
477 yj-=_k;
478 _y[0]=yj+s^s;
479 cwrsi3(_k,_i,_y+1);
480 }
481
482 /*Returns the _i'th combination of _k elements (at most 238) chosen from a set
483 of size 5 with associated sign bits.
484 _y: Returns the vector of pulses.*/
cwrsi5(int _k,celt_uint32 _i,int * _y)485 static void cwrsi5(int _k,celt_uint32 _i,int *_y){
486 celt_uint32 p;
487 int s;
488 int yj;
489 p=ucwrs5(_k+1);
490 s=-(_i>=p);
491 _i-=p&s;
492 yj=_k;
493 /* A binary search on U(5,K) avoids the need for 64-bit arithmetic */
494 {
495 int kl=0;
496 int kr=_k;
497 for(;;){
498 _k=kl+kr>>1;
499 p=ucwrs5(_k);
500 if(p<_i){
501 if(_k>=kr)break;
502 kl=_k+1;
503 }
504 else if(p>_i)kr=_k-1;
505 else break;
506 }
507 }
508 _i-=p;
509 yj-=_k;
510 _y[0]=yj+s^s;
511 cwrsi4(_k,_i,_y+1);
512 }
513 #endif /* SMALL_FOOTPRINT */
514
515 /*Returns the _i'th combination of _k elements chosen from a set of size _n
516 with associated sign bits.
517 _y: Returns the vector of pulses.
518 _u: Must contain entries [0..._k+1] of row _n of U() on input.
519 Its contents will be destructively modified.*/
cwrsi(int _n,int _k,celt_uint32 _i,int * _y,celt_uint32 * _u)520 static void cwrsi(int _n,int _k,celt_uint32 _i,int *_y,celt_uint32 *_u){
521 int j;
522 celt_assert(_n>0);
523 j=0;
524 do{
525 celt_uint32 p;
526 int s;
527 int yj;
528 p=_u[_k+1];
529 s=-(_i>=p);
530 _i-=p&s;
531 yj=_k;
532 p=_u[_k];
533 while(p>_i)p=_u[--_k];
534 _i-=p;
535 yj-=_k;
536 _y[j]=yj+s^s;
537 uprev(_u,_k+2,0);
538 }
539 while(++j<_n);
540 }
541
542
543 /*Returns the index of the given combination of K elements chosen from a set
544 of size 1 with associated sign bits.
545 _y: The vector of pulses, whose sum of absolute values is K.
546 _k: Returns K.*/
icwrs1(const int * _y,int * _k)547 static inline celt_uint32 icwrs1(const int *_y,int *_k){
548 *_k=abs(_y[0]);
549 return _y[0]<0;
550 }
551
552 #ifndef SMALL_FOOTPRINT
553
554 /*Returns the index of the given combination of K elements chosen from a set
555 of size 2 with associated sign bits.
556 _y: The vector of pulses, whose sum of absolute values is K.
557 _k: Returns K.*/
icwrs2(const int * _y,int * _k)558 static inline celt_uint32 icwrs2(const int *_y,int *_k){
559 celt_uint32 i;
560 int k;
561 i=icwrs1(_y+1,&k);
562 i+=ucwrs2(k);
563 k+=abs(_y[0]);
564 if(_y[0]<0)i+=ucwrs2(k+1U);
565 *_k=k;
566 return i;
567 }
568
569 /*Returns the index of the given combination of K elements chosen from a set
570 of size 3 with associated sign bits.
571 _y: The vector of pulses, whose sum of absolute values is K.
572 _k: Returns K.*/
icwrs3(const int * _y,int * _k)573 static inline celt_uint32 icwrs3(const int *_y,int *_k){
574 celt_uint32 i;
575 int k;
576 i=icwrs2(_y+1,&k);
577 i+=ucwrs3(k);
578 k+=abs(_y[0]);
579 if(_y[0]<0)i+=ucwrs3(k+1U);
580 *_k=k;
581 return i;
582 }
583
584 /*Returns the index of the given combination of K elements chosen from a set
585 of size 4 with associated sign bits.
586 _y: The vector of pulses, whose sum of absolute values is K.
587 _k: Returns K.*/
icwrs4(const int * _y,int * _k)588 static inline celt_uint32 icwrs4(const int *_y,int *_k){
589 celt_uint32 i;
590 int k;
591 i=icwrs3(_y+1,&k);
592 i+=ucwrs4(k);
593 k+=abs(_y[0]);
594 if(_y[0]<0)i+=ucwrs4(k+1);
595 *_k=k;
596 return i;
597 }
598
599 /*Returns the index of the given combination of K elements chosen from a set
600 of size 5 with associated sign bits.
601 _y: The vector of pulses, whose sum of absolute values is K.
602 _k: Returns K.*/
icwrs5(const int * _y,int * _k)603 static inline celt_uint32 icwrs5(const int *_y,int *_k){
604 celt_uint32 i;
605 int k;
606 i=icwrs4(_y+1,&k);
607 i+=ucwrs5(k);
608 k+=abs(_y[0]);
609 if(_y[0]<0)i+=ucwrs5(k+1);
610 *_k=k;
611 return i;
612 }
613 #endif /* SMALL_FOOTPRINT */
614
615 /*Returns the index of the given combination of K elements chosen from a set
616 of size _n with associated sign bits.
617 _y: The vector of pulses, whose sum of absolute values must be _k.
618 _nc: Returns V(_n,_k).*/
icwrs(int _n,int _k,celt_uint32 * _nc,const int * _y,celt_uint32 * _u)619 celt_uint32 icwrs(int _n,int _k,celt_uint32 *_nc,const int *_y,
620 celt_uint32 *_u){
621 celt_uint32 i;
622 int j;
623 int k;
624 /*We can't unroll the first two iterations of the loop unless _n>=2.*/
625 celt_assert(_n>=2);
626 _u[0]=0;
627 for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
628 i=icwrs1(_y+_n-1,&k);
629 j=_n-2;
630 i+=_u[k];
631 k+=abs(_y[j]);
632 if(_y[j]<0)i+=_u[k+1];
633 while(j-->0){
634 unext(_u,_k+2,0);
635 i+=_u[k];
636 k+=abs(_y[j]);
637 if(_y[j]<0)i+=_u[k+1];
638 }
639 *_nc=_u[k]+_u[k+1];
640 return i;
641 }
642
643 #ifdef CUSTOM_MODES
get_required_bits(celt_int16 * _bits,int _n,int _maxk,int _frac)644 void get_required_bits(celt_int16 *_bits,int _n,int _maxk,int _frac){
645 int k;
646 /*_maxk==0 => there's nothing to do.*/
647 celt_assert(_maxk>0);
648 _bits[0]=0;
649 if (_n==1)
650 {
651 for (k=1;k<=_maxk;k++)
652 _bits[k] = 1<<_frac;
653 }
654 else {
655 VARDECL(celt_uint32,u);
656 SAVE_STACK;
657 ALLOC(u,_maxk+2U,celt_uint32);
658 ncwrs_urow(_n,_maxk,u);
659 for(k=1;k<=_maxk;k++)
660 _bits[k]=log2_frac(u[k]+u[k+1],_frac);
661 RESTORE_STACK;
662 }
663 }
664 #endif /* CUSTOM_MODES */
665
encode_pulses(const int * _y,int _n,int _k,ec_enc * _enc)666 void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
667 celt_uint32 i;
668 if (_k==0)
669 return;
670 switch(_n){
671 case 1:{
672 i=icwrs1(_y,&_k);
673 celt_assert(ncwrs1(_k)==2);
674 ec_enc_bits(_enc,i,1);
675 }break;
676 #ifndef SMALL_FOOTPRINT
677 case 2:{
678 i=icwrs2(_y,&_k);
679 ec_enc_uint(_enc,i,ncwrs2(_k));
680 }break;
681 case 3:{
682 i=icwrs3(_y,&_k);
683 ec_enc_uint(_enc,i,ncwrs3(_k));
684 }break;
685 case 4:{
686 i=icwrs4(_y,&_k);
687 ec_enc_uint(_enc,i,ncwrs4(_k));
688 }break;
689 case 5:{
690 i=icwrs5(_y,&_k);
691 ec_enc_uint(_enc,i,ncwrs5(_k));
692 }break;
693 #endif
694 default:
695 {
696 VARDECL(celt_uint32,u);
697 celt_uint32 nc;
698 SAVE_STACK;
699 ALLOC(u,_k+2U,celt_uint32);
700 i=icwrs(_n,_k,&nc,_y,u);
701 ec_enc_uint(_enc,i,nc);
702 RESTORE_STACK;
703 };
704 }
705 }
706
decode_pulses(int * _y,int _n,int _k,ec_dec * _dec)707 void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
708 {
709 if (_k==0) {
710 int i;
711 for (i=0;i<_n;i++)
712 _y[i] = 0;
713 return;
714 }
715 switch(_n){
716 case 1:{
717 celt_assert(ncwrs1(_k)==2);
718 cwrsi1(_k,ec_dec_bits(_dec,1),_y);
719 }break;
720 #ifndef SMALL_FOOTPRINT
721 case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
722 case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
723 case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
724 case 5:cwrsi5(_k,ec_dec_uint(_dec,ncwrs5(_k)),_y);break;
725 #endif
726 default:
727 {
728 VARDECL(celt_uint32,u);
729 SAVE_STACK;
730 ALLOC(u,_k+2U,celt_uint32);
731 cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
732 RESTORE_STACK;
733 }
734 }
735 }
736
737