1 /* Copyright (c) 2007-2008 CSIRO
2    Copyright (c) 2007-2009 Xiph.Org Foundation
3    Copyright (c) 2007-2009 Timothy B. Terriberry
4    Written by Timothy B. Terriberry and Jean-Marc Valin */
5 /*
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions
8    are met:
9 
10    - Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12 
13    - Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
21    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "os_support.h"
35 #include <stdlib.h>
36 #include <string.h>
37 #include "cwrs.h"
38 #include "mathops.h"
39 #include "arch.h"
40 
41 /*Guaranteed to return a conservatively large estimate of the binary logarithm
42    with frac bits of fractional precision.
43   Tested for all possible 32-bit inputs with frac=4, where the maximum
44    overestimation is 0.06254243 bits.*/
log2_frac(celt_uint32 val,int frac)45 int log2_frac(celt_uint32 val, int frac)
46 {
47   int l;
48   l=EC_ILOG(val);
49   if(val&val-1){
50     /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
51        before the shift would cause overflow (e.g., for 0xFFFFxxxx).*/
52     if(l>16)val=(val>>l-16)+((val&(1<<l-16)-1)+(1<<l-16)-1>>l-16);
53     else val<<=16-l;
54     l=l-1<<frac;
55     /*Note that we always need one iteration, since the rounding up above means
56        that we might need to adjust the integer part of the logarithm.*/
57     do{
58       int b;
59       b=(int)(val>>16);
60       l+=b<<frac;
61       val=val+b>>b;
62       val=val*val+0x7FFF>>15;
63     }
64     while(frac-->0);
65     /*If val is not exactly 0x8000, then we have to round up the remainder.*/
66     return l+(val>0x8000);
67   }
68   /*Exact powers of two require no rounding.*/
69   else return l-1<<frac;
70 }
71 
72 #ifndef SMALL_FOOTPRINT
73 
74 
75 #define MASK32 (0xFFFFFFFF)
76 
77 /*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
78 static const celt_uint32 INV_TABLE[64]={
79   0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
80   0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
81   0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
82   0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
83   0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
84   0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
85   0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
86   0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
87   0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
88   0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
89   0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
90   0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
91   0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
92   0xD8FD8FD9,0x8D28AC43,0xDA6C0965,0xDB195E8F,
93   0x0FDBC091,0x61F2A4BB,0xDCFDCFDD,0x46FDD947,
94   0x56BE69C9,0xEB2FDEB3,0x26E978D5,0xEFDFBF7F,
95   /*
96   0x0FE03F81,0xC9484E2B,0xE133F84D,0xE1A8C537,
97   0x077975B9,0x70586723,0xCD29C245,0xFAA11E6F,
98   0x0FE3C071,0x08B51D9B,0x8CE2CABD,0xBF937F27,
99   0xA8FE53A9,0x592FE593,0x2C0685B5,0x2EB11B5F,
100   0xFCD1E361,0x451AB30B,0x72CFE72D,0xDB35A717,
101   0xFB74A399,0xE80BFA03,0x0D516325,0x1BCB564F,
102   0xE02E4851,0xD962AE7B,0x10F8ED9D,0x95AEDD07,
103   0xE9DC0589,0xA18A4473,0xEA53FA95,0xEE936F3F,
104   0x90948F41,0xEAFEAFEB,0x3D137E0D,0xEF46C0F7,
105   0x028C1979,0x791064E3,0xC04FEC05,0xE115062F,
106   0x32385831,0x6E68575B,0xA10D387D,0x6FECF2E7,
107   0x3FB47F69,0xED4BFB53,0x74FED775,0xDB43BB1F,
108   0x87654321,0x9BA144CB,0x478BBCED,0xBFB912D7,
109   0x1FDCD759,0x14B2A7C3,0xCB125CE5,0x437B2E0F,
110   0x10FEF011,0xD2B3183B,0x386CAB5D,0xEF6AC0C7,
111   0x0E64C149,0x9A020A33,0xE6B41C55,0xFEFEFEFF*/
112 };
113 
114 /*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
115   _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
116    fits in 32 bits, but currently the table for multiplicative inverses is only
117    valid for _d<128.*/
imusdiv32odd(celt_uint32 _a,celt_uint32 _b,celt_uint32 _c,int _d)118 static inline celt_uint32 imusdiv32odd(celt_uint32 _a,celt_uint32 _b,
119  celt_uint32 _c,int _d){
120   return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
121 }
122 
123 /*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
124   _d does not actually have to be even, but imusdiv32odd will be faster when
125    it's odd, so you should use that instead.
126   _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
127    table for multiplicative inverses is only valid for _d<=256).
128   _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
129    32 bits.*/
imusdiv32even(celt_uint32 _a,celt_uint32 _b,celt_uint32 _c,int _d)130 static inline celt_uint32 imusdiv32even(celt_uint32 _a,celt_uint32 _b,
131  celt_uint32 _c,int _d){
132   celt_uint32 inv;
133   int           mask;
134   int           shift;
135   int           one;
136   celt_assert(_d>0);
137   shift=EC_ILOG(_d^_d-1);
138   celt_assert(_d<=256);
139   inv=INV_TABLE[_d-1>>shift];
140   shift--;
141   one=1<<shift;
142   mask=one-1;
143   return (_a*(_b>>shift)-(_c>>shift)+
144    (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32;
145 }
146 
147 #endif /* SMALL_FOOTPRINT */
148 
149 /*Although derived separately, the pulse vector coding scheme is equivalent to
150    a Pyramid Vector Quantizer \cite{Fis86}.
151   Some additional notes about an early version appear at
152    http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
153    and the definitions of some terms have evolved since that was written.
154 
155   The conversion from a pulse vector to an integer index (encoding) and back
156    (decoding) is governed by two related functions, V(N,K) and U(N,K).
157 
158   V(N,K) = the number of combinations, with replacement, of N items, taken K
159    at a time, when a sign bit is added to each item taken at least once (i.e.,
160    the number of N-dimensional unit pulse vectors with K pulses).
161   One way to compute this is via
162     V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
163    where choose() is the binomial function.
164   A table of values for N<10 and K<10 looks like:
165   V[10][10] = {
166     {1,  0,   0,    0,    0,     0,     0,      0,      0,       0},
167     {1,  2,   2,    2,    2,     2,     2,      2,      2,       2},
168     {1,  4,   8,   12,   16,    20,    24,     28,     32,      36},
169     {1,  6,  18,   38,   66,   102,   146,    198,    258,     326},
170     {1,  8,  32,   88,  192,   360,   608,    952,   1408,    1992},
171     {1, 10,  50,  170,  450,  1002,  1970,   3530,   5890,    9290},
172     {1, 12,  72,  292,  912,  2364,  5336,  10836,  20256,   35436},
173     {1, 14,  98,  462, 1666,  4942, 12642,  28814,  59906,  115598},
174     {1, 16, 128,  688, 2816,  9424, 27008,  68464, 157184,  332688},
175     {1, 18, 162,  978, 4482, 16722, 53154, 148626, 374274,  864146}
176   };
177 
178   U(N,K) = the number of such combinations wherein N-1 objects are taken at
179    most K-1 at a time.
180   This is given by
181     U(N,K) = sum(k=0...K-1,V(N-1,k))
182            = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
183   The latter expression also makes clear that U(N,K) is half the number of such
184    combinations wherein the first object is taken at least once.
185   Although it may not be clear from either of these definitions, U(N,K) is the
186    natural function to work with when enumerating the pulse vector codebooks,
187    not V(N,K).
188   U(N,K) is not well-defined for N=0, but with the extension
189     U(0,K) = K>0 ? 0 : 1,
190    the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
191   U[10][10] = {
192     {1, 0,  0,   0,    0,    0,     0,     0,      0,      0},
193     {0, 1,  1,   1,    1,    1,     1,     1,      1,      1},
194     {0, 1,  3,   5,    7,    9,    11,    13,     15,     17},
195     {0, 1,  5,  13,   25,   41,    61,    85,    113,    145},
196     {0, 1,  7,  25,   63,  129,   231,   377,    575,    833},
197     {0, 1,  9,  41,  129,  321,   681,  1289,   2241,   3649},
198     {0, 1, 11,  61,  231,  681,  1683,  3653,   7183,  13073},
199     {0, 1, 13,  85,  377, 1289,  3653,  8989,  19825,  40081},
200     {0, 1, 15, 113,  575, 2241,  7183, 19825,  48639, 108545},
201     {0, 1, 17, 145,  833, 3649, 13073, 40081, 108545, 265729}
202   };
203 
204   With this extension, V(N,K) may be written in terms of U(N,K):
205     V(N,K) = U(N,K) + U(N,K+1)
206    for all N>=0, K>=0.
207   Thus U(N,K+1) represents the number of combinations where the first element
208    is positive or zero, and U(N,K) represents the number of combinations where
209    it is negative.
210   With a large enough table of U(N,K) values, we could write O(N) encoding
211    and O(min(N*log(K),N+K)) decoding routines, but such a table would be
212    prohibitively large for small embedded devices (K may be as large as 32767
213    for small N, and N may be as large as 200).
214 
215   Both functions obey the same recurrence relation:
216     V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
217     U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
218    for all N>0, K>0, with different initial conditions at N=0 or K=0.
219   This allows us to construct a row of one of the tables above given the
220    previous row or the next row.
221   Thus we can derive O(NK) encoding and decoding routines with O(K) memory
222    using only addition and subtraction.
223 
224   When encoding, we build up from the U(2,K) row and work our way forwards.
225   When decoding, we need to start at the U(N,K) row and work our way backwards,
226    which requires a means of computing U(N,K).
227   U(N,K) may be computed from two previous values with the same N:
228     U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
229    for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
230    previous values with the same K:
231     U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
232    for all K>1.
233   This allows us to construct an arbitrary row of the U(N,K) table by starting
234    with the first two values, which are constants.
235   This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
236    multiplications.
237   Similar relations can be derived for V(N,K), but are not used here.
238 
239   For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
240    polynomial for fixed N.
241   The first few are
242     U(1,K) = 1,
243     U(2,K) = 2*K-1,
244     U(3,K) = (2*K-2)*K+1,
245     U(4,K) = (((4*K-6)*K+8)*K-3)/3,
246     U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
247    and
248     V(1,K) = 2,
249     V(2,K) = 4*K,
250     V(3,K) = 4*K*K+2,
251     V(4,K) = 8*(K*K+2)*K/3,
252     V(5,K) = ((4*K*K+20)*K*K+6)/3,
253    for all K>0.
254   This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
255    small N (and indeed decoding is also O(N) for N<3).
256 
257   @ARTICLE{Fis86,
258     author="Thomas R. Fischer",
259     title="A Pyramid Vector Quantizer",
260     journal="IEEE Transactions on Information Theory",
261     volume="IT-32",
262     number=4,
263     pages="568--583",
264     month=Jul,
265     year=1986
266   }*/
267 
268 #ifndef SMALL_FOOTPRINT
269 
270 /*Compute U(1,_k).*/
ucwrs1(int _k)271 static inline unsigned ucwrs1(int _k){
272   return _k?1:0;
273 }
274 
275 /*Compute V(1,_k).*/
ncwrs1(int _k)276 static inline unsigned ncwrs1(int _k){
277   return _k?2:1;
278 }
279 
280 /*Compute U(2,_k).
281   Note that this may be called with _k=32768 (maxK[2]+1).*/
ucwrs2(unsigned _k)282 static inline unsigned ucwrs2(unsigned _k){
283   return _k?_k+(_k-1):0;
284 }
285 
286 /*Compute V(2,_k).*/
ncwrs2(int _k)287 static inline celt_uint32 ncwrs2(int _k){
288   return _k?4*(celt_uint32)_k:1;
289 }
290 
291 /*Compute U(3,_k).
292   Note that this may be called with _k=32768 (maxK[3]+1).*/
ucwrs3(unsigned _k)293 static inline celt_uint32 ucwrs3(unsigned _k){
294   return _k?(2*(celt_uint32)_k-2)*_k+1:0;
295 }
296 
297 /*Compute V(3,_k).*/
ncwrs3(int _k)298 static inline celt_uint32 ncwrs3(int _k){
299   return _k?2*(2*(unsigned)_k*(celt_uint32)_k+1):1;
300 }
301 
302 /*Compute U(4,_k).*/
ucwrs4(int _k)303 static inline celt_uint32 ucwrs4(int _k){
304   return _k?imusdiv32odd(2*_k,(2*_k-3)*(celt_uint32)_k+4,3,1):0;
305 }
306 
307 /*Compute V(4,_k).*/
ncwrs4(int _k)308 static inline celt_uint32 ncwrs4(int _k){
309   return _k?((_k*(celt_uint32)_k+2)*_k)/3<<3:1;
310 }
311 
312 /*Compute U(5,_k).*/
ucwrs5(int _k)313 static inline celt_uint32 ucwrs5(int _k){
314   return _k?(((((_k-2)*(unsigned)_k+5)*(celt_uint32)_k-4)*_k)/3<<1)+1:0;
315 }
316 
317 /*Compute V(5,_k).*/
ncwrs5(int _k)318 static inline celt_uint32 ncwrs5(int _k){
319   return _k?(((_k*(unsigned)_k+5)*(celt_uint32)_k*_k)/3<<2)+2:1;
320 }
321 
322 #endif /* SMALL_FOOTPRINT */
323 
324 /*Computes the next row/column of any recurrence that obeys the relation
325    u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
326   _ui0 is the base case for the new row/column.*/
unext(celt_uint32 * _ui,unsigned _len,celt_uint32 _ui0)327 static inline void unext(celt_uint32 *_ui,unsigned _len,celt_uint32 _ui0){
328   celt_uint32 ui1;
329   unsigned      j;
330   /*This do-while will overrun the array if we don't have storage for at least
331      2 values.*/
332   j=1; do {
333     ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
334     _ui[j-1]=_ui0;
335     _ui0=ui1;
336   } while (++j<_len);
337   _ui[j-1]=_ui0;
338 }
339 
340 /*Computes the previous row/column of any recurrence that obeys the relation
341    u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
342   _ui0 is the base case for the new row/column.*/
uprev(celt_uint32 * _ui,unsigned _n,celt_uint32 _ui0)343 static inline void uprev(celt_uint32 *_ui,unsigned _n,celt_uint32 _ui0){
344   celt_uint32 ui1;
345   unsigned      j;
346   /*This do-while will overrun the array if we don't have storage for at least
347      2 values.*/
348   j=1; do {
349     ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
350     _ui[j-1]=_ui0;
351     _ui0=ui1;
352   } while (++j<_n);
353   _ui[j-1]=_ui0;
354 }
355 
356 /*Compute V(_n,_k), as well as U(_n,0..._k+1).
357   _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
ncwrs_urow(unsigned _n,unsigned _k,celt_uint32 * _u)358 static celt_uint32 ncwrs_urow(unsigned _n,unsigned _k,celt_uint32 *_u){
359   celt_uint32 um2;
360   unsigned      len;
361   unsigned      k;
362   len=_k+2;
363   /*We require storage at least 3 values (e.g., _k>0).*/
364   celt_assert(len>=3);
365   _u[0]=0;
366   _u[1]=um2=1;
367 #ifndef SMALL_FOOTPRINT
368   if(_n<=6 || _k>255)
369 #endif
370  {
371     /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
372     /*If _n==1, _u[i] should be 1 for i>1.*/
373     celt_assert(_n>=2);
374     /*If _k==0, the following do-while loop will overflow the buffer.*/
375     celt_assert(_k>0);
376     k=2;
377     do _u[k]=(k<<1)-1;
378     while(++k<len);
379     for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
380   }
381 #ifndef SMALL_FOOTPRINT
382   else{
383     celt_uint32 um1;
384     celt_uint32 n2m1;
385     _u[2]=n2m1=um1=(_n<<1)-1;
386     for(k=3;k<len;k++){
387       /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
388       _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
389       if(++k>=len)break;
390       _u[k]=um1=imusdiv32odd(n2m1,um2,um1,k-1>>1)+um1;
391     }
392   }
393 #endif /* SMALL_FOOTPRINT */
394   return _u[_k]+_u[_k+1];
395 }
396 
397 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
398    set of size 1 with associated sign bits.
399   _y: Returns the vector of pulses.*/
cwrsi1(int _k,celt_uint32 _i,int * _y)400 static inline void cwrsi1(int _k,celt_uint32 _i,int *_y){
401   int s;
402   s=-(int)_i;
403   _y[0]=_k+s^s;
404 }
405 
406 #ifndef SMALL_FOOTPRINT
407 
408 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
409    set of size 2 with associated sign bits.
410   _y: Returns the vector of pulses.*/
cwrsi2(int _k,celt_uint32 _i,int * _y)411 static inline void cwrsi2(int _k,celt_uint32 _i,int *_y){
412   celt_uint32 p;
413   int           s;
414   int           yj;
415   p=ucwrs2(_k+1U);
416   s=-(_i>=p);
417   _i-=p&s;
418   yj=_k;
419   _k=_i+1>>1;
420   p=ucwrs2(_k);
421   _i-=p;
422   yj-=_k;
423   _y[0]=yj+s^s;
424   cwrsi1(_k,_i,_y+1);
425 }
426 
427 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
428    set of size 3 with associated sign bits.
429   _y: Returns the vector of pulses.*/
cwrsi3(int _k,celt_uint32 _i,int * _y)430 static void cwrsi3(int _k,celt_uint32 _i,int *_y){
431   celt_uint32 p;
432   int           s;
433   int           yj;
434   p=ucwrs3(_k+1U);
435   s=-(_i>=p);
436   _i-=p&s;
437   yj=_k;
438   /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
439      _i<2147418113=U(3,32768)).*/
440   _k=_i>0?isqrt32(2*_i-1)+1>>1:0;
441   p=ucwrs3(_k);
442   _i-=p;
443   yj-=_k;
444   _y[0]=yj+s^s;
445   cwrsi2(_k,_i,_y+1);
446 }
447 
448 /*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
449    of size 4 with associated sign bits.
450   _y: Returns the vector of pulses.*/
cwrsi4(int _k,celt_uint32 _i,int * _y)451 static void cwrsi4(int _k,celt_uint32 _i,int *_y){
452   celt_uint32 p;
453   int           s;
454   int           yj;
455   int           kl;
456   int           kr;
457   p=ucwrs4(_k+1);
458   s=-(_i>=p);
459   _i-=p&s;
460   yj=_k;
461   /*We could solve a cubic for k here, but the form of the direct solution does
462      not lend itself well to exact integer arithmetic.
463     Instead we do a binary search on U(4,K).*/
464   kl=0;
465   kr=_k;
466   for(;;){
467     _k=kl+kr>>1;
468     p=ucwrs4(_k);
469     if(p<_i){
470       if(_k>=kr)break;
471       kl=_k+1;
472     }
473     else if(p>_i)kr=_k-1;
474     else break;
475   }
476   _i-=p;
477   yj-=_k;
478   _y[0]=yj+s^s;
479   cwrsi3(_k,_i,_y+1);
480 }
481 
482 /*Returns the _i'th combination of _k elements (at most 238) chosen from a set
483    of size 5 with associated sign bits.
484   _y: Returns the vector of pulses.*/
cwrsi5(int _k,celt_uint32 _i,int * _y)485 static void cwrsi5(int _k,celt_uint32 _i,int *_y){
486   celt_uint32 p;
487   int           s;
488   int           yj;
489   p=ucwrs5(_k+1);
490   s=-(_i>=p);
491   _i-=p&s;
492   yj=_k;
493   /* A binary search on U(5,K) avoids the need for 64-bit arithmetic */
494   {
495     int kl=0;
496     int kr=_k;
497     for(;;){
498       _k=kl+kr>>1;
499       p=ucwrs5(_k);
500       if(p<_i){
501         if(_k>=kr)break;
502         kl=_k+1;
503       }
504       else if(p>_i)kr=_k-1;
505       else break;
506     }
507   }
508   _i-=p;
509   yj-=_k;
510   _y[0]=yj+s^s;
511   cwrsi4(_k,_i,_y+1);
512 }
513 #endif /* SMALL_FOOTPRINT */
514 
515 /*Returns the _i'th combination of _k elements chosen from a set of size _n
516    with associated sign bits.
517   _y: Returns the vector of pulses.
518   _u: Must contain entries [0..._k+1] of row _n of U() on input.
519       Its contents will be destructively modified.*/
cwrsi(int _n,int _k,celt_uint32 _i,int * _y,celt_uint32 * _u)520 static void cwrsi(int _n,int _k,celt_uint32 _i,int *_y,celt_uint32 *_u){
521   int j;
522   celt_assert(_n>0);
523   j=0;
524   do{
525     celt_uint32 p;
526     int           s;
527     int           yj;
528     p=_u[_k+1];
529     s=-(_i>=p);
530     _i-=p&s;
531     yj=_k;
532     p=_u[_k];
533     while(p>_i)p=_u[--_k];
534     _i-=p;
535     yj-=_k;
536     _y[j]=yj+s^s;
537     uprev(_u,_k+2,0);
538   }
539   while(++j<_n);
540 }
541 
542 
543 /*Returns the index of the given combination of K elements chosen from a set
544    of size 1 with associated sign bits.
545   _y: The vector of pulses, whose sum of absolute values is K.
546   _k: Returns K.*/
icwrs1(const int * _y,int * _k)547 static inline celt_uint32 icwrs1(const int *_y,int *_k){
548   *_k=abs(_y[0]);
549   return _y[0]<0;
550 }
551 
552 #ifndef SMALL_FOOTPRINT
553 
554 /*Returns the index of the given combination of K elements chosen from a set
555    of size 2 with associated sign bits.
556   _y: The vector of pulses, whose sum of absolute values is K.
557   _k: Returns K.*/
icwrs2(const int * _y,int * _k)558 static inline celt_uint32 icwrs2(const int *_y,int *_k){
559   celt_uint32 i;
560   int           k;
561   i=icwrs1(_y+1,&k);
562   i+=ucwrs2(k);
563   k+=abs(_y[0]);
564   if(_y[0]<0)i+=ucwrs2(k+1U);
565   *_k=k;
566   return i;
567 }
568 
569 /*Returns the index of the given combination of K elements chosen from a set
570    of size 3 with associated sign bits.
571   _y: The vector of pulses, whose sum of absolute values is K.
572   _k: Returns K.*/
icwrs3(const int * _y,int * _k)573 static inline celt_uint32 icwrs3(const int *_y,int *_k){
574   celt_uint32 i;
575   int           k;
576   i=icwrs2(_y+1,&k);
577   i+=ucwrs3(k);
578   k+=abs(_y[0]);
579   if(_y[0]<0)i+=ucwrs3(k+1U);
580   *_k=k;
581   return i;
582 }
583 
584 /*Returns the index of the given combination of K elements chosen from a set
585    of size 4 with associated sign bits.
586   _y: The vector of pulses, whose sum of absolute values is K.
587   _k: Returns K.*/
icwrs4(const int * _y,int * _k)588 static inline celt_uint32 icwrs4(const int *_y,int *_k){
589   celt_uint32 i;
590   int           k;
591   i=icwrs3(_y+1,&k);
592   i+=ucwrs4(k);
593   k+=abs(_y[0]);
594   if(_y[0]<0)i+=ucwrs4(k+1);
595   *_k=k;
596   return i;
597 }
598 
599 /*Returns the index of the given combination of K elements chosen from a set
600    of size 5 with associated sign bits.
601   _y: The vector of pulses, whose sum of absolute values is K.
602   _k: Returns K.*/
icwrs5(const int * _y,int * _k)603 static inline celt_uint32 icwrs5(const int *_y,int *_k){
604   celt_uint32 i;
605   int           k;
606   i=icwrs4(_y+1,&k);
607   i+=ucwrs5(k);
608   k+=abs(_y[0]);
609   if(_y[0]<0)i+=ucwrs5(k+1);
610   *_k=k;
611   return i;
612 }
613 #endif /* SMALL_FOOTPRINT */
614 
615 /*Returns the index of the given combination of K elements chosen from a set
616    of size _n with associated sign bits.
617   _y:  The vector of pulses, whose sum of absolute values must be _k.
618   _nc: Returns V(_n,_k).*/
icwrs(int _n,int _k,celt_uint32 * _nc,const int * _y,celt_uint32 * _u)619 celt_uint32 icwrs(int _n,int _k,celt_uint32 *_nc,const int *_y,
620  celt_uint32 *_u){
621   celt_uint32 i;
622   int           j;
623   int           k;
624   /*We can't unroll the first two iterations of the loop unless _n>=2.*/
625   celt_assert(_n>=2);
626   _u[0]=0;
627   for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
628   i=icwrs1(_y+_n-1,&k);
629   j=_n-2;
630   i+=_u[k];
631   k+=abs(_y[j]);
632   if(_y[j]<0)i+=_u[k+1];
633   while(j-->0){
634     unext(_u,_k+2,0);
635     i+=_u[k];
636     k+=abs(_y[j]);
637     if(_y[j]<0)i+=_u[k+1];
638   }
639   *_nc=_u[k]+_u[k+1];
640   return i;
641 }
642 
643 #ifdef CUSTOM_MODES
get_required_bits(celt_int16 * _bits,int _n,int _maxk,int _frac)644 void get_required_bits(celt_int16 *_bits,int _n,int _maxk,int _frac){
645   int k;
646   /*_maxk==0 => there's nothing to do.*/
647   celt_assert(_maxk>0);
648   _bits[0]=0;
649   if (_n==1)
650   {
651     for (k=1;k<=_maxk;k++)
652       _bits[k] = 1<<_frac;
653   }
654   else {
655     VARDECL(celt_uint32,u);
656     SAVE_STACK;
657     ALLOC(u,_maxk+2U,celt_uint32);
658     ncwrs_urow(_n,_maxk,u);
659     for(k=1;k<=_maxk;k++)
660       _bits[k]=log2_frac(u[k]+u[k+1],_frac);
661     RESTORE_STACK;
662   }
663 }
664 #endif /* CUSTOM_MODES */
665 
encode_pulses(const int * _y,int _n,int _k,ec_enc * _enc)666 void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
667   celt_uint32 i;
668   if (_k==0)
669      return;
670   switch(_n){
671     case 1:{
672       i=icwrs1(_y,&_k);
673       celt_assert(ncwrs1(_k)==2);
674       ec_enc_bits(_enc,i,1);
675     }break;
676 #ifndef SMALL_FOOTPRINT
677     case 2:{
678       i=icwrs2(_y,&_k);
679       ec_enc_uint(_enc,i,ncwrs2(_k));
680     }break;
681     case 3:{
682       i=icwrs3(_y,&_k);
683       ec_enc_uint(_enc,i,ncwrs3(_k));
684     }break;
685     case 4:{
686       i=icwrs4(_y,&_k);
687       ec_enc_uint(_enc,i,ncwrs4(_k));
688     }break;
689     case 5:{
690       i=icwrs5(_y,&_k);
691       ec_enc_uint(_enc,i,ncwrs5(_k));
692     }break;
693 #endif
694      default:
695     {
696       VARDECL(celt_uint32,u);
697       celt_uint32 nc;
698       SAVE_STACK;
699       ALLOC(u,_k+2U,celt_uint32);
700       i=icwrs(_n,_k,&nc,_y,u);
701       ec_enc_uint(_enc,i,nc);
702       RESTORE_STACK;
703     };
704   }
705 }
706 
decode_pulses(int * _y,int _n,int _k,ec_dec * _dec)707 void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
708 {
709    if (_k==0) {
710       int i;
711       for (i=0;i<_n;i++)
712          _y[i] = 0;
713       return;
714    }
715    switch(_n){
716     case 1:{
717       celt_assert(ncwrs1(_k)==2);
718       cwrsi1(_k,ec_dec_bits(_dec,1),_y);
719     }break;
720 #ifndef SMALL_FOOTPRINT
721     case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
722     case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
723     case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
724     case 5:cwrsi5(_k,ec_dec_uint(_dec,ncwrs5(_k)),_y);break;
725 #endif
726       default:
727     {
728       VARDECL(celt_uint32,u);
729       SAVE_STACK;
730       ALLOC(u,_k+2U,celt_uint32);
731       cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
732       RESTORE_STACK;
733     }
734   }
735 }
736 
737