1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015             *
9  * by the Xiph.Org Foundation https://xiph.org/                     *
10  *                                                                  *
11  ********************************************************************
12 
13  function: basic shared codebook operations
14 
15  ********************************************************************/
16 
17 #include "ogg.h"
18 #include "vorbis_os.h"
19 #include "vorbis_misc.h"
20 #include "vorbis_codec.h"
21 #include "vorbis_codebook.h"
22 #include "vorbis_scales.h"
23 #include <climits>
24 
25 /**** pack/unpack helpers ******************************************/
26 
ov_ilog(ogg_uint32_t v)27 int ov_ilog(ogg_uint32_t v){
28   int ret;
29   for(ret=0;v;ret++)v>>=1;
30   return ret;
31 }
32 
33 /* 32 bit float (not IEEE; nonnormalized mantissa +
34    biased exponent) : neeeeeee eeemmmmm mmmmmmmm mmmmmmmm
35    Why not IEEE?  It's just not that important here. */
36 
37 #define VQ_FEXP 10
38 #define VQ_FMAN 21
39 #define VQ_FEXP_BIAS 768 /* bias toward values smaller than 1. */
40 
41 /* doesn't currently guard under/overflow */
_float32_pack(float val)42 long _float32_pack(float val){
43   int sign=0;
44   long exp;
45   long mant;
46   if(val<0){
47     sign=0x80000000;
48     val= -val;
49   }
50   exp= floor(log(val)/log(2.f)+.001); /* +epsilon */
51   mant=rint(ldexp(val,(VQ_FMAN-1)-exp));
52   exp=(exp+VQ_FEXP_BIAS)<<VQ_FMAN;
53 
54   return(sign|exp|mant);
55 }
56 
_float32_unpack(long val)57 float _float32_unpack(long val){
58   double mant=val&0x1fffff;
59   int    sign=val&0x80000000;
60   long   exp =(val&0x7fe00000L)>>VQ_FMAN;
61   if(sign)mant= -mant;
62   exp=exp-(VQ_FMAN-1)-VQ_FEXP_BIAS;
63   /* clamp excessive exponent values */
64   if (exp>63){
65     exp=63;
66   }
67   if (exp<-63){
68     exp=-63;
69   }
70   return(ldexp(mant,exp));
71 }
72 
73 /* given a list of word lengths, generate a list of codewords.  Works
74    for length ordered or unordered, always assigns the lowest valued
75    codewords first.  Extended to handle unused entries (length 0) */
_make_words(char * l,long n,long sparsecount)76 ogg_uint32_t *_make_words(char *l,long n,long sparsecount){
77   long i,j,count=0;
78   ogg_uint32_t marker[33];
79   ogg_uint32_t *r = (ogg_uint32_t *)_Melder_malloc((sparsecount?sparsecount:n)*sizeof(*r));
80   memset(marker,0,sizeof(marker));
81 
82   for(i=0;i<n;i++){
83     long length=l[i];
84     if(length>0){
85       ogg_uint32_t entry=marker[length];
86 
87       /* when we claim a node for an entry, we also claim the nodes
88          below it (pruning off the imagined tree that may have dangled
89          from it) as well as blocking the use of any nodes directly
90          above for leaves */
91 
92       /* update ourself */
93       if(length<32 && (entry>>length)){
94         /* error condition; the lengths must specify an overpopulated tree */
95         Melder_free(r);
96         return(nullptr);
97       }
98       r[count++]=entry;
99 
100       /* Look to see if the next shorter marker points to the node
101          above. if so, update it and repeat.  */
102       {
103         for(j=length;j>0;j--){
104 
105           if(marker[j]&1){
106             /* have to jump branches */
107             if(j==1)
108               marker[1]++;
109             else
110               marker[j]=marker[j-1]<<1;
111             break; /* invariant says next upper marker would already
112                       have been moved if it was on the same path */
113           }
114           marker[j]++;
115         }
116       }
117 
118       /* prune the tree; the implicit invariant says all the longer
119          markers were dangling from our just-taken node.  Dangle them
120          from our *new* node. */
121       for(j=length+1;j<33;j++)
122         if((marker[j]>>1) == entry){
123           entry=marker[j];
124           marker[j]=marker[j-1]<<1;
125         }else
126           break;
127     }else
128       if(sparsecount==0)count++;
129   }
130 
131   /* any underpopulated tree must be rejected. */
132   /* Single-entry codebooks are a retconned extension to the spec.
133      They have a single codeword '0' of length 1 that results in an
134      underpopulated tree.  Shield that case from the underformed tree check. */
135   if(!(count==1 && marker[2]==2)){
136     for(i=1;i<33;i++)
137       if(marker[i] & (0xffffffffUL>>(32-i))){
138         Melder_free(r);
139         return(nullptr);
140       }
141   }
142 
143   /* bitreverse the words because our bitwise packer/unpacker is LSb
144      endian */
145   for(i=0,count=0;i<n;i++){
146     ogg_uint32_t temp=0;
147     for(j=0;j<l[i];j++){
148       temp<<=1;
149       temp|=(r[count]>>j)&1;
150     }
151 
152     if(sparsecount){
153       if(l[i])
154         r[count++]=temp;
155     }else
156       r[count++]=temp;
157   }
158 
159   return(r);
160 }
161 
162 /* there might be a straightforward one-line way to do the below
163    that's portable and totally safe against roundoff, but I haven't
164    thought of it.  Therefore, we opt on the side of caution */
_book_maptype1_quantvals(const static_codebook * b)165 long _book_maptype1_quantvals(const static_codebook *b){
166   long vals;
167   if(b->entries<1){
168     return(0);
169   }
170   vals=floor(pow((float)b->entries,1.f/b->dim));
171 
172   /* the above *should* be reliable, but we'll not assume that FP is
173      ever reliable when bitstream sync is at stake; verify via integer
174      means that vals really is the greatest value of dim for which
175      vals^b->bim <= b->entries */
176   /* treat the above as an initial guess */
177   if(vals<1){
178     vals=1;
179   }
180   while(1){
181     long acc=1;
182     long acc1=1;
183     int i;
184     for(i=0;i<b->dim;i++){
185       if(b->entries/vals<acc)break;
186       acc*=vals;
187       if(LONG_MAX/(vals+1)<acc1)acc1=LONG_MAX;
188       else acc1*=vals+1;
189     }
190     if(i>=b->dim && acc<=b->entries && acc1>b->entries){
191       return(vals);
192     }else{
193       if(i<b->dim || acc>b->entries){
194         vals--;
195       }else{
196         vals++;
197       }
198     }
199   }
200 }
201 
202 /* unpack the quantized list of values for encode/decode ***********/
203 /* we need to deal with two map types: in map type 1, the values are
204    generated algorithmically (each column of the vector counts through
205    the values in the quant vector). in map type 2, all the values came
206    in in an explicit list.  Both value lists must be unpacked */
_book_unquantize(const static_codebook * b,int n,int * sparsemap)207 float *_book_unquantize(const static_codebook *b,int n,int *sparsemap){
208   long j,k,count=0;
209   if(b->maptype==1 || b->maptype==2){
210     int quantvals;
211     float mindel=_float32_unpack(b->q_min);
212     float delta=_float32_unpack(b->q_delta);
213     float *r = (float *) _Melder_calloc(n*b->dim,sizeof(*r));
214 
215     /* maptype 1 and 2 both use a quantized value vector, but
216        different sizes */
217     switch(b->maptype){
218     case 1:
219       /* most of the time, entries%dimensions == 0, but we need to be
220          well defined.  We define that the possible vales at each
221          scalar is values == entries/dim.  If entries%dim != 0, we'll
222          have 'too few' values (values*dim<entries), which means that
223          we'll have 'left over' entries; left over entries use zeroed
224          values (and are wasted).  So don't generate codebooks like
225          that */
226       quantvals=_book_maptype1_quantvals(b);
227       for(j=0;j<b->entries;j++){
228         if((sparsemap && b->lengthlist[j]) || !sparsemap){
229           float last=0.f;
230           int indexdiv=1;
231           for(k=0;k<b->dim;k++){
232             int index= (j/indexdiv)%quantvals;
233             float val=b->quantlist[index];
234             val=fabs(val)*delta+mindel+last;
235             if(b->q_sequencep)last=val;
236             if(sparsemap)
237               r[sparsemap[count]*b->dim+k]=val;
238             else
239               r[count*b->dim+k]=val;
240             indexdiv*=quantvals;
241           }
242           count++;
243         }
244 
245       }
246       break;
247     case 2:
248       for(j=0;j<b->entries;j++){
249         if((sparsemap && b->lengthlist[j]) || !sparsemap){
250           float last=0.f;
251 
252           for(k=0;k<b->dim;k++){
253             float val=b->quantlist[j*b->dim+k];
254             val=fabs(val)*delta+mindel+last;
255             if(b->q_sequencep)last=val;
256             if(sparsemap)
257               r[sparsemap[count]*b->dim+k]=val;
258             else
259               r[count*b->dim+k]=val;
260           }
261           count++;
262         }
263       }
264       break;
265     }
266 
267     return(r);
268   }
269   return(nullptr);
270 }
271 
vorbis_staticbook_destroy(static_codebook * b)272 void vorbis_staticbook_destroy(static_codebook *b){
273   if(b->allocedp){
274     if(b->quantlist)Melder_free(b->quantlist);
275     if(b->lengthlist)Melder_free(b->lengthlist);
276     memset(b,0,sizeof(*b));
277     Melder_free(b);
278   } /* otherwise, it is in static memory */
279 }
280 
vorbis_book_clear(codebook * b)281 void vorbis_book_clear(codebook *b){
282   /* static book is not cleared; we're likely called on the lookup and
283      the static codebook belongs to the info struct */
284   if(b->valuelist)Melder_free(b->valuelist);
285   if(b->codelist)Melder_free(b->codelist);
286 
287   if(b->dec_index)Melder_free(b->dec_index);
288   if(b->dec_codelengths)Melder_free(b->dec_codelengths);
289   if(b->dec_firsttable)Melder_free(b->dec_firsttable);
290 
291   memset(b,0,sizeof(*b));
292 }
293 
vorbis_book_init_encode(codebook * c,const static_codebook * s)294 int vorbis_book_init_encode(codebook *c,const static_codebook *s){
295 
296   memset(c,0,sizeof(*c));
297   c->c=s;
298   c->entries=s->entries;
299   c->used_entries=s->entries;
300   c->dim=s->dim;
301   c->codelist=_make_words(s->lengthlist,s->entries,0);
302   /* c->valuelist=_book_unquantize(s,s->entries,nullptr); */
303   c->quantvals=_book_maptype1_quantvals(s);
304   c->minval=(int)rint(_float32_unpack(s->q_min));
305   c->delta=(int)rint(_float32_unpack(s->q_delta));
306 
307   return(0);
308 }
309 
bitreverse(ogg_uint32_t x)310 static ogg_uint32_t bitreverse(ogg_uint32_t x){
311   x=    ((x>>16)&0x0000ffffUL) | ((x<<16)&0xffff0000UL);
312   x=    ((x>> 8)&0x00ff00ffUL) | ((x<< 8)&0xff00ff00UL);
313   x=    ((x>> 4)&0x0f0f0f0fUL) | ((x<< 4)&0xf0f0f0f0UL);
314   x=    ((x>> 2)&0x33333333UL) | ((x<< 2)&0xccccccccUL);
315   return((x>> 1)&0x55555555UL) | ((x<< 1)&0xaaaaaaaaUL);
316 }
317 
sort32a(const void * a,const void * b)318 static int sort32a(const void *a,const void *b){
319   return ( **(ogg_uint32_t **)a>**(ogg_uint32_t **)b)-
320     ( **(ogg_uint32_t **)a<**(ogg_uint32_t **)b);
321 }
322 
323 /* decode codebook arrangement is more heavily optimized than encode */
vorbis_book_init_decode(codebook * c,const static_codebook * s)324 int vorbis_book_init_decode(codebook *c,const static_codebook *s){
325   int i,j,n=0,tabn;
326   int *sortindex;
327 
328   memset(c,0,sizeof(*c));
329 
330   /* count actually used entries and find max length */
331   for(i=0;i<s->entries;i++)
332     if(s->lengthlist[i]>0)
333       n++;
334 
335   c->entries=s->entries;
336   c->used_entries=n;
337   c->dim=s->dim;
338 
339   if(n>0){
340     /* two different remappings go on here.
341 
342     First, we collapse the likely sparse codebook down only to
343     actually represented values/words.  This collapsing needs to be
344     indexed as map-valueless books are used to encode original entry
345     positions as integers.
346 
347     Second, we reorder all vectors, including the entry index above,
348     by sorted bitreversed codeword to allow treeless decode. */
349 
350     /* perform sort */
351     ogg_uint32_t *codes=_make_words(s->lengthlist,s->entries,c->used_entries);
352     ogg_uint32_t **codep = (ogg_uint32_t **) alloca(sizeof(*codep)*n);
353 
354     if(codes==nullptr)goto err_out;
355 
356     for(i=0;i<n;i++){
357       codes[i]=bitreverse(codes[i]);
358       codep[i]=codes+i;
359     }
360 
361     qsort(codep,n,sizeof(*codep),sort32a);
362 
363     sortindex = (int *) alloca(n*sizeof(*sortindex));
364     c->codelist= (ogg_uint32_t *) _Melder_malloc(n*sizeof(*c->codelist));
365     /* the index is a reverse index */
366     for(i=0;i<n;i++){
367       int position=codep[i]-codes;
368       sortindex[position]=i;
369     }
370 
371     for(i=0;i<n;i++)
372       c->codelist[sortindex[i]]=codes[i];
373     Melder_free(codes);
374 
375     c->valuelist=_book_unquantize(s,n,sortindex);
376     c->dec_index = (int *) _Melder_malloc(n*sizeof(*c->dec_index));
377 
378     for(n=0,i=0;i<s->entries;i++)
379       if(s->lengthlist[i]>0)
380         c->dec_index[sortindex[n++]]=i;
381 
382     c->dec_codelengths = (char *) _Melder_malloc(n*sizeof(*c->dec_codelengths));
383     c->dec_maxlength=0;
384     for(n=0,i=0;i<s->entries;i++)
385       if(s->lengthlist[i]>0){
386         c->dec_codelengths[sortindex[n++]]=s->lengthlist[i];
387         if(s->lengthlist[i]>c->dec_maxlength)
388           c->dec_maxlength=s->lengthlist[i];
389       }
390 
391     if(n==1 && c->dec_maxlength==1){
392       /* special case the 'single entry codebook' with a single bit
393        fastpath table (that always returns entry 0 )in order to use
394        unmodified decode paths. */
395       c->dec_firsttablen=1;
396       c->dec_firsttable= (ogg_uint32_t *) _Melder_calloc(2,sizeof(*c->dec_firsttable));
397       c->dec_firsttable[0]=c->dec_firsttable[1]=1;
398 
399     }else{
400       c->dec_firsttablen=ov_ilog(c->used_entries)-4; /* this is magic */
401       if(c->dec_firsttablen<5)c->dec_firsttablen=5;
402       if(c->dec_firsttablen>8)c->dec_firsttablen=8;
403 
404       tabn=1<<c->dec_firsttablen;
405       c->dec_firsttable = (ogg_uint32_t *) _Melder_calloc(tabn,sizeof(*c->dec_firsttable));
406 
407       for(i=0;i<n;i++){
408         if(c->dec_codelengths[i]<=c->dec_firsttablen){
409           ogg_uint32_t orig=bitreverse(c->codelist[i]);
410           for(j=0;j<(1<<(c->dec_firsttablen-c->dec_codelengths[i]));j++)
411             c->dec_firsttable[orig|(j<<c->dec_codelengths[i])]=i+1;
412         }
413       }
414 
415       /* now fill in 'unused' entries in the firsttable with hi/lo search
416          hints for the non-direct-hits */
417       {
418         ogg_uint32_t mask=0xfffffffeUL<<(31-c->dec_firsttablen);
419         long lo=0,hi=0;
420 
421         for(i=0;i<tabn;i++){
422           ogg_uint32_t word=i<<(32-c->dec_firsttablen);
423           if(c->dec_firsttable[bitreverse(word)]==0){
424             while((lo+1)<n && c->codelist[lo+1]<=word)lo++;
425             while(    hi<n && word>=(c->codelist[hi]&mask))hi++;
426 
427             /* we only actually have 15 bits per hint to play with here.
428                In order to overflow gracefully (nothing breaks, efficiency
429                just drops), encode as the difference from the extremes. */
430             {
431               unsigned long loval=lo;
432               unsigned long hival=n-hi;
433 
434               if(loval>0x7fff)loval=0x7fff;
435               if(hival>0x7fff)hival=0x7fff;
436               c->dec_firsttable[bitreverse(word)]=
437                 0x80000000UL | (loval<<15) | hival;
438             }
439           }
440         }
441       }
442     }
443   }
444 
445   return(0);
446  err_out:
447   vorbis_book_clear(c);
448   return(-1);
449 }
450 
vorbis_book_codeword(codebook * book,int entry)451 long vorbis_book_codeword(codebook *book,int entry){
452   if(book->c) /* only use with encode; decode optimizations are
453                  allowed to break this */
454     return book->codelist[entry];
455   return -1;
456 }
457 
vorbis_book_codelen(codebook * book,int entry)458 long vorbis_book_codelen(codebook *book,int entry){
459   if(book->c) /* only use with encode; decode optimizations are
460                  allowed to break this */
461     return book->c->lengthlist[entry];
462   return -1;
463 }
464 
465 #ifdef _V_SELFTEST
466 
467 /* Unit tests of the dequantizer; this stuff will be OK
468    cross-platform, I simply want to be sure that special mapping cases
469    actually work properly; a bug could go unnoticed for a while */
470 
471 #include <stdio.h>
472 
473 /* cases:
474 
475    no mapping
476    full, explicit mapping
477    algorithmic mapping
478 
479    nonsequential
480    sequential
481 */
482 
483 static long full_quantlist1[]={0,1,2,3,    4,5,6,7, 8,3,6,1};
484 static long partial_quantlist1[]={0,7,2};
485 
486 /* no mapping */
487 static_codebook test1={
488   4,16,
489   nullptr,
490   0,
491   0,0,0,0,
492   nullptr,
493   0
494 };
495 static float *test1_result=nullptr;
496 
497 /* linear, full mapping, nonsequential */
498 static_codebook test2={
499   4,3,
500   nullptr,
501   2,
502   -533200896,1611661312,4,0,
503   full_quantlist1,
504   0
505 };
506 static float test2_result[]={-3,-2,-1,0, 1,2,3,4, 5,0,3,-2};
507 
508 /* linear, full mapping, sequential */
509 static_codebook test3={
510   4,3,
511   nullptr,
512   2,
513   -533200896,1611661312,4,1,
514   full_quantlist1,
515   0
516 };
517 static float test3_result[]={-3,-5,-6,-6, 1,3,6,10, 5,5,8,6};
518 
519 /* linear, algorithmic mapping, nonsequential */
520 static_codebook test4={
521   3,27,
522   nullptr,
523   1,
524   -533200896,1611661312,4,0,
525   partial_quantlist1,
526   0
527 };
528 static float test4_result[]={-3,-3,-3, 4,-3,-3, -1,-3,-3,
529                               -3, 4,-3, 4, 4,-3, -1, 4,-3,
530                               -3,-1,-3, 4,-1,-3, -1,-1,-3,
531                               -3,-3, 4, 4,-3, 4, -1,-3, 4,
532                               -3, 4, 4, 4, 4, 4, -1, 4, 4,
533                               -3,-1, 4, 4,-1, 4, -1,-1, 4,
534                               -3,-3,-1, 4,-3,-1, -1,-3,-1,
535                               -3, 4,-1, 4, 4,-1, -1, 4,-1,
536                               -3,-1,-1, 4,-1,-1, -1,-1,-1};
537 
538 /* linear, algorithmic mapping, sequential */
539 static_codebook test5={
540   3,27,
541   nullptr,
542   1,
543   -533200896,1611661312,4,1,
544   partial_quantlist1,
545   0
546 };
547 static float test5_result[]={-3,-6,-9, 4, 1,-2, -1,-4,-7,
548                               -3, 1,-2, 4, 8, 5, -1, 3, 0,
549                               -3,-4,-7, 4, 3, 0, -1,-2,-5,
550                               -3,-6,-2, 4, 1, 5, -1,-4, 0,
551                               -3, 1, 5, 4, 8,12, -1, 3, 7,
552                               -3,-4, 0, 4, 3, 7, -1,-2, 2,
553                               -3,-6,-7, 4, 1, 0, -1,-4,-5,
554                               -3, 1, 0, 4, 8, 7, -1, 3, 2,
555                               -3,-4,-5, 4, 3, 2, -1,-2,-3};
556 
run_test(static_codebook * b,float * comp)557 void run_test(static_codebook *b,float *comp){
558   float *out=_book_unquantize(b,b->entries,nullptr);
559   int i;
560 
561   if(comp){
562     if(!out){
563       fprintf(stderr,"_book_unquantize incorrectly returned nullptr\n");
564       exit(1);
565     }
566 
567     for(i=0;i<b->entries*b->dim;i++)
568       if(fabs(out[i]-comp[i])>.0001){
569         fprintf(stderr,"disagreement in unquantized and reference data:\n"
570                 "position %d, %g != %g\n",i,out[i],comp[i]);
571         exit(1);
572       }
573 
574   }else{
575     if(out){
576       fprintf(stderr,"_book_unquantize returned a value array: \n"
577               " correct result should have been nullptr\n");
578       exit(1);
579     }
580   }
581   free(out);
582 }
583 
main()584 int main(){
585   /* run the nine dequant tests, and compare to the hand-rolled results */
586   fprintf(stderr,"Dequant test 1... ");
587   run_test(&test1,test1_result);
588   fprintf(stderr,"OK\nDequant test 2... ");
589   run_test(&test2,test2_result);
590   fprintf(stderr,"OK\nDequant test 3... ");
591   run_test(&test3,test3_result);
592   fprintf(stderr,"OK\nDequant test 4... ");
593   run_test(&test4,test4_result);
594   fprintf(stderr,"OK\nDequant test 5... ");
595   run_test(&test5,test5_result);
596   fprintf(stderr,"OK\n\n");
597 
598   return(0);
599 }
600 
601 #endif
602