1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
4  *                                                                  *
5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
6  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
8  *                                                                  *
9  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
10  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
11  *                                                                  *
12  ********************************************************************
13 
14  function: basic shared codebook operations
15 
16  ********************************************************************/
17 
18 #include <stdlib.h>
19 #include <math.h>
20 #include <string.h>
21 #include <ogg/ogg.h>
22 #include "misc.h"
23 #include "ivorbiscodec.h"
24 #include "codebook.h"
25 
26 /**** pack/unpack helpers ******************************************/
_ilog(unsigned int v)27 int _ilog(unsigned int v){
28   int ret=0;
29   while(v){
30     ret++;
31     v>>=1;
32   }
33   return(ret);
34 }
35 
36 /* 32 bit float (not IEEE; nonnormalized mantissa +
37    biased exponent) : neeeeeee eeemmmmm mmmmmmmm mmmmmmmm
38    Why not IEEE?  It's just not that important here. */
39 
40 #define VQ_FEXP 10
41 #define VQ_FMAN 21
42 #define VQ_FEXP_BIAS 768 /* bias toward values smaller than 1. */
43 
_float32_unpack(long val,int * point)44 static ogg_int32_t _float32_unpack(long val,int *point){
45   long   mant=val&0x1fffff;
46   int    sign=val&0x80000000;
47   long   exp =(val&0x7fe00000L)>>VQ_FMAN;
48 
49   exp-=(VQ_FMAN-1)+VQ_FEXP_BIAS;
50 
51   if(mant){
52     while(!(mant&0x40000000)){
53       mant<<=1;
54       exp-=1;
55     }
56 
57     if(sign)mant= -mant;
58   }else{
59     sign=0;
60     exp=-9999;
61   }
62 
63   *point=exp;
64   return mant;
65 }
66 
67 /* given a list of word lengths, generate a list of codewords.  Works
68    for length ordered or unordered, always assigns the lowest valued
69    codewords first.  Extended to handle unused entries (length 0) */
_make_words(long * l,long n,long sparsecount)70 ogg_uint32_t *_make_words(long *l,long n,long sparsecount){
71   long i,j,count=0;
72   ogg_uint32_t marker[33];
73   ogg_uint32_t *r=(ogg_uint32_t *)_ogg_malloc((sparsecount?sparsecount:n)*sizeof(*r));
74   memset(marker,0,sizeof(marker));
75 
76   for(i=0;i<n;i++){
77     long length=l[i];
78     if(length>0){
79       ogg_uint32_t entry=marker[length];
80 
81       /* when we claim a node for an entry, we also claim the nodes
82 	 below it (pruning off the imagined tree that may have dangled
83 	 from it) as well as blocking the use of any nodes directly
84 	 above for leaves */
85 
86       /* update ourself */
87       if(length<32 && (entry>>length)){
88 	/* error condition; the lengths must specify an overpopulated tree */
89 	_ogg_free(r);
90 	return(NULL);
91       }
92       r[count++]=entry;
93 
94       /* Look to see if the next shorter marker points to the node
95 	 above. if so, update it and repeat.  */
96       {
97 	for(j=length;j>0;j--){
98 
99 	  if(marker[j]&1){
100 	    /* have to jump branches */
101 	    if(j==1)
102 	      marker[1]++;
103 	    else
104 	      marker[j]=marker[j-1]<<1;
105 	    break; /* invariant says next upper marker would already
106 		      have been moved if it was on the same path */
107 	  }
108 	  marker[j]++;
109 	}
110       }
111 
112       /* prune the tree; the implicit invariant says all the longer
113 	 markers were dangling from our just-taken node.  Dangle them
114 	 from our *new* node. */
115       for(j=length+1;j<33;j++)
116 	if((marker[j]>>1) == entry){
117 	  entry=marker[j];
118 	  marker[j]=marker[j-1]<<1;
119 	}else
120 	  break;
121     }else
122       if(sparsecount==0)count++;
123   }
124 
125   /* sanity check the huffman tree; an underpopulated tree must be
126      rejected. The only exception is the one-node pseudo-nil tree,
127      which appears to be underpopulated because the tree doesn't
128      really exist; there's only one possible 'codeword' or zero bits,
129      but the above tree-gen code doesn't mark that. */
130   if(sparsecount != 1){
131     for(i=1;i<33;i++)
132       if(marker[i] & (0xffffffffUL>>(32-i))){
133        _ogg_free(r);
134        return(NULL);
135       }
136   }
137 
138   /* bitreverse the words because our bitwise packer/unpacker is LSb
139      endian */
140   for(i=0,count=0;i<n;i++){
141     ogg_uint32_t temp=0;
142     for(j=0;j<l[i];j++){
143       temp<<=1;
144       temp|=(r[count]>>j)&1;
145     }
146 
147     if(sparsecount){
148       if(l[i])
149 	r[count++]=temp;
150     }else
151       r[count++]=temp;
152   }
153 
154   return(r);
155 }
156 
157 /* there might be a straightforward one-line way to do the below
158    that's portable and totally safe against roundoff, but I haven't
159    thought of it.  Therefore, we opt on the side of caution */
_book_maptype1_quantvals(const static_codebook * b)160 long _book_maptype1_quantvals(const static_codebook *b){
161   /* get us a starting hint, we'll polish it below */
162   int bits=_ilog(b->entries);
163   int vals=b->entries>>((bits-1)*(b->dim-1)/b->dim);
164 
165   while(1){
166     long acc=1;
167     long acc1=1;
168     int i;
169     for(i=0;i<b->dim;i++){
170       acc*=vals;
171       acc1*=vals+1;
172     }
173     if(acc<=b->entries && acc1>b->entries){
174       return(vals);
175     }else{
176       if(acc>b->entries){
177 	vals--;
178       }else{
179 	vals++;
180       }
181     }
182   }
183 }
184 
185 /* different than what _book_unquantize does for mainline:
186    we repack the book in a fixed point format that shares the same
187    binary point.  Upon first use, we can shift point if needed */
188 
189 /* we need to deal with two map types: in map type 1, the values are
190    generated algorithmically (each column of the vector counts through
191    the values in the quant vector). in map type 2, all the values came
192    in in an explicit list.  Both value lists must be unpacked */
193 
_book_unquantize(const static_codebook * b,int n,int * sparsemap,int * maxpoint)194 ogg_int32_t *_book_unquantize(const static_codebook *b,int n,int *sparsemap,
195 			      int *maxpoint){
196   long j,k,count=0;
197   if(b->maptype==1 || b->maptype==2){
198     int quantvals;
199     int minpoint,delpoint;
200     ogg_int32_t mindel=_float32_unpack(b->q_min,&minpoint);
201     ogg_int32_t delta=_float32_unpack(b->q_delta,&delpoint);
202     ogg_int32_t *r=(ogg_int32_t *)_ogg_calloc(n*b->dim,sizeof(*r));
203     int *rp=(int *)_ogg_calloc(n*b->dim,sizeof(*rp));
204 
205     *maxpoint=minpoint;
206 
207     /* maptype 1 and 2 both use a quantized value vector, but
208        different sizes */
209     switch(b->maptype){
210     case 1:
211       /* most of the time, entries%dimensions == 0, but we need to be
212 	 well defined.  We define that the possible vales at each
213 	 scalar is values == entries/dim.  If entries%dim != 0, we'll
214 	 have 'too few' values (values*dim<entries), which means that
215 	 we'll have 'left over' entries; left over entries use zeroed
216 	 values (and are wasted).  So don't generate codebooks like
217 	 that */
218       quantvals=_book_maptype1_quantvals(b);
219       for(j=0;j<b->entries;j++){
220 	if((sparsemap && b->lengthlist[j]) || !sparsemap){
221 	  ogg_int32_t last=0;
222 	  int lastpoint=0;
223 	  int indexdiv=1;
224 	  for(k=0;k<b->dim;k++){
225 	    int index= (j/indexdiv)%quantvals;
226 	    int point=0;
227 	    int val=VFLOAT_MULTI(delta,delpoint,
228 				 abs(b->quantlist[index]),&point);
229 
230 	    val=VFLOAT_ADD(mindel,minpoint,val,point,&point);
231 	    val=VFLOAT_ADD(last,lastpoint,val,point,&point);
232 
233 	    if(b->q_sequencep){
234 	      last=val;
235 	      lastpoint=point;
236 	    }
237 
238 	    if(sparsemap){
239 	      r[sparsemap[count]*b->dim+k]=val;
240 	      rp[sparsemap[count]*b->dim+k]=point;
241 	    }else{
242 	      r[count*b->dim+k]=val;
243 	      rp[count*b->dim+k]=point;
244 	    }
245 	    if(*maxpoint<point)*maxpoint=point;
246 	    indexdiv*=quantvals;
247 	  }
248 	  count++;
249 	}
250 
251       }
252       break;
253     case 2:
254       for(j=0;j<b->entries;j++){
255 	if((sparsemap && b->lengthlist[j]) || !sparsemap){
256 	  ogg_int32_t last=0;
257 	  int         lastpoint=0;
258 
259 	  for(k=0;k<b->dim;k++){
260 	    int point=0;
261 	    int val=VFLOAT_MULTI(delta,delpoint,
262 				 abs(b->quantlist[j*b->dim+k]),&point);
263 
264 	    val=VFLOAT_ADD(mindel,minpoint,val,point,&point);
265 	    val=VFLOAT_ADD(last,lastpoint,val,point,&point);
266 
267 	    if(b->q_sequencep){
268 	      last=val;
269 	      lastpoint=point;
270 	    }
271 
272 	    if(sparsemap){
273 	      r[sparsemap[count]*b->dim+k]=val;
274 	      rp[sparsemap[count]*b->dim+k]=point;
275 	    }else{
276 	      r[count*b->dim+k]=val;
277 	      rp[count*b->dim+k]=point;
278 	    }
279 	    if(*maxpoint<point)*maxpoint=point;
280 	  }
281 	  count++;
282 	}
283       }
284       break;
285     }
286 
287     for(j=0;j<n*b->dim;j++)
288       if(rp[j]<*maxpoint)
289 	r[j]>>=*maxpoint-rp[j];
290 
291     _ogg_free(rp);
292     return(r);
293   }
294   return(NULL);
295 }
296 
vorbis_staticbook_destroy(static_codebook * b)297 void vorbis_staticbook_destroy(static_codebook *b){
298   if(b->quantlist)_ogg_free(b->quantlist);
299   if(b->lengthlist)_ogg_free(b->lengthlist);
300   memset(b,0,sizeof(*b));
301   _ogg_free(b);
302 }
303 
vorbis_book_clear(codebook * b)304 void vorbis_book_clear(codebook *b){
305   /* static book is not cleared; we're likely called on the lookup and
306      the static codebook belongs to the info struct */
307   if(b->valuelist)_ogg_free(b->valuelist);
308   if(b->codelist)_ogg_free(b->codelist);
309 
310   if(b->dec_index)_ogg_free(b->dec_index);
311   if(b->dec_codelengths)_ogg_free(b->dec_codelengths);
312   if(b->dec_firsttable)_ogg_free(b->dec_firsttable);
313 
314   memset(b,0,sizeof(*b));
315 }
316 
bitreverse(ogg_uint32_t x)317 static ogg_uint32_t bitreverse(ogg_uint32_t x){
318   x=    ((x>>16)&0x0000ffffUL) | ((x<<16)&0xffff0000UL);
319   x=    ((x>> 8)&0x00ff00ffUL) | ((x<< 8)&0xff00ff00UL);
320   x=    ((x>> 4)&0x0f0f0f0fUL) | ((x<< 4)&0xf0f0f0f0UL);
321   x=    ((x>> 2)&0x33333333UL) | ((x<< 2)&0xccccccccUL);
322   return((x>> 1)&0x55555555UL) | ((x<< 1)&0xaaaaaaaaUL);
323 }
324 
sort32a(const void * a,const void * b)325 static int sort32a(const void *a,const void *b){
326   return (**(ogg_uint32_t **)a>**(ogg_uint32_t **)b)-
327     (**(ogg_uint32_t **)a<**(ogg_uint32_t **)b);
328 }
329 
330 /* decode codebook arrangement is more heavily optimized than encode */
vorbis_book_init_decode(codebook * c,const static_codebook * s)331 int vorbis_book_init_decode(codebook *c,const static_codebook *s){
332   int i,j,n=0,tabn;
333   int *sortindex;
334   memset(c,0,sizeof(*c));
335 
336   /* count actually used entries */
337   for(i=0;i<s->entries;i++)
338     if(s->lengthlist[i]>0)
339       n++;
340 
341   c->entries=s->entries;
342   c->used_entries=n;
343   c->dim=s->dim;
344 
345   if(n>0){
346     /* two different remappings go on here.
347 
348        First, we collapse the likely sparse codebook down only to
349        actually represented values/words.  This collapsing needs to be
350        indexed as map-valueless books are used to encode original entry
351        positions as integers.
352 
353        Second, we reorder all vectors, including the entry index above,
354        by sorted bitreversed codeword to allow treeless decode. */
355 
356     /* perform sort */
357     ogg_uint32_t *codes=_make_words(s->lengthlist,s->entries,c->used_entries);
358     ogg_uint32_t **codep=(ogg_uint32_t **)alloca(sizeof(*codep)*n);
359 
360     if(codes==NULL)goto err_out;
361 
362     for(i=0;i<n;i++){
363       codes[i]=bitreverse(codes[i]);
364       codep[i]=codes+i;
365     }
366 
367     qsort(codep,n,sizeof(*codep),sort32a);
368 
369     sortindex=(int *)alloca(n*sizeof(*sortindex));
370     c->codelist=(ogg_uint32_t *)_ogg_malloc(n*sizeof(*c->codelist));
371     /* the index is a reverse index */
372     for(i=0;i<n;i++){
373       int position=codep[i]-codes;
374       sortindex[position]=i;
375     }
376 
377     for(i=0;i<n;i++)
378       c->codelist[sortindex[i]]=codes[i];
379     _ogg_free(codes);
380 
381 
382 
383     c->valuelist=_book_unquantize(s,n,sortindex,&c->binarypoint);
384     c->dec_index=(int *)_ogg_malloc(n*sizeof(*c->dec_index));
385 
386     for(n=0,i=0;i<s->entries;i++)
387       if(s->lengthlist[i]>0)
388 	c->dec_index[sortindex[n++]]=i;
389 
390     c->dec_codelengths=(char *)_ogg_malloc(n*sizeof(*c->dec_codelengths));
391     for(n=0,i=0;i<s->entries;i++)
392       if(s->lengthlist[i]>0)
393 	c->dec_codelengths[sortindex[n++]]=s->lengthlist[i];
394 
395     c->dec_firsttablen=_ilog(c->used_entries)-4; /* this is magic */
396     if(c->dec_firsttablen<5)c->dec_firsttablen=5;
397     if(c->dec_firsttablen>8)c->dec_firsttablen=8;
398 
399     tabn=1<<c->dec_firsttablen;
400     c->dec_firsttable=(ogg_uint32_t *)_ogg_calloc(tabn,sizeof(*c->dec_firsttable));
401     c->dec_maxlength=0;
402 
403     for(i=0;i<n;i++){
404       if(c->dec_maxlength<c->dec_codelengths[i])
405 	c->dec_maxlength=c->dec_codelengths[i];
406       if(c->dec_codelengths[i]<=c->dec_firsttablen){
407 	ogg_uint32_t orig=bitreverse(c->codelist[i]);
408 	for(j=0;j<(1<<(c->dec_firsttablen-c->dec_codelengths[i]));j++)
409 	  c->dec_firsttable[orig|(j<<c->dec_codelengths[i])]=i+1;
410       }
411     }
412 
413     /* now fill in 'unused' entries in the firsttable with hi/lo search
414        hints for the non-direct-hits */
415     {
416       ogg_uint32_t mask=0xfffffffeUL<<(31-c->dec_firsttablen);
417       long lo=0,hi=0;
418 
419       for(i=0;i<tabn;i++){
420 	ogg_uint32_t word=i<<(32-c->dec_firsttablen);
421 	if(c->dec_firsttable[bitreverse(word)]==0){
422 	  while((lo+1)<n && c->codelist[lo+1]<=word)lo++;
423 	  while(    hi<n && word>=(c->codelist[hi]&mask))hi++;
424 
425 	  /* we only actually have 15 bits per hint to play with here.
426 	     In order to overflow gracefully (nothing breaks, efficiency
427 	     just drops), encode as the difference from the extremes. */
428 	  {
429 	    unsigned long loval=lo;
430 	    unsigned long hival=n-hi;
431 
432 	    if(loval>0x7fff)loval=0x7fff;
433 	    if(hival>0x7fff)hival=0x7fff;
434 	    c->dec_firsttable[bitreverse(word)]=
435 	      0x80000000UL | (loval<<15) | hival;
436 	  }
437 	}
438       }
439     }
440   }
441 
442   return(0);
443  err_out:
444   vorbis_book_clear(c);
445   return(-1);
446 }
447 
448