1 ////////////////////////////////////////////////////////////////////////////
2 //                           **** WAVPACK ****                            //
3 //                  Hybrid Lossless Wavefile Compressor                   //
4 //              Copyright (c) 1998 - 2013 Conifer Software.               //
5 //                          All Rights Reserved.                          //
6 //      Distributed under the BSD Software License (see license.txt)      //
7 ////////////////////////////////////////////////////////////////////////////
8 
9 // write_words.c
10 
11 // This module provides entropy word encoding functions using
12 // a variation on the Rice method.  This was introduced in version 3.93
13 // because it allows splitting the data into a "lossy" stream and a
14 // "correction" stream in a very efficient manner and is therefore ideal
15 // for the "hybrid" mode.  For 4.0, the efficiency of this method was
16 // significantly improved by moving away from the normal Rice restriction of
17 // using powers of two for the modulus divisions and now the method can be
18 // used for both hybrid and pure lossless encoding.
19 
20 // Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%),
21 // and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the
22 // previous. Using standard Rice coding on this data would result in 1.4
23 // bits per sample average (not counting sign bit). However, there is a
24 // very simple encoding that is over 99% efficient with this data and
25 // results in about 1.22 bits per sample.
26 
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "wavpack_local.h"
31 
32 ///////////////////////////// executable code ////////////////////////////////
33 
34 // Initialize entropy encoder for the specified stream. In lossless mode there
35 // are no parameters to select; in hybrid mode the bitrate mode and value need
36 // be initialized.
37 
38 static void word_set_bitrate (WavpackStream *wps);
39 
init_words(WavpackStream * wps)40 void init_words (WavpackStream *wps)
41 {
42     CLEAR (wps->w);
43 
44     if (wps->wphdr.flags & HYBRID_FLAG)
45         word_set_bitrate (wps);
46 }
47 
48 // Set up parameters for hybrid mode based on header flags and "bits" field.
49 // This is currently only set up for the HYBRID_BITRATE mode in which the
50 // allowed error varies with the residual level (from "slow_level"). The
51 // simpler mode (which is not used yet) has the error level directly
52 // controlled from the metadata.
53 
word_set_bitrate(WavpackStream * wps)54 static void word_set_bitrate (WavpackStream *wps)
55 {
56     int bitrate_0, bitrate_1;
57 
58     if (wps->wphdr.flags & HYBRID_BITRATE) {
59         if (wps->wphdr.flags & FALSE_STEREO)
60             bitrate_0 = (wps->bits * 2 - 512) < 568 ? 0 : (wps->bits * 2 - 512) - 568;
61         else
62             bitrate_0 = wps->bits < 568 ? 0 : wps->bits - 568;
63 
64         if (!(wps->wphdr.flags & MONO_DATA)) {
65 
66             if (wps->wphdr.flags & HYBRID_BALANCE)
67                 bitrate_1 = (wps->wphdr.flags & JOINT_STEREO) ? 256 : 0;
68             else {
69                 bitrate_1 = bitrate_0;
70 
71                 if (wps->wphdr.flags & JOINT_STEREO) {
72                     if (bitrate_0 < 128) {
73                         bitrate_1 += bitrate_0;
74                         bitrate_0 = 0;
75                     }
76                     else {
77                         bitrate_0 -= 128;
78                         bitrate_1 += 128;
79                     }
80                 }
81             }
82         }
83         else
84             bitrate_1 = 0;
85     }
86     else
87         bitrate_0 = bitrate_1 = 0;
88 
89     wps->w.bitrate_acc [0] = (int32_t) bitrate_0 << 16;
90     wps->w.bitrate_acc [1] = (int32_t) bitrate_1 << 16;
91 }
92 
93 // Allocates the correct space in the metadata structure and writes the
94 // current median values to it. Values are converted from 32-bit unsigned
95 // to our internal 16-bit wp_log2 values, and read_entropy_vars () is called
96 // to read the values back because we must compensate for the loss through
97 // the log function.
98 
write_entropy_vars(WavpackStream * wps,WavpackMetadata * wpmd)99 void write_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd)
100 {
101     unsigned char *byteptr;
102     int temp;
103 
104     byteptr = wpmd->data = malloc (12);
105     wpmd->id = ID_ENTROPY_VARS;
106 
107     *byteptr++ = temp = wp_log2 (wps->w.c [0].median [0]);
108     *byteptr++ = temp >> 8;
109     *byteptr++ = temp = wp_log2 (wps->w.c [0].median [1]);
110     *byteptr++ = temp >> 8;
111     *byteptr++ = temp = wp_log2 (wps->w.c [0].median [2]);
112     *byteptr++ = temp >> 8;
113 
114     if (!(wps->wphdr.flags & MONO_DATA)) {
115         *byteptr++ = temp = wp_log2 (wps->w.c [1].median [0]);
116         *byteptr++ = temp >> 8;
117         *byteptr++ = temp = wp_log2 (wps->w.c [1].median [1]);
118         *byteptr++ = temp >> 8;
119         *byteptr++ = temp = wp_log2 (wps->w.c [1].median [2]);
120         *byteptr++ = temp >> 8;
121     }
122 
123     wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data);
124     read_entropy_vars (wps, wpmd);
125 }
126 
127 // Allocates enough space in the metadata structure and writes the current
128 // high word of the bitrate accumulator and the slow_level values to it. The
129 // slow_level values are converted from 32-bit unsigned to our internal 16-bit
130 // wp_log2 values. Afterward, read_entropy_vars () is called to read the values
131 // back because we must compensate for the loss through the log function and
132 // the truncation of the bitrate.
133 
write_hybrid_profile(WavpackStream * wps,WavpackMetadata * wpmd)134 void write_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd)
135 {
136     unsigned char *byteptr;
137     int temp;
138 
139     word_set_bitrate (wps);
140     byteptr = wpmd->data = malloc (512);
141     wpmd->id = ID_HYBRID_PROFILE;
142 
143     if (wps->wphdr.flags & HYBRID_BITRATE) {
144         *byteptr++ = temp = wp_log2s (wps->w.c [0].slow_level);
145         *byteptr++ = temp >> 8;
146 
147         if (!(wps->wphdr.flags & MONO_DATA)) {
148             *byteptr++ = temp = wp_log2s (wps->w.c [1].slow_level);
149             *byteptr++ = temp >> 8;
150         }
151     }
152 
153     *byteptr++ = temp = wps->w.bitrate_acc [0] >> 16;
154     *byteptr++ = temp >> 8;
155 
156     if (!(wps->wphdr.flags & MONO_DATA)) {
157         *byteptr++ = temp = wps->w.bitrate_acc [1] >> 16;
158         *byteptr++ = temp >> 8;
159     }
160 
161     if (wps->w.bitrate_delta [0] | wps->w.bitrate_delta [1]) {
162         *byteptr++ = temp = wp_log2s (wps->w.bitrate_delta [0]);
163         *byteptr++ = temp >> 8;
164 
165         if (!(wps->wphdr.flags & MONO_DATA)) {
166             *byteptr++ = temp = wp_log2s (wps->w.bitrate_delta [1]);
167             *byteptr++ = temp >> 8;
168         }
169     }
170 
171     wpmd->byte_length = (int32_t)(byteptr - (unsigned char *) wpmd->data);
172     read_hybrid_profile (wps, wpmd);
173 }
174 
175 // This function writes the specified word to the open bitstream "wvbits" and,
176 // if the bitstream "wvcbits" is open, writes any correction data there. This
177 // function will work for either lossless or hybrid but because a version
178 // optimized for lossless exits below, it would normally be used for the hybrid
179 // mode only. The return value is the actual value stored to the stream (even
180 // if a correction file is being created) and is used as feedback to the
181 // predictor.
182 
send_word(WavpackStream * wps,int32_t value,int chan)183 int32_t FASTCALL send_word (WavpackStream *wps, int32_t value, int chan)
184 {
185     struct entropy_data *c = wps->w.c + chan;
186     uint32_t ones_count, low, mid, high;
187     int sign = (value < 0) ? 1 : 0;
188 
189     if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) {
190         if (wps->w.zeros_acc) {
191             if (value)
192                 flush_word (wps);
193             else {
194                 c->slow_level -= (c->slow_level + SLO) >> SLS;
195                 wps->w.zeros_acc++;
196                 return 0;
197             }
198         }
199         else if (value)
200             putbit_0 (&wps->wvbits);
201         else {
202             c->slow_level -= (c->slow_level + SLO) >> SLS;
203             CLEAR (wps->w.c [0].median);
204             CLEAR (wps->w.c [1].median);
205             wps->w.zeros_acc = 1;
206             return 0;
207         }
208     }
209 
210     if (sign)
211         value = ~value;
212 
213     if ((wps->wphdr.flags & HYBRID_FLAG) && !chan)
214         update_error_limit (wps);
215 
216     if (value < (int32_t) GET_MED (0)) {
217         ones_count = low = 0;
218         high = GET_MED (0) - 1;
219         DEC_MED0 ();
220     }
221     else {
222         low = GET_MED (0);
223         INC_MED0 ();
224 
225         if (value - low < GET_MED (1)) {
226             ones_count = 1;
227             high = low + GET_MED (1) - 1;
228             DEC_MED1 ();
229         }
230         else {
231             low += GET_MED (1);
232             INC_MED1 ();
233 
234             if (value - low < GET_MED (2)) {
235                 ones_count = 2;
236                 high = low + GET_MED (2) - 1;
237                 DEC_MED2 ();
238             }
239             else {
240                 ones_count = 2 + (value - low) / GET_MED (2);
241                 low += (ones_count - 2) * GET_MED (2);
242                 high = low + GET_MED (2) - 1;
243                 INC_MED2 ();
244             }
245         }
246     }
247 
248     mid = (high + low + 1) >> 1;
249 
250     if (wps->w.holding_zero) {
251         if (ones_count)
252             wps->w.holding_one++;
253 
254         flush_word (wps);
255 
256         if (ones_count) {
257             wps->w.holding_zero = 1;
258             ones_count--;
259         }
260         else
261             wps->w.holding_zero = 0;
262     }
263     else
264         wps->w.holding_zero = 1;
265 
266     wps->w.holding_one = ones_count * 2;
267 
268     if (!c->error_limit) {
269         if (high != low) {
270             uint32_t maxcode = high - low, code = value - low;
271             int bitcount = count_bits (maxcode);
272             uint32_t extras = bitset [bitcount] - maxcode - 1;
273 
274             if (code < extras) {
275                 wps->w.pend_data |= code << wps->w.pend_count;
276                 wps->w.pend_count += bitcount - 1;
277             }
278             else {
279                 wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count;
280                 wps->w.pend_count += bitcount - 1;
281                 wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++;
282             }
283         }
284 
285         mid = value;
286     }
287     else
288         while (high - low > c->error_limit)
289             if (value < (int32_t) mid) {
290                 mid = ((high = mid - 1) + low + 1) >> 1;
291                 wps->w.pend_count++;
292             }
293             else {
294                 mid = (high + (low = mid) + 1) >> 1;
295                 wps->w.pend_data |= bitset [wps->w.pend_count++];
296             }
297 
298     wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++);
299 
300     if (!wps->w.holding_zero)
301         flush_word (wps);
302 
303     if (bs_is_open (&wps->wvcbits) && c->error_limit) {
304         uint32_t code = value - low, maxcode = high - low;
305         int bitcount = count_bits (maxcode);
306         uint32_t extras = bitset [bitcount] - maxcode - 1;
307 
308         if (bitcount) {
309             if (code < extras)
310                 putbits (code, bitcount - 1, &wps->wvcbits);
311             else {
312                 putbits ((code + extras) >> 1, bitcount - 1, &wps->wvcbits);
313                 putbit ((code + extras) & 1, &wps->wvcbits);
314             }
315         }
316     }
317 
318     if (wps->wphdr.flags & HYBRID_BITRATE) {
319         c->slow_level -= (c->slow_level + SLO) >> SLS;
320         c->slow_level += wp_log2 (mid);
321     }
322 
323     return sign ? ~mid : mid;
324 }
325 
326 // This function is an optimized version of send_word() that only handles
327 // lossless (error_limit == 0) and sends an entire buffer of either mono or
328 // stereo data rather than a single sample. Unlike the generalized
329 // send_word(), it does not return values because it always encodes
330 // the exact value passed.
331 
send_words_lossless(WavpackStream * wps,int32_t * buffer,int32_t nsamples)332 void send_words_lossless (WavpackStream *wps, int32_t *buffer, int32_t nsamples)
333 {
334     struct entropy_data *c = wps->w.c;
335     int32_t value, csamples;
336 
337     if (!(wps->wphdr.flags & MONO_DATA))
338         nsamples *= 2;
339 
340     for (csamples = 0; csamples < nsamples; ++csamples) {
341         int sign = ((value = *buffer++) < 0) ? 1 : 0;
342         uint32_t ones_count, low, high;
343 
344         if (!(wps->wphdr.flags & MONO_DATA))
345             c = wps->w.c + (csamples & 1);
346 
347         if (wps->w.c [0].median [0] < 2 && !wps->w.holding_zero && wps->w.c [1].median [0] < 2) {
348             if (wps->w.zeros_acc) {
349                 if (value)
350                     flush_word (wps);
351                 else {
352                     wps->w.zeros_acc++;
353                     continue;
354                 }
355             }
356             else if (value)
357                 putbit_0 (&wps->wvbits);
358             else {
359                 CLEAR (wps->w.c [0].median);
360                 CLEAR (wps->w.c [1].median);
361                 wps->w.zeros_acc = 1;
362                 continue;
363             }
364         }
365 
366         if (sign)
367             value = ~value;
368 
369         if (value < (int32_t) GET_MED (0)) {
370             ones_count = low = 0;
371             high = GET_MED (0) - 1;
372             DEC_MED0 ();
373         }
374         else {
375             low = GET_MED (0);
376             INC_MED0 ();
377 
378             if (value - low < GET_MED (1)) {
379                 ones_count = 1;
380                 high = low + GET_MED (1) - 1;
381                 DEC_MED1 ();
382             }
383             else {
384                 low += GET_MED (1);
385                 INC_MED1 ();
386 
387                 if (value - low < GET_MED (2)) {
388                     ones_count = 2;
389                     high = low + GET_MED (2) - 1;
390                     DEC_MED2 ();
391                 }
392                 else {
393                     ones_count = 2 + (value - low) / GET_MED (2);
394                     low += (ones_count - 2) * GET_MED (2);
395                     high = low + GET_MED (2) - 1;
396                     INC_MED2 ();
397                 }
398             }
399         }
400 
401         if (wps->w.holding_zero) {
402             if (ones_count)
403                 wps->w.holding_one++;
404 
405             flush_word (wps);
406 
407             if (ones_count) {
408                 wps->w.holding_zero = 1;
409                 ones_count--;
410             }
411             else
412                 wps->w.holding_zero = 0;
413         }
414         else
415             wps->w.holding_zero = 1;
416 
417         wps->w.holding_one = ones_count * 2;
418 
419         if (high != low) {
420             uint32_t maxcode = high - low, code = value - low;
421             int bitcount = count_bits (maxcode);
422             uint32_t extras = bitset [bitcount] - maxcode - 1;
423 
424             if (code < extras) {
425                 wps->w.pend_data |= code << wps->w.pend_count;
426                 wps->w.pend_count += bitcount - 1;
427             }
428             else {
429                 wps->w.pend_data |= ((code + extras) >> 1) << wps->w.pend_count;
430                 wps->w.pend_count += bitcount - 1;
431                 wps->w.pend_data |= ((code + extras) & 1) << wps->w.pend_count++;
432             }
433         }
434 
435         wps->w.pend_data |= ((int32_t) sign << wps->w.pend_count++);
436 
437         if (!wps->w.holding_zero)
438             flush_word (wps);
439     }
440 }
441 
442 // Used by send_word() and send_word_lossless() to actually send most the
443 // accumulated data onto the bitstream. This is also called directly from
444 // clients when all words have been sent.
445 
flush_word(WavpackStream * wps)446 void flush_word (WavpackStream *wps)
447 {
448     if (wps->w.zeros_acc) {
449         int cbits = count_bits (wps->w.zeros_acc);
450 
451         while (cbits--)
452             putbit_1 (&wps->wvbits);
453 
454         putbit_0 (&wps->wvbits);
455 
456         while (wps->w.zeros_acc > 1) {
457             putbit (wps->w.zeros_acc & 1, &wps->wvbits);
458             wps->w.zeros_acc >>= 1;
459         }
460 
461         wps->w.zeros_acc = 0;
462     }
463 
464     if (wps->w.holding_one) {
465 #ifdef LIMIT_ONES
466         if (wps->w.holding_one >= LIMIT_ONES) {
467             int cbits;
468 
469             putbits ((1L << LIMIT_ONES) - 1, LIMIT_ONES + 1, &wps->wvbits);
470             wps->w.holding_one -= LIMIT_ONES;
471             cbits = count_bits (wps->w.holding_one);
472 
473             while (cbits--)
474                 putbit_1 (&wps->wvbits);
475 
476             putbit_0 (&wps->wvbits);
477 
478             while (wps->w.holding_one > 1) {
479                 putbit (wps->w.holding_one & 1, &wps->wvbits);
480                 wps->w.holding_one >>= 1;
481             }
482 
483             wps->w.holding_zero = 0;
484         }
485         else
486             putbits (bitmask [wps->w.holding_one], wps->w.holding_one, &wps->wvbits);
487 
488         wps->w.holding_one = 0;
489 #else
490         do {
491             putbit_1 (&wps->wvbits);
492         } while (--wps->w.holding_one);
493 #endif
494     }
495 
496     if (wps->w.holding_zero) {
497         putbit_0 (&wps->wvbits);
498         wps->w.holding_zero = 0;
499     }
500 
501     if (wps->w.pend_count) {
502         putbits (wps->w.pend_data, wps->w.pend_count, &wps->wvbits);
503         wps->w.pend_data = wps->w.pend_count = 0;
504     }
505 }
506 
507 // This function is similar to send_word() except that no data is actually
508 // written to any stream, but it does return the value that would have been
509 // sent to a hybrid stream. It is used to determine beforehand how much noise
510 // will be added to samples.
511 
nosend_word(WavpackStream * wps,int32_t value,int chan)512 int32_t nosend_word (WavpackStream *wps, int32_t value, int chan)
513 {
514     struct entropy_data *c = wps->w.c + chan;
515     uint32_t ones_count, low, mid, high;
516     int sign = (value < 0) ? 1 : 0;
517 
518     if (sign)
519         value = ~value;
520 
521     if ((wps->wphdr.flags & HYBRID_FLAG) && !chan)
522         update_error_limit (wps);
523 
524     if (value < (int32_t) GET_MED (0)) {
525         low = 0;
526         high = GET_MED (0) - 1;
527         DEC_MED0 ();
528     }
529     else {
530         low = GET_MED (0);
531         INC_MED0 ();
532 
533         if (value - low < GET_MED (1)) {
534             high = low + GET_MED (1) - 1;
535             DEC_MED1 ();
536         }
537         else {
538             low += GET_MED (1);
539             INC_MED1 ();
540 
541             if (value - low < GET_MED (2)) {
542                 high = low + GET_MED (2) - 1;
543                 DEC_MED2 ();
544             }
545             else {
546                 ones_count = 2 + (value - low) / GET_MED (2);
547                 low += (ones_count - 2) * GET_MED (2);
548                 high = low + GET_MED (2) - 1;
549                 INC_MED2 ();
550             }
551         }
552     }
553 
554     mid = (high + low + 1) >> 1;
555 
556     if (!c->error_limit)
557         mid = value;
558     else
559         while (high - low > c->error_limit)
560             if (value < (int32_t) mid)
561                 mid = ((high = mid - 1) + low + 1) >> 1;
562             else
563                 mid = (high + (low = mid) + 1) >> 1;
564 
565     c->slow_level -= (c->slow_level + SLO) >> SLS;
566     c->slow_level += wp_log2 (mid);
567 
568     return sign ? ~mid : mid;
569 }
570 
571 // This function is used to scan some number of samples to set the variables
572 // "slow_level" and the "median" array. In pure symmetrical encoding mode this
573 // would not be needed because these values would simply be continued from the
574 // previous block. However, in the -X modes and the 32-bit modes we cannot do
575 // this because parameters may change between blocks and the variables might
576 // not apply. This function can work in mono or stereo and can scan a block
577 // in either direction.
578 
scan_word_pass(WavpackStream * wps,int32_t * samples,uint32_t num_samples,int dir)579 static void scan_word_pass (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir)
580 {
581     uint32_t flags = wps->wphdr.flags, value, low;
582     struct entropy_data *c = wps->w.c;
583     int chan;
584 
585     if (flags & MONO_DATA) {
586         if (dir < 0) {
587             samples += (num_samples - 1);
588             dir = -1;
589         }
590         else
591             dir = 1;
592     }
593     else {
594         if (dir < 0) {
595             samples += (num_samples - 1) * 2;
596             dir = -2;
597         }
598         else
599             dir = 2;
600     }
601 
602     while (num_samples--) {
603 
604         value = labs (samples [chan = 0]);
605 
606         if (flags & HYBRID_BITRATE) {
607             wps->w.c [0].slow_level -= (wps->w.c [0].slow_level + SLO) >> SLS;
608             wps->w.c [0].slow_level += wp_log2 (value);
609         }
610 
611         if (value < GET_MED (0)) {
612             DEC_MED0 ();
613         }
614         else {
615             low = GET_MED (0);
616             INC_MED0 ();
617 
618             if (value - low < GET_MED (1)) {
619                 DEC_MED1 ();
620             }
621             else {
622                 low += GET_MED (1);
623                 INC_MED1 ();
624 
625                 if (value - low < GET_MED (2)) {
626                     DEC_MED2 ();
627                 }
628                 else {
629                     INC_MED2 ();
630                 }
631             }
632         }
633 
634         if (!(flags & MONO_DATA)) {
635             value = labs (samples [chan = 1]);
636             c++;
637 
638             if (wps->wphdr.flags & HYBRID_BITRATE) {
639                 wps->w.c [1].slow_level -= (wps->w.c [1].slow_level + SLO) >> SLS;
640                 wps->w.c [1].slow_level += wp_log2 (value);
641             }
642 
643             if (value < GET_MED (0)) {
644                 DEC_MED0 ();
645             }
646             else {
647                 low = GET_MED (0);
648                 INC_MED0 ();
649 
650                 if (value - low < GET_MED (1)) {
651                     DEC_MED1 ();
652                 }
653                 else {
654                     low += GET_MED (1);
655                     INC_MED1 ();
656 
657                     if (value - low < GET_MED (2)) {
658                         DEC_MED2 ();
659                     }
660                     else {
661                         INC_MED2 ();
662                     }
663                 }
664             }
665 
666             c--;
667         }
668 
669         samples += dir;
670     }
671 }
672 
673 // Wrapper for scan_word_pass() than ensures that at least 2048 samples are processed by
674 // potentially making multiple passes through the data. See description of scan_word_pass()
675 // for more details.
676 
scan_word(WavpackStream * wps,int32_t * samples,uint32_t num_samples,int dir)677 void scan_word (WavpackStream *wps, int32_t *samples, uint32_t num_samples, int dir)
678 {
679     init_words (wps);
680 
681     if (num_samples) {
682         int passes = (2048 + num_samples - 1) / num_samples;    // i.e., ceil (2048.0 / num_samples)
683 
684         while (passes--)
685             scan_word_pass (wps, samples, num_samples, dir);
686     }
687 }
688 
689