1 // Copyright 2014 Wouter van Oortmerssen. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Wouter's Entropy Coder: apparently I re-invented adaptive Shannon-Fano.
16 //
17 // similar compression performance to huffman, and absolutely tiny code, one function for both
18 // compression and decompression. Adaptive, so should work well even for tiny buffers.
19 // One of the vectors passed in is the input, the other the output (and exactly one of the two
20 // should be empty initially).
21 // not the fastest possible implementation (only 40MB/sec for either compression or decompression
22 // on a modern pc), but should be sufficient for many uses
23 //
24 // uses std::string and std::swap as its only external dependencies for simplicity, but could made
25 // to not rely on them relatively easily.
26
WEntropyCoder(const unsigned char * in,size_t inlen,size_t origlen,string & out)27 template<bool compress> void WEntropyCoder(const unsigned char *in,
28 size_t inlen, // Size of input.
29 size_t origlen, // Uncompressed size.
30 string &out) {
31 const int NSYM = 256; // This depends on the fact we're reading from unsigned chars.
32 int symbol[NSYM]; // The symbol in this slot. Adaptively sorted by frequency.
33 size_t freq[NSYM]; // Its frequency.
34 int sym_idx[NSYM]; // Lookup symbol -> index into the above two arrays.
35 for (int i = 0; i < NSYM; i++) { freq[i] = 1; symbol[i] = sym_idx[i] = i; }
36 size_t compr_idx = 0;
37 unsigned char bits = 0;
38 int nbits = 0;
39 for (size_t i = 0; i < origlen; i++) {
40 int start = 0, range = NSYM;
41 size_t total_freq = i + NSYM;
42 while (range > 1) {
43 size_t acc_freq = 0;
44 int j = start;
45 do acc_freq += freq[j++]; while (acc_freq + freq[j] / 2 < total_freq / 2);
46 unsigned char bit = 0;
47 if (compress) {
48 if (sym_idx[in[i]] < j) bit = 1;
49 bits |= bit << nbits;
50 if (++nbits == 8) { out.push_back(bits); bits = 0; nbits = 0; }
51 } else {
52 if (!nbits) { assert(compr_idx < inlen); bits = in[compr_idx++]; nbits = 8; }
53 bit = bits & 1;
54 bits >>= 1;
55 nbits--;
56 }
57 if (bit) {
58 total_freq = acc_freq; assert(j - start < range); range = j - start;
59 } else {
60 total_freq -= acc_freq; range -= j - start; start = j;
61 }
62 }
63 if (!compress) out.push_back((unsigned char)symbol[start]);
64 assert(range == 1 && (!compress || in[i] == symbol[start]));
65 freq[start]++;
66 while (start && freq[start - 1] < freq[start]) {
67 swap(sym_idx[symbol[start - 1]], sym_idx[symbol[start]]);
68 swap(freq[start - 1], freq[start]);
69 swap(symbol[start - 1], symbol[start]);
70 start--;
71 }
72 }
73 if (compress) { if (nbits) out.push_back(bits); } else { assert(compr_idx == inlen); }
74 (void)inlen;
75 }
76
77