1 // Copyright 2016 The Draco Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 #include "draco/compression/entropy/symbol_encoding.h"
16 
17 #include <algorithm>
18 #include <cmath>
19 
20 #include "draco/compression/entropy/rans_symbol_encoder.h"
21 #include "draco/compression/entropy/shannon_entropy.h"
22 #include "draco/core/bit_utils.h"
23 #include "draco/core/macros.h"
24 
25 namespace draco {
26 
27 constexpr int32_t kMaxTagSymbolBitLength = 32;
28 constexpr int kMaxRawEncodingBitLength = 18;
29 constexpr int kDefaultSymbolCodingCompressionLevel = 7;
30 
31 typedef uint64_t TaggedBitLengthFrequencies[kMaxTagSymbolBitLength];
32 
SetSymbolEncodingMethod(Options * options,SymbolCodingMethod method)33 void SetSymbolEncodingMethod(Options *options, SymbolCodingMethod method) {
34   options->SetInt("symbol_encoding_method", method);
35 }
36 
SetSymbolEncodingCompressionLevel(Options * options,int compression_level)37 bool SetSymbolEncodingCompressionLevel(Options *options,
38                                        int compression_level) {
39   if (compression_level < 0 || compression_level > 10) {
40     return false;
41   }
42   options->SetInt("symbol_encoding_compression_level", compression_level);
43   return true;
44 }
45 
46 // Computes bit lengths of the input values. If num_components > 1, the values
47 // are processed in "num_components" sized chunks and the bit length is always
48 // computed for the largest value from the chunk.
ComputeBitLengths(const uint32_t * symbols,int num_values,int num_components,std::vector<uint32_t> * out_bit_lengths,uint32_t * out_max_value)49 static void ComputeBitLengths(const uint32_t *symbols, int num_values,
50                               int num_components,
51                               std::vector<uint32_t> *out_bit_lengths,
52                               uint32_t *out_max_value) {
53   out_bit_lengths->reserve(num_values);
54   *out_max_value = 0;
55   // Maximum integer value across all components.
56   for (int i = 0; i < num_values; i += num_components) {
57     // Get the maximum value for a given entry across all attribute components.
58     uint32_t max_component_value = symbols[i];
59     for (int j = 1; j < num_components; ++j) {
60       if (max_component_value < symbols[i + j]) {
61         max_component_value = symbols[i + j];
62       }
63     }
64     int value_msb_pos = 0;
65     if (max_component_value > 0) {
66       value_msb_pos = MostSignificantBit(max_component_value);
67     }
68     if (max_component_value > *out_max_value) {
69       *out_max_value = max_component_value;
70     }
71     out_bit_lengths->push_back(value_msb_pos + 1);
72   }
73 }
74 
ApproximateTaggedSchemeBits(const std::vector<uint32_t> bit_lengths,int num_components)75 static int64_t ApproximateTaggedSchemeBits(
76     const std::vector<uint32_t> bit_lengths, int num_components) {
77   // Compute the total bit length used by all values (the length of data encode
78   // after tags).
79   uint64_t total_bit_length = 0;
80   for (size_t i = 0; i < bit_lengths.size(); ++i) {
81     total_bit_length += bit_lengths[i];
82   }
83   // Compute the number of entropy bits for tags.
84   int num_unique_symbols;
85   const int64_t tag_bits = ComputeShannonEntropy(
86       bit_lengths.data(), static_cast<int>(bit_lengths.size()), 32,
87       &num_unique_symbols);
88   const int64_t tag_table_bits =
89       ApproximateRAnsFrequencyTableBits(num_unique_symbols, num_unique_symbols);
90   return tag_bits + tag_table_bits + total_bit_length * num_components;
91 }
92 
ApproximateRawSchemeBits(const uint32_t * symbols,int num_symbols,uint32_t max_value,int * out_num_unique_symbols)93 static int64_t ApproximateRawSchemeBits(const uint32_t *symbols,
94                                         int num_symbols, uint32_t max_value,
95                                         int *out_num_unique_symbols) {
96   int num_unique_symbols;
97   const int64_t data_bits = ComputeShannonEntropy(
98       symbols, num_symbols, max_value, &num_unique_symbols);
99   const int64_t table_bits =
100       ApproximateRAnsFrequencyTableBits(max_value, num_unique_symbols);
101   *out_num_unique_symbols = num_unique_symbols;
102   return table_bits + data_bits;
103 }
104 
105 template <template <int> class SymbolEncoderT>
106 bool EncodeTaggedSymbols(const uint32_t *symbols, int num_values,
107                          int num_components,
108                          const std::vector<uint32_t> &bit_lengths,
109                          EncoderBuffer *target_buffer);
110 
111 template <template <int> class SymbolEncoderT>
112 bool EncodeRawSymbols(const uint32_t *symbols, int num_values,
113                       uint32_t max_entry_value, int32_t num_unique_symbols,
114                       const Options *options, EncoderBuffer *target_buffer);
115 
EncodeSymbols(const uint32_t * symbols,int num_values,int num_components,const Options * options,EncoderBuffer * target_buffer)116 bool EncodeSymbols(const uint32_t *symbols, int num_values, int num_components,
117                    const Options *options, EncoderBuffer *target_buffer) {
118   if (num_values < 0) {
119     return false;
120   }
121   if (num_values == 0) {
122     return true;
123   }
124   if (num_components <= 0) {
125     num_components = 1;
126   }
127   std::vector<uint32_t> bit_lengths;
128   uint32_t max_value;
129   ComputeBitLengths(symbols, num_values, num_components, &bit_lengths,
130                     &max_value);
131 
132   // Approximate number of bits needed for storing the symbols using the tagged
133   // scheme.
134   const int64_t tagged_scheme_total_bits =
135       ApproximateTaggedSchemeBits(bit_lengths, num_components);
136 
137   // Approximate number of bits needed for storing the symbols using the raw
138   // scheme.
139   int num_unique_symbols = 0;
140   const int64_t raw_scheme_total_bits = ApproximateRawSchemeBits(
141       symbols, num_values, max_value, &num_unique_symbols);
142 
143   // The maximum bit length of a single entry value that we can encode using
144   // the raw scheme.
145   const int max_value_bit_length =
146       MostSignificantBit(std::max(1u, max_value)) + 1;
147 
148   int method = -1;
149   if (options != nullptr && options->IsOptionSet("symbol_encoding_method")) {
150     method = options->GetInt("symbol_encoding_method");
151   } else {
152     if (tagged_scheme_total_bits < raw_scheme_total_bits ||
153         max_value_bit_length > kMaxRawEncodingBitLength) {
154       method = SYMBOL_CODING_TAGGED;
155     } else {
156       method = SYMBOL_CODING_RAW;
157     }
158   }
159   // Use the tagged scheme.
160   target_buffer->Encode(static_cast<uint8_t>(method));
161   if (method == SYMBOL_CODING_TAGGED) {
162     return EncodeTaggedSymbols<RAnsSymbolEncoder>(
163         symbols, num_values, num_components, bit_lengths, target_buffer);
164   }
165   if (method == SYMBOL_CODING_RAW) {
166     return EncodeRawSymbols<RAnsSymbolEncoder>(symbols, num_values, max_value,
167                                                num_unique_symbols, options,
168                                                target_buffer);
169   }
170   // Unknown method selected.
171   return false;
172 }
173 
174 template <template <int> class SymbolEncoderT>
EncodeTaggedSymbols(const uint32_t * symbols,int num_values,int num_components,const std::vector<uint32_t> & bit_lengths,EncoderBuffer * target_buffer)175 bool EncodeTaggedSymbols(const uint32_t *symbols, int num_values,
176                          int num_components,
177                          const std::vector<uint32_t> &bit_lengths,
178                          EncoderBuffer *target_buffer) {
179   // Create entries for entropy coding. Each entry corresponds to a different
180   // number of bits that are necessary to encode a given value. Every value
181   // has at most 32 bits. Therefore, we need 32 different entries (for
182   // bit_length [1-32]). For each entry we compute the frequency of a given
183   // bit-length in our data set.
184   TaggedBitLengthFrequencies frequencies;
185   // Set frequency for each entry to zero.
186   memset(frequencies, 0, sizeof(frequencies));
187 
188   // Compute the frequencies from input data.
189   // Maximum integer value for the values across all components.
190   for (size_t i = 0; i < bit_lengths.size(); ++i) {
191     // Update the frequency of the associated entry id.
192     ++frequencies[bit_lengths[i]];
193   }
194 
195   // Create one extra buffer to store raw value.
196   EncoderBuffer value_buffer;
197   // Number of expected bits we need to store the values (can be optimized if
198   // needed).
199   const uint64_t value_bits =
200       kMaxTagSymbolBitLength * static_cast<uint64_t>(num_values);
201 
202   // Create encoder for encoding the bit tags.
203   SymbolEncoderT<5> tag_encoder;
204   tag_encoder.Create(frequencies, kMaxTagSymbolBitLength, target_buffer);
205 
206   // Start encoding bit tags.
207   tag_encoder.StartEncoding(target_buffer);
208 
209   // Also start encoding the values.
210   value_buffer.StartBitEncoding(value_bits, false);
211 
212   if (tag_encoder.needs_reverse_encoding()) {
213     // Encoder needs the values to be encoded in the reverse order.
214     for (int i = num_values - num_components; i >= 0; i -= num_components) {
215       const int bit_length = bit_lengths[i / num_components];
216       tag_encoder.EncodeSymbol(bit_length);
217 
218       // Values are always encoded in the normal order
219       const int j = num_values - num_components - i;
220       const int value_bit_length = bit_lengths[j / num_components];
221       for (int c = 0; c < num_components; ++c) {
222         value_buffer.EncodeLeastSignificantBits32(value_bit_length,
223                                                   symbols[j + c]);
224       }
225     }
226   } else {
227     for (int i = 0; i < num_values; i += num_components) {
228       const int bit_length = bit_lengths[i / num_components];
229       // First encode the tag.
230       tag_encoder.EncodeSymbol(bit_length);
231       // Now encode all values using the stored bit_length.
232       for (int j = 0; j < num_components; ++j) {
233         value_buffer.EncodeLeastSignificantBits32(bit_length, symbols[i + j]);
234       }
235     }
236   }
237   tag_encoder.EndEncoding(target_buffer);
238   value_buffer.EndBitEncoding();
239 
240   // Append the values to the end of the target buffer.
241   target_buffer->Encode(value_buffer.data(), value_buffer.size());
242   return true;
243 }
244 
245 template <class SymbolEncoderT>
EncodeRawSymbolsInternal(const uint32_t * symbols,int num_values,uint32_t max_entry_value,EncoderBuffer * target_buffer)246 bool EncodeRawSymbolsInternal(const uint32_t *symbols, int num_values,
247                               uint32_t max_entry_value,
248                               EncoderBuffer *target_buffer) {
249   // Count the frequency of each entry value.
250   std::vector<uint64_t> frequencies(max_entry_value + 1, 0);
251   for (int i = 0; i < num_values; ++i) {
252     ++frequencies[symbols[i]];
253   }
254 
255   SymbolEncoderT encoder;
256   encoder.Create(frequencies.data(), static_cast<int>(frequencies.size()),
257                  target_buffer);
258   encoder.StartEncoding(target_buffer);
259   // Encode all values.
260   if (SymbolEncoderT::needs_reverse_encoding()) {
261     for (int i = num_values - 1; i >= 0; --i) {
262       encoder.EncodeSymbol(symbols[i]);
263     }
264   } else {
265     for (int i = 0; i < num_values; ++i) {
266       encoder.EncodeSymbol(symbols[i]);
267     }
268   }
269   encoder.EndEncoding(target_buffer);
270   return true;
271 }
272 
273 template <template <int> class SymbolEncoderT>
EncodeRawSymbols(const uint32_t * symbols,int num_values,uint32_t max_entry_value,int32_t num_unique_symbols,const Options * options,EncoderBuffer * target_buffer)274 bool EncodeRawSymbols(const uint32_t *symbols, int num_values,
275                       uint32_t max_entry_value, int32_t num_unique_symbols,
276                       const Options *options, EncoderBuffer *target_buffer) {
277   int symbol_bits = 0;
278   if (num_unique_symbols > 0) {
279     symbol_bits = MostSignificantBit(num_unique_symbols);
280   }
281   int unique_symbols_bit_length = symbol_bits + 1;
282   // Currently, we don't support encoding of more than 2^18 unique symbols.
283   if (unique_symbols_bit_length > kMaxRawEncodingBitLength) {
284     return false;
285   }
286   int compression_level = kDefaultSymbolCodingCompressionLevel;
287   if (options != nullptr &&
288       options->IsOptionSet("symbol_encoding_compression_level")) {
289     compression_level = options->GetInt("symbol_encoding_compression_level");
290   }
291 
292   // Adjust the bit_length based on compression level. Lower compression levels
293   // will use fewer bits while higher compression levels use more bits. Note
294   // that this is going to work for all valid bit_lengths because the actual
295   // number of bits allocated for rANS encoding is hard coded as:
296   // std::max(12, 3 * bit_length / 2) , therefore there will be always a
297   // sufficient number of bits available for all symbols.
298   // See ComputeRAnsPrecisionFromUniqueSymbolsBitLength() for the formula.
299   // This hardcoded equation cannot be changed without changing the bitstream.
300   if (compression_level < 4) {
301     unique_symbols_bit_length -= 2;
302   } else if (compression_level < 6) {
303     unique_symbols_bit_length -= 1;
304   } else if (compression_level > 9) {
305     unique_symbols_bit_length += 2;
306   } else if (compression_level > 7) {
307     unique_symbols_bit_length += 1;
308   }
309   // Clamp the bit_length to a valid range.
310   unique_symbols_bit_length = std::min(std::max(1, unique_symbols_bit_length),
311                                        kMaxRawEncodingBitLength);
312   target_buffer->Encode(static_cast<uint8_t>(unique_symbols_bit_length));
313   // Use appropriate symbol encoder based on the maximum symbol bit length.
314   switch (unique_symbols_bit_length) {
315     case 0:
316       FALLTHROUGH_INTENDED;
317     case 1:
318       return EncodeRawSymbolsInternal<SymbolEncoderT<1>>(
319           symbols, num_values, max_entry_value, target_buffer);
320     case 2:
321       return EncodeRawSymbolsInternal<SymbolEncoderT<2>>(
322           symbols, num_values, max_entry_value, target_buffer);
323     case 3:
324       return EncodeRawSymbolsInternal<SymbolEncoderT<3>>(
325           symbols, num_values, max_entry_value, target_buffer);
326     case 4:
327       return EncodeRawSymbolsInternal<SymbolEncoderT<4>>(
328           symbols, num_values, max_entry_value, target_buffer);
329     case 5:
330       return EncodeRawSymbolsInternal<SymbolEncoderT<5>>(
331           symbols, num_values, max_entry_value, target_buffer);
332     case 6:
333       return EncodeRawSymbolsInternal<SymbolEncoderT<6>>(
334           symbols, num_values, max_entry_value, target_buffer);
335     case 7:
336       return EncodeRawSymbolsInternal<SymbolEncoderT<7>>(
337           symbols, num_values, max_entry_value, target_buffer);
338     case 8:
339       return EncodeRawSymbolsInternal<SymbolEncoderT<8>>(
340           symbols, num_values, max_entry_value, target_buffer);
341     case 9:
342       return EncodeRawSymbolsInternal<SymbolEncoderT<9>>(
343           symbols, num_values, max_entry_value, target_buffer);
344     case 10:
345       return EncodeRawSymbolsInternal<SymbolEncoderT<10>>(
346           symbols, num_values, max_entry_value, target_buffer);
347     case 11:
348       return EncodeRawSymbolsInternal<SymbolEncoderT<11>>(
349           symbols, num_values, max_entry_value, target_buffer);
350     case 12:
351       return EncodeRawSymbolsInternal<SymbolEncoderT<12>>(
352           symbols, num_values, max_entry_value, target_buffer);
353     case 13:
354       return EncodeRawSymbolsInternal<SymbolEncoderT<13>>(
355           symbols, num_values, max_entry_value, target_buffer);
356     case 14:
357       return EncodeRawSymbolsInternal<SymbolEncoderT<14>>(
358           symbols, num_values, max_entry_value, target_buffer);
359     case 15:
360       return EncodeRawSymbolsInternal<SymbolEncoderT<15>>(
361           symbols, num_values, max_entry_value, target_buffer);
362     case 16:
363       return EncodeRawSymbolsInternal<SymbolEncoderT<16>>(
364           symbols, num_values, max_entry_value, target_buffer);
365     case 17:
366       return EncodeRawSymbolsInternal<SymbolEncoderT<17>>(
367           symbols, num_values, max_entry_value, target_buffer);
368     case 18:
369       return EncodeRawSymbolsInternal<SymbolEncoderT<18>>(
370           symbols, num_values, max_entry_value, target_buffer);
371     default:
372       return false;
373   }
374 }
375 
376 }  // namespace draco
377