1 // Copyright 2016 The Draco Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 #include "draco/compression/entropy/symbol_encoding.h"
16
17 #include <algorithm>
18 #include <cmath>
19
20 #include "draco/compression/entropy/rans_symbol_encoder.h"
21 #include "draco/compression/entropy/shannon_entropy.h"
22 #include "draco/core/bit_utils.h"
23 #include "draco/core/macros.h"
24
25 namespace draco {
26
27 constexpr int32_t kMaxTagSymbolBitLength = 32;
28 constexpr int kMaxRawEncodingBitLength = 18;
29 constexpr int kDefaultSymbolCodingCompressionLevel = 7;
30
31 typedef uint64_t TaggedBitLengthFrequencies[kMaxTagSymbolBitLength];
32
SetSymbolEncodingMethod(Options * options,SymbolCodingMethod method)33 void SetSymbolEncodingMethod(Options *options, SymbolCodingMethod method) {
34 options->SetInt("symbol_encoding_method", method);
35 }
36
SetSymbolEncodingCompressionLevel(Options * options,int compression_level)37 bool SetSymbolEncodingCompressionLevel(Options *options,
38 int compression_level) {
39 if (compression_level < 0 || compression_level > 10) {
40 return false;
41 }
42 options->SetInt("symbol_encoding_compression_level", compression_level);
43 return true;
44 }
45
46 // Computes bit lengths of the input values. If num_components > 1, the values
47 // are processed in "num_components" sized chunks and the bit length is always
48 // computed for the largest value from the chunk.
ComputeBitLengths(const uint32_t * symbols,int num_values,int num_components,std::vector<uint32_t> * out_bit_lengths,uint32_t * out_max_value)49 static void ComputeBitLengths(const uint32_t *symbols, int num_values,
50 int num_components,
51 std::vector<uint32_t> *out_bit_lengths,
52 uint32_t *out_max_value) {
53 out_bit_lengths->reserve(num_values);
54 *out_max_value = 0;
55 // Maximum integer value across all components.
56 for (int i = 0; i < num_values; i += num_components) {
57 // Get the maximum value for a given entry across all attribute components.
58 uint32_t max_component_value = symbols[i];
59 for (int j = 1; j < num_components; ++j) {
60 if (max_component_value < symbols[i + j]) {
61 max_component_value = symbols[i + j];
62 }
63 }
64 int value_msb_pos = 0;
65 if (max_component_value > 0) {
66 value_msb_pos = MostSignificantBit(max_component_value);
67 }
68 if (max_component_value > *out_max_value) {
69 *out_max_value = max_component_value;
70 }
71 out_bit_lengths->push_back(value_msb_pos + 1);
72 }
73 }
74
ApproximateTaggedSchemeBits(const std::vector<uint32_t> bit_lengths,int num_components)75 static int64_t ApproximateTaggedSchemeBits(
76 const std::vector<uint32_t> bit_lengths, int num_components) {
77 // Compute the total bit length used by all values (the length of data encode
78 // after tags).
79 uint64_t total_bit_length = 0;
80 for (size_t i = 0; i < bit_lengths.size(); ++i) {
81 total_bit_length += bit_lengths[i];
82 }
83 // Compute the number of entropy bits for tags.
84 int num_unique_symbols;
85 const int64_t tag_bits = ComputeShannonEntropy(
86 bit_lengths.data(), static_cast<int>(bit_lengths.size()), 32,
87 &num_unique_symbols);
88 const int64_t tag_table_bits =
89 ApproximateRAnsFrequencyTableBits(num_unique_symbols, num_unique_symbols);
90 return tag_bits + tag_table_bits + total_bit_length * num_components;
91 }
92
ApproximateRawSchemeBits(const uint32_t * symbols,int num_symbols,uint32_t max_value,int * out_num_unique_symbols)93 static int64_t ApproximateRawSchemeBits(const uint32_t *symbols,
94 int num_symbols, uint32_t max_value,
95 int *out_num_unique_symbols) {
96 int num_unique_symbols;
97 const int64_t data_bits = ComputeShannonEntropy(
98 symbols, num_symbols, max_value, &num_unique_symbols);
99 const int64_t table_bits =
100 ApproximateRAnsFrequencyTableBits(max_value, num_unique_symbols);
101 *out_num_unique_symbols = num_unique_symbols;
102 return table_bits + data_bits;
103 }
104
105 template <template <int> class SymbolEncoderT>
106 bool EncodeTaggedSymbols(const uint32_t *symbols, int num_values,
107 int num_components,
108 const std::vector<uint32_t> &bit_lengths,
109 EncoderBuffer *target_buffer);
110
111 template <template <int> class SymbolEncoderT>
112 bool EncodeRawSymbols(const uint32_t *symbols, int num_values,
113 uint32_t max_entry_value, int32_t num_unique_symbols,
114 const Options *options, EncoderBuffer *target_buffer);
115
EncodeSymbols(const uint32_t * symbols,int num_values,int num_components,const Options * options,EncoderBuffer * target_buffer)116 bool EncodeSymbols(const uint32_t *symbols, int num_values, int num_components,
117 const Options *options, EncoderBuffer *target_buffer) {
118 if (num_values < 0) {
119 return false;
120 }
121 if (num_values == 0) {
122 return true;
123 }
124 if (num_components <= 0) {
125 num_components = 1;
126 }
127 std::vector<uint32_t> bit_lengths;
128 uint32_t max_value;
129 ComputeBitLengths(symbols, num_values, num_components, &bit_lengths,
130 &max_value);
131
132 // Approximate number of bits needed for storing the symbols using the tagged
133 // scheme.
134 const int64_t tagged_scheme_total_bits =
135 ApproximateTaggedSchemeBits(bit_lengths, num_components);
136
137 // Approximate number of bits needed for storing the symbols using the raw
138 // scheme.
139 int num_unique_symbols = 0;
140 const int64_t raw_scheme_total_bits = ApproximateRawSchemeBits(
141 symbols, num_values, max_value, &num_unique_symbols);
142
143 // The maximum bit length of a single entry value that we can encode using
144 // the raw scheme.
145 const int max_value_bit_length =
146 MostSignificantBit(std::max(1u, max_value)) + 1;
147
148 int method = -1;
149 if (options != nullptr && options->IsOptionSet("symbol_encoding_method")) {
150 method = options->GetInt("symbol_encoding_method");
151 } else {
152 if (tagged_scheme_total_bits < raw_scheme_total_bits ||
153 max_value_bit_length > kMaxRawEncodingBitLength) {
154 method = SYMBOL_CODING_TAGGED;
155 } else {
156 method = SYMBOL_CODING_RAW;
157 }
158 }
159 // Use the tagged scheme.
160 target_buffer->Encode(static_cast<uint8_t>(method));
161 if (method == SYMBOL_CODING_TAGGED) {
162 return EncodeTaggedSymbols<RAnsSymbolEncoder>(
163 symbols, num_values, num_components, bit_lengths, target_buffer);
164 }
165 if (method == SYMBOL_CODING_RAW) {
166 return EncodeRawSymbols<RAnsSymbolEncoder>(symbols, num_values, max_value,
167 num_unique_symbols, options,
168 target_buffer);
169 }
170 // Unknown method selected.
171 return false;
172 }
173
174 template <template <int> class SymbolEncoderT>
EncodeTaggedSymbols(const uint32_t * symbols,int num_values,int num_components,const std::vector<uint32_t> & bit_lengths,EncoderBuffer * target_buffer)175 bool EncodeTaggedSymbols(const uint32_t *symbols, int num_values,
176 int num_components,
177 const std::vector<uint32_t> &bit_lengths,
178 EncoderBuffer *target_buffer) {
179 // Create entries for entropy coding. Each entry corresponds to a different
180 // number of bits that are necessary to encode a given value. Every value
181 // has at most 32 bits. Therefore, we need 32 different entries (for
182 // bit_length [1-32]). For each entry we compute the frequency of a given
183 // bit-length in our data set.
184 TaggedBitLengthFrequencies frequencies;
185 // Set frequency for each entry to zero.
186 memset(frequencies, 0, sizeof(frequencies));
187
188 // Compute the frequencies from input data.
189 // Maximum integer value for the values across all components.
190 for (size_t i = 0; i < bit_lengths.size(); ++i) {
191 // Update the frequency of the associated entry id.
192 ++frequencies[bit_lengths[i]];
193 }
194
195 // Create one extra buffer to store raw value.
196 EncoderBuffer value_buffer;
197 // Number of expected bits we need to store the values (can be optimized if
198 // needed).
199 const uint64_t value_bits =
200 kMaxTagSymbolBitLength * static_cast<uint64_t>(num_values);
201
202 // Create encoder for encoding the bit tags.
203 SymbolEncoderT<5> tag_encoder;
204 tag_encoder.Create(frequencies, kMaxTagSymbolBitLength, target_buffer);
205
206 // Start encoding bit tags.
207 tag_encoder.StartEncoding(target_buffer);
208
209 // Also start encoding the values.
210 value_buffer.StartBitEncoding(value_bits, false);
211
212 if (tag_encoder.needs_reverse_encoding()) {
213 // Encoder needs the values to be encoded in the reverse order.
214 for (int i = num_values - num_components; i >= 0; i -= num_components) {
215 const int bit_length = bit_lengths[i / num_components];
216 tag_encoder.EncodeSymbol(bit_length);
217
218 // Values are always encoded in the normal order
219 const int j = num_values - num_components - i;
220 const int value_bit_length = bit_lengths[j / num_components];
221 for (int c = 0; c < num_components; ++c) {
222 value_buffer.EncodeLeastSignificantBits32(value_bit_length,
223 symbols[j + c]);
224 }
225 }
226 } else {
227 for (int i = 0; i < num_values; i += num_components) {
228 const int bit_length = bit_lengths[i / num_components];
229 // First encode the tag.
230 tag_encoder.EncodeSymbol(bit_length);
231 // Now encode all values using the stored bit_length.
232 for (int j = 0; j < num_components; ++j) {
233 value_buffer.EncodeLeastSignificantBits32(bit_length, symbols[i + j]);
234 }
235 }
236 }
237 tag_encoder.EndEncoding(target_buffer);
238 value_buffer.EndBitEncoding();
239
240 // Append the values to the end of the target buffer.
241 target_buffer->Encode(value_buffer.data(), value_buffer.size());
242 return true;
243 }
244
245 template <class SymbolEncoderT>
EncodeRawSymbolsInternal(const uint32_t * symbols,int num_values,uint32_t max_entry_value,EncoderBuffer * target_buffer)246 bool EncodeRawSymbolsInternal(const uint32_t *symbols, int num_values,
247 uint32_t max_entry_value,
248 EncoderBuffer *target_buffer) {
249 // Count the frequency of each entry value.
250 std::vector<uint64_t> frequencies(max_entry_value + 1, 0);
251 for (int i = 0; i < num_values; ++i) {
252 ++frequencies[symbols[i]];
253 }
254
255 SymbolEncoderT encoder;
256 encoder.Create(frequencies.data(), static_cast<int>(frequencies.size()),
257 target_buffer);
258 encoder.StartEncoding(target_buffer);
259 // Encode all values.
260 if (SymbolEncoderT::needs_reverse_encoding()) {
261 for (int i = num_values - 1; i >= 0; --i) {
262 encoder.EncodeSymbol(symbols[i]);
263 }
264 } else {
265 for (int i = 0; i < num_values; ++i) {
266 encoder.EncodeSymbol(symbols[i]);
267 }
268 }
269 encoder.EndEncoding(target_buffer);
270 return true;
271 }
272
273 template <template <int> class SymbolEncoderT>
EncodeRawSymbols(const uint32_t * symbols,int num_values,uint32_t max_entry_value,int32_t num_unique_symbols,const Options * options,EncoderBuffer * target_buffer)274 bool EncodeRawSymbols(const uint32_t *symbols, int num_values,
275 uint32_t max_entry_value, int32_t num_unique_symbols,
276 const Options *options, EncoderBuffer *target_buffer) {
277 int symbol_bits = 0;
278 if (num_unique_symbols > 0) {
279 symbol_bits = MostSignificantBit(num_unique_symbols);
280 }
281 int unique_symbols_bit_length = symbol_bits + 1;
282 // Currently, we don't support encoding of more than 2^18 unique symbols.
283 if (unique_symbols_bit_length > kMaxRawEncodingBitLength) {
284 return false;
285 }
286 int compression_level = kDefaultSymbolCodingCompressionLevel;
287 if (options != nullptr &&
288 options->IsOptionSet("symbol_encoding_compression_level")) {
289 compression_level = options->GetInt("symbol_encoding_compression_level");
290 }
291
292 // Adjust the bit_length based on compression level. Lower compression levels
293 // will use fewer bits while higher compression levels use more bits. Note
294 // that this is going to work for all valid bit_lengths because the actual
295 // number of bits allocated for rANS encoding is hard coded as:
296 // std::max(12, 3 * bit_length / 2) , therefore there will be always a
297 // sufficient number of bits available for all symbols.
298 // See ComputeRAnsPrecisionFromUniqueSymbolsBitLength() for the formula.
299 // This hardcoded equation cannot be changed without changing the bitstream.
300 if (compression_level < 4) {
301 unique_symbols_bit_length -= 2;
302 } else if (compression_level < 6) {
303 unique_symbols_bit_length -= 1;
304 } else if (compression_level > 9) {
305 unique_symbols_bit_length += 2;
306 } else if (compression_level > 7) {
307 unique_symbols_bit_length += 1;
308 }
309 // Clamp the bit_length to a valid range.
310 unique_symbols_bit_length = std::min(std::max(1, unique_symbols_bit_length),
311 kMaxRawEncodingBitLength);
312 target_buffer->Encode(static_cast<uint8_t>(unique_symbols_bit_length));
313 // Use appropriate symbol encoder based on the maximum symbol bit length.
314 switch (unique_symbols_bit_length) {
315 case 0:
316 FALLTHROUGH_INTENDED;
317 case 1:
318 return EncodeRawSymbolsInternal<SymbolEncoderT<1>>(
319 symbols, num_values, max_entry_value, target_buffer);
320 case 2:
321 return EncodeRawSymbolsInternal<SymbolEncoderT<2>>(
322 symbols, num_values, max_entry_value, target_buffer);
323 case 3:
324 return EncodeRawSymbolsInternal<SymbolEncoderT<3>>(
325 symbols, num_values, max_entry_value, target_buffer);
326 case 4:
327 return EncodeRawSymbolsInternal<SymbolEncoderT<4>>(
328 symbols, num_values, max_entry_value, target_buffer);
329 case 5:
330 return EncodeRawSymbolsInternal<SymbolEncoderT<5>>(
331 symbols, num_values, max_entry_value, target_buffer);
332 case 6:
333 return EncodeRawSymbolsInternal<SymbolEncoderT<6>>(
334 symbols, num_values, max_entry_value, target_buffer);
335 case 7:
336 return EncodeRawSymbolsInternal<SymbolEncoderT<7>>(
337 symbols, num_values, max_entry_value, target_buffer);
338 case 8:
339 return EncodeRawSymbolsInternal<SymbolEncoderT<8>>(
340 symbols, num_values, max_entry_value, target_buffer);
341 case 9:
342 return EncodeRawSymbolsInternal<SymbolEncoderT<9>>(
343 symbols, num_values, max_entry_value, target_buffer);
344 case 10:
345 return EncodeRawSymbolsInternal<SymbolEncoderT<10>>(
346 symbols, num_values, max_entry_value, target_buffer);
347 case 11:
348 return EncodeRawSymbolsInternal<SymbolEncoderT<11>>(
349 symbols, num_values, max_entry_value, target_buffer);
350 case 12:
351 return EncodeRawSymbolsInternal<SymbolEncoderT<12>>(
352 symbols, num_values, max_entry_value, target_buffer);
353 case 13:
354 return EncodeRawSymbolsInternal<SymbolEncoderT<13>>(
355 symbols, num_values, max_entry_value, target_buffer);
356 case 14:
357 return EncodeRawSymbolsInternal<SymbolEncoderT<14>>(
358 symbols, num_values, max_entry_value, target_buffer);
359 case 15:
360 return EncodeRawSymbolsInternal<SymbolEncoderT<15>>(
361 symbols, num_values, max_entry_value, target_buffer);
362 case 16:
363 return EncodeRawSymbolsInternal<SymbolEncoderT<16>>(
364 symbols, num_values, max_entry_value, target_buffer);
365 case 17:
366 return EncodeRawSymbolsInternal<SymbolEncoderT<17>>(
367 symbols, num_values, max_entry_value, target_buffer);
368 case 18:
369 return EncodeRawSymbolsInternal<SymbolEncoderT<18>>(
370 symbols, num_values, max_entry_value, target_buffer);
371 default:
372 return false;
373 }
374 }
375
376 } // namespace draco
377