1 // Copyright 2016 The Draco Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 #ifndef DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_ 16 #define DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_ 17 18 #include <stdint.h> 19 20 #include <vector> 21 22 namespace draco { 23 24 // Computes an approximate Shannon entropy of symbols stored in the provided 25 // input array |symbols|. The entropy corresponds to the number of bits that is 26 // required to represent/store all the symbols using an optimal entropy coding 27 // algorithm. See for example "A mathematical theory of communication" by 28 // Shannon'48 (http://ieeexplore.ieee.org/document/6773024/). 29 // 30 // |max_value| is a required input that define the maximum value in the input 31 // |symbols| array. 32 // 33 // |out_num_unique_symbols| is an optional output argument that stores the 34 // number of unique symbols contained within the |symbols| array. 35 // TODO(ostava): This should be renamed or the return value should be changed to 36 // return the actual entropy and not the number of bits needed to represent the 37 // input symbols. 38 int64_t ComputeShannonEntropy(const uint32_t *symbols, int num_symbols, 39 int max_value, int *out_num_unique_symbols); 40 41 // Computes the Shannon entropy of |num_values| Boolean entries, where 42 // |num_true_values| are set to true. 43 // Returns entropy between 0-1. 44 double ComputeBinaryShannonEntropy(uint32_t num_values, 45 uint32_t num_true_values); 46 47 // Class that can be used to keep track of the Shannon entropy on streamed data. 48 // As new symbols are pushed to the tracker, the entropy is automatically 49 // recomputed. The class also support recomputing the entropy without actually 50 // pushing the symbols to the tracker through the Peek() method. 51 class ShannonEntropyTracker { 52 public: 53 ShannonEntropyTracker(); 54 55 // Struct for holding entropy data about the symbols added to the tracker. 56 // It can be used to compute the number of bits needed to store the data using 57 // the method: 58 // ShannonEntropyTracker::GetNumberOfDataBits(entropy_data); 59 // or to compute the approximate size of the frequency table needed by the 60 // rans coding using method: 61 // ShannonEntropyTracker::GetNumberOfRAnsTableBits(entropy_data); 62 struct EntropyData { 63 double entropy_norm; 64 int num_values; 65 int max_symbol; 66 int num_unique_symbols; EntropyDataEntropyData67 EntropyData() 68 : entropy_norm(0.0), 69 num_values(0), 70 max_symbol(0), 71 num_unique_symbols(0) {} 72 }; 73 74 // Adds new symbols to the tracker and recomputes the entropy accordingly. 75 EntropyData Push(const uint32_t *symbols, int num_symbols); 76 77 // Returns new entropy data for the tracker as if |symbols| were added to the 78 // tracker without actually changing the status of the tracker. 79 EntropyData Peek(const uint32_t *symbols, int num_symbols); 80 81 // Gets the number of bits needed for encoding symbols added to the tracker. GetNumberOfDataBits()82 int64_t GetNumberOfDataBits() const { 83 return GetNumberOfDataBits(entropy_data_); 84 } 85 86 // Gets the number of bits needed for encoding frequency table using the rans 87 // encoder. GetNumberOfRAnsTableBits()88 int64_t GetNumberOfRAnsTableBits() const { 89 return GetNumberOfRAnsTableBits(entropy_data_); 90 } 91 92 // Gets the number of bits needed for encoding given |entropy_data|. 93 static int64_t GetNumberOfDataBits(const EntropyData &entropy_data); 94 95 // Gets the number of bits needed for encoding frequency table using the rans 96 // encoder for the given |entropy_data|. 97 static int64_t GetNumberOfRAnsTableBits(const EntropyData &entropy_data); 98 99 private: 100 EntropyData UpdateSymbols(const uint32_t *symbols, int num_symbols, 101 bool push_changes); 102 103 std::vector<int32_t> frequencies_; 104 105 EntropyData entropy_data_; 106 }; 107 108 } // namespace draco 109 110 #endif // DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_ 111