1 // Copyright 2016 The Draco Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 #ifndef DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_
16 #define DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_
17 
18 #include <stdint.h>
19 
20 #include <vector>
21 
22 namespace draco {
23 
24 // Computes an approximate Shannon entropy of symbols stored in the provided
25 // input array |symbols|. The entropy corresponds to the number of bits that is
26 // required to represent/store all the symbols using an optimal entropy coding
27 // algorithm. See for example "A mathematical theory of communication" by
28 // Shannon'48 (http://ieeexplore.ieee.org/document/6773024/).
29 //
30 // |max_value| is a required input that define the maximum value in the input
31 // |symbols| array.
32 //
33 // |out_num_unique_symbols| is an optional output argument that stores the
34 // number of unique symbols contained within the |symbols| array.
35 // TODO(ostava): This should be renamed or the return value should be changed to
36 // return the actual entropy and not the number of bits needed to represent the
37 // input symbols.
38 int64_t ComputeShannonEntropy(const uint32_t *symbols, int num_symbols,
39                               int max_value, int *out_num_unique_symbols);
40 
41 // Computes the Shannon entropy of |num_values| Boolean entries, where
42 // |num_true_values| are set to true.
43 // Returns entropy between 0-1.
44 double ComputeBinaryShannonEntropy(uint32_t num_values,
45                                    uint32_t num_true_values);
46 
47 // Class that can be used to keep track of the Shannon entropy on streamed data.
48 // As new symbols are pushed to the tracker, the entropy is automatically
49 // recomputed. The class also support recomputing the entropy without actually
50 // pushing the symbols to the tracker through the Peek() method.
51 class ShannonEntropyTracker {
52  public:
53   ShannonEntropyTracker();
54 
55   // Struct for holding entropy data about the symbols added to the tracker.
56   // It can be used to compute the number of bits needed to store the data using
57   // the method:
58   //   ShannonEntropyTracker::GetNumberOfDataBits(entropy_data);
59   // or to compute the approximate size of the frequency table needed by the
60   // rans coding using method:
61   //   ShannonEntropyTracker::GetNumberOfRAnsTableBits(entropy_data);
62   struct EntropyData {
63     double entropy_norm;
64     int num_values;
65     int max_symbol;
66     int num_unique_symbols;
EntropyDataEntropyData67     EntropyData()
68         : entropy_norm(0.0),
69           num_values(0),
70           max_symbol(0),
71           num_unique_symbols(0) {}
72   };
73 
74   // Adds new symbols to the tracker and recomputes the entropy accordingly.
75   EntropyData Push(const uint32_t *symbols, int num_symbols);
76 
77   // Returns new entropy data for the tracker as if |symbols| were added to the
78   // tracker without actually changing the status of the tracker.
79   EntropyData Peek(const uint32_t *symbols, int num_symbols);
80 
81   // Gets the number of bits needed for encoding symbols added to the tracker.
GetNumberOfDataBits()82   int64_t GetNumberOfDataBits() const {
83     return GetNumberOfDataBits(entropy_data_);
84   }
85 
86   // Gets the number of bits needed for encoding frequency table using the rans
87   // encoder.
GetNumberOfRAnsTableBits()88   int64_t GetNumberOfRAnsTableBits() const {
89     return GetNumberOfRAnsTableBits(entropy_data_);
90   }
91 
92   // Gets the number of bits needed for encoding given |entropy_data|.
93   static int64_t GetNumberOfDataBits(const EntropyData &entropy_data);
94 
95   // Gets the number of bits needed for encoding frequency table using the rans
96   // encoder for the given |entropy_data|.
97   static int64_t GetNumberOfRAnsTableBits(const EntropyData &entropy_data);
98 
99  private:
100   EntropyData UpdateSymbols(const uint32_t *symbols, int num_symbols,
101                             bool push_changes);
102 
103   std::vector<int32_t> frequencies_;
104 
105   EntropyData entropy_data_;
106 };
107 
108 }  // namespace draco
109 
110 #endif  // DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_
111