1 // Copyright 2018 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 6 #define NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 7 8 #include <stdint.h> 9 10 #include <string> 11 12 #include "base/macros.h" 13 14 namespace net { 15 16 namespace extras { 17 18 // Decodes an entry from preloaded data. 19 // Clients must implement ReadEntry() method to read the specific type of data 20 // they are interested in. 21 class PreloadDecoder { 22 public: 23 // These must match the values in net/tools/huffman_trie/trie/trie_writer.h. 24 enum : char { kEndOfString = 0, kEndOfTable = 127 }; 25 26 // BitReader is a class that allows a bytestring to be read bit-by-bit. 27 class BitReader { 28 public: 29 BitReader(const uint8_t* bytes, size_t num_bits); 30 31 // Next sets |*out| to the next bit from the input. It returns false if no 32 // more bits are available or true otherwise. 33 bool Next(bool* out); 34 35 // Read sets the |num_bits| least-significant bits of |*out| to the value of 36 // the next |num_bits| bits from the input. It returns false if there are 37 // insufficient bits in the input or true otherwise. 38 bool Read(unsigned num_bits, uint32_t* out); 39 40 // Decodes a size_t from the reader, putting the resulting value in |*out|. 41 // Returns false if there are insufficient bits to read and true otherwise. 42 // 43 // This function's inverse is TrieBitBuffer::WriteSize. 44 // 45 // The encoding is a prefix code optimized for small values (less than 4). 46 // It is designed for the lengths of prefixes in the HSTS Preload list trie. 47 // Compared to the unary encoding that was previously used (where the number 48 // of bits used is one plus the value being encoded), this uses one more bit 49 // for encoding 0 and 1, and the same number of bits for encoding 2, and 50 // fewer bits for encoding values greater than 2. At the time of writing, 51 // 35% of the lengths encoded in the trie were 0 or 1, 11% were 2, and the 52 // remaining 54% were greater than 2. 53 // 54 // This encoding scheme uses a variable number of bits to encode each value. 55 // There are fixed values for 0, 1, 2, and 3, and then a simple rule is used 56 // for 4 and greater. 0 uses 2 bits; 1 through 3 use 3 bits. The fixed 57 // values are as follows: 58 // 59 // 0: 0b00 60 // 1: 0b100 61 // 2: 0b101 62 // 3: 0b110 63 // 64 // Note that none of the fixed values are prefixed with 0b01 or 0b111. These 65 // prefixes are used with a unary-like encoding for values 4 and above. 66 // Zero or more 1s, followed by a 0, are appended to one of those prefixes. 67 // Even values use the prefix 0b01, and odd values use the prefix 0b111. The 68 // number of 1s to append is half the value (rounded down) minus 1. 69 bool DecodeSize(size_t* out); 70 71 // Seek sets the current offest in the input to bit number |offset|. It 72 // returns true if |offset| is within the range of the input and false 73 // otherwise. 74 bool Seek(size_t offset); 75 76 private: 77 const uint8_t* const bytes_; 78 const size_t num_bits_; 79 const size_t num_bytes_; 80 // current_byte_index_ contains the current byte offset in |bytes_|. 81 size_t current_byte_index_; 82 // current_byte_ contains the current byte of the input. 83 uint8_t current_byte_; 84 // num_bits_used_ contains the number of bits of |current_byte_| that have 85 // been read. 86 unsigned num_bits_used_; 87 88 DISALLOW_COPY_AND_ASSIGN(BitReader); 89 }; 90 91 // HuffmanDecoder is a very simple Huffman reader. The input Huffman tree is 92 // simply encoded as a series of two-byte structures. The first byte 93 // determines the "0" pointer for that node and the second the "1" pointer. 94 // Each byte either has the MSB set, in which case the bottom 7 bits are the 95 // value for that position, or else the bottom seven bits contain the index of 96 // a node. 97 // 98 // The tree is decoded by walking rather than a table-driven approach. 99 class HuffmanDecoder { 100 public: 101 HuffmanDecoder(const uint8_t* tree, size_t tree_bytes); 102 103 bool Decode(PreloadDecoder::BitReader* reader, char* out) const; 104 105 private: 106 const uint8_t* const tree_; 107 const size_t tree_bytes_; 108 109 DISALLOW_COPY_AND_ASSIGN(HuffmanDecoder); 110 }; 111 112 PreloadDecoder(const uint8_t* huffman_tree, 113 size_t huffman_tree_size, 114 const uint8_t* trie, 115 size_t trie_bits, 116 size_t trie_root_position); 117 virtual ~PreloadDecoder(); 118 119 // Resolves search keyword given by |search| in the preloaded data. Returns 120 // false on internal error and true otherwise. After a successful return, 121 // |*out_found| is true iff a relevant entry has been found. In the case of 122 // HSTS data, |search| is the hostname being searched. 123 // 124 // Although this code should be robust, it never processes attacker-controlled 125 // data -- it only operates on the preloaded data built into the binary. 126 // 127 // The preloaded data is represented as a trie and matches |search| 128 // backwards. Each node in the trie starts with a number of characters, which 129 // must match exactly. After that is a dispatch table which maps the next 130 // character in the search keyword to another node in the trie. 131 // 132 // In the dispatch table, the zero character represents the "end of string" 133 // (which is the *beginning* of the search keyword since we process it 134 // backwards). The value in that case is special -- rather than an offset to 135 // another trie node, it contains the searched entry (for HSTS data, it 136 // contains whether subdomains are included, pinsets etc.). Clients must 137 // implement ReadEntry to read the entry at this location. 138 // 139 // Dispatch tables are always given in order, but the "end of string" (zero) 140 // value always comes before an entry for '.'. 141 bool Decode(const std::string& search, bool* out_found); 142 143 protected: 144 virtual bool ReadEntry(BitReader* reader, 145 const std::string& search, 146 size_t current_search_offset, 147 bool* out_found) = 0; 148 huffman_decoder()149 const HuffmanDecoder& huffman_decoder() const { return huffman_decoder_; } 150 151 private: 152 HuffmanDecoder huffman_decoder_; 153 BitReader bit_reader_; 154 155 const size_t trie_root_position_; 156 157 DISALLOW_COPY_AND_ASSIGN(PreloadDecoder); 158 }; 159 160 } // namespace extras 161 162 } // namespace net 163 164 #endif // NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 165