1 /////////////////////////////////////////////////////////////////////// 2 // File: bitvector.h 3 // Description: Class replacement for BITVECTOR. 4 // Author: Ray Smith 5 // 6 // (C) Copyright 2011, Google Inc. 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 /////////////////////////////////////////////////////////////////////// 18 19 #ifndef TESSERACT_CCUTIL_BITVECTOR_H_ 20 #define TESSERACT_CCUTIL_BITVECTOR_H_ 21 22 #include <tesseract/export.h> 23 24 #include <cassert> 25 #include <cstdint> // for uint8_t 26 #include <cstdio> 27 #include <vector> // for std::vector 28 29 namespace tesseract { 30 31 // Trivial class to encapsulate a fixed-length array of bits, with 32 // Serialize/DeSerialize. Replaces the old macros. 33 class TESS_API BitVector { 34 public: 35 // Fast lookup table to get the first least significant set bit in a byte. 36 // For zero, the table has 255, but since it is a special case, most code 37 // that uses this table will check for zero before looking up lsb_index_. 38 static const uint8_t lsb_index_[256]; 39 // Fast lookup table to get the residual bits after zeroing the least 40 // significant set bit in a byte. 41 static const uint8_t lsb_eroded_[256]; 42 // Fast lookup table to give the number of set bits in a byte. 43 static const int hamming_table_[256]; 44 45 BitVector() = default; 46 // Initializes the array to length * false. BitVector(int length)47 explicit BitVector(int length) : bit_size_(length), array_(WordLength()) { 48 } BitVector(const BitVector & src)49 BitVector(const BitVector &src) : bit_size_(src.bit_size_), array_(src.array_) { 50 } 51 BitVector &operator=(const BitVector &src); 52 ~BitVector() = default; 53 54 // Initializes the array to length * false. 55 void Init(int length); 56 empty()57 int empty() const { 58 return bit_size_ == 0; 59 } 60 61 // Returns the number of bits that are accessible in the vector. size()62 int size() const { 63 return bit_size_; 64 } 65 66 // Writes to the given file. Returns false in case of error. 67 bool Serialize(FILE *fp) const; 68 // Reads from the given file. Returns false in case of error. 69 // If swap is true, assumes a big/little-endian swap is needed. 70 bool DeSerialize(bool swap, FILE *fp); 71 72 void SetAllFalse(); 73 void SetAllTrue(); 74 75 // Accessors to set/reset/get bits. 76 // The range of index is [0, size()-1]. 77 // There is debug-only bounds checking. SetBit(int index)78 void SetBit(int index) { 79 array_[WordIndex(index)] |= BitMask(index); 80 } ResetBit(int index)81 void ResetBit(int index) { 82 array_[WordIndex(index)] &= ~BitMask(index); 83 } SetValue(int index,bool value)84 void SetValue(int index, bool value) { 85 if (value) { 86 SetBit(index); 87 } else { 88 ResetBit(index); 89 } 90 } At(int index)91 bool At(int index) const { 92 return (array_[WordIndex(index)] & BitMask(index)) != 0; 93 } 94 bool operator[](int index) const { 95 return (array_[WordIndex(index)] & BitMask(index)) != 0; 96 } 97 98 // Returns the index of the next set bit after the given index. 99 // Useful for quickly iterating through the set bits in a sparse vector. 100 int NextSetBit(int prev_bit) const; 101 102 // Returns the number of set bits in the vector. 103 int NumSetBits() const; 104 105 // Logical in-place operations on whole bit vectors. Tries to do something 106 // sensible if they aren't the same size, but they should be really. 107 void operator|=(const BitVector &other); 108 void operator&=(const BitVector &other); 109 void operator^=(const BitVector &other); 110 // Set subtraction *this = v1 - v2. 111 void SetSubtract(const BitVector &v1, const BitVector &v2); 112 113 private: 114 // Allocates memory for a vector of the given length. 115 void Alloc(int length); 116 117 // Computes the index to array_ for the given index, with debug range 118 // checking. WordIndex(int index)119 int WordIndex(int index) const { 120 assert(0 <= index && index < bit_size_); 121 return index / kBitFactor; 122 } 123 // Returns a mask to select the appropriate bit for the given index. BitMask(int index)124 uint32_t BitMask(int index) const { 125 return 1 << (index & (kBitFactor - 1)); 126 } 127 // Returns the number of array elements needed to represent the current 128 // bit_size_. WordLength()129 int WordLength() const { 130 return (bit_size_ + kBitFactor - 1) / kBitFactor; 131 } 132 // Returns the number of bytes consumed by the array_. ByteLength()133 int ByteLength() const { 134 return WordLength() * sizeof(array_[0]); 135 } 136 137 // Number of bits in this BitVector. 138 int32_t bit_size_ = 0; 139 // Array of words used to pack the bits. 140 // Bits are stored little-endian by uint32_t word, ie by word first and then 141 // starting with the least significant bit in each word. 142 std::vector<uint32_t> array_; 143 // Number of bits in an array_ element. 144 static const int kBitFactor = sizeof(array_[0]) * 8; 145 }; 146 147 } // namespace tesseract. 148 149 #endif // TESSERACT_CCUTIL_BITVECTOR_H_ 150