1 ///////////////////////////////////////////////////////////////////////
2 // File:        bitvector.h
3 // Description: Class replacement for BITVECTOR.
4 // Author:      Ray Smith
5 //
6 // (C) Copyright 2011, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #ifndef TESSERACT_CCUTIL_BITVECTOR_H_
20 #define TESSERACT_CCUTIL_BITVECTOR_H_
21 
22 #include <tesseract/export.h>
23 
24 #include <cassert>
25 #include <cstdint> // for uint8_t
26 #include <cstdio>
27 #include <vector>  // for std::vector
28 
29 namespace tesseract {
30 
31 // Trivial class to encapsulate a fixed-length array of bits, with
32 // Serialize/DeSerialize. Replaces the old macros.
33 class TESS_API BitVector {
34 public:
35   // Fast lookup table to get the first least significant set bit in a byte.
36   // For zero, the table has 255, but since it is a special case, most code
37   // that uses this table will check for zero before looking up lsb_index_.
38   static const uint8_t lsb_index_[256];
39   // Fast lookup table to get the residual bits after zeroing the least
40   // significant set bit in a byte.
41   static const uint8_t lsb_eroded_[256];
42   // Fast lookup table to give the number of set bits in a byte.
43   static const int hamming_table_[256];
44 
45   BitVector() = default;
46   // Initializes the array to length * false.
BitVector(int length)47   explicit BitVector(int length) : bit_size_(length), array_(WordLength()) {
48   }
BitVector(const BitVector & src)49   BitVector(const BitVector &src) : bit_size_(src.bit_size_), array_(src.array_) {
50   }
51   BitVector &operator=(const BitVector &src);
52   ~BitVector() = default;
53 
54   // Initializes the array to length * false.
55   void Init(int length);
56 
empty()57   int empty() const {
58     return bit_size_ == 0;
59   }
60 
61   // Returns the number of bits that are accessible in the vector.
size()62   int size() const {
63     return bit_size_;
64   }
65 
66   // Writes to the given file. Returns false in case of error.
67   bool Serialize(FILE *fp) const;
68   // Reads from the given file. Returns false in case of error.
69   // If swap is true, assumes a big/little-endian swap is needed.
70   bool DeSerialize(bool swap, FILE *fp);
71 
72   void SetAllFalse();
73   void SetAllTrue();
74 
75   // Accessors to set/reset/get bits.
76   // The range of index is [0, size()-1].
77   // There is debug-only bounds checking.
SetBit(int index)78   void SetBit(int index) {
79     array_[WordIndex(index)] |= BitMask(index);
80   }
ResetBit(int index)81   void ResetBit(int index) {
82     array_[WordIndex(index)] &= ~BitMask(index);
83   }
SetValue(int index,bool value)84   void SetValue(int index, bool value) {
85     if (value) {
86       SetBit(index);
87     } else {
88       ResetBit(index);
89     }
90   }
At(int index)91   bool At(int index) const {
92     return (array_[WordIndex(index)] & BitMask(index)) != 0;
93   }
94   bool operator[](int index) const {
95     return (array_[WordIndex(index)] & BitMask(index)) != 0;
96   }
97 
98   // Returns the index of the next set bit after the given index.
99   // Useful for quickly iterating through the set bits in a sparse vector.
100   int NextSetBit(int prev_bit) const;
101 
102   // Returns the number of set bits in the vector.
103   int NumSetBits() const;
104 
105   // Logical in-place operations on whole bit vectors. Tries to do something
106   // sensible if they aren't the same size, but they should be really.
107   void operator|=(const BitVector &other);
108   void operator&=(const BitVector &other);
109   void operator^=(const BitVector &other);
110   // Set subtraction *this = v1 - v2.
111   void SetSubtract(const BitVector &v1, const BitVector &v2);
112 
113 private:
114   // Allocates memory for a vector of the given length.
115   void Alloc(int length);
116 
117   // Computes the index to array_ for the given index, with debug range
118   // checking.
WordIndex(int index)119   int WordIndex(int index) const {
120     assert(0 <= index && index < bit_size_);
121     return index / kBitFactor;
122   }
123   // Returns a mask to select the appropriate bit for the given index.
BitMask(int index)124   uint32_t BitMask(int index) const {
125     return 1 << (index & (kBitFactor - 1));
126   }
127   // Returns the number of array elements needed to represent the current
128   // bit_size_.
WordLength()129   int WordLength() const {
130     return (bit_size_ + kBitFactor - 1) / kBitFactor;
131   }
132   // Returns the number of bytes consumed by the array_.
ByteLength()133   int ByteLength() const {
134     return WordLength() * sizeof(array_[0]);
135   }
136 
137   // Number of bits in this BitVector.
138   int32_t bit_size_ = 0;
139   // Array of words used to pack the bits.
140   // Bits are stored little-endian by uint32_t word, ie by word first and then
141   // starting with the least significant bit in each word.
142   std::vector<uint32_t> array_;
143   // Number of bits in an array_ element.
144   static const int kBitFactor = sizeof(array_[0]) * 8;
145 };
146 
147 } // namespace tesseract.
148 
149 #endif // TESSERACT_CCUTIL_BITVECTOR_H_
150