1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 // Various utility functions on bitsets.
15
16 #ifndef OR_TOOLS_UTIL_BITSET_H_
17 #define OR_TOOLS_UTIL_BITSET_H_
18
19 #include <string.h>
20
21 #include <algorithm>
22 #include <vector>
23
24 #include "ortools/base/integral_types.h"
25 #include "ortools/base/logging.h"
26 #include "ortools/base/macros.h"
27
28 namespace operations_research {
29
30 // Basic bit operations
31
32 // Useful constants: word and double word will all bits set.
33 static const uint64_t kAllBits64 = uint64_t{0xFFFFFFFFFFFFFFFF};
34 static const uint64_t kAllBitsButLsb64 = uint64_t{0xFFFFFFFFFFFFFFFE};
35 static const uint32_t kAllBits32 = 0xFFFFFFFFU;
36
37 // Returns a word with only bit pos set.
OneBit64(int pos)38 inline uint64_t OneBit64(int pos) { return uint64_t{1} << pos; }
OneBit32(int pos)39 inline uint32_t OneBit32(int pos) { return 1U << pos; }
40
41 // Returns the number of bits set in n.
BitCount64(uint64_t n)42 inline uint64_t BitCount64(uint64_t n) {
43 const uint64_t m1 = uint64_t{0x5555555555555555};
44 const uint64_t m2 = uint64_t{0x3333333333333333};
45 const uint64_t m4 = uint64_t{0x0F0F0F0F0F0F0F0F};
46 const uint64_t h01 = uint64_t{0x0101010101010101};
47 n -= (n >> 1) & m1;
48 n = (n & m2) + ((n >> 2) & m2);
49 n = (n + (n >> 4)) & m4;
50 n = (n * h01) >> 56;
51 return n;
52 }
BitCount32(uint32_t n)53 inline uint32_t BitCount32(uint32_t n) {
54 n -= (n >> 1) & 0x55555555UL;
55 n = (n & 0x33333333) + ((n >> 2) & 0x33333333UL);
56 n = (n + (n >> 4)) & 0x0F0F0F0FUL;
57 n = n + (n >> 8);
58 n = n + (n >> 16);
59 return n & 0x0000003FUL;
60 }
61
62 // Returns a word with only the least significant bit of n set.
LeastSignificantBitWord64(uint64_t n)63 inline uint64_t LeastSignificantBitWord64(uint64_t n) { return n & ~(n - 1); }
LeastSignificantBitWord32(uint32_t n)64 inline uint32_t LeastSignificantBitWord32(uint32_t n) { return n & ~(n - 1); }
65
66 // Returns the least significant bit position in n.
67 // Discussion around lsb computation:
68 // De Bruijn is almost as fast as the bsr/bsf-instruction-based intrinsics.
69 // Both are always much faster than the Default algorithm.
70 #define USE_DEBRUIJN true // if true, use de Bruijn bit forward scanner.
71 #if defined(__GNUC__) || defined(__llvm__)
72 #define USE_FAST_LEAST_SIGNIFICANT_BIT true // if true, use fast lsb.
73 #endif
74
75 #if defined(USE_FAST_LEAST_SIGNIFICANT_BIT)
LeastSignificantBitPosition64Fast(uint64_t n)76 inline int LeastSignificantBitPosition64Fast(uint64_t n) {
77 return n == 0 ? 0 : __builtin_ctzll(n);
78 }
79 #endif
80
LeastSignificantBitPosition64DeBruijn(uint64_t n)81 inline int LeastSignificantBitPosition64DeBruijn(uint64_t n) {
82 static const uint64_t kSeq = uint64_t{0x0218a392dd5fb34f};
83 static const int kTab[64] = {
84 // initialized by 'kTab[(kSeq << i) >> 58] = i
85 0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 52, 20, 58,
86 5, 17, 26, 56, 15, 38, 29, 40, 10, 49, 53, 31, 21, 34, 59, 42,
87 63, 6, 12, 18, 24, 27, 51, 57, 16, 55, 37, 39, 48, 30, 33, 41,
88 62, 11, 23, 50, 54, 36, 47, 32, 61, 22, 35, 46, 60, 45, 44, 43,
89 };
90 return kTab[((n & (~n + 1)) * kSeq) >> 58];
91 }
92
LeastSignificantBitPosition64Default(uint64_t n)93 inline int LeastSignificantBitPosition64Default(uint64_t n) {
94 if (n == 0) return 0;
95 int pos = 63;
96 if (n & 0x00000000FFFFFFFFLL) {
97 pos -= 32;
98 } else {
99 n >>= 32;
100 }
101 if (n & 0x000000000000FFFFLL) {
102 pos -= 16;
103 } else {
104 n >>= 16;
105 }
106 if (n & 0x00000000000000FFLL) {
107 pos -= 8;
108 } else {
109 n >>= 8;
110 }
111 if (n & 0x000000000000000FLL) {
112 pos -= 4;
113 } else {
114 n >>= 4;
115 }
116 if (n & 0x0000000000000003LL) {
117 pos -= 2;
118 } else {
119 n >>= 2;
120 }
121 if (n & 0x0000000000000001LL) {
122 pos -= 1;
123 }
124 return pos;
125 }
126
LeastSignificantBitPosition64(uint64_t n)127 inline int LeastSignificantBitPosition64(uint64_t n) {
128 DCHECK_NE(n, 0);
129 #ifdef USE_FAST_LEAST_SIGNIFICANT_BIT
130 return LeastSignificantBitPosition64Fast(n);
131 #elif defined(USE_DEBRUIJN)
132 return LeastSignificantBitPosition64DeBruijn(n);
133 #else
134 return LeastSignificantBitPosition64Default(n);
135 #endif
136 }
137
138 #if defined(USE_FAST_LEAST_SIGNIFICANT_BIT)
LeastSignificantBitPosition32Fast(uint32_t n)139 inline int LeastSignificantBitPosition32Fast(uint32_t n) {
140 return n == 0 ? 0 : __builtin_ctzl(n);
141 }
142 #endif
143
LeastSignificantBitPosition32DeBruijn(uint32_t n)144 inline int LeastSignificantBitPosition32DeBruijn(uint32_t n) {
145 static const uint32_t kSeq = 0x077CB531U; // de Bruijn sequence
146 static const int kTab[32] = {// initialized by 'kTab[(kSeq << i) >> 27] = i
147 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20,
148 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19,
149 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
150 return kTab[((n & (~n + 1)) * kSeq) >> 27];
151 }
152
LeastSignificantBitPosition32Default(uint32_t n)153 inline int LeastSignificantBitPosition32Default(uint32_t n) {
154 if (n == 0) return 0;
155 int pos = 31;
156 if (n & 0x0000FFFFL) {
157 pos -= 16;
158 } else {
159 n >>= 16;
160 }
161 if (n & 0x000000FFL) {
162 pos -= 8;
163 } else {
164 n >>= 8;
165 }
166 if (n & 0x0000000FL) {
167 pos -= 4;
168 } else {
169 n >>= 4;
170 }
171 if (n & 0x00000003L) {
172 pos -= 2;
173 } else {
174 n >>= 2;
175 }
176 if (n & 0x00000001L) {
177 pos -= 1;
178 }
179 return pos;
180 }
181
LeastSignificantBitPosition32(uint32_t n)182 inline int LeastSignificantBitPosition32(uint32_t n) {
183 DCHECK_NE(n, 0);
184 #ifdef USE_FAST_LEAST_SIGNIFICANT_BIT
185 return LeastSignificantBitPosition32Fast(n);
186 #elif defined(USE_DEBRUIJN)
187 return LeastSignificantBitPosition32DeBruijn(n);
188 #else
189 return LeastSignificantBitPosition32Default(n);
190 #endif
191 }
192
193 // Returns the most significant bit position in n.
194 #if USE_FAST_LEAST_SIGNIFICANT_BIT
MostSignificantBitPosition64Fast(uint64_t n)195 inline int MostSignificantBitPosition64Fast(uint64_t n) {
196 // __builtin_clzll(1) should always return 63. There is no penalty in
197 // using offset, and the code looks more like its uint32_t counterpart.
198 const int offset = __builtin_clzll(1);
199 return n == 0 ? 0 : (offset - __builtin_clzll(n));
200 }
201 #endif
202
MostSignificantBitPosition64Default(uint64_t n)203 inline int MostSignificantBitPosition64Default(uint64_t n) {
204 int b = 0;
205 if (0 != (n & (kAllBits64 << (1 << 5)))) {
206 b |= (1 << 5);
207 n >>= (1 << 5);
208 }
209 if (0 != (n & (kAllBits64 << (1 << 4)))) {
210 b |= (1 << 4);
211 n >>= (1 << 4);
212 }
213 if (0 != (n & (kAllBits64 << (1 << 3)))) {
214 b |= (1 << 3);
215 n >>= (1 << 3);
216 }
217 if (0 != (n & (kAllBits64 << (1 << 2)))) {
218 b |= (1 << 2);
219 n >>= (1 << 2);
220 }
221 if (0 != (n & (kAllBits64 << (1 << 1)))) {
222 b |= (1 << 1);
223 n >>= (1 << 1);
224 }
225 if (0 != (n & (kAllBits64 << (1 << 0)))) {
226 b |= (1 << 0);
227 }
228 return b;
229 }
230
MostSignificantBitPosition64(uint64_t n)231 inline int MostSignificantBitPosition64(uint64_t n) {
232 #ifdef USE_FAST_LEAST_SIGNIFICANT_BIT
233 return MostSignificantBitPosition64Fast(n);
234 #else
235 return MostSignificantBitPosition64Default(n);
236 #endif
237 }
238
239 #if USE_FAST_LEAST_SIGNIFICANT_BIT
MostSignificantBitPosition32Fast(uint32_t n)240 inline int MostSignificantBitPosition32Fast(uint32_t n) {
241 // The constant here depends on whether we are on a 32-bit or 64-bit machine.
242 // __builtin_clzl(1) returns 63 on a 64-bit machine and 31 on a 32-bit
243 // machine.
244 const int offset = __builtin_clzl(1);
245 return n == 0 ? 0 : (offset - __builtin_clzl(n));
246 }
247 #endif
248
MostSignificantBitPosition32Default(uint32_t n)249 inline int MostSignificantBitPosition32Default(uint32_t n) {
250 int b = 0;
251 if (0 != (n & (kAllBits32 << (1 << 4)))) {
252 b |= (1 << 4);
253 n >>= (1 << 4);
254 }
255 if (0 != (n & (kAllBits32 << (1 << 3)))) {
256 b |= (1 << 3);
257 n >>= (1 << 3);
258 }
259 if (0 != (n & (kAllBits32 << (1 << 2)))) {
260 b |= (1 << 2);
261 n >>= (1 << 2);
262 }
263 if (0 != (n & (kAllBits32 << (1 << 1)))) {
264 b |= (1 << 1);
265 n >>= (1 << 1);
266 }
267 if (0 != (n & (kAllBits32 << (1 << 0)))) {
268 b |= (1 << 0);
269 }
270 return b;
271 }
272
MostSignificantBitPosition32(uint32_t n)273 inline int MostSignificantBitPosition32(uint32_t n) {
274 #ifdef USE_FAST_LEAST_SIGNIFICANT_BIT
275 return MostSignificantBitPosition32Fast(n);
276 #else
277 return MostSignificantBitPosition32Default(n);
278 #endif
279 }
280
281 #undef USE_DEBRUIJN
282 #undef USE_FAST_LEAST_SIGNIFICANT_BIT
283
284 // Returns a word with bits from s to e set.
OneRange64(uint64_t s,uint64_t e)285 inline uint64_t OneRange64(uint64_t s, uint64_t e) {
286 DCHECK_LE(s, 63);
287 DCHECK_LE(e, 63);
288 DCHECK_LE(s, e);
289 return (kAllBits64 << s) ^ ((kAllBits64 - 1) << e);
290 }
291
OneRange32(uint32_t s,uint32_t e)292 inline uint32_t OneRange32(uint32_t s, uint32_t e) {
293 DCHECK_LE(s, 31);
294 DCHECK_LE(e, 31);
295 DCHECK_LE(s, e);
296 return (kAllBits32 << s) ^ ((kAllBits32 - 1) << e);
297 }
298
299 // Returns a word with s least significant bits unset.
IntervalUp64(uint64_t s)300 inline uint64_t IntervalUp64(uint64_t s) {
301 DCHECK_LE(s, 63);
302 return kAllBits64 << s;
303 }
304
IntervalUp32(uint32_t s)305 inline uint32_t IntervalUp32(uint32_t s) {
306 DCHECK_LE(s, 31);
307 return kAllBits32 << s;
308 }
309
310 // Returns a word with the s most significant bits unset.
IntervalDown64(uint64_t s)311 inline uint64_t IntervalDown64(uint64_t s) {
312 DCHECK_LE(s, 63);
313 return kAllBits64 >> (63 - s);
314 }
315
IntervalDown32(uint32_t s)316 inline uint32_t IntervalDown32(uint32_t s) {
317 DCHECK_LE(s, 31);
318 return kAllBits32 >> (31 - s);
319 }
320
321 // ----- Bitset operators -----
322 // Bitset: array of uint32_t/uint64_t words
323
324 // Bit operators used to manipulates bitsets.
325
326 // Returns the bit number in the word computed by BitOffsetXX,
327 // corresponding to the bit at position pos in the bitset.
328 // Note: '& 63' is faster than '% 64'
329 // TODO(user): rename BitPos and BitOffset to something more understandable.
BitPos64(uint64_t pos)330 inline uint32_t BitPos64(uint64_t pos) { return (pos & 63); }
BitPos32(uint32_t pos)331 inline uint32_t BitPos32(uint32_t pos) { return (pos & 31); }
332
333 // Returns the word number corresponding to bit number pos.
BitOffset64(uint64_t pos)334 inline uint64_t BitOffset64(uint64_t pos) { return (pos >> 6); }
BitOffset32(uint32_t pos)335 inline uint32_t BitOffset32(uint32_t pos) { return (pos >> 5); }
336
337 // Returns the number of words needed to store size bits.
BitLength64(uint64_t size)338 inline uint64_t BitLength64(uint64_t size) { return ((size + 63) >> 6); }
BitLength32(uint32_t size)339 inline uint32_t BitLength32(uint32_t size) { return ((size + 31) >> 5); }
340
341 // Returns the bit number in the bitset of the first bit of word number v.
BitShift64(uint64_t v)342 inline uint64_t BitShift64(uint64_t v) { return v << 6; }
BitShift32(uint32_t v)343 inline uint32_t BitShift32(uint32_t v) { return v << 5; }
344
345 // Returns true if the bit pos is set in bitset.
IsBitSet64(const uint64_t * const bitset,uint64_t pos)346 inline bool IsBitSet64(const uint64_t* const bitset, uint64_t pos) {
347 return (bitset[BitOffset64(pos)] & OneBit64(BitPos64(pos)));
348 }
IsBitSet32(const uint32_t * const bitset,uint32_t pos)349 inline bool IsBitSet32(const uint32_t* const bitset, uint32_t pos) {
350 return (bitset[BitOffset32(pos)] & OneBit32(BitPos32(pos)));
351 }
352
353 // Sets the bit pos to true in bitset.
SetBit64(uint64_t * const bitset,uint64_t pos)354 inline void SetBit64(uint64_t* const bitset, uint64_t pos) {
355 bitset[BitOffset64(pos)] |= OneBit64(BitPos64(pos));
356 }
SetBit32(uint32_t * const bitset,uint32_t pos)357 inline void SetBit32(uint32_t* const bitset, uint32_t pos) {
358 bitset[BitOffset32(pos)] |= OneBit32(BitPos32(pos));
359 }
360
361 // Sets the bit pos to false in bitset.
ClearBit64(uint64_t * const bitset,uint64_t pos)362 inline void ClearBit64(uint64_t* const bitset, uint64_t pos) {
363 bitset[BitOffset64(pos)] &= ~OneBit64(BitPos64(pos));
364 }
ClearBit32(uint32_t * const bitset,uint32_t pos)365 inline void ClearBit32(uint32_t* const bitset, uint32_t pos) {
366 bitset[BitOffset32(pos)] &= ~OneBit32(BitPos32(pos));
367 }
368
369 // Returns the number of bits set in bitset between positions start and end.
370 uint64_t BitCountRange64(const uint64_t* const bitset, uint64_t start,
371 uint64_t end);
372 uint32_t BitCountRange32(const uint32_t* const bitset, uint32_t start,
373 uint32_t end);
374
375 // Returns true if no bits are set in bitset between start and end.
376 bool IsEmptyRange64(const uint64_t* const bitset, uint64_t start, uint64_t end);
377 bool IsEmptyRange32(const uint32_t* const bitset, uint32_t start, uint32_t end);
378
379 // Returns the first bit set in bitset between start and max_bit.
380 int64_t LeastSignificantBitPosition64(const uint64_t* const bitset,
381 uint64_t start, uint64_t end);
382 int LeastSignificantBitPosition32(const uint32_t* const bitset, uint32_t start,
383 uint32_t end);
384
385 // Returns the last bit set in bitset between min_bit and start.
386 int64_t MostSignificantBitPosition64(const uint64_t* const bitset,
387 uint64_t start, uint64_t end);
388 int MostSignificantBitPosition32(const uint32_t* const bitset, uint32_t start,
389 uint32_t end);
390
391 // Unsafe versions of the functions above where respectively end and start
392 // are supposed to be set.
393 int64_t UnsafeLeastSignificantBitPosition64(const uint64_t* const bitset,
394 uint64_t start, uint64_t end);
395 int32_t UnsafeLeastSignificantBitPosition32(const uint32_t* const bitset,
396 uint32_t start, uint32_t end);
397
398 int64_t UnsafeMostSignificantBitPosition64(const uint64_t* const bitset,
399 uint64_t start, uint64_t end);
400 int32_t UnsafeMostSignificantBitPosition32(const uint32_t* const bitset,
401 uint32_t start, uint32_t end);
402
403 // Returns a mask with the bits pos % 64 and (pos ^ 1) % 64 sets.
TwoBitsFromPos64(uint64_t pos)404 inline uint64_t TwoBitsFromPos64(uint64_t pos) {
405 return uint64_t{3} << (pos & 62);
406 }
407
408 // This class is like an ITIVector<IndexType, bool> except that it provides a
409 // more efficient way to iterate over the positions set to true. It achieves
410 // this by caching the current uint64_t bucket in the Iterator and using
411 // LeastSignificantBitPosition64() to iterate over the positions at 1 in this
412 // bucket.
413 template <typename IndexType = int64_t>
414 class Bitset64 {
415 public:
Bitset64()416 Bitset64() : size_(), data_(), end_(*this, /*at_end=*/true) {}
Bitset64(IndexType size)417 explicit Bitset64(IndexType size)
418 : size_(Value(size) > 0 ? size : IndexType(0)),
419 data_(BitLength64(Value(size_))),
420 end_(*this, /*at_end=*/true) {}
421
422 // Returns how many bits this Bitset64 can hold.
size()423 IndexType size() const { return size_; }
424
425 // Appends value at the end of the bitset.
PushBack(bool value)426 void PushBack(bool value) {
427 ++size_;
428 data_.resize(BitLength64(Value(size_)), 0);
429 Set(size_ - 1, value);
430 }
431
432 // Resizes the Bitset64 to the given number of bits. New bits are sets to 0.
Resize(IndexType size)433 void Resize(IndexType size) {
434 DCHECK_GE(Value(size), 0);
435 size_ = Value(size) > 0 ? size : IndexType(0);
436 data_.resize(BitLength64(Value(size_)), 0);
437 }
438
439 // Changes the number of bits the Bitset64 can hold and set all of them to 0.
ClearAndResize(IndexType size)440 void ClearAndResize(IndexType size) {
441 DCHECK_GE(Value(size), 0);
442 size_ = Value(size) > 0 ? size : IndexType(0);
443
444 // Memset is 4x faster than data_.assign() as of 19/03/2014.
445 // TODO(user): Ideally if a realloc happens, we don't need to copy the old
446 // data...
447 const size_t bit_length = static_cast<size_t>(BitLength64(Value(size_)));
448 const size_t to_clear = std::min(data_.size(), bit_length);
449 data_.resize(bit_length, 0);
450 memset(data_.data(), 0, to_clear * sizeof(int64_t));
451 }
452
453 // Sets all bits to 0.
ClearAll()454 void ClearAll() { memset(data_.data(), 0, data_.size() * sizeof(int64_t)); }
455
456 // Sets the bit at position i to 0.
Clear(IndexType i)457 void Clear(IndexType i) {
458 DCHECK_GE(Value(i), 0);
459 DCHECK_LT(Value(i), Value(size_));
460 data_[BitOffset64(Value(i))] &= ~OneBit64(BitPos64(Value(i)));
461 }
462
463 // Sets bucket containing bit i to 0.
ClearBucket(IndexType i)464 void ClearBucket(IndexType i) {
465 DCHECK_GE(Value(i), 0);
466 DCHECK_LT(Value(i), Value(size_));
467 data_[BitOffset64(Value(i))] = 0;
468 }
469
470 // Clears the bits at position i and i ^ 1.
ClearTwoBits(IndexType i)471 void ClearTwoBits(IndexType i) {
472 DCHECK_GE(Value(i), 0);
473 DCHECK_LT(Value(i), Value(size_));
474 data_[BitOffset64(Value(i))] &= ~TwoBitsFromPos64(Value(i));
475 }
476
477 // Returns true if the bit at position i or the one at position i ^ 1 is set.
AreOneOfTwoBitsSet(IndexType i)478 bool AreOneOfTwoBitsSet(IndexType i) const {
479 DCHECK_GE(Value(i), 0);
480 DCHECK_LT(Value(i), Value(size_));
481 return data_[BitOffset64(Value(i))] & TwoBitsFromPos64(Value(i));
482 }
483
484 // Returns true if the bit at position i is set.
IsSet(IndexType i)485 bool IsSet(IndexType i) const {
486 DCHECK_GE(Value(i), 0);
487 DCHECK_LT(Value(i), Value(size_));
488 return data_[BitOffset64(Value(i))] & OneBit64(BitPos64(Value(i)));
489 }
490
491 // Same as IsSet().
492 bool operator[](IndexType i) const { return IsSet(i); }
493
494 // Sets the bit at position i to 1.
Set(IndexType i)495 void Set(IndexType i) {
496 DCHECK_GE(Value(i), 0);
497 DCHECK_LT(Value(i), size_);
498 data_[BitOffset64(Value(i))] |= OneBit64(BitPos64(Value(i)));
499 }
500
501 // If value is true, sets the bit at position i to 1, sets it to 0 otherwise.
Set(IndexType i,bool value)502 void Set(IndexType i, bool value) {
503 if (value) {
504 Set(i);
505 } else {
506 Clear(i);
507 }
508 }
509
510 // Copies bucket containing bit i from "other" to "this".
CopyBucket(const Bitset64<IndexType> & other,IndexType i)511 void CopyBucket(const Bitset64<IndexType>& other, IndexType i) {
512 const uint64_t offset = BitOffset64(Value(i));
513 data_[offset] = other.data_[offset];
514 }
515
516 // Copies "other" to "this". The bitsets do not have to be of the same size.
517 // If "other" is smaller, high order bits are not changed. If "other" is
518 // larger, its high order bits are ignored. In any case "this" is not resized.
519 template <typename OtherIndexType>
SetContentFromBitset(const Bitset64<OtherIndexType> & other)520 void SetContentFromBitset(const Bitset64<OtherIndexType>& other) {
521 const int64_t min_size = std::min(data_.size(), other.data_.size());
522 if (min_size == 0) return;
523 const uint64_t last_common_bucket = data_[min_size - 1];
524 memcpy(data_.data(), other.data_.data(), min_size * sizeof(uint64_t));
525 if (data_.size() >= other.data_.size()) {
526 const uint64_t bitmask = kAllBitsButLsb64
527 << BitPos64(other.Value(other.size() - 1));
528 data_[min_size - 1] &= ~bitmask;
529 data_[min_size - 1] |= (bitmask & last_common_bucket);
530 }
531 }
532
533 // Same as SetContentFromBitset where "this" and "other" have the same size.
534 template <typename OtherIndexType>
SetContentFromBitsetOfSameSize(const Bitset64<OtherIndexType> & other)535 void SetContentFromBitsetOfSameSize(const Bitset64<OtherIndexType>& other) {
536 DCHECK_EQ(Value(size()), other.Value(other.size()));
537 memcpy(data_.data(), other.data_.data(), data_.size() * sizeof(uint64_t));
538 }
539
540 // Sets "this" to be the intersection of "this" and "other". The
541 // bitsets do not have to be the same size. If other is smaller, all
542 // the higher order bits are assumed to be 0.
Intersection(const Bitset64<IndexType> & other)543 void Intersection(const Bitset64<IndexType>& other) {
544 const int min_size = std::min(data_.size(), other.data_.size());
545 for (int i = 0; i < min_size; ++i) {
546 data_[i] &= other.data_[i];
547 }
548 for (int i = min_size; i < data_.size(); ++i) {
549 data_[i] = 0;
550 }
551 }
552
553 // Sets "this" to be the union of "this" and "other". The
554 // bitsets do not have to be the same size. If other is smaller, all
555 // the higher order bits are assumed to be 0.
Union(const Bitset64<IndexType> & other)556 void Union(const Bitset64<IndexType>& other) {
557 const int min_size = std::min(data_.size(), other.data_.size());
558 for (int i = 0; i < min_size; ++i) {
559 data_[i] |= other.data_[i];
560 }
561 }
562
563 // Class to iterate over the bit positions at 1 of a Bitset64.
564 //
565 // IMPORTANT: Because the iterator "caches" the current uint64_t bucket, this
566 // will probably not do what you want if Bitset64 is modified while iterating.
567 class Iterator {
568 public:
Iterator(const Bitset64 & data_)569 explicit Iterator(const Bitset64& data_)
570 : bitset_(data_), index_(0), base_index_(0), current_(0) {
571 if (bitset_.data_.empty()) {
572 index_ = -1;
573 } else {
574 current_ = bitset_.data_[0];
575 Next();
576 }
577 }
578
579 // Returns true if the Iterator is at a valid position.
Ok()580 bool Ok() const { return index_ != -1; }
581
582 // Returns the current position of the iterator.
Index()583 IndexType Index() const {
584 DCHECK(Ok());
585 return IndexType(index_);
586 }
587
588 // Moves the iterator the the next position at 1 of the Bitset64.
Next()589 void Next() {
590 DCHECK(Ok());
591 if (current_ == 0) {
592 int bucket = BitOffset64(base_index_);
593 const int size = bitset_.data_.size();
594 do {
595 bucket++;
596 } while (bucket < size && bitset_.data_[bucket] == 0);
597 if (bucket == size) {
598 index_ = -1;
599 return;
600 }
601 current_ = bitset_.data_[bucket];
602 base_index_ = BitShift64(bucket);
603 }
604
605 // Computes the index and clear the least significant bit of current_.
606 index_ = base_index_ + LeastSignificantBitPosition64(current_);
607 current_ &= current_ - 1;
608 }
609
610 // STL version of the functions above to support range-based "for" loop.
Iterator(const Bitset64 & data_,bool at_end)611 Iterator(const Bitset64& data_, bool at_end)
612 : bitset_(data_), index_(0), base_index_(0), current_(0) {
613 if (at_end || bitset_.data_.empty()) {
614 index_ = -1;
615 } else {
616 current_ = bitset_.data_[0];
617 Next();
618 }
619 }
620 bool operator!=(const Iterator& other) const {
621 return index_ != other.index_;
622 }
623 IndexType operator*() const { return IndexType(index_); }
624 void operator++() { Next(); }
625
626 private:
627 const Bitset64& bitset_;
628 int index_;
629 int base_index_;
630 uint64_t current_;
631 };
632
633 // Allows range-based "for" loop on the non-zero positions:
634 // for (const IndexType index : bitset) {}
635 // instead of:
636 // for (Bitset64<IndexType>::Iterator it(bitset); it.Ok(); it.Next()) {
637 // const IndexType index = it.Index();
begin()638 Iterator begin() const { return Iterator(*this); }
end()639 Iterator end() const { return end_; }
640
641 // Cryptic function! This is just an optimized version of a given piece of
642 // code and has probably little general use.
ConditionalXorOfTwoBits(IndexType i,uint64_t use1,const Bitset64<IndexType> & set1,uint64_t use2,const Bitset64<IndexType> & set2)643 static uint64_t ConditionalXorOfTwoBits(IndexType i, uint64_t use1,
644 const Bitset64<IndexType>& set1,
645 uint64_t use2,
646 const Bitset64<IndexType>& set2) {
647 DCHECK_EQ(set1.data_.size(), set1.data_.size());
648 DCHECK(use1 == 0 || use1 == 1);
649 DCHECK(use2 == 0 || use2 == 1);
650 const int bucket = BitOffset64(Value(i));
651 const int pos = BitPos64(Value(i));
652 return ((use1 << pos) & set1.data_[bucket]) ^
653 ((use2 << pos) & set2.data_[bucket]);
654 }
655
656 // Returns a 0/1 string representing the bitset.
DebugString()657 std::string DebugString() const {
658 std::string output;
659 for (IndexType i(0); i < size(); ++i) {
660 output += IsSet(i) ? "1" : "0";
661 }
662 return output;
663 }
664
665 private:
666 // Returns the value of the index type.
667 // This function is specialized below to work with IntType and int64_t.
668 static int64_t Value(IndexType input);
669
670 IndexType size_;
671 std::vector<uint64_t> data_;
672
673 // It is faster to store the end() Iterator than to recompute it every time.
674 // Note that we cannot do the same for begin().
675 const Iterator end_;
676
677 template <class OtherIndexType>
678 friend class Bitset64;
679 DISALLOW_COPY_AND_ASSIGN(Bitset64);
680 };
681
682 // Specialized version of Bitset64 that allows to query the last bit set more
683 // efficiently.
684 class BitQueue64 {
685 public:
BitQueue64()686 BitQueue64() : size_(), top_(-1), data_() {}
BitQueue64(int size)687 explicit BitQueue64(int size)
688 : size_(size), top_(-1), data_(BitLength64(size), 0) {}
689
IncreaseSize(int size)690 void IncreaseSize(int size) {
691 CHECK_GE(size, size_);
692 size_ = size;
693 data_.resize(BitLength64(size), 0);
694 }
695
ClearAndResize(int size)696 void ClearAndResize(int size) {
697 top_ = -1;
698 size_ = size;
699 data_.assign(BitLength64(size), 0);
700 }
701
Set(int i)702 void Set(int i) {
703 DCHECK_GE(i, 0);
704 DCHECK_LT(i, size_);
705 top_ = std::max(top_, i);
706 data_[BitOffset64(i)] |= OneBit64(BitPos64(i));
707 }
708
709 // Sets all the bits from 0 up to i-1 to 1.
SetAllBefore(int i)710 void SetAllBefore(int i) {
711 DCHECK_GE(i, 0);
712 DCHECK_LT(i, size_);
713 top_ = std::max(top_, i - 1);
714 int bucket_index = static_cast<int>(BitOffset64(i));
715 data_[bucket_index] |= OneBit64(BitPos64(i)) - 1;
716 for (--bucket_index; bucket_index >= 0; --bucket_index) {
717 data_[bucket_index] = kAllBits64;
718 }
719 }
720
721 // Returns the position of the highest bit set in O(1) or -1 if no bit is set.
Top()722 int Top() const { return top_; }
723
724 // Clears the Top() bit and recomputes the position of the next Top().
ClearTop()725 void ClearTop() {
726 DCHECK_NE(top_, -1);
727 int bucket_index = static_cast<int>(BitOffset64(top_));
728 uint64_t bucket = data_[bucket_index] &= ~OneBit64(BitPos64(top_));
729 while (!bucket) {
730 if (bucket_index == 0) {
731 top_ = -1;
732 return;
733 }
734 bucket = data_[--bucket_index];
735 }
736
737 // Note(user): I experimented with reversing the bit order in a bucket to
738 // use LeastSignificantBitPosition64() and it is only slightly faster at the
739 // cost of a lower Set() speed. So I preferred this version.
740 top_ = static_cast<int>(BitShift64(bucket_index) +
741 MostSignificantBitPosition64(bucket));
742 }
743
744 private:
745 int size_;
746 int top_;
747 std::vector<uint64_t> data_;
748 DISALLOW_COPY_AND_ASSIGN(BitQueue64);
749 };
750
751 // The specialization of Value() for IntType and int64_t.
752 template <typename IntType>
Value(IntType input)753 inline int64_t Bitset64<IntType>::Value(IntType input) {
754 DCHECK_GE(input.value(), 0);
755 return input.value();
756 }
757 template <>
Value(int64_t input)758 inline int64_t Bitset64<int64_t>::Value(int64_t input) {
759 DCHECK_GE(input, 0);
760 return input;
761 }
762
763 // A simple utility class to set/unset integer in a range [0, size).
764 // This is optimized for sparsity.
765 template <typename IntegerType = int64_t>
766 class SparseBitset {
767 public:
SparseBitset()768 SparseBitset() {}
SparseBitset(IntegerType size)769 explicit SparseBitset(IntegerType size) : bitset_(size) {}
size()770 IntegerType size() const { return bitset_.size(); }
SparseClearAll()771 void SparseClearAll() {
772 for (const IntegerType i : to_clear_) bitset_.ClearBucket(i);
773 to_clear_.clear();
774 }
ClearAll()775 void ClearAll() {
776 bitset_.ClearAll();
777 to_clear_.clear();
778 }
ClearAndResize(IntegerType size)779 void ClearAndResize(IntegerType size) {
780 // As of 19/03/2014, experiments show that this is a reasonable threshold.
781 const int kSparseThreshold = 300;
782 if (to_clear_.size() * kSparseThreshold < size) {
783 SparseClearAll();
784 bitset_.Resize(size);
785 } else {
786 bitset_.ClearAndResize(size);
787 to_clear_.clear();
788 }
789 }
Resize(IntegerType size)790 void Resize(IntegerType size) {
791 if (size < bitset_.size()) {
792 int new_index = 0;
793 for (IntegerType index : to_clear_) {
794 if (index < size) {
795 to_clear_[new_index] = index;
796 ++new_index;
797 }
798 }
799 to_clear_.resize(new_index);
800 }
801 bitset_.Resize(size);
802 }
803 bool operator[](IntegerType index) const { return bitset_[index]; }
Set(IntegerType index)804 void Set(IntegerType index) {
805 if (!bitset_[index]) {
806 bitset_.Set(index);
807 to_clear_.push_back(index);
808 }
809 }
Clear(IntegerType index)810 void Clear(IntegerType index) { bitset_.Clear(index); }
NumberOfSetCallsWithDifferentArguments()811 int NumberOfSetCallsWithDifferentArguments() const {
812 return to_clear_.size();
813 }
PositionsSetAtLeastOnce()814 const std::vector<IntegerType>& PositionsSetAtLeastOnce() const {
815 return to_clear_;
816 }
817
818 // Tells the class that all its bits are cleared, so it can reset to_clear_
819 // to the empty vector. Note that this call is "unsafe" since the fact that
820 // the class is actually all cleared is only checked in debug mode.
821 //
822 // This is useful to iterate on the "set" positions while clearing them for
823 // instance. This way, after the loop, a client can call this for efficiency.
NotifyAllClear()824 void NotifyAllClear() {
825 if (DEBUG_MODE) {
826 for (IntegerType index : to_clear_) CHECK(!bitset_[index]);
827 }
828 to_clear_.clear();
829 }
830
831 private:
832 Bitset64<IntegerType> bitset_;
833 std::vector<IntegerType> to_clear_;
834 DISALLOW_COPY_AND_ASSIGN(SparseBitset);
835 };
836
837 } // namespace operations_research
838
839 #endif // OR_TOOLS_UTIL_BITSET_H_
840