1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cassert>
20 #include <cstdint>
21 
22 namespace folly {
23 
24 /***
25  *  SparseByteSet
26  *
27  *  A special-purpose data structure representing an insert-only set of bytes.
28  *  May have better performance than std::bitset<256>, depending on workload.
29  *
30  *  Operations:
31  *  - add(byte)
32  *  - contains(byte)
33  *
34  *  Performance:
35  *  - The entire capacity of the set is inline; the set never allocates.
36  *  - The constructor zeros only the first two bytes of the object.
37  *  - add and contains both run in constant time w.r.t. the size of the set.
38  *    Constant time - not amortized constant - and with small constant factor.
39  *
40  *  This data structure is ideal for on-stack use.
41  *
42  *  Aho, Hopcroft, and Ullman refer to this trick in "The Design and Analysis
43  *  of Computer Algorithms" (1974), but the best description is here:
44  *  http://research.swtch.com/sparse
45  *  http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.30.7319
46  */
47 class SparseByteSet {
48  public:
49   //  There are this many possible values:
50   static constexpr uint16_t kCapacity = 256;
51 
52   //  No init of byte-arrays required!
SparseByteSet()53   SparseByteSet() : size_(0) {}
54 
55   /***
56    *  add(byte)
57    *
58    *  O(1), non-amortized.
59    */
add(uint8_t i)60   inline bool add(uint8_t i) {
61     bool r = !contains(i);
62     if (r) {
63       assert(size_ < kCapacity);
64       dense_[size_] = i;
65       sparse_[i] = uint8_t(size_);
66       size_++;
67     }
68     return r;
69   }
70 
71   /***
72    *  contains(byte)
73    *
74    *  O(1), non-amortized.
75    */
contains(uint8_t i)76   inline bool contains(uint8_t i) const {
77     return sparse_[i] < size_ && dense_[sparse_[i]] == i;
78   }
79 
80  private:
81   uint16_t size_; // can't use uint8_t because it would overflow if all
82                   // possible values were inserted.
83   uint8_t sparse_[kCapacity];
84   uint8_t dense_[kCapacity];
85 };
86 
87 } // namespace folly
88