1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 // Set of integer tuples (fixed-size arrays, all of the same size) with
15 // a basic API.
16 // It supports several types of integer arrays transparently, with an
17 // inherent storage based on int64_t arrays.
18 //
19 // The key feature is the "lazy" copy:
20 // - Copying an IntTupleSet won't actually copy the data right away; we
21 // will just have several IntTupleSet pointing at the same data.
22 // - Modifying an IntTupleSet which shares his data with others
23 // will create a new, modified instance of the data payload, and make
24 // the IntTupleSet point to that new data.
25 // - Modifying an IntTupleSet that doesn't share its data with any other
26 // IntTupleSet will modify the data directly.
27 // Therefore, you don't need to use const IntTupleSet& in methods. Just do:
28 // void MyMethod(IntTupleSet tuple_set) { ... }
29 //
30 // This class is thread hostile as the copy and reference counter are
31 // not protected by a mutex.
32
33 #ifndef OR_TOOLS_UTIL_TUPLE_SET_H_
34 #define OR_TOOLS_UTIL_TUPLE_SET_H_
35
36 #include <algorithm>
37 #include <vector>
38
39 #include "absl/container/flat_hash_map.h"
40 #include "absl/container/flat_hash_set.h"
41 #include "ortools/base/hash.h"
42 #include "ortools/base/integral_types.h"
43 #include "ortools/base/logging.h"
44 #include "ortools/base/macros.h"
45 #include "ortools/base/map_util.h"
46
47 namespace operations_research {
48 // ----- Main IntTupleSet class -----
49 class IntTupleSet {
50 public:
51 // Creates an empty tuple set with a fixed length for all tuples.
52 explicit IntTupleSet(int arity);
53 // Copy constructor (it actually does a lazy copy, see toplevel comment).
54 IntTupleSet(const IntTupleSet& set); // NOLINT
55 ~IntTupleSet();
56
57 // Clears data.
58 void Clear();
59
60 // Inserts the tuple to the set. It does nothing if the tuple is
61 // already in the set. The size of the tuple must be equal to the
62 // arity of the set. It returns the index at which the tuple was
63 // inserted (-1 if it was already present).
64 int Insert(const std::vector<int>& tuple);
65 int Insert(const std::vector<int64_t>& tuple);
66 // Arity fixed version of Insert removing the need for a vector for the user.
67 int Insert2(int64_t v0, int64_t v1);
68 int Insert3(int64_t v0, int64_t v1, int64_t v2);
69 int Insert4(int64_t v0, int64_t v1, int64_t v2, int64_t v3);
70 // Inserts the tuples.
71 void InsertAll(const std::vector<std::vector<int64_t> >& tuples);
72 void InsertAll(const std::vector<std::vector<int> >& tuples);
73
74 // Checks if the tuple is in the set.
75 bool Contains(const std::vector<int>& tuple) const;
76 bool Contains(const std::vector<int64_t>& tuple) const;
77
78 // Returns the number of tuples.
79 int NumTuples() const;
80 // Get the given tuple's value at the given position. The indices
81 // of the tuples correspond to the order in which they were
82 // inserted.
83 int64_t Value(int tuple_index, int pos_in_tuple) const;
84 // Returns the arity of the set.
85 int Arity() const;
86 // Access the raw data, see IntTupleSet::Data::flat_tuples_.
87 const int64_t* RawData() const;
88 // Returns the number of different values in the given column.
89 int NumDifferentValuesInColumn(int col) const;
90 // Return a copy of the set, sorted by the "col"-th value of each
91 // tuples. The sort is stable.
92 IntTupleSet SortedByColumn(int col) const;
93 // Returns a copy of the tuple set lexicographically sorted.
94 IntTupleSet SortedLexicographically() const;
95
96 private:
97 // Class that holds the actual data of an IntTupleSet. It handles
98 // the reference counters, etc.
99 class Data {
100 public:
101 explicit Data(int arity);
102 Data(const Data& data);
103 ~Data();
104 void AddSharedOwner();
105 bool RemovedSharedOwner();
106 Data* CopyIfShared();
107 template <class T>
108 int Insert(const std::vector<T>& tuple);
109 template <class T>
110 bool Contains(const std::vector<T>& candidate) const;
111 template <class T>
112 int64_t Fingerprint(const std::vector<T>& tuple) const;
113 int NumTuples() const;
114 int64_t Value(int index, int pos) const;
115 int Arity() const;
116 const int64_t* RawData() const;
117 void Clear();
118
119 private:
120 const int arity_;
121 int num_owners_;
122 // Concatenation of all tuples ever added.
123 std::vector<int64_t> flat_tuples_;
124 // Maps a tuple's fingerprint to the list of tuples with this
125 // fingerprint, represented by their start index in the
126 // flat_tuples_ vector.
127 absl::flat_hash_map<int64_t, std::vector<int> > tuple_fprint_to_index_;
128 };
129
130 // Used to represent a light representation of a tuple.
131 struct IndexData {
132 int index;
133 IntTupleSet::Data* data;
IndexDataIndexData134 IndexData(int i, IntTupleSet::Data* const d) : index(i), data(d) {}
135 static bool Compare(const IndexData& a, const IndexData& b);
136 };
137
138 struct IndexValue {
139 int index;
140 int64_t value;
IndexValueIndexValue141 IndexValue(int i, int64_t v) : index(i), value(v) {}
142 static bool Compare(const IndexValue& a, const IndexValue& b);
143 };
144
145 mutable Data* data_;
146 };
147
148 // ----- Data -----
Data(int arity)149 inline IntTupleSet::Data::Data(int arity) : arity_(arity), num_owners_(0) {}
150
Data(const Data & data)151 inline IntTupleSet::Data::Data(const Data& data)
152 : arity_(data.arity_),
153 num_owners_(0),
154 flat_tuples_(data.flat_tuples_),
155 tuple_fprint_to_index_(data.tuple_fprint_to_index_) {}
156
~Data()157 inline IntTupleSet::Data::~Data() {}
158
AddSharedOwner()159 inline void IntTupleSet::Data::AddSharedOwner() { num_owners_++; }
160
RemovedSharedOwner()161 inline bool IntTupleSet::Data::RemovedSharedOwner() {
162 return (--num_owners_ == 0);
163 }
164
CopyIfShared()165 inline IntTupleSet::Data* IntTupleSet::Data::CopyIfShared() {
166 if (num_owners_ > 1) { // Copy on write.
167 Data* const new_data = new Data(*this);
168 RemovedSharedOwner();
169 new_data->AddSharedOwner();
170 return new_data;
171 }
172 return this;
173 }
174
175 template <class T>
Insert(const std::vector<T> & tuple)176 int IntTupleSet::Data::Insert(const std::vector<T>& tuple) {
177 DCHECK(arity_ == 0 || flat_tuples_.size() % arity_ == 0);
178 CHECK_EQ(arity_, tuple.size());
179 DCHECK_EQ(1, num_owners_);
180 if (!Contains(tuple)) {
181 const int index = NumTuples();
182 const int offset = flat_tuples_.size();
183 flat_tuples_.resize(offset + arity_);
184 // On mac os X, using this instead of push_back gives a 10x speedup!
185 for (int i = 0; i < arity_; ++i) {
186 flat_tuples_[offset + i] = tuple[i];
187 }
188 const int64_t fingerprint = Fingerprint(tuple);
189 tuple_fprint_to_index_[fingerprint].push_back(index);
190 return index;
191 } else {
192 return -1;
193 }
194 }
195
196 template <class T>
Contains(const std::vector<T> & candidate)197 bool IntTupleSet::Data::Contains(const std::vector<T>& candidate) const {
198 if (candidate.size() != arity_) {
199 return false;
200 }
201 const int64_t fingerprint = Fingerprint(candidate);
202 if (tuple_fprint_to_index_.contains(fingerprint)) {
203 const std::vector<int>& indices =
204 gtl::FindOrDie(tuple_fprint_to_index_, fingerprint);
205 for (int i = 0; i < indices.size(); ++i) {
206 const int tuple_index = indices[i];
207 for (int j = 0; j < arity_; ++j) {
208 if (candidate[j] != flat_tuples_[tuple_index * arity_ + j]) {
209 return false;
210 }
211 }
212 return true;
213 }
214 }
215 return false;
216 }
217
218 template <class T>
Fingerprint(const std::vector<T> & tuple)219 int64_t IntTupleSet::Data::Fingerprint(const std::vector<T>& tuple) const {
220 switch (arity_) {
221 case 0:
222 return 0;
223 case 1:
224 return tuple[0];
225 case 2: {
226 uint64_t x = tuple[0];
227 uint64_t y = uint64_t{0xe08c1d668b756f82};
228 uint64_t z = tuple[1];
229 mix(x, y, z);
230 return z;
231 }
232 default: {
233 uint64_t x = tuple[0];
234 uint64_t y = uint64_t{0xe08c1d668b756f82};
235 for (int i = 1; i < tuple.size(); ++i) {
236 uint64_t z = tuple[i];
237 mix(x, y, z);
238 x = z;
239 }
240 return x;
241 }
242 }
243 }
244
NumTuples()245 inline int IntTupleSet::Data::NumTuples() const {
246 return tuple_fprint_to_index_.size();
247 }
248
Value(int index,int pos)249 inline int64_t IntTupleSet::Data::Value(int index, int pos) const {
250 DCHECK_GE(index, 0);
251 DCHECK_LT(index, flat_tuples_.size() / arity_);
252 DCHECK_GE(pos, 0);
253 DCHECK_LT(pos, arity_);
254 return flat_tuples_[index * arity_ + pos];
255 }
256
Arity()257 inline int IntTupleSet::Data::Arity() const { return arity_; }
258
RawData()259 inline const int64_t* IntTupleSet::Data::RawData() const {
260 return flat_tuples_.data();
261 }
262
Clear()263 inline void IntTupleSet::Data::Clear() {
264 flat_tuples_.clear();
265 tuple_fprint_to_index_.clear();
266 }
267
IntTupleSet(int arity)268 inline IntTupleSet::IntTupleSet(int arity) : data_(new Data(arity)) {
269 CHECK_GE(arity, 0);
270 data_->AddSharedOwner();
271 }
272
IntTupleSet(const IntTupleSet & set)273 inline IntTupleSet::IntTupleSet(const IntTupleSet& set) : data_(set.data_) {
274 data_->AddSharedOwner();
275 }
276
~IntTupleSet()277 inline IntTupleSet::~IntTupleSet() {
278 CHECK(data_ != nullptr);
279 if (data_->RemovedSharedOwner()) {
280 delete data_;
281 }
282 }
283
Clear()284 inline void IntTupleSet::Clear() {
285 data_ = data_->CopyIfShared();
286 data_->Clear();
287 }
288
Insert(const std::vector<int> & tuple)289 inline int IntTupleSet::Insert(const std::vector<int>& tuple) {
290 data_ = data_->CopyIfShared();
291 return data_->Insert(tuple);
292 }
293
Insert(const std::vector<int64_t> & tuple)294 inline int IntTupleSet::Insert(const std::vector<int64_t>& tuple) {
295 data_ = data_->CopyIfShared();
296 return data_->Insert(tuple);
297 }
298
Insert2(int64_t v0,int64_t v1)299 inline int IntTupleSet::Insert2(int64_t v0, int64_t v1) {
300 std::vector<int64_t> tuple(2);
301 tuple[0] = v0;
302 tuple[1] = v1;
303 return Insert(tuple);
304 }
305
Insert3(int64_t v0,int64_t v1,int64_t v2)306 inline int IntTupleSet::Insert3(int64_t v0, int64_t v1, int64_t v2) {
307 std::vector<int64_t> tuple(3);
308 tuple[0] = v0;
309 tuple[1] = v1;
310 tuple[2] = v2;
311 return Insert(tuple);
312 }
313
Insert4(int64_t v0,int64_t v1,int64_t v2,int64_t v3)314 inline int IntTupleSet::Insert4(int64_t v0, int64_t v1, int64_t v2,
315 int64_t v3) {
316 std::vector<int64_t> tuple(4);
317 tuple[0] = v0;
318 tuple[1] = v1;
319 tuple[2] = v2;
320 tuple[3] = v3;
321 return Insert(tuple);
322 }
323
Contains(const std::vector<int> & tuple)324 inline bool IntTupleSet::Contains(const std::vector<int>& tuple) const {
325 return data_->Contains(tuple);
326 }
327
Contains(const std::vector<int64_t> & tuple)328 inline bool IntTupleSet::Contains(const std::vector<int64_t>& tuple) const {
329 return data_->Contains(tuple);
330 }
331
InsertAll(const std::vector<std::vector<int>> & tuples)332 inline void IntTupleSet::InsertAll(
333 const std::vector<std::vector<int> >& tuples) {
334 data_ = data_->CopyIfShared();
335 for (int i = 0; i < tuples.size(); ++i) {
336 Insert(tuples[i]);
337 }
338 }
339
InsertAll(const std::vector<std::vector<int64_t>> & tuples)340 inline void IntTupleSet::InsertAll(
341 const std::vector<std::vector<int64_t> >& tuples) {
342 data_ = data_->CopyIfShared();
343 for (int i = 0; i < tuples.size(); ++i) {
344 Insert(tuples[i]);
345 }
346 }
347
NumTuples()348 inline int IntTupleSet::NumTuples() const { return data_->NumTuples(); }
349
Value(int index,int pos)350 inline int64_t IntTupleSet::Value(int index, int pos) const {
351 return data_->Value(index, pos);
352 }
353
Arity()354 inline int IntTupleSet::Arity() const { return data_->Arity(); }
355
RawData()356 inline const int64_t* IntTupleSet::RawData() const { return data_->RawData(); }
357
NumDifferentValuesInColumn(int col)358 inline int IntTupleSet::NumDifferentValuesInColumn(int col) const {
359 if (col < 0 || col >= data_->Arity()) {
360 return 0;
361 }
362 absl::flat_hash_set<int64_t> values;
363 for (int i = 0; i < data_->NumTuples(); ++i) {
364 values.insert(data_->Value(i, col));
365 }
366 return values.size();
367 }
368
Compare(const IndexValue & a,const IndexValue & b)369 inline bool IntTupleSet::IndexValue::Compare(const IndexValue& a,
370 const IndexValue& b) {
371 return a.value < b.value || (a.value == b.value && a.index < b.index);
372 }
373
SortedByColumn(int col)374 inline IntTupleSet IntTupleSet::SortedByColumn(int col) const {
375 std::vector<IndexValue> keys;
376 keys.reserve(data_->NumTuples());
377 for (int index = 0; index < data_->NumTuples(); ++index) {
378 keys.push_back(IndexValue(index, data_->Value(index, col)));
379 }
380 std::sort(keys.begin(), keys.end(), IntTupleSet::IndexValue::Compare);
381 const int arity = data_->Arity();
382 IntTupleSet sorted(arity);
383 for (int i = 0; i < keys.size(); ++i) {
384 const int64_t* tuple_ptr = data_->RawData() + keys[i].index * arity;
385 sorted.Insert(std::vector<int64_t>(tuple_ptr, tuple_ptr + arity));
386 }
387 return sorted;
388 }
389
Compare(const IndexData & a,const IndexData & b)390 inline bool IntTupleSet::IndexData::Compare(const IndexData& a,
391 const IndexData& b) {
392 const IntTupleSet::Data* const data = a.data;
393 const int arity = data->Arity();
394 for (int i = 0; i < arity; ++i) {
395 const int64_t value1 = data->Value(a.index, i);
396 const int64_t value2 = data->Value(b.index, i);
397 if (value1 < value2) {
398 return true;
399 }
400 if (value1 > value2) {
401 return false;
402 }
403 }
404 return false;
405 }
406
SortedLexicographically()407 inline IntTupleSet IntTupleSet::SortedLexicographically() const {
408 std::vector<IndexData> keys;
409 keys.reserve(data_->NumTuples());
410 for (int index = 0; index < data_->NumTuples(); ++index) {
411 keys.push_back(IndexData(index, data_));
412 }
413 std::sort(keys.begin(), keys.end(), IntTupleSet::IndexData::Compare);
414 const int arity = data_->Arity();
415 IntTupleSet sorted(arity);
416 for (int i = 0; i < keys.size(); ++i) {
417 std::vector<int64_t> tuple(arity);
418 const int64_t* tuple_ptr = data_->RawData() + keys[i].index * arity;
419 sorted.Insert(std::vector<int64_t>(tuple_ptr, tuple_ptr + arity));
420 }
421 return sorted;
422 }
423 } // namespace operations_research
424
425 #endif // OR_TOOLS_UTIL_TUPLE_SET_H_
426