1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 // Set of integer tuples (fixed-size arrays, all of the same size) with
15 // a basic API.
16 // It supports several types of integer arrays transparently, with an
17 // inherent storage based on int64_t arrays.
18 //
19 // The key feature is the "lazy" copy:
20 // - Copying an IntTupleSet won't actually copy the data right away; we
21 //   will just have several IntTupleSet pointing at the same data.
22 // - Modifying an IntTupleSet which shares his data with others
23 //   will create a new, modified instance of the data payload, and make
24 //   the IntTupleSet point to that new data.
25 // - Modifying an IntTupleSet that doesn't share its data with any other
26 //   IntTupleSet will modify the data directly.
27 // Therefore, you don't need to use const IntTupleSet& in methods. Just do:
28 // void MyMethod(IntTupleSet tuple_set) { ... }
29 //
30 // This class is thread hostile as the copy and reference counter are
31 // not protected by a mutex.
32 
33 #ifndef OR_TOOLS_UTIL_TUPLE_SET_H_
34 #define OR_TOOLS_UTIL_TUPLE_SET_H_
35 
36 #include <algorithm>
37 #include <vector>
38 
39 #include "absl/container/flat_hash_map.h"
40 #include "absl/container/flat_hash_set.h"
41 #include "ortools/base/hash.h"
42 #include "ortools/base/integral_types.h"
43 #include "ortools/base/logging.h"
44 #include "ortools/base/macros.h"
45 #include "ortools/base/map_util.h"
46 
47 namespace operations_research {
48 // ----- Main IntTupleSet class -----
49 class IntTupleSet {
50  public:
51   // Creates an empty tuple set with a fixed length for all tuples.
52   explicit IntTupleSet(int arity);
53   // Copy constructor (it actually does a lazy copy, see toplevel comment).
54   IntTupleSet(const IntTupleSet& set);  // NOLINT
55   ~IntTupleSet();
56 
57   // Clears data.
58   void Clear();
59 
60   // Inserts the tuple to the set. It does nothing if the tuple is
61   // already in the set. The size of the tuple must be equal to the
62   // arity of the set. It returns the index at which the tuple was
63   // inserted (-1 if it was already present).
64   int Insert(const std::vector<int>& tuple);
65   int Insert(const std::vector<int64_t>& tuple);
66   // Arity fixed version of Insert removing the need for a vector for the user.
67   int Insert2(int64_t v0, int64_t v1);
68   int Insert3(int64_t v0, int64_t v1, int64_t v2);
69   int Insert4(int64_t v0, int64_t v1, int64_t v2, int64_t v3);
70   // Inserts the tuples.
71   void InsertAll(const std::vector<std::vector<int64_t> >& tuples);
72   void InsertAll(const std::vector<std::vector<int> >& tuples);
73 
74   // Checks if the tuple is in the set.
75   bool Contains(const std::vector<int>& tuple) const;
76   bool Contains(const std::vector<int64_t>& tuple) const;
77 
78   // Returns the number of tuples.
79   int NumTuples() const;
80   // Get the given tuple's value at the given position.  The indices
81   // of the tuples correspond to the order in which they were
82   // inserted.
83   int64_t Value(int tuple_index, int pos_in_tuple) const;
84   // Returns the arity of the set.
85   int Arity() const;
86   // Access the raw data, see IntTupleSet::Data::flat_tuples_.
87   const int64_t* RawData() const;
88   // Returns the number of different values in the given column.
89   int NumDifferentValuesInColumn(int col) const;
90   // Return a copy of the set, sorted by the "col"-th value of each
91   // tuples. The sort is stable.
92   IntTupleSet SortedByColumn(int col) const;
93   // Returns a copy of the tuple set lexicographically sorted.
94   IntTupleSet SortedLexicographically() const;
95 
96  private:
97   // Class that holds the actual data of an IntTupleSet. It handles
98   // the reference counters, etc.
99   class Data {
100    public:
101     explicit Data(int arity);
102     Data(const Data& data);
103     ~Data();
104     void AddSharedOwner();
105     bool RemovedSharedOwner();
106     Data* CopyIfShared();
107     template <class T>
108     int Insert(const std::vector<T>& tuple);
109     template <class T>
110     bool Contains(const std::vector<T>& candidate) const;
111     template <class T>
112     int64_t Fingerprint(const std::vector<T>& tuple) const;
113     int NumTuples() const;
114     int64_t Value(int index, int pos) const;
115     int Arity() const;
116     const int64_t* RawData() const;
117     void Clear();
118 
119    private:
120     const int arity_;
121     int num_owners_;
122     // Concatenation of all tuples ever added.
123     std::vector<int64_t> flat_tuples_;
124     // Maps a tuple's fingerprint to the list of tuples with this
125     // fingerprint, represented by their start index in the
126     // flat_tuples_ vector.
127     absl::flat_hash_map<int64_t, std::vector<int> > tuple_fprint_to_index_;
128   };
129 
130   // Used to represent a light representation of a tuple.
131   struct IndexData {
132     int index;
133     IntTupleSet::Data* data;
IndexDataIndexData134     IndexData(int i, IntTupleSet::Data* const d) : index(i), data(d) {}
135     static bool Compare(const IndexData& a, const IndexData& b);
136   };
137 
138   struct IndexValue {
139     int index;
140     int64_t value;
IndexValueIndexValue141     IndexValue(int i, int64_t v) : index(i), value(v) {}
142     static bool Compare(const IndexValue& a, const IndexValue& b);
143   };
144 
145   mutable Data* data_;
146 };
147 
148 // ----- Data -----
Data(int arity)149 inline IntTupleSet::Data::Data(int arity) : arity_(arity), num_owners_(0) {}
150 
Data(const Data & data)151 inline IntTupleSet::Data::Data(const Data& data)
152     : arity_(data.arity_),
153       num_owners_(0),
154       flat_tuples_(data.flat_tuples_),
155       tuple_fprint_to_index_(data.tuple_fprint_to_index_) {}
156 
~Data()157 inline IntTupleSet::Data::~Data() {}
158 
AddSharedOwner()159 inline void IntTupleSet::Data::AddSharedOwner() { num_owners_++; }
160 
RemovedSharedOwner()161 inline bool IntTupleSet::Data::RemovedSharedOwner() {
162   return (--num_owners_ == 0);
163 }
164 
CopyIfShared()165 inline IntTupleSet::Data* IntTupleSet::Data::CopyIfShared() {
166   if (num_owners_ > 1) {  // Copy on write.
167     Data* const new_data = new Data(*this);
168     RemovedSharedOwner();
169     new_data->AddSharedOwner();
170     return new_data;
171   }
172   return this;
173 }
174 
175 template <class T>
Insert(const std::vector<T> & tuple)176 int IntTupleSet::Data::Insert(const std::vector<T>& tuple) {
177   DCHECK(arity_ == 0 || flat_tuples_.size() % arity_ == 0);
178   CHECK_EQ(arity_, tuple.size());
179   DCHECK_EQ(1, num_owners_);
180   if (!Contains(tuple)) {
181     const int index = NumTuples();
182     const int offset = flat_tuples_.size();
183     flat_tuples_.resize(offset + arity_);
184     // On mac os X, using this instead of push_back gives a 10x speedup!
185     for (int i = 0; i < arity_; ++i) {
186       flat_tuples_[offset + i] = tuple[i];
187     }
188     const int64_t fingerprint = Fingerprint(tuple);
189     tuple_fprint_to_index_[fingerprint].push_back(index);
190     return index;
191   } else {
192     return -1;
193   }
194 }
195 
196 template <class T>
Contains(const std::vector<T> & candidate)197 bool IntTupleSet::Data::Contains(const std::vector<T>& candidate) const {
198   if (candidate.size() != arity_) {
199     return false;
200   }
201   const int64_t fingerprint = Fingerprint(candidate);
202   if (tuple_fprint_to_index_.contains(fingerprint)) {
203     const std::vector<int>& indices =
204         gtl::FindOrDie(tuple_fprint_to_index_, fingerprint);
205     for (int i = 0; i < indices.size(); ++i) {
206       const int tuple_index = indices[i];
207       for (int j = 0; j < arity_; ++j) {
208         if (candidate[j] != flat_tuples_[tuple_index * arity_ + j]) {
209           return false;
210         }
211       }
212       return true;
213     }
214   }
215   return false;
216 }
217 
218 template <class T>
Fingerprint(const std::vector<T> & tuple)219 int64_t IntTupleSet::Data::Fingerprint(const std::vector<T>& tuple) const {
220   switch (arity_) {
221     case 0:
222       return 0;
223     case 1:
224       return tuple[0];
225     case 2: {
226       uint64_t x = tuple[0];
227       uint64_t y = uint64_t{0xe08c1d668b756f82};
228       uint64_t z = tuple[1];
229       mix(x, y, z);
230       return z;
231     }
232     default: {
233       uint64_t x = tuple[0];
234       uint64_t y = uint64_t{0xe08c1d668b756f82};
235       for (int i = 1; i < tuple.size(); ++i) {
236         uint64_t z = tuple[i];
237         mix(x, y, z);
238         x = z;
239       }
240       return x;
241     }
242   }
243 }
244 
NumTuples()245 inline int IntTupleSet::Data::NumTuples() const {
246   return tuple_fprint_to_index_.size();
247 }
248 
Value(int index,int pos)249 inline int64_t IntTupleSet::Data::Value(int index, int pos) const {
250   DCHECK_GE(index, 0);
251   DCHECK_LT(index, flat_tuples_.size() / arity_);
252   DCHECK_GE(pos, 0);
253   DCHECK_LT(pos, arity_);
254   return flat_tuples_[index * arity_ + pos];
255 }
256 
Arity()257 inline int IntTupleSet::Data::Arity() const { return arity_; }
258 
RawData()259 inline const int64_t* IntTupleSet::Data::RawData() const {
260   return flat_tuples_.data();
261 }
262 
Clear()263 inline void IntTupleSet::Data::Clear() {
264   flat_tuples_.clear();
265   tuple_fprint_to_index_.clear();
266 }
267 
IntTupleSet(int arity)268 inline IntTupleSet::IntTupleSet(int arity) : data_(new Data(arity)) {
269   CHECK_GE(arity, 0);
270   data_->AddSharedOwner();
271 }
272 
IntTupleSet(const IntTupleSet & set)273 inline IntTupleSet::IntTupleSet(const IntTupleSet& set) : data_(set.data_) {
274   data_->AddSharedOwner();
275 }
276 
~IntTupleSet()277 inline IntTupleSet::~IntTupleSet() {
278   CHECK(data_ != nullptr);
279   if (data_->RemovedSharedOwner()) {
280     delete data_;
281   }
282 }
283 
Clear()284 inline void IntTupleSet::Clear() {
285   data_ = data_->CopyIfShared();
286   data_->Clear();
287 }
288 
Insert(const std::vector<int> & tuple)289 inline int IntTupleSet::Insert(const std::vector<int>& tuple) {
290   data_ = data_->CopyIfShared();
291   return data_->Insert(tuple);
292 }
293 
Insert(const std::vector<int64_t> & tuple)294 inline int IntTupleSet::Insert(const std::vector<int64_t>& tuple) {
295   data_ = data_->CopyIfShared();
296   return data_->Insert(tuple);
297 }
298 
Insert2(int64_t v0,int64_t v1)299 inline int IntTupleSet::Insert2(int64_t v0, int64_t v1) {
300   std::vector<int64_t> tuple(2);
301   tuple[0] = v0;
302   tuple[1] = v1;
303   return Insert(tuple);
304 }
305 
Insert3(int64_t v0,int64_t v1,int64_t v2)306 inline int IntTupleSet::Insert3(int64_t v0, int64_t v1, int64_t v2) {
307   std::vector<int64_t> tuple(3);
308   tuple[0] = v0;
309   tuple[1] = v1;
310   tuple[2] = v2;
311   return Insert(tuple);
312 }
313 
Insert4(int64_t v0,int64_t v1,int64_t v2,int64_t v3)314 inline int IntTupleSet::Insert4(int64_t v0, int64_t v1, int64_t v2,
315                                 int64_t v3) {
316   std::vector<int64_t> tuple(4);
317   tuple[0] = v0;
318   tuple[1] = v1;
319   tuple[2] = v2;
320   tuple[3] = v3;
321   return Insert(tuple);
322 }
323 
Contains(const std::vector<int> & tuple)324 inline bool IntTupleSet::Contains(const std::vector<int>& tuple) const {
325   return data_->Contains(tuple);
326 }
327 
Contains(const std::vector<int64_t> & tuple)328 inline bool IntTupleSet::Contains(const std::vector<int64_t>& tuple) const {
329   return data_->Contains(tuple);
330 }
331 
InsertAll(const std::vector<std::vector<int>> & tuples)332 inline void IntTupleSet::InsertAll(
333     const std::vector<std::vector<int> >& tuples) {
334   data_ = data_->CopyIfShared();
335   for (int i = 0; i < tuples.size(); ++i) {
336     Insert(tuples[i]);
337   }
338 }
339 
InsertAll(const std::vector<std::vector<int64_t>> & tuples)340 inline void IntTupleSet::InsertAll(
341     const std::vector<std::vector<int64_t> >& tuples) {
342   data_ = data_->CopyIfShared();
343   for (int i = 0; i < tuples.size(); ++i) {
344     Insert(tuples[i]);
345   }
346 }
347 
NumTuples()348 inline int IntTupleSet::NumTuples() const { return data_->NumTuples(); }
349 
Value(int index,int pos)350 inline int64_t IntTupleSet::Value(int index, int pos) const {
351   return data_->Value(index, pos);
352 }
353 
Arity()354 inline int IntTupleSet::Arity() const { return data_->Arity(); }
355 
RawData()356 inline const int64_t* IntTupleSet::RawData() const { return data_->RawData(); }
357 
NumDifferentValuesInColumn(int col)358 inline int IntTupleSet::NumDifferentValuesInColumn(int col) const {
359   if (col < 0 || col >= data_->Arity()) {
360     return 0;
361   }
362   absl::flat_hash_set<int64_t> values;
363   for (int i = 0; i < data_->NumTuples(); ++i) {
364     values.insert(data_->Value(i, col));
365   }
366   return values.size();
367 }
368 
Compare(const IndexValue & a,const IndexValue & b)369 inline bool IntTupleSet::IndexValue::Compare(const IndexValue& a,
370                                              const IndexValue& b) {
371   return a.value < b.value || (a.value == b.value && a.index < b.index);
372 }
373 
SortedByColumn(int col)374 inline IntTupleSet IntTupleSet::SortedByColumn(int col) const {
375   std::vector<IndexValue> keys;
376   keys.reserve(data_->NumTuples());
377   for (int index = 0; index < data_->NumTuples(); ++index) {
378     keys.push_back(IndexValue(index, data_->Value(index, col)));
379   }
380   std::sort(keys.begin(), keys.end(), IntTupleSet::IndexValue::Compare);
381   const int arity = data_->Arity();
382   IntTupleSet sorted(arity);
383   for (int i = 0; i < keys.size(); ++i) {
384     const int64_t* tuple_ptr = data_->RawData() + keys[i].index * arity;
385     sorted.Insert(std::vector<int64_t>(tuple_ptr, tuple_ptr + arity));
386   }
387   return sorted;
388 }
389 
Compare(const IndexData & a,const IndexData & b)390 inline bool IntTupleSet::IndexData::Compare(const IndexData& a,
391                                             const IndexData& b) {
392   const IntTupleSet::Data* const data = a.data;
393   const int arity = data->Arity();
394   for (int i = 0; i < arity; ++i) {
395     const int64_t value1 = data->Value(a.index, i);
396     const int64_t value2 = data->Value(b.index, i);
397     if (value1 < value2) {
398       return true;
399     }
400     if (value1 > value2) {
401       return false;
402     }
403   }
404   return false;
405 }
406 
SortedLexicographically()407 inline IntTupleSet IntTupleSet::SortedLexicographically() const {
408   std::vector<IndexData> keys;
409   keys.reserve(data_->NumTuples());
410   for (int index = 0; index < data_->NumTuples(); ++index) {
411     keys.push_back(IndexData(index, data_));
412   }
413   std::sort(keys.begin(), keys.end(), IntTupleSet::IndexData::Compare);
414   const int arity = data_->Arity();
415   IntTupleSet sorted(arity);
416   for (int i = 0; i < keys.size(); ++i) {
417     std::vector<int64_t> tuple(arity);
418     const int64_t* tuple_ptr = data_->RawData() + keys[i].index * arity;
419     sorted.Insert(std::vector<int64_t>(tuple_ptr, tuple_ptr + arity));
420   }
421   return sorted;
422 }
423 }  // namespace operations_research
424 
425 #endif  // OR_TOOLS_UTIL_TUPLE_SET_H_
426