1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 // TODO(user): refine this toplevel comment when this file settles.
15 //
16 // Two dynamic partition classes: one that incrementally splits a partition
17 // into more and more parts; one that incrementally merges a partition into less
18 // and less parts.
19 //
20 // GLOSSARY:
21 // The partition classes maintain a partition of N integers 0..N-1
22 // (aka "elements") into disjoint equivalence classes (aka "parts").
23 //
24 // SAFETY:
25 // Like vector<int> crashes when used improperly, these classes are not "safe":
26 // most of their methods may crash if called with invalid arguments. The client
27 // code is responsible for using this class properly. A few DCHECKs() will help
28 // catch bugs, though.
29 
30 #ifndef OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
31 #define OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
32 
33 #include <cstdint>
34 #include <string>
35 #include <vector>
36 
37 #include "ortools/base/logging.h"
38 
39 namespace operations_research {
40 
41 // Partition class that supports incremental splitting, with backtracking.
42 // See http://en.wikipedia.org/wiki/Partition_refinement .
43 // More precisely, the supported edit operations are:
44 // - Refine the partition so that a subset S (typically, |S| <<< N)
45 //   of elements are all considered non-equivalent to any element in ¬S.
46 //   Typically, this should be done in O(|S|).
47 // - Undo the above operations (backtracking).
48 //
49 // TODO(user): rename this to BacktrackableSplittingPartition.
50 class DynamicPartition {
51  public:
52   // Creates a DynamicPartition on n elements, numbered 0..n-1. Start with
53   // the trivial partition (only one subset containing all elements).
54   explicit DynamicPartition(int num_elements);
55 
56   // Ditto, but specify the initial part of each elements. Part indices must
57   // form a dense integer set starting at 0; eg. [2, 1, 0, 1, 1, 3, 0] is valid.
58   explicit DynamicPartition(const std::vector<int>& initial_part_of_element);
59 
60   // Accessors.
NumElements()61   int NumElements() const { return element_.size(); }
NumParts()62   const int NumParts() const { return part_.size(); }
63 
64   // To iterate over the elements in part #i:
65   // for (int element : partition.ElementsInPart(i)) { ... }
66   //
67   // ORDERING OF ELEMENTS INSIDE PARTS: the order of elements within a given
68   // part is volatile, and may change with Refine() or UndoRefine*() operations,
69   // even if the part itself doesn't change.
70   struct IterablePart;
71   IterablePart ElementsInPart(int i) const;
72 
73   int PartOf(int element) const;
74   int SizeOfPart(int part) const;
75   int ParentOfPart(int part) const;
76 
77   // A handy shortcut to ElementsInPart(PartOf(e)). The returned IterablePart
78   // will never be empty, since it contains at least i.
79   IterablePart ElementsInSamePartAs(int i) const;
80 
81   // Returns a fingerprint of the given part. While collisions are possible,
82   // their probability is quite low. Two parts that have the same size and the
83   // same fingerprint are most likely identical.
84   // Also, two parts that have the exact same set of elements will *always*
85   // have the same fingerprint.
86   uint64_t FprintOfPart(int part) const;
87 
88   // Refines the partition such that elements that are in distinguished_subset
89   // never share the same part as elements that aren't in that subset.
90   // This might be a no-op: in that case, NumParts() won't change, but the
91   // order of elements inside each part may change.
92   //
93   // ORDERING OF PARTS:
94   // For each i such that Part #i has a non-trivial intersection with
95   // "distinguished_subset" (neither empty, nor the full Part); Part #i is
96   // stripped out of all elements that are in "distinguished_subset", and
97   // those elements are sent to a newly created part, whose parent_part = i.
98   // The parts newly created by a single Refine() operations are sorted
99   // by parent_part.
100   // Example: a Refine() on a partition with 6 parts causes parts #1, #3 and
101   // #4 to be split: the partition will now contain 3 new parts: part #6 (with
102   // parent_part = 1), part #7 (with parent_part = 3) and part #8 (with
103   // parent_part = 4).
104   //
105   // TODO(user): the graph symmetry finder could probably benefit a lot from
106   // keeping track of one additional bit of information for each part that
107   // remains unchanged by a Refine() operation: was that part entirely *in*
108   // the distinguished subset or entirely *out*?
109   void Refine(const std::vector<int>& distinguished_subset);
110 
111   // Undo one or several Refine() operations, until the number of parts
112   // becomes equal to "original_num_parts".
113   // Prerequisite: NumParts() >= original_num_parts.
114   void UndoRefineUntilNumPartsEqual(int original_num_parts);
115 
116   // Dump the partition to a string. There might be different conventions for
117   // sorting the parts and the elements inside them.
118   enum DebugStringSorting {
119     // Elements are sorted within parts, and parts are then sorted
120     // lexicographically.
121     SORT_LEXICOGRAPHICALLY,
122     // Elements are sorted within parts, and parts are kept in order.
123     SORT_BY_PART,
124   };
125   std::string DebugString(DebugStringSorting sorting) const;
126 
127   // ADVANCED USAGE:
128   // All elements (0..n-1) of the partition, sorted in a way that's compatible
129   // with the hierarchical partitioning:
130   // - All the elements of any given part are contiguous.
131   // - Elements of a part P are always after elements of part Parent(P).
132   // - The order remains identical (and the above property holds) after any
133   //   UndoRefine*() operation.
134   // Note that the order does get changed by Refine() operations.
135   // This is a reference, so it'll only remain valid and constant until the
136   // class is destroyed or until Refine() get called.
ElementsInHierarchicalOrder()137   const std::vector<int>& ElementsInHierarchicalOrder() const {
138     return element_;
139   }
140 
141  private:
142   // A DynamicPartition instance maintains a list of all of its elements,
143   // 'sorted' by partitions: elements of the same subset are contiguous
144   // in that list.
145   std::vector<int> element_;
146 
147   // The reverse of elements_[]: element_[index_of_[i]] = i.
148   std::vector<int> index_of_;
149 
150   // part_of_[i] is the index of the part that contains element i.
151   std::vector<int> part_of_;
152 
153   struct Part {
154     // This part holds elements[start_index .. end_index-1].
155     // INVARIANT: end_index > start_index.
156     int start_index;  // Inclusive
157     int end_index;    // Exclusive
158 
159     // The Part that this part was split out of. See the comment at Refine().
160     // INVARIANT: part[i].parent_part <= i, and the equality holds iff part[i]
161     // has no parent.
162     int parent_part;  // Index into the part[] array.
163 
164     // The part's fingerprint is the XOR of all fingerprints of its elements.
165     // See FprintOfInt32() in the .cc.
166     uint64_t fprint;
167 
PartPart168     Part() : start_index(0), end_index(0), parent_part(0), fprint(0) {}
PartPart169     Part(int start_index, int end_index, int parent_part, uint64_t fprint)
170         : start_index(start_index),
171           end_index(end_index),
172           parent_part(parent_part),
173           fprint(fprint) {}
174   };
175   std::vector<Part> part_;  // The disjoint parts.
176 
177   // Used temporarily and exclusively by Refine(). This prevents Refine()
178   // from being thread-safe.
179   // INVARIANT: tmp_counter_of_part_ contains only 0s before and after Refine().
180   std::vector<int> tmp_counter_of_part_;
181   std::vector<int> tmp_affected_parts_;
182 };
183 
184 struct DynamicPartition::IterablePart {
beginIterablePart185   std::vector<int>::const_iterator begin() const { return begin_; }
endIterablePart186   std::vector<int>::const_iterator end() const { return end_; }
187   std::vector<int>::const_iterator begin_;
188   std::vector<int>::const_iterator end_;
189 
sizeIterablePart190   int size() const { return end_ - begin_; }
191 
IterablePartIterablePart192   IterablePart() {}
IterablePartIterablePart193   IterablePart(const std::vector<int>::const_iterator& b,
194                const std::vector<int>::const_iterator& e)
195       : begin_(b), end_(e) {}
196 
197   // These typedefs allow this iterator to be used within testing::ElementsAre.
198   typedef int value_type;
199   typedef std::vector<int>::const_iterator const_iterator;
200 };
201 
202 // Partition class that supports incremental merging, using the union-find
203 // algorithm (see http://en.wikipedia.org/wiki/Disjoint-set_data_structure).
204 class MergingPartition {
205  public:
206   // At first, all nodes are in their own singleton part.
MergingPartition()207   MergingPartition() { Reset(0); }
MergingPartition(int num_nodes)208   explicit MergingPartition(int num_nodes) { Reset(num_nodes); }
209   void Reset(int num_nodes);
210 
NumNodes()211   int NumNodes() const { return parent_.size(); }
212 
213   // Complexity: amortized O(Ackermann⁻¹(N)) -- which is essentially O(1) --
214   // where N is the number of nodes.
215   //
216   // Return value: If this merge caused a representative node (of either node1
217   // or node2) to stop being a representative (because only one can remain);
218   // this method returns that removed representative. Otherwise it returns -1.
219   //
220   // Details: a smaller part will always be merged onto a larger one.
221   // Upons ties, the smaller representative becomes the overall representative.
222   int MergePartsOf(int node1, int node2);  // The 'union' of the union-find.
223 
224   // Get the representative of "node" (a node in the same equivalence class,
225   // which will also be returned for any other "node" in the same class).
226   // The complexity if the same as MergePartsOf().
227   int GetRootAndCompressPath(int node);
228 
229   // Specialized reader API: prunes "nodes" to only keep at most one node per
230   // part: any node which is in the same part as an earlier node will be pruned.
231   void KeepOnlyOneNodePerPart(std::vector<int>* nodes);
232 
233   // Output the whole partition as node equivalence classes: if there are K
234   // parts and N nodes, node_equivalence_classes[i] will contain the part index
235   // (a number in 0..K-1) of node #i. Parts will be sorted by their first node
236   // (i.e. node 0 will always be in part 0; then the next node that isn't in
237   // part 0 will be in part 1, and so on).
238   // Returns the number K of classes.
239   int FillEquivalenceClasses(std::vector<int>* node_equivalence_classes);
240 
241   // Dump all components, with nodes sorted within each part and parts
242   // sorted lexicographically. Eg. "0 1 3 4 | 2 5 | 6 7 8".
243   std::string DebugString();
244 
245   // Advanced usage: sets 'node' to be in its original singleton. All nodes
246   // who may point to 'node' as a parent will remain in an inconsistent state.
247   // This can be used to reinitialize a MergingPartition that has been sparsely
248   // modified in O(|modifications|).
249   // CRASHES IF USED INCORRECTLY.
250   void ResetNode(int node);
251 
NumNodesInSamePartAs(int node)252   int NumNodesInSamePartAs(int node) {
253     return part_size_[GetRootAndCompressPath(node)];
254   }
255 
256   // FOR DEBUGGING OR SPECIAL "CONST" ACCESS ONLY:
257   // Find the root of the union-find tree with leaf 'node', i.e. its
258   // representative node, but don't use path compression.
259   // The amortized complexity can be as bad as log(N), as opposed to the
260   // version using path compression.
261   int GetRoot(int node) const;
262 
263  private:
264   // Along the upwards path from 'node' to its root, set the parent of all
265   // nodes (including the root) to 'parent'.
266   void SetParentAlongPathToRoot(int node, int parent);
267 
268   std::vector<int> parent_;
269   std::vector<int> part_size_;
270 
271   // Used transiently by KeepOnlyOneNodePerPart().
272   std::vector<bool> tmp_part_bit_;
273 };
274 
275 // *** Implementation of inline methods of the above classes. ***
276 
ElementsInPart(int i)277 inline DynamicPartition::IterablePart DynamicPartition::ElementsInPart(
278     int i) const {
279   DCHECK_GE(i, 0);
280   DCHECK_LT(i, NumParts());
281   return IterablePart(element_.begin() + part_[i].start_index,
282                       element_.begin() + part_[i].end_index);
283 }
284 
PartOf(int element)285 inline int DynamicPartition::PartOf(int element) const {
286   DCHECK_GE(element, 0);
287   DCHECK_LT(element, part_of_.size());
288   return part_of_[element];
289 }
290 
SizeOfPart(int part)291 inline int DynamicPartition::SizeOfPart(int part) const {
292   DCHECK_GE(part, 0);
293   DCHECK_LT(part, part_.size());
294   const Part& p = part_[part];
295   return p.end_index - p.start_index;
296 }
297 
ParentOfPart(int part)298 inline int DynamicPartition::ParentOfPart(int part) const {
299   DCHECK_GE(part, 0);
300   DCHECK_LT(part, part_.size());
301   return part_[part].parent_part;
302 }
303 
ElementsInSamePartAs(int i)304 inline DynamicPartition::IterablePart DynamicPartition::ElementsInSamePartAs(
305     int i) const {
306   return ElementsInPart(PartOf(i));
307 }
308 
FprintOfPart(int part)309 inline uint64_t DynamicPartition::FprintOfPart(int part) const {
310   DCHECK_GE(part, 0);
311   DCHECK_LT(part, part_.size());
312   return part_[part].fprint;
313 }
314 
GetRoot(int node)315 inline int MergingPartition::GetRoot(int node) const {
316   DCHECK_GE(node, 0);
317   DCHECK_LT(node, NumNodes());
318   int child = node;
319   while (true) {
320     const int parent = parent_[child];
321     if (parent == child) return child;
322     child = parent;
323   }
324 }
325 
SetParentAlongPathToRoot(int node,int parent)326 inline void MergingPartition::SetParentAlongPathToRoot(int node, int parent) {
327   DCHECK_GE(node, 0);
328   DCHECK_LT(node, NumNodes());
329   DCHECK_GE(parent, 0);
330   DCHECK_LT(parent, NumNodes());
331   int child = node;
332   while (true) {
333     const int old_parent = parent_[child];
334     parent_[child] = parent;
335     if (old_parent == child) return;
336     child = old_parent;
337   }
338 }
339 
ResetNode(int node)340 inline void MergingPartition::ResetNode(int node) {
341   DCHECK_GE(node, 0);
342   DCHECK_LT(node, NumNodes());
343   parent_[node] = node;
344   part_size_[node] = 1;
345 }
346 
347 }  // namespace operations_research
348 
349 #endif  // OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
350