1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 // TODO(user): refine this toplevel comment when this file settles.
15 //
16 // Two dynamic partition classes: one that incrementally splits a partition
17 // into more and more parts; one that incrementally merges a partition into less
18 // and less parts.
19 //
20 // GLOSSARY:
21 // The partition classes maintain a partition of N integers 0..N-1
22 // (aka "elements") into disjoint equivalence classes (aka "parts").
23 //
24 // SAFETY:
25 // Like vector<int> crashes when used improperly, these classes are not "safe":
26 // most of their methods may crash if called with invalid arguments. The client
27 // code is responsible for using this class properly. A few DCHECKs() will help
28 // catch bugs, though.
29
30 #ifndef OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
31 #define OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
32
33 #include <cstdint>
34 #include <string>
35 #include <vector>
36
37 #include "ortools/base/logging.h"
38
39 namespace operations_research {
40
41 // Partition class that supports incremental splitting, with backtracking.
42 // See http://en.wikipedia.org/wiki/Partition_refinement .
43 // More precisely, the supported edit operations are:
44 // - Refine the partition so that a subset S (typically, |S| <<< N)
45 // of elements are all considered non-equivalent to any element in ¬S.
46 // Typically, this should be done in O(|S|).
47 // - Undo the above operations (backtracking).
48 //
49 // TODO(user): rename this to BacktrackableSplittingPartition.
50 class DynamicPartition {
51 public:
52 // Creates a DynamicPartition on n elements, numbered 0..n-1. Start with
53 // the trivial partition (only one subset containing all elements).
54 explicit DynamicPartition(int num_elements);
55
56 // Ditto, but specify the initial part of each elements. Part indices must
57 // form a dense integer set starting at 0; eg. [2, 1, 0, 1, 1, 3, 0] is valid.
58 explicit DynamicPartition(const std::vector<int>& initial_part_of_element);
59
60 // Accessors.
NumElements()61 int NumElements() const { return element_.size(); }
NumParts()62 const int NumParts() const { return part_.size(); }
63
64 // To iterate over the elements in part #i:
65 // for (int element : partition.ElementsInPart(i)) { ... }
66 //
67 // ORDERING OF ELEMENTS INSIDE PARTS: the order of elements within a given
68 // part is volatile, and may change with Refine() or UndoRefine*() operations,
69 // even if the part itself doesn't change.
70 struct IterablePart;
71 IterablePart ElementsInPart(int i) const;
72
73 int PartOf(int element) const;
74 int SizeOfPart(int part) const;
75 int ParentOfPart(int part) const;
76
77 // A handy shortcut to ElementsInPart(PartOf(e)). The returned IterablePart
78 // will never be empty, since it contains at least i.
79 IterablePart ElementsInSamePartAs(int i) const;
80
81 // Returns a fingerprint of the given part. While collisions are possible,
82 // their probability is quite low. Two parts that have the same size and the
83 // same fingerprint are most likely identical.
84 // Also, two parts that have the exact same set of elements will *always*
85 // have the same fingerprint.
86 uint64_t FprintOfPart(int part) const;
87
88 // Refines the partition such that elements that are in distinguished_subset
89 // never share the same part as elements that aren't in that subset.
90 // This might be a no-op: in that case, NumParts() won't change, but the
91 // order of elements inside each part may change.
92 //
93 // ORDERING OF PARTS:
94 // For each i such that Part #i has a non-trivial intersection with
95 // "distinguished_subset" (neither empty, nor the full Part); Part #i is
96 // stripped out of all elements that are in "distinguished_subset", and
97 // those elements are sent to a newly created part, whose parent_part = i.
98 // The parts newly created by a single Refine() operations are sorted
99 // by parent_part.
100 // Example: a Refine() on a partition with 6 parts causes parts #1, #3 and
101 // #4 to be split: the partition will now contain 3 new parts: part #6 (with
102 // parent_part = 1), part #7 (with parent_part = 3) and part #8 (with
103 // parent_part = 4).
104 //
105 // TODO(user): the graph symmetry finder could probably benefit a lot from
106 // keeping track of one additional bit of information for each part that
107 // remains unchanged by a Refine() operation: was that part entirely *in*
108 // the distinguished subset or entirely *out*?
109 void Refine(const std::vector<int>& distinguished_subset);
110
111 // Undo one or several Refine() operations, until the number of parts
112 // becomes equal to "original_num_parts".
113 // Prerequisite: NumParts() >= original_num_parts.
114 void UndoRefineUntilNumPartsEqual(int original_num_parts);
115
116 // Dump the partition to a string. There might be different conventions for
117 // sorting the parts and the elements inside them.
118 enum DebugStringSorting {
119 // Elements are sorted within parts, and parts are then sorted
120 // lexicographically.
121 SORT_LEXICOGRAPHICALLY,
122 // Elements are sorted within parts, and parts are kept in order.
123 SORT_BY_PART,
124 };
125 std::string DebugString(DebugStringSorting sorting) const;
126
127 // ADVANCED USAGE:
128 // All elements (0..n-1) of the partition, sorted in a way that's compatible
129 // with the hierarchical partitioning:
130 // - All the elements of any given part are contiguous.
131 // - Elements of a part P are always after elements of part Parent(P).
132 // - The order remains identical (and the above property holds) after any
133 // UndoRefine*() operation.
134 // Note that the order does get changed by Refine() operations.
135 // This is a reference, so it'll only remain valid and constant until the
136 // class is destroyed or until Refine() get called.
ElementsInHierarchicalOrder()137 const std::vector<int>& ElementsInHierarchicalOrder() const {
138 return element_;
139 }
140
141 private:
142 // A DynamicPartition instance maintains a list of all of its elements,
143 // 'sorted' by partitions: elements of the same subset are contiguous
144 // in that list.
145 std::vector<int> element_;
146
147 // The reverse of elements_[]: element_[index_of_[i]] = i.
148 std::vector<int> index_of_;
149
150 // part_of_[i] is the index of the part that contains element i.
151 std::vector<int> part_of_;
152
153 struct Part {
154 // This part holds elements[start_index .. end_index-1].
155 // INVARIANT: end_index > start_index.
156 int start_index; // Inclusive
157 int end_index; // Exclusive
158
159 // The Part that this part was split out of. See the comment at Refine().
160 // INVARIANT: part[i].parent_part <= i, and the equality holds iff part[i]
161 // has no parent.
162 int parent_part; // Index into the part[] array.
163
164 // The part's fingerprint is the XOR of all fingerprints of its elements.
165 // See FprintOfInt32() in the .cc.
166 uint64_t fprint;
167
PartPart168 Part() : start_index(0), end_index(0), parent_part(0), fprint(0) {}
PartPart169 Part(int start_index, int end_index, int parent_part, uint64_t fprint)
170 : start_index(start_index),
171 end_index(end_index),
172 parent_part(parent_part),
173 fprint(fprint) {}
174 };
175 std::vector<Part> part_; // The disjoint parts.
176
177 // Used temporarily and exclusively by Refine(). This prevents Refine()
178 // from being thread-safe.
179 // INVARIANT: tmp_counter_of_part_ contains only 0s before and after Refine().
180 std::vector<int> tmp_counter_of_part_;
181 std::vector<int> tmp_affected_parts_;
182 };
183
184 struct DynamicPartition::IterablePart {
beginIterablePart185 std::vector<int>::const_iterator begin() const { return begin_; }
endIterablePart186 std::vector<int>::const_iterator end() const { return end_; }
187 std::vector<int>::const_iterator begin_;
188 std::vector<int>::const_iterator end_;
189
sizeIterablePart190 int size() const { return end_ - begin_; }
191
IterablePartIterablePart192 IterablePart() {}
IterablePartIterablePart193 IterablePart(const std::vector<int>::const_iterator& b,
194 const std::vector<int>::const_iterator& e)
195 : begin_(b), end_(e) {}
196
197 // These typedefs allow this iterator to be used within testing::ElementsAre.
198 typedef int value_type;
199 typedef std::vector<int>::const_iterator const_iterator;
200 };
201
202 // Partition class that supports incremental merging, using the union-find
203 // algorithm (see http://en.wikipedia.org/wiki/Disjoint-set_data_structure).
204 class MergingPartition {
205 public:
206 // At first, all nodes are in their own singleton part.
MergingPartition()207 MergingPartition() { Reset(0); }
MergingPartition(int num_nodes)208 explicit MergingPartition(int num_nodes) { Reset(num_nodes); }
209 void Reset(int num_nodes);
210
NumNodes()211 int NumNodes() const { return parent_.size(); }
212
213 // Complexity: amortized O(Ackermann⁻¹(N)) -- which is essentially O(1) --
214 // where N is the number of nodes.
215 //
216 // Return value: If this merge caused a representative node (of either node1
217 // or node2) to stop being a representative (because only one can remain);
218 // this method returns that removed representative. Otherwise it returns -1.
219 //
220 // Details: a smaller part will always be merged onto a larger one.
221 // Upons ties, the smaller representative becomes the overall representative.
222 int MergePartsOf(int node1, int node2); // The 'union' of the union-find.
223
224 // Get the representative of "node" (a node in the same equivalence class,
225 // which will also be returned for any other "node" in the same class).
226 // The complexity if the same as MergePartsOf().
227 int GetRootAndCompressPath(int node);
228
229 // Specialized reader API: prunes "nodes" to only keep at most one node per
230 // part: any node which is in the same part as an earlier node will be pruned.
231 void KeepOnlyOneNodePerPart(std::vector<int>* nodes);
232
233 // Output the whole partition as node equivalence classes: if there are K
234 // parts and N nodes, node_equivalence_classes[i] will contain the part index
235 // (a number in 0..K-1) of node #i. Parts will be sorted by their first node
236 // (i.e. node 0 will always be in part 0; then the next node that isn't in
237 // part 0 will be in part 1, and so on).
238 // Returns the number K of classes.
239 int FillEquivalenceClasses(std::vector<int>* node_equivalence_classes);
240
241 // Dump all components, with nodes sorted within each part and parts
242 // sorted lexicographically. Eg. "0 1 3 4 | 2 5 | 6 7 8".
243 std::string DebugString();
244
245 // Advanced usage: sets 'node' to be in its original singleton. All nodes
246 // who may point to 'node' as a parent will remain in an inconsistent state.
247 // This can be used to reinitialize a MergingPartition that has been sparsely
248 // modified in O(|modifications|).
249 // CRASHES IF USED INCORRECTLY.
250 void ResetNode(int node);
251
NumNodesInSamePartAs(int node)252 int NumNodesInSamePartAs(int node) {
253 return part_size_[GetRootAndCompressPath(node)];
254 }
255
256 // FOR DEBUGGING OR SPECIAL "CONST" ACCESS ONLY:
257 // Find the root of the union-find tree with leaf 'node', i.e. its
258 // representative node, but don't use path compression.
259 // The amortized complexity can be as bad as log(N), as opposed to the
260 // version using path compression.
261 int GetRoot(int node) const;
262
263 private:
264 // Along the upwards path from 'node' to its root, set the parent of all
265 // nodes (including the root) to 'parent'.
266 void SetParentAlongPathToRoot(int node, int parent);
267
268 std::vector<int> parent_;
269 std::vector<int> part_size_;
270
271 // Used transiently by KeepOnlyOneNodePerPart().
272 std::vector<bool> tmp_part_bit_;
273 };
274
275 // *** Implementation of inline methods of the above classes. ***
276
ElementsInPart(int i)277 inline DynamicPartition::IterablePart DynamicPartition::ElementsInPart(
278 int i) const {
279 DCHECK_GE(i, 0);
280 DCHECK_LT(i, NumParts());
281 return IterablePart(element_.begin() + part_[i].start_index,
282 element_.begin() + part_[i].end_index);
283 }
284
PartOf(int element)285 inline int DynamicPartition::PartOf(int element) const {
286 DCHECK_GE(element, 0);
287 DCHECK_LT(element, part_of_.size());
288 return part_of_[element];
289 }
290
SizeOfPart(int part)291 inline int DynamicPartition::SizeOfPart(int part) const {
292 DCHECK_GE(part, 0);
293 DCHECK_LT(part, part_.size());
294 const Part& p = part_[part];
295 return p.end_index - p.start_index;
296 }
297
ParentOfPart(int part)298 inline int DynamicPartition::ParentOfPart(int part) const {
299 DCHECK_GE(part, 0);
300 DCHECK_LT(part, part_.size());
301 return part_[part].parent_part;
302 }
303
ElementsInSamePartAs(int i)304 inline DynamicPartition::IterablePart DynamicPartition::ElementsInSamePartAs(
305 int i) const {
306 return ElementsInPart(PartOf(i));
307 }
308
FprintOfPart(int part)309 inline uint64_t DynamicPartition::FprintOfPart(int part) const {
310 DCHECK_GE(part, 0);
311 DCHECK_LT(part, part_.size());
312 return part_[part].fprint;
313 }
314
GetRoot(int node)315 inline int MergingPartition::GetRoot(int node) const {
316 DCHECK_GE(node, 0);
317 DCHECK_LT(node, NumNodes());
318 int child = node;
319 while (true) {
320 const int parent = parent_[child];
321 if (parent == child) return child;
322 child = parent;
323 }
324 }
325
SetParentAlongPathToRoot(int node,int parent)326 inline void MergingPartition::SetParentAlongPathToRoot(int node, int parent) {
327 DCHECK_GE(node, 0);
328 DCHECK_LT(node, NumNodes());
329 DCHECK_GE(parent, 0);
330 DCHECK_LT(parent, NumNodes());
331 int child = node;
332 while (true) {
333 const int old_parent = parent_[child];
334 parent_[child] = parent;
335 if (old_parent == child) return;
336 child = old_parent;
337 }
338 }
339
ResetNode(int node)340 inline void MergingPartition::ResetNode(int node) {
341 DCHECK_GE(node, 0);
342 DCHECK_LT(node, NumNodes());
343 parent_[node] = node;
344 part_size_[node] = 1;
345 }
346
347 } // namespace operations_research
348
349 #endif // OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
350