1 /** \file
2 * \brief Implementation of a clustering algorithm (by Auber, Chiricota,
3 * Melancon).
4 *
5 * \author Karsten Klein
6 *
7 * \par License:
8 * This file is part of the Open Graph Drawing Framework (OGDF).
9 *
10 * \par
11 * Copyright (C)<br>
12 * See README.md in the OGDF root directory for details.
13 *
14 * \par
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * Version 2 or 3 as published by the Free Software Foundation;
18 * see the file LICENSE.txt included in the packaging of this file
19 * for details.
20 *
21 * \par
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * \par
28 * You should have received a copy of the GNU General Public
29 * License along with this program; if not, see
30 * http://www.gnu.org/copyleft/gpl.html
31 */
32
33 #include <ogdf/graphalg/Clusterer.h>
34 #include <ogdf/basic/GraphCopy.h>
35
36
37 namespace ogdf {
38
Clusterer(const Graph & G)39 Clusterer::Clusterer(const Graph &G) :
40 ClustererModule(G),
41 m_recursive(true),
42 m_autoThreshNum(0)
43 {
44 //use automatic thresholds
45 #if 0
46 m_thresholds.pushFront(3.0);
47 m_thresholds.pushFront(1.4);
48 #endif
49 m_defaultThresholds.pushFront(1.6);
50 m_defaultThresholds.pushBack(3.2);
51 m_defaultThresholds.pushBack(4.5);
52 m_stopIndex = 0.7;
53 }
54
55
56 //computes a list of SimpleCluster pointers, where each SimpleCluster
57 //models a cluster and has a parent (0 if root)
58 //list of cluster structures will be parameter
computeClustering(SList<SimpleCluster * > & clustering)59 void Clusterer::computeClustering(SList<SimpleCluster*> &clustering)
60 {
61 //save to which cluster the vertices belong
62 //0 is root cluster
63 SimpleCluster *root = new SimpleCluster();
64 root->m_size = m_pGraph->numberOfNodes();
65
66 //we push root to the front afterwards
67 #if 0
68 clustering.pushFront(root);
69 #endif
70 NodeArray<SimpleCluster*> vCluster(*m_pGraph, root);
71
72 //we delete edges and therefore work on a copy
73 GraphCopy GC(*m_pGraph);
74 //multiedges are not used for the computation
75 makeSimple(GC);
76 //based on strengths, we compute a clustering
77 //Case 1:with depth one (or #thresholds)
78 //Case 2: Recursively
79
80 ListIterator<double> it;
81
82 if (m_thresholds.size() > 0)
83 it = m_thresholds.begin();
84 else
85 {
86 OGDF_ASSERT(m_defaultThresholds.size() > 0);
87 it = m_defaultThresholds.begin();
88 }
89
90 //are there more cases than (not) rec.?
91 int fall = (m_recursive ? 2 : 1);
92 #if 0
93 int fall = 2;
94 #endif
95 if (fall == 2)
96 {
97 //we just use the first value of auto/default/threshs
98 #if 0
99 OGDF_ASSERT(m_thresholds.size() == 1);
100 #endif
101 //Case2:
102 //we compute the edge strengths on the components recursively
103 //we therefore only need a single threshold
104
105 //we compute the edge strengths
106 EdgeArray<double> strengths(GC,0.0);
107
108 //we need a criterion to stop the process
109 //we stop if there are no edges deleted or
110 //the average node clustering index in our copy rises
111 //above m_stopIndex
112 bool edgesDeleted = true;
113
114 while (edgesDeleted && (averageCIndex(GC)<m_stopIndex))
115 {
116 computeEdgeStrengths(GC, strengths);
117 if (m_autoThreshNum > 0)
118 {
119 OGDF_ASSERT(m_autoThresholds.size() > 0);
120 it = m_autoThresholds.begin();
121 }
122 if (it.valid())
123 {
124 //collect edges that will be deleted
125 List<edge> le;
126 for(edge e : GC.edges)
127 {
128 if (strengths[e] < *it)
129 {
130 le.pushFront(e);
131 }
132 }
133 //stop criterion
134 if (le.size() == 0)
135 {
136 edgesDeleted = false;
137 continue;
138 }
139 //delete edges
140 for(edge e : le)
141 GC.delEdge(e);
142
143 //gather cluster information
144 //vertices within a connected component are always part
145 //of the same cluster which then will be parent of the new clusters
146
147 ArrayBuffer<node> S;
148 NodeArray<bool> done(GC, false);
149 for(node v : GC.nodes)
150 {
151 if (done[v]) continue;
152 done[v] = true;
153 S.push(v);
154 SimpleCluster* parent = vCluster[GC.original(v)];
155 SList<node> theCluster;
156
157 List<node> compNodes; //nodes in a component
158
159 while(!S.empty())
160 {
161 node w = S.popRet();
162 compNodes.pushFront(w);
163 for(adjEntry adj : w->adjEntries) {
164 node x = adj->twinNode();
165 if (!(done[x]))
166 {
167 done[x] = true;
168 S.push(x);
169 }
170 }
171 }
172 //run over nodes and set cluster info
173 //do not construct trivial clusters
174 if ( (parent->m_size > compNodes.size()) &&
175 (compNodes.size()>2))
176 {
177 SimpleCluster* s = new SimpleCluster();
178 s->setParent(parent);
179
180 parent->pushBackChild(s);
181 clustering.pushFront(s);
182 for (node vi : compNodes)
183 {
184 vCluster[GC.original(vi)] = s;
185 //vertex leaves parent to s
186 parent->m_size--;
187 s->m_size++;
188 }
189 }
190 }
191 }
192 }
193 } else { // case 2
194 //we compute the edge strengths
195 EdgeArray<double> strengths(*m_pGraph,0.0);
196 computeEdgeStrengths(strengths);
197 if (m_autoThreshNum > 0)
198 {
199 OGDF_ASSERT(m_autoThresholds.size() > 0);
200 it = m_autoThresholds.begin();
201 }
202 //Case1:
203
204 while (it.valid())
205 {
206 //collect edges that will be deleted
207 List<edge> le;
208 for(edge e : GC.edges)
209 {
210 if (strengths[e] < *it)
211 {
212 le.pushFront(e);
213 }
214 }
215 //delete edges
216 for(edge e : le)
217 GC.delEdge(e);
218
219 //gather cluster information
220 //vertices within a connected component are always part
221 //of the same cluster which then will be parent of the new clusters
222
223 ArrayBuffer<node> S;
224 NodeArray<bool> done(GC, false);
225 for(node v : GC.nodes)
226 {
227 if (done[v]) continue;
228 done[v] = true;
229 S.push(v);
230 SimpleCluster* parent = vCluster[GC.original(v)];
231 SList<node> theCluster;
232
233 List<node> compNodes; //nodes in a component
234
235 //do not immediately construct a cluster, first gather and store
236 //vertices, then test if the cluster fits some constraints, e.g. size
237 while(!S.empty())
238 {
239 node w = S.popRet();
240 compNodes.pushFront(w);
241 for(adjEntry adj : w->adjEntries) {
242 node x = adj->twinNode();
243 if (!(done[x]))
244 {
245 done[x] = true;
246 S.push(x);
247 }
248 }
249 }
250
251 //run over nodes and set cluster info
252 //do not construct trivial clusters
253 if ( (parent->m_size > compNodes.size()) &&
254 (compNodes.size()>2))
255 {
256 SimpleCluster* s = new SimpleCluster;
257 s->setParent(parent);
258 #if 0
259 s->m_index = -1;
260 #endif
261 parent->pushBackChild(s);
262 clustering.pushFront(s);
263 for(node vi : compNodes)
264 {
265 vCluster[GC.original(vi)] = s;
266 //vertex leaves parent to s
267 parent->m_size--;
268 s->m_size++;
269 }
270 }
271 }
272
273 ++it;
274 }
275 }
276
277 clustering.pushFront(root);
278 //we now have the clustering and know for each vertex, to which cluster
279 //it was assigned in the last iteration (vCluster)
280 //we update the lists of children
281 for(node v : m_pGraph->nodes)
282 {
283 vCluster[v]->pushBackVertex(v);
284 }
285 }
286
computeEdgeStrengths(EdgeArray<double> & strength)287 void Clusterer::computeEdgeStrengths(EdgeArray<double> &strength)
288 {
289 const Graph &G = *m_pGraph;
290 computeEdgeStrengths(G, strength);
291 }
292
computeEdgeStrengths(const Graph & G,EdgeArray<double> & strength)293 void Clusterer::computeEdgeStrengths(const Graph &G, EdgeArray<double> &strength)
294 {
295 strength.init(G, 0.0);
296 double minStrength = 5.0, maxStrength = 0.0; //used to derive automatic thresholds
297 //5 is the maximum possible value (sum of five values 0-1)
298
299 //A Kompromiss: Entweder immer Nachbarn der Nachbarn oder einmal berechnen und speichern (gut
300 //wenn haeufig benoetigt (hoher Grad), braucht aber viel Platz
301 //B Was ist schneller: Listen nachher nochmal durchlaufen und loeschen, wenn Doppelnachbar oder
302 //gleich beim Auftreten loeschen ueber Iterator
303
304 //First, compute the sets Mw, Mv, Wwv. Then check their connectivity.
305 //Use a list for the vertices that are solely connected to w
306 for(edge e : G.edges)
307 {
308 List<node> vNb;
309 List<node> wNb;
310 List<node> bNb; //neighbour to both vertices
311 EdgeArray<bool> processed(G, false); //edge was processed
312 NodeArray<int> nba(G, 0); //neighbours of v and w: 1v, 2both, 3w
313
314 node v = e->source();
315 node w = e->target();
316
317 //neighbourhood sizes
318 int sizeMv = 0;
319 int sizeMw = 0;
320 int sizeWvw = 0;
321 //neighbourhood links
322 #if 0
323 int rMv = 0; //within MV
324 int rMw = 0;
325 #endif
326 int rWvw = 0;
327
328 int rMvMw = 0; //from Mv to Mw
329 int rMvWvw = 0;
330 int rMwWvw = 0;
331
332 //Compute neighbourhood
333 //Muss man selfloops gesondert beruecksichtigen
334 for(adjEntry adjE : v->adjEntries)
335 {
336 node u = adjE->twinNode();
337 if (u == v) continue;
338 if ( u != w )
339 {
340 nba[u] = 1;
341 }
342 }
343 for(adjEntry adjE : w->adjEntries)
344 {
345 node u = adjE->twinNode();
346 if (u == w) continue;
347 if ( u != v )
348 {
349 if (nba[u] == 1)
350 {
351 nba[u] = 2;
352 }
353 else
354 {
355 if (nba[u] != 2) nba[u] = 3;
356
357 sizeMw++;
358 wNb.pushFront(u);
359 }
360 }
361 }
362
363 //Problem in der Laufzeit ist die paarweise Bewertung der Nachbarschaft
364 //ohne Nutzung vorheriger Informationen
365
366 //We know the neighbourhood of v and w and have to compute the connectivity
367 for(adjEntry adjE : v->adjEntries)
368 {
369 node u = adjE->twinNode();
370
371 if ( u != w )
372 {
373 //check if u is in Mv
374 if (nba[u] == 1)
375 {
376 //vertex in Mv
377 sizeMv++;
378 //check links within Mv, to Mw and Wvw
379 for(adjEntry adjE2 : u->adjEntries)
380 {
381 processed[adjE2->theEdge()] = true;
382 node t = adjE2->twinNode();
383 //test links to other sets
384 switch (nba[t]) {
385 #if 0
386 case 1: rMv++; break;
387 #endif
388 case 2: rMvWvw++; break;
389 case 3: rMvMw++; break;
390 }
391 }
392 } else {
393 //vertex in Wvw, nba == 2
394 OGDF_ASSERT(nba[u] == 2);
395 sizeWvw++;
396 for(adjEntry adjE2 : u->adjEntries)
397 {
398 node t = adjE2->twinNode();
399 //processed testen?
400 //test links to other sets
401 switch (nba[t]) {
402 #if 0
403 case 1: rMv++; break;
404 #endif
405 case 2: rWvw++; break;
406 case 3: rMwWvw++; break;
407 }
408 }
409 }
410 }
411 }
412
413 //Now compute the ratio of existing edges to maximal number
414 //(complete graph)
415
416 double sMvWvw = 0.0;
417 double sMwWvw = 0.0;
418 double sWvw = 0.0;
419 double sMvMw = 0.0;
420 //we have to cope with special cases
421 int smult = sizeMv*sizeWvw;
422 if (smult != 0) sMvWvw = (double)rMvWvw/smult;
423 smult = sizeMw*sizeWvw;
424 if (smult != 0) sMwWvw = (double)rMwWvw/smult;
425 smult = (sizeMv*sizeMw);
426 if (smult != 0) sMvMw = (double)rMvMw/smult;
427
428
429 if (sizeWvw > 1)
430 sWvw = 2.0*rWvw/(sizeWvw*(sizeWvw-1));
431 else if (sizeWvw == 1) sWvw = 1.0;
432 //Ratio of cycles of size 3 and 4
433 double cycleProportion = ((double)sizeWvw/(sizeWvw+sizeMv+sizeMw));
434 double edgeStrength = sMvWvw+sMwWvw+sWvw+sMvMw+cycleProportion;
435
436 if (m_autoThreshNum > 0)
437 {
438 if (minStrength > edgeStrength) minStrength = edgeStrength;
439 if (maxStrength < edgeStrength) maxStrength = edgeStrength;
440 }
441
442 #if 0
443 std::cout<<"sWerte: "<<sMvWvw<<"/"<<sMwWvw<<"/"<<sWvw<<"/"<<sMvMw<<"\n";
444 std::cout << "CycleProportion "<<cycleProportion<<"\n";
445 std::cout << "EdgeStrength "<<edgeStrength<<"\n";
446 #endif
447 strength[e] = edgeStrength;
448
449 //true neighbours are not adjacent to v
450 #if 0
451 std::cout << "Checking true neighbours of w\n";
452 #endif
453
454 }
455 if (m_autoThreshNum > 0)
456 {
457 if (m_autoThresholds.size()>0) m_autoThresholds.clear();
458 if (maxStrength > minStrength)
459 {
460 #if 0
461 std::cout << "Max: "<<maxStrength<< " Min: "<<minStrength<<"\n";
462 #endif
463 double step = (maxStrength-minStrength)/((double)m_autoThreshNum+1.0);
464 double val = minStrength+step;
465 for (int i = 0; i<m_autoThreshNum; i++)
466 {
467 m_autoThresholds.pushBack(val);
468 val += step;
469 }
470 } else {
471 m_autoThresholds.pushBack(maxStrength); // Stops computation
472 }
473 }
474 }
475
476 //computes clustering and translates the computed structure into C
createClusterGraph(ClusterGraph & C)477 void Clusterer::createClusterGraph(ClusterGraph &C)
478 {
479 OGDF_ASSERT(&(C.constGraph()) == m_pGraph);
480 //clear existing entries
481 C.clear();
482
483 //we compute the edge strengths
484 EdgeArray<double> strengths(*m_pGraph,0.0);
485 computeEdgeStrengths(strengths);
486
487 //based on strengths, we compute a clustering
488 //Case 1:with depth one (or #thresholds)
489 //Case 2: Recursively
490
491 //Case1:
492 GraphCopy GC(*m_pGraph);
493 ListIterator<double> it = m_thresholds.begin();
494
495 while (it.valid())
496 {
497 //collect edges that will be deleted
498 List<edge> le;
499 for(edge e : GC.edges)
500 {
501 if (strengths[e] < *it)
502 {
503 le.pushFront(e);
504 }
505 }
506 //delete edges
507 for (edge e : le)
508 GC.delEdge(e);
509
510 //gather cluster information
511 //vertices within a connected component are always part
512 //of the same cluster which then will be parent of the new clusters
513
514 ArrayBuffer<node> S;
515 NodeArray<bool> done(GC, false);
516 for(node v : GC.nodes)
517 {
518 if (done[v]) continue;
519 done[v] = true;
520 S.push(v);
521 cluster parent = C.clusterOf(GC.original(v));
522 SList<node> theCluster;
523
524 while(!S.empty())
525 {
526 node w = S.popRet();
527 theCluster.pushFront(GC.original(w));
528 for(adjEntry adj : w->adjEntries) {
529 node x = adj->twinNode();
530 if (!(done[x]))
531 {
532 done[x] = true;
533 S.push(x);
534 }
535 }
536 }
537 //create the cluster
538 C.createCluster(theCluster, parent);
539 }
540
541 ++it;
542 }
543 }
544
setClusteringThresholds(const List<double> & threshs)545 void Clusterer::setClusteringThresholds(const List<double> &threshs)
546 {
547 //we copy the values, should be a low number
548 m_thresholds.clear();
549
550 for (double x : threshs)
551 m_thresholds.pushFront(x);
552 }
553
554 }
555