1 /** \file
2  * \brief Implementation of a clustering algorithm (by Auber, Chiricota,
3  * Melancon).
4  *
5  * \author Karsten Klein
6  *
7  * \par License:
8  * This file is part of the Open Graph Drawing Framework (OGDF).
9  *
10  * \par
11  * Copyright (C)<br>
12  * See README.md in the OGDF root directory for details.
13  *
14  * \par
15  * This program is free software; you can redistribute it and/or
16  * modify it under the terms of the GNU General Public License
17  * Version 2 or 3 as published by the Free Software Foundation;
18  * see the file LICENSE.txt included in the packaging of this file
19  * for details.
20  *
21  * \par
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  * GNU General Public License for more details.
26  *
27  * \par
28  * You should have received a copy of the GNU General Public
29  * License along with this program; if not, see
30  * http://www.gnu.org/copyleft/gpl.html
31  */
32 
33 #include <ogdf/graphalg/Clusterer.h>
34 #include <ogdf/basic/GraphCopy.h>
35 
36 
37 namespace ogdf {
38 
Clusterer(const Graph & G)39 Clusterer::Clusterer(const Graph &G) :
40 	ClustererModule(G),
41 	m_recursive(true),
42 	m_autoThreshNum(0)
43 {
44 	//use automatic thresholds
45 #if 0
46 	m_thresholds.pushFront(3.0);
47 	m_thresholds.pushFront(1.4);
48 #endif
49 	m_defaultThresholds.pushFront(1.6);
50 	m_defaultThresholds.pushBack(3.2);
51 	m_defaultThresholds.pushBack(4.5);
52 	m_stopIndex = 0.7;
53 }
54 
55 
56 //computes a list of SimpleCluster pointers, where each SimpleCluster
57 //models a cluster and has a parent (0 if root)
58 //list of cluster structures will be parameter
computeClustering(SList<SimpleCluster * > & clustering)59 void Clusterer::computeClustering(SList<SimpleCluster*> &clustering)
60 {
61 	//save to which cluster the vertices belong
62 	//0 is root cluster
63 	SimpleCluster *root = new SimpleCluster();
64 	root->m_size = m_pGraph->numberOfNodes();
65 
66 	//we push root to the front afterwards
67 #if 0
68 	clustering.pushFront(root);
69 #endif
70 	NodeArray<SimpleCluster*> vCluster(*m_pGraph, root);
71 
72 	//we delete edges and therefore work on a copy
73 	GraphCopy GC(*m_pGraph);
74 	//multiedges are not used for the computation
75 	makeSimple(GC);
76 	//based on strengths, we compute a clustering
77 	//Case 1:with depth one (or #thresholds)
78 	//Case 2: Recursively
79 
80 	ListIterator<double> it;
81 
82 	if (m_thresholds.size() > 0)
83 		it = m_thresholds.begin();
84 	else
85 	{
86 		OGDF_ASSERT(m_defaultThresholds.size() > 0);
87 		it = m_defaultThresholds.begin();
88 	}
89 
90 	//are there more cases than (not) rec.?
91 	int fall = (m_recursive ? 2 : 1);
92 #if 0
93 	int fall = 2;
94 #endif
95 	if (fall == 2)
96 	{
97 		//we just use the first value of auto/default/threshs
98 #if 0
99 		OGDF_ASSERT(m_thresholds.size() == 1);
100 #endif
101 		//Case2:
102 		//we compute the edge strengths on the components recursively
103 		//we therefore only need a single threshold
104 
105 		//we compute the edge strengths
106 		EdgeArray<double> strengths(GC,0.0);
107 
108 		//we need a criterion to stop the process
109 		//we stop if there are no edges deleted or
110 		//the average node clustering index in our copy rises
111 		//above m_stopIndex
112 		bool edgesDeleted = true;
113 
114 		while (edgesDeleted && (averageCIndex(GC)<m_stopIndex))
115 		{
116 			computeEdgeStrengths(GC, strengths);
117 			if (m_autoThreshNum > 0)
118 			{
119 				OGDF_ASSERT(m_autoThresholds.size() > 0);
120 				it = m_autoThresholds.begin();
121 			}
122 			if (it.valid())
123 			{
124 				//collect edges that will be deleted
125 				List<edge> le;
126 				for(edge e : GC.edges)
127 				{
128 					if (strengths[e] < *it)
129 					{
130 						le.pushFront(e);
131 					}
132 				}
133 				//stop criterion
134 				if (le.size() == 0)
135 				{
136 					edgesDeleted = false;
137 					continue;
138 				}
139 				//delete edges
140 				for(edge e : le)
141 					GC.delEdge(e);
142 
143 				//gather cluster information
144 				//vertices within a connected component are always part
145 				//of the same cluster which then will be parent of the new clusters
146 
147 				ArrayBuffer<node> S;
148 				NodeArray<bool> done(GC, false);
149 				for(node v : GC.nodes)
150 				{
151 					if (done[v]) continue;
152 					done[v] = true;
153 					S.push(v);
154 					SimpleCluster* parent = vCluster[GC.original(v)];
155 					SList<node> theCluster;
156 
157 					List<node> compNodes; //nodes in a component
158 
159 					while(!S.empty())
160 					{
161 						node w = S.popRet();
162 						compNodes.pushFront(w);
163 						for(adjEntry adj : w->adjEntries) {
164 							node x = adj->twinNode();
165 							if (!(done[x]))
166 							{
167 								done[x] = true;
168 								S.push(x);
169 							}
170 						}
171 					}
172 					//run over nodes and set cluster info
173 					//do not construct trivial clusters
174 					if ( (parent->m_size > compNodes.size()) &&
175 						(compNodes.size()>2))
176 					{
177 						SimpleCluster* s = new SimpleCluster();
178 						s->setParent(parent);
179 
180 						parent->pushBackChild(s);
181 						clustering.pushFront(s);
182 						for (node vi : compNodes)
183 						{
184 							vCluster[GC.original(vi)] = s;
185 							//vertex leaves parent to s
186 							parent->m_size--;
187 							s->m_size++;
188 						}
189 					}
190 				}
191 			}
192 		}
193 	} else { // case 2
194 		//we compute the edge strengths
195 		EdgeArray<double> strengths(*m_pGraph,0.0);
196 		computeEdgeStrengths(strengths);
197 		if (m_autoThreshNum > 0)
198 		{
199 			OGDF_ASSERT(m_autoThresholds.size() > 0);
200 			it = m_autoThresholds.begin();
201 		}
202 		//Case1:
203 
204 		while (it.valid())
205 		{
206 			//collect edges that will be deleted
207 			List<edge> le;
208 			for(edge e : GC.edges)
209 			{
210 				if (strengths[e] < *it)
211 				{
212 					le.pushFront(e);
213 				}
214 			}
215 			//delete edges
216 			for(edge e : le)
217 				GC.delEdge(e);
218 
219 			//gather cluster information
220 			//vertices within a connected component are always part
221 			//of the same cluster which then will be parent of the new clusters
222 
223 			ArrayBuffer<node> S;
224 			NodeArray<bool> done(GC, false);
225 			for(node v : GC.nodes)
226 			{
227 				if (done[v]) continue;
228 				done[v] = true;
229 				S.push(v);
230 				SimpleCluster* parent = vCluster[GC.original(v)];
231 				SList<node> theCluster;
232 
233 				List<node> compNodes; //nodes in a component
234 
235 				//do not immediately construct a cluster, first gather and store
236 				//vertices, then test if the cluster fits some constraints, e.g. size
237 				while(!S.empty())
238 				{
239 					node w = S.popRet();
240 					compNodes.pushFront(w);
241 					for(adjEntry adj : w->adjEntries) {
242 						node x = adj->twinNode();
243 						if (!(done[x]))
244 						{
245 							done[x] = true;
246 							S.push(x);
247 						}
248 					}
249 				}
250 
251 				//run over nodes and set cluster info
252 				//do not construct trivial clusters
253 				if ( (parent->m_size > compNodes.size()) &&
254 					(compNodes.size()>2))
255 				{
256 					SimpleCluster* s = new SimpleCluster;
257 					s->setParent(parent);
258 #if 0
259 					s->m_index = -1;
260 #endif
261 					parent->pushBackChild(s);
262 					clustering.pushFront(s);
263 					for(node vi : compNodes)
264 					{
265 						vCluster[GC.original(vi)] = s;
266 						//vertex leaves parent to s
267 						parent->m_size--;
268 						s->m_size++;
269 					}
270 				}
271 			}
272 
273 			++it;
274 		}
275 	}
276 
277 	clustering.pushFront(root);
278 	//we now have the clustering and know for each vertex, to which cluster
279 	//it was assigned in the last iteration (vCluster)
280 	//we update the lists of children
281 	for(node v : m_pGraph->nodes)
282 	{
283 		vCluster[v]->pushBackVertex(v);
284 	}
285 }
286 
computeEdgeStrengths(EdgeArray<double> & strength)287 void Clusterer::computeEdgeStrengths(EdgeArray<double> &strength)
288 {
289 	const Graph &G = *m_pGraph;
290 	computeEdgeStrengths(G, strength);
291 }
292 
computeEdgeStrengths(const Graph & G,EdgeArray<double> & strength)293 void Clusterer::computeEdgeStrengths(const Graph &G, EdgeArray<double> &strength)
294 {
295 	strength.init(G, 0.0);
296 	double minStrength = 5.0, maxStrength = 0.0; //used to derive automatic thresholds
297 	//5 is the maximum possible value (sum of five values 0-1)
298 
299 	//A Kompromiss: Entweder immer Nachbarn der Nachbarn oder einmal berechnen und speichern (gut
300 	//wenn haeufig benoetigt (hoher Grad), braucht aber viel Platz
301 	//B Was ist schneller: Listen nachher nochmal durchlaufen und loeschen, wenn Doppelnachbar oder
302 	//gleich beim Auftreten loeschen ueber Iterator
303 
304 	//First, compute the sets Mw, Mv, Wwv. Then check their connectivity.
305 	//Use a list for the vertices that are solely connected to w
306 	for(edge e : G.edges)
307 	{
308 		List<node> vNb;
309 		List<node> wNb;
310 		List<node> bNb; //neighbour to both vertices
311 		EdgeArray<bool> processed(G, false); //edge was processed
312 		NodeArray<int> nba(G, 0); //neighbours of v and w: 1v, 2both, 3w
313 
314 		node v = e->source();
315 		node w = e->target();
316 
317 		//neighbourhood sizes
318 		int sizeMv = 0;
319 		int sizeMw = 0;
320 		int sizeWvw = 0;
321 		//neighbourhood links
322 #if 0
323 		int rMv = 0; //within MV
324 		int rMw = 0;
325 #endif
326 		int rWvw = 0;
327 
328 		int rMvMw = 0; //from Mv to Mw
329 		int rMvWvw = 0;
330 		int rMwWvw = 0;
331 
332 		//Compute neighbourhood
333 		//Muss man selfloops gesondert beruecksichtigen
334 		for(adjEntry adjE : v->adjEntries)
335 		{
336 			node u = adjE->twinNode();
337 			if (u == v) continue;
338 			if ( u != w )
339 			{
340 				nba[u] = 1;
341 			}
342 		}
343 		for(adjEntry adjE : w->adjEntries)
344 		{
345 			node u = adjE->twinNode();
346 			if (u == w) continue;
347 			if ( u != v )
348 			{
349 				if (nba[u] == 1)
350 				{
351 					nba[u] = 2;
352 				}
353 				else
354 				{
355 					if (nba[u] != 2) nba[u] = 3;
356 
357 					sizeMw++;
358 					wNb.pushFront(u);
359 				}
360 			}
361 		}
362 
363 		//Problem in der Laufzeit ist die paarweise Bewertung der Nachbarschaft
364 		//ohne Nutzung vorheriger Informationen
365 
366 		//We know the neighbourhood of v and w and have to compute the connectivity
367 		for(adjEntry adjE : v->adjEntries)
368 		{
369 			node u = adjE->twinNode();
370 
371 			if ( u != w )
372 			{
373 				//check if u is in Mv
374 				if (nba[u] == 1)
375 				{
376 					//vertex in Mv
377 					sizeMv++;
378 					//check links within Mv, to Mw and Wvw
379 					for(adjEntry adjE2 : u->adjEntries)
380 					{
381 						processed[adjE2->theEdge()] = true;
382 						node t = adjE2->twinNode();
383 						//test links to other sets
384 						switch (nba[t]) {
385 #if 0
386 						case 1: rMv++; break;
387 #endif
388 						case 2: rMvWvw++; break;
389 						case 3: rMvMw++; break;
390 						}
391 					}
392 				} else {
393 					//vertex in Wvw, nba == 2
394 					OGDF_ASSERT(nba[u] == 2);
395 					sizeWvw++;
396 					for(adjEntry adjE2 : u->adjEntries)
397 					{
398 						node t = adjE2->twinNode();
399 						//processed testen?
400 						//test links to other sets
401 						switch (nba[t]) {
402 #if 0
403 						case 1: rMv++; break;
404 #endif
405 						case 2: rWvw++; break;
406 						case 3: rMwWvw++; break;
407 						}
408 					}
409 				}
410 			}
411 		}
412 
413 		//Now compute the ratio of existing edges to maximal number
414 		//(complete graph)
415 
416 		double sMvWvw = 0.0;
417 		double sMwWvw = 0.0;
418 		double sWvw = 0.0;
419 		double sMvMw = 0.0;
420 		//we have to cope with special cases
421 		int smult = sizeMv*sizeWvw;
422 		if (smult != 0) sMvWvw = (double)rMvWvw/smult;
423 		smult = sizeMw*sizeWvw;
424 		if (smult != 0) sMwWvw = (double)rMwWvw/smult;
425 		smult = (sizeMv*sizeMw);
426 		if (smult != 0) sMvMw = (double)rMvMw/smult;
427 
428 
429 		if (sizeWvw > 1)
430 			sWvw   = 2.0*rWvw/(sizeWvw*(sizeWvw-1));
431 		else if (sizeWvw == 1) sWvw = 1.0;
432 		//Ratio of cycles of size 3 and 4
433 		double cycleProportion = ((double)sizeWvw/(sizeWvw+sizeMv+sizeMw));
434 		double edgeStrength = sMvWvw+sMwWvw+sWvw+sMvMw+cycleProportion;
435 
436 		if (m_autoThreshNum > 0)
437 		{
438 			if (minStrength > edgeStrength) minStrength = edgeStrength;
439 			if (maxStrength < edgeStrength) maxStrength = edgeStrength;
440 		}
441 
442 #if 0
443 		std::cout<<"sWerte: "<<sMvWvw<<"/"<<sMwWvw<<"/"<<sWvw<<"/"<<sMvMw<<"\n";
444 		std::cout << "CycleProportion "<<cycleProportion<<"\n";
445 		std::cout << "EdgeStrength "<<edgeStrength<<"\n";
446 #endif
447 		strength[e] = edgeStrength;
448 
449 		//true neighbours are not adjacent to v
450 #if 0
451 		std::cout << "Checking true neighbours of w\n";
452 #endif
453 
454 	}
455 	if (m_autoThreshNum > 0)
456 	{
457 		if (m_autoThresholds.size()>0) m_autoThresholds.clear();
458 		if (maxStrength > minStrength)
459 		{
460 #if 0
461 			std::cout << "Max: "<<maxStrength<< " Min: "<<minStrength<<"\n";
462 #endif
463 			double step = (maxStrength-minStrength)/((double)m_autoThreshNum+1.0);
464 			double val = minStrength+step;
465 			for (int i = 0; i<m_autoThreshNum; i++)
466 			{
467 				m_autoThresholds.pushBack(val);
468 				val += step;
469 			}
470 		} else {
471 			m_autoThresholds.pushBack(maxStrength); // Stops computation
472 		}
473 	}
474 }
475 
476 //computes clustering and translates the computed structure into C
createClusterGraph(ClusterGraph & C)477 void Clusterer::createClusterGraph(ClusterGraph &C)
478 {
479 	OGDF_ASSERT(&(C.constGraph()) == m_pGraph);
480 	//clear existing entries
481 	C.clear();
482 
483 	//we compute the edge strengths
484 	EdgeArray<double> strengths(*m_pGraph,0.0);
485 	computeEdgeStrengths(strengths);
486 
487 	//based on strengths, we compute a clustering
488 	//Case 1:with depth one (or #thresholds)
489 	//Case 2: Recursively
490 
491 	//Case1:
492 	GraphCopy GC(*m_pGraph);
493 	ListIterator<double> it = m_thresholds.begin();
494 
495 	while (it.valid())
496 	{
497 		//collect edges that will be deleted
498 		List<edge> le;
499 		for(edge e : GC.edges)
500 		{
501 			if (strengths[e] < *it)
502 			{
503 				le.pushFront(e);
504 			}
505 		}
506 		//delete edges
507 		for (edge e : le)
508 			GC.delEdge(e);
509 
510 		//gather cluster information
511 		//vertices within a connected component are always part
512 		//of the same cluster which then will be parent of the new clusters
513 
514 		ArrayBuffer<node> S;
515 		NodeArray<bool> done(GC, false);
516 		for(node v : GC.nodes)
517 		{
518 			if (done[v]) continue;
519 			done[v] = true;
520 			S.push(v);
521 			cluster parent = C.clusterOf(GC.original(v));
522 			SList<node> theCluster;
523 
524 			while(!S.empty())
525 			{
526 				node w = S.popRet();
527 				theCluster.pushFront(GC.original(w));
528 				for(adjEntry adj : w->adjEntries) {
529 					node x = adj->twinNode();
530 					if (!(done[x]))
531 					{
532 						done[x] = true;
533 						S.push(x);
534 					}
535 				}
536 			}
537 			//create the cluster
538 			C.createCluster(theCluster, parent);
539 		}
540 
541 		++it;
542 	}
543 }
544 
setClusteringThresholds(const List<double> & threshs)545 void Clusterer::setClusteringThresholds(const List<double> &threshs)
546 {
547 	//we copy the values, should be a low number
548 	m_thresholds.clear();
549 
550 	for (double x : threshs)
551 		m_thresholds.pushFront(x);
552 }
553 
554 }
555