1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // Implementation of Weakly-Connected-Components algorithm.
35 // ==========================================================================
36
37 #ifndef INCLUDE_SEQAN_GRAPH_ALGORITHMS_WEAKLY_CONNECTED_COMPONENTS_H_
38 #define INCLUDE_SEQAN_GRAPH_ALGORITHMS_WEAKLY_CONNECTED_COMPONENTS_H_
39
40 namespace seqan {
41
42 // ============================================================================
43 // Forwards
44 // ============================================================================
45
46 // ============================================================================
47 // Tags, Classes, Enums
48 // ============================================================================
49
50 // ============================================================================
51 // Metafunctions
52 // ============================================================================
53
54 // ============================================================================
55 // Functions
56 // ============================================================================
57
58 // ----------------------------------------------------------------------------
59 // Function weaklyConnectedComponents()
60 // ----------------------------------------------------------------------------
61
62 /*!
63 * @fn weaklyConnectedComponents
64 * @headerfile <seqan/graph_algorithms.h>
65 * @brief Compute weakly connected components of a directed graph.
66 *
67 * @signature TSize weaklyConnectedComponents(components, g);
68 *
69 * @param[out] components
70 * A property map. Each vertex is mapped to a component id. If two vertices share the same id they
71 * are in the same component.
72 * @param[in] g A @link DirectedGraph @endlink to use for the input.
73 *
74 * @return TSize The number of weakly connected components (Metafunction: @link Graph#Size @endlink of the type
75 * of <tt>g</tt>).
76 *
77 * The running time is <tt>O(n a(n, n))</tt> where <tt>a</tt> is the inverse Ackermann function and thus almost linear.
78 * The union find data structure is used since the graph implementations do not allow the efficient iteration of
79 * in-edges.
80 */
81 template <typename TSpec, typename TComponents>
82 typename Size<Graph<TSpec> >::Type
weaklyConnectedComponents(TComponents & components,Graph<TSpec> const & g)83 weaklyConnectedComponents(TComponents & components,
84 Graph<TSpec> const & g)
85
86 {
87 typedef Graph<TSpec> TGraph;
88 typedef typename Size<TGraph>::Type TSize;
89 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
90 typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
91 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
92
93 // Initialization.
94 UnionFind<TVertexDescriptor> unionFind;
95 resizeVertexMap(unionFind, g);
96
97 // Iterate over all edges, joining weakly connected components.
98 for (TEdgeIterator itE(g); !atEnd(itE); goNext(itE))
99 joinSets(unionFind, findSet(unionFind, sourceVertex(itE)), findSet(unionFind, targetVertex(itE)));
100
101 // Count number of sets.
102 TSize setCount = 0;
103 for (TVertexIterator itV(g); !atEnd(itV); goNext(itV))
104 setCount += (findSet(unionFind, *itV) == *itV);
105
106 // Build a map from graph vertex descriptor to component id.
107 TSize nextId = 0;
108 clear(components);
109 resizeVertexMap(components, g, setCount); // setCount is sentinel value
110 for (TVertexIterator itV(g); !atEnd(itV); goNext(itV)) {
111 if (getProperty(components, findSet(unionFind, *itV)) == setCount)
112 assignProperty(components, findSet(unionFind, *itV), nextId++);
113 assignProperty(components, *itV, getProperty(components, findSet(unionFind, *itV)));
114 }
115
116 return setCount;
117 }
118
119 } // namespace seqan
120
121 #endif // #ifndef INCLUDE_SEQAN_GRAPH_ALGORITHMS_WEAKLY_CONNECTED_COMPONENTS_H_
122