1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // Implementation of Weakly-Connected-Components algorithm.
35 // ==========================================================================
36 
37 #ifndef INCLUDE_SEQAN_GRAPH_ALGORITHMS_WEAKLY_CONNECTED_COMPONENTS_H_
38 #define INCLUDE_SEQAN_GRAPH_ALGORITHMS_WEAKLY_CONNECTED_COMPONENTS_H_
39 
40 namespace seqan {
41 
42 // ============================================================================
43 // Forwards
44 // ============================================================================
45 
46 // ============================================================================
47 // Tags, Classes, Enums
48 // ============================================================================
49 
50 // ============================================================================
51 // Metafunctions
52 // ============================================================================
53 
54 // ============================================================================
55 // Functions
56 // ============================================================================
57 
58 // ----------------------------------------------------------------------------
59 // Function weaklyConnectedComponents()
60 // ----------------------------------------------------------------------------
61 
62 /*!
63  * @fn weaklyConnectedComponents
64  * @headerfile <seqan/graph_algorithms.h>
65  * @brief Compute weakly connected components of a directed graph.
66  *
67  * @signature TSize weaklyConnectedComponents(components, g);
68  *
69  * @param[out] components
70  *               A property map.  Each vertex is mapped to a component id.  If two vertices share the same id they
71  *               are in the same component.
72  * @param[in]  g A @link DirectedGraph @endlink to use for the input.
73  *
74  * @return TSize The number of weakly connected components  (Metafunction: @link Graph#Size @endlink of the type
75  *               of <tt>g</tt>).
76  *
77  * The running time is <tt>O(n a(n, n))</tt> where <tt>a</tt> is the inverse Ackermann function and thus almost linear.
78  * The union find data structure is used since the graph implementations do not allow the efficient iteration of
79  * in-edges.
80  */
81 template <typename TSpec, typename TComponents>
82 typename Size<Graph<TSpec> >::Type
weaklyConnectedComponents(TComponents & components,Graph<TSpec> const & g)83 weaklyConnectedComponents(TComponents & components,
84                           Graph<TSpec> const & g)
85 
86 {
87     typedef Graph<TSpec> TGraph;
88     typedef typename Size<TGraph>::Type TSize;
89     typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
90     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
91     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
92 
93     // Initialization.
94     UnionFind<TVertexDescriptor> unionFind;
95     resizeVertexMap(unionFind, g);
96 
97     // Iterate over all edges, joining weakly connected components.
98     for (TEdgeIterator itE(g); !atEnd(itE); goNext(itE))
99         joinSets(unionFind, findSet(unionFind, sourceVertex(itE)), findSet(unionFind, targetVertex(itE)));
100 
101     // Count number of sets.
102     TSize setCount = 0;
103     for (TVertexIterator itV(g); !atEnd(itV); goNext(itV))
104         setCount += (findSet(unionFind, *itV) == *itV);
105 
106     // Build a map from graph vertex descriptor to component id.
107     TSize nextId = 0;
108     clear(components);
109     resizeVertexMap(components, g, setCount);  // setCount is sentinel value
110     for (TVertexIterator itV(g); !atEnd(itV); goNext(itV)) {
111         if (getProperty(components, findSet(unionFind, *itV)) == setCount)
112             assignProperty(components, findSet(unionFind, *itV), nextId++);
113         assignProperty(components, *itV, getProperty(components, findSet(unionFind, *itV)));
114     }
115 
116     return setCount;
117 }
118 
119 }  // namespace seqan
120 
121 #endif  // #ifndef INCLUDE_SEQAN_GRAPH_ALGORITHMS_WEAKLY_CONNECTED_COMPONENTS_H_
122