1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32
33 #ifndef SEQAN_HEADER_GRAPH_ALIGN_TCOFFEE_DISTANCE_H
34 #define SEQAN_HEADER_GRAPH_ALIGN_TCOFFEE_DISTANCE_H
35
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38
39 //////////////////////////////////////////////////////////////////////////////
40 // Distance matrix calculation
41 //////////////////////////////////////////////////////////////////////////////
42
43 /*!
44 * @defgroup DistanceCalculationTags
45 * @brief Tags for specifying how to calculate distance matrices.
46 *
47 *
48 * @tag DistanceCalculationTags#LibraryDistance
49 * @headerfile <seqan/graph_msa.h>
50 * @brief Using the library itself and heaviest common subsequence to determine a distance matrix.
51 *
52 * @signature typedef Tag<LibraryDistance_> const LibraryDistance;
53 *
54 *
55 * @tag DistanceCalculationTags#KmerDistance
56 * @headerfile <seqan/graph_msa.h>
57 * @brief Using a simple kmer count to determine a distance matrix.
58 *
59 * @signature typedef Tag<LibraryDistance_> const KmerDistance;
60 */
61
62 struct LibraryDistance_;
63 typedef Tag<LibraryDistance_> const LibraryDistance;
64
65
66 struct KmerDistance_;
67 typedef Tag<KmerDistance_> const KmerDistance;
68
69
70
71
72
73
74 //////////////////////////////////////////////////////////////////////////////
75 // LibraryDistance
76 //////////////////////////////////////////////////////////////////////////////
77
78 //////////////////////////////////////////////////////////////////////////////
79
80 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix>
81 inline void
getDistanceMatrix(Graph<Alignment<TStringSet,TCargo,TSpec>> & g,TMatrix & distanceMatrix,LibraryDistance)82 getDistanceMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
83 TMatrix& distanceMatrix,
84 LibraryDistance)
85 {
86 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
87 typedef typename Size<TGraph>::Type TSize;
88 //typedef typename Id<TGraph>::Type TId;
89 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
90 //typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
91 //typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
92 typedef typename Value<TMatrix>::Type TValue;
93
94 // Initialization
95 clear(distanceMatrix);
96 TStringSet& str = stringSet(g);
97 TSize nseq = length(str);
98 resize(distanceMatrix, nseq * nseq, 0);
99
100 // All pairwise alignments
101 typedef String<String<TVertexDescriptor> > TSegmentString;
102 TValue maxScore = 0;
103 for(TSize i=0; i<nseq; ++i) {
104 TSegmentString seq1;
105 TSize len1 = length(str[i]);
106 _buildLeafString(g, i, seq1);
107 for(TSize j=i+1; j<nseq; ++j) {
108 // Align the 2 strings
109 TSegmentString seq2;
110 TSize len2 = length(str[j]);
111 _buildLeafString(g, j, seq2);
112 TSegmentString alignSeq;
113 TValue score = heaviestCommonSubsequence(g,seq1,seq2,alignSeq);
114
115 // Normalize by distance
116 if (len1 > len2) score /= len1;
117 else score /= len2;
118 if (score > maxScore) maxScore = score;
119
120 // Remember the value
121 distanceMatrix[i*nseq+j] = score;
122 }
123 }
124
125 // Normalize values
126 for(TSize i=0; i<nseq; ++i)
127 for(TSize j=i+1; j<nseq; ++j)
128 distanceMatrix[i*nseq+j] = SEQAN_DISTANCE_UNITY - ((distanceMatrix[i*nseq+j] * SEQAN_DISTANCE_UNITY) / maxScore );
129 }
130
131
132 //////////////////////////////////////////////////////////////////////////////
133 // KmerDistance
134 //////////////////////////////////////////////////////////////////////////////
135
136 //////////////////////////////////////////////////////////////////////////////
137
138
139 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix, typename TSize, typename TAlphabet>
140 inline void
getDistanceMatrix(Graph<Alignment<TStringSet,TCargo,TSpec>> & g,TMatrix & distanceMatrix,TSize ktup,TAlphabet,KmerDistance)141 getDistanceMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
142 TMatrix& distanceMatrix,
143 TSize ktup,
144 TAlphabet,
145 KmerDistance)
146 {
147 //typedef typename Value<TMatrix>::Type TValue;
148 typedef typename Iterator<TMatrix, Standard>::Type TMatrixIterator;
149
150 getKmerSimilarityMatrix(stringSet(g), distanceMatrix, ktup, TAlphabet());
151
152 // Similarity to distance conversion
153 TMatrixIterator matIt = begin(distanceMatrix, Standard());
154 TMatrixIterator endMatIt = end(distanceMatrix, Standard());
155 for(;matIt != endMatIt;++matIt)
156 *matIt = SEQAN_DISTANCE_UNITY - (*matIt);
157 }
158
159 //////////////////////////////////////////////////////////////////////////////
160
161 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix, typename TSize>
162 inline void
getDistanceMatrix(Graph<Alignment<TStringSet,TCargo,TSpec>> & g,TMatrix & distanceMatrix,TSize ktup,KmerDistance)163 getDistanceMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
164 TMatrix& distanceMatrix,
165 TSize ktup,
166 KmerDistance)
167 {
168 SEQAN_CHECKPOINT
169 getDistanceMatrix(g, distanceMatrix, ktup, typename Value<typename Value<TStringSet>::Type>::Type(), KmerDistance() );
170 }
171
172 //////////////////////////////////////////////////////////////////////////////
173
174 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix>
175 inline void
getDistanceMatrix(Graph<Alignment<TStringSet,TCargo,TSpec>> & g,TMatrix & distanceMatrix,KmerDistance)176 getDistanceMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
177 TMatrix& distanceMatrix,
178 KmerDistance)
179 {
180 SEQAN_CHECKPOINT
181 getDistanceMatrix(g, distanceMatrix, 3, KmerDistance() );
182 }
183
184
185 //////////////////////////////////////////////////////////////////////////////
186
187 /*!
188 * @fn AlignmentGraph#getDistanceMatrix
189 * @headerfile <seqan/graph_msa.h>
190 * @brief Computes a pairwise distance matrix from an @link AlignmentGraph @endlink.
191 *
192 * @signature void getDistanceMtarix(graph, mat[, tag]);
193 * @signature void getDistanceMtarix(graph, mat[, kTup][, alphabet], KmerDistance);
194 *
195 * @param[in] graph An @link AlignmentGraph @endlink containing the sequences and possible alignment edges.
196 * @param[out] mat Pairwise distance matrix.
197 * @param[in] kTup For KMerDistance, the length of the k-mers.
198 * @param[in] alphabet For KMerDistance, the alphabet to use for k-mer counting (e.g. compressed alphabets).
199 * @param[in] tag See @link DistanceCalculationTags @endlink. Default: <tt>KMerDistance</tt>.
200 */
201
202
203 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix>
204 inline void
getDistanceMatrix(Graph<Alignment<TStringSet,TCargo,TSpec>> & g,TMatrix & distanceMatrix)205 getDistanceMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
206 TMatrix& distanceMatrix)
207 {
208 SEQAN_CHECKPOINT
209 getDistanceMatrix(g, distanceMatrix, KmerDistance() );
210 }
211
212
213
214 }// namespace SEQAN_NAMESPACE_MAIN
215
216 #endif //#ifndef SEQAN_HEADER_...
217