1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32
33 #ifndef SEQAN_HEADER_GRAPH_ALIGN_TCOFFEE_BASE_H
34 #define SEQAN_HEADER_GRAPH_ALIGN_TCOFFEE_BASE_H
35
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38
39 //////////////////////////////////////////////////////////////////////////////
40 // Distance unity
41 //////////////////////////////////////////////////////////////////////////////
42
43 //static const int SEQAN_DISTANCE_UNITY = 1 << 20;
44 static const int SEQAN_DISTANCE_UNITY = 1;
45
46
47 //////////////////////////////////////////////////////////////////////////////
48 // Tags
49 //////////////////////////////////////////////////////////////////////////////
50
51 //////////////////////////////////////////////////////////////////////////////
52
53 /**
54 .Tag.Alignment Graph Combination:
55 ..summary:A tag to specify how to combine alignment graphs.
56 ..include:seqan/graph_msa.h
57 */
58
59 /**
60 .Tag.Alignment Graph Combination.value.FractionalScore:
61 Rescores matches with the appropriate fractional score.
62 ..include:seqan/graph_msa.h
63 */
64 struct FractionalScore_;
65 typedef Tag<FractionalScore_> const FractionalScore;
66
67
68 /**
69 .Tag.Alignment Graph Combination.value.FrequencyCount:
70 Rescores matches with the frequency count for this edge.
71 ..include:seqan/graph_msa.h
72 */
73 struct FrequencyCounting_;
74 typedef Tag<FrequencyCounting_> const FrequencyCounting;
75
76 /**
77 .Tag.Alignment Graph Combination.value.ReScore:
78 Rescores the matches after segment-match refinement.
79 ..include:seqan/graph_msa.h
80 */
81 struct ReScore_;
82 typedef Tag<ReScore_> const ReScore;
83
84
85 //////////////////////////////////////////////////////////////////////////////
86 // Generating an alignment graph from segment matches
87 //////////////////////////////////////////////////////////////////////////////
88
89
90 //////////////////////////////////////////////////////////////////////////////
91
92 template<typename TFragment, typename TSpec1, typename TScoreValue, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec>
93 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,String<TScoreValue,TSpec2> & scores,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,FractionalScore)94 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
95 String<TScoreValue, TSpec2>& scores,
96 Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
97 FractionalScore)
98 {
99 SEQAN_CHECKPOINT
100 typedef String<TFragment, TSpec1> TFragmentString;
101 typedef typename Iterator<TFragmentString, Standard>::Type TFragmentStringIter;
102 typedef String<TScoreValue, TSpec2> TScoreValues;
103 typedef typename Iterator<TScoreValues, Standard>::Type TScoreValuesIter;
104 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TOutGraph;
105 typedef typename Size<TFragmentString>::Type TSize;
106 typedef typename Id<TOutGraph>::Type TId;
107 typedef typename EdgeDescriptor<TOutGraph>::Type TEdgeDescriptor;
108 typedef typename VertexDescriptor<TOutGraph>::Type TVertexDescriptor;
109
110 // Initialization
111 clearVertices(outGraph);
112 TStringSet& strSet = stringSet(outGraph);
113
114 // Segment-match refinement
115 matchRefinement(matches,strSet,outGraph);
116
117 // Clear edge-weights
118 typedef typename Iterator<TOutGraph, EdgeIterator>::Type TEdgeIterator;
119 TEdgeIterator itE(outGraph);
120 for(;!atEnd(itE);goNext(itE)) cargo(value(itE)) = 1;
121
122 // Adapt the scores
123 TFragmentStringIter it = begin(matches, Standard() );
124 TFragmentStringIter endIt = end(matches, Standard() );
125 TScoreValuesIter scoreIt = begin(scores, Standard() );
126 for(; it != endIt; ++it, ++scoreIt) {
127 TId id1 = sequenceId(*it,0);
128 TId id2 = sequenceId(*it,1);
129 TSize pos1 = fragmentBegin(*it, id1);
130 TSize pos2 = fragmentBegin(*it, id2);
131 TSize fragLen = fragmentLength(*it, id1);
132 TSize end1 = pos1 + fragLen;
133 while(pos1 < end1) {
134 TVertexDescriptor p1 = findVertex(outGraph, id1, pos1);
135 TVertexDescriptor p2 = findVertex(outGraph, id2, pos2);
136 TSize vertexLen = fragmentLength(outGraph, p1);
137 cargo(findEdge(outGraph, p1, p2)) += (TCargo) ((vertexLen * (*scoreIt)) / fragLen);
138 pos1 += vertexLen;
139 pos2 += vertexLen;
140 }
141 }
142 }
143
144
145 //////////////////////////////////////////////////////////////////////////////
146
147 template<typename TFragment, typename TSpec1, typename TStringSet, typename TCargo, typename TSpec>
148 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,FrequencyCounting)149 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
150 Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
151 FrequencyCounting)
152 {
153 SEQAN_CHECKPOINT
154 typedef String<TFragment, TSpec1> TFragmentString;
155 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TOutGraph;
156 typedef typename Size<TFragmentString>::Type TSize;
157
158 // Initialization
159 clearVertices(outGraph);
160 TStringSet& strSet = stringSet(outGraph);
161
162 // Segment-match refinement
163 matchRefinement(matches,strSet,outGraph);
164 }
165
166 //////////////////////////////////////////////////////////////////////////////
167
168 template<typename TFragment, typename TSpec1, typename TScoreValue, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec>
169 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,String<TScoreValue,TSpec2> &,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,FrequencyCounting)170 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
171 String<TScoreValue, TSpec2>&,
172 Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
173 FrequencyCounting)
174 {
175 buildAlignmentGraph(matches, outGraph, FrequencyCounting() );
176 }
177
178 //////////////////////////////////////////////////////////////////////////////
179
180 template<typename TString, typename TSpec, typename TScoreType, typename TSize, typename TSpec2, typename TScoreString, typename TScoreValue>
181 inline void
_scoreMatches(StringSet<TString,TSpec> const & seqSet,TScoreType const & scType,String<Fragment<TSize,ExactFragment<>>,TSpec2> const & matches,TScoreString & scores,TScoreValue offset)182 _scoreMatches(StringSet<TString, TSpec> const& seqSet,
183 TScoreType const& scType,
184 String<Fragment<TSize, ExactFragment<> >, TSpec2> const& matches,
185 TScoreString& scores,
186 TScoreValue offset)
187 {
188 SEQAN_CHECKPOINT
189 typedef String<Fragment<TSize, ExactFragment<> >, TSpec2> TFragmentString;
190 typedef typename Id<typename Value<TFragmentString>::Type>::Type TId;
191 typedef typename Iterator<TFragmentString, Standard>::Type TFragmentStringIter;
192 typedef typename Iterator<TString, Standard>::Type TStringIter;
193 typedef typename Iterator<TScoreString, Standard>::Type TScoreStringIter;
194 resize(scores, length(matches));
195
196 // Get the scores
197 TFragmentStringIter itF = begin(matches, Standard() );
198 TFragmentStringIter itFEnd = end(matches, Standard() );
199 TScoreStringIter itSc = begin(scores, Standard() );
200 TId id1 = 0; TId id2 = 0;
201 TSize pos1 = 0; TSize pos2 = 0; TSize fragLen = 0;
202 for(; itF != itFEnd; ++itF, ++itSc) {
203 id1 = sequenceId(*itF,0);
204 id2 = sequenceId(*itF,1);
205 pos1 = fragmentBegin(*itF, id1);
206 pos2 = fragmentBegin(*itF, id2);
207 fragLen = fragmentLength(*itF, id1);
208 TStringIter itS1 = begin(seqSet[idToPosition(seqSet, id1)], Standard() );
209 itS1 += pos1;
210 TStringIter itS2 = begin(seqSet[idToPosition(seqSet, id2)], Standard() );
211 itS2 += pos2;
212 *itSc = 0;
213 for(TSize i = 0; i<fragLen; ++i, ++itS1, ++itS2)
214 *itSc += offset + score(scType, *itS1, *itS2);
215 }
216 }
217
218 //////////////////////////////////////////////////////////////////////////////
219
220 template<typename TString, typename TSpec, typename TScoreType, typename TFragment, typename TSpec2, typename TScoreString>
221 inline void
_scoreMatches(StringSet<TString,TSpec> const & seqSet,TScoreType const & scType,String<TFragment,TSpec2> const & matches,TScoreString & scores)222 _scoreMatches(StringSet<TString, TSpec> const& seqSet,
223 TScoreType const& scType,
224 String<TFragment, TSpec2> const& matches,
225 TScoreString& scores)
226 {
227 SEQAN_CHECKPOINT
228 _scoreMatches(seqSet, scType, matches, scores, (typename Value<TScoreString>::Type) 10);
229 }
230
231
232 //////////////////////////////////////////////////////////////////////////////
233
234 template<typename TFragment, typename TSpec1, typename TScoreValue, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec, typename TScore>
235 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,String<TScoreValue,TSpec2> & scores,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,TScore const & scType,ReScore)236 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
237 String<TScoreValue, TSpec2>& scores,
238 Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
239 TScore const& scType,
240 ReScore)
241 {
242 // ReScore
243 _scoreMatches(stringSet(outGraph), scType, matches, scores);
244
245 // Use fractinal score
246 buildAlignmentGraph(matches, scores, outGraph, FractionalScore() );
247 }
248
249
250 //////////////////////////////////////////////////////////////////////////////
251 // Consistency: Triplet extension
252 //////////////////////////////////////////////////////////////////////////////
253
254 //////////////////////////////////////////////////////////////////////////////
255
256 //////////////////////////////////////////////////////////////////////////////
257
258
259
260 template<typename TVertexDescriptor, typename TCargo>
261 struct MsaEdgeCargo_ {
262 public:
263 TVertexDescriptor v1;
264 TVertexDescriptor v2;
265 TCargo c;
266
MsaEdgeCargo_MsaEdgeCargo_267 MsaEdgeCargo_() {}
268
269
MsaEdgeCargo_MsaEdgeCargo_270 MsaEdgeCargo_(TVertexDescriptor vert1, TVertexDescriptor vert2, TCargo carg) :
271 v1(vert1), v2(vert2), c(carg) {}
272 };
273
274 //////////////////////////////////////////////////////////////////////////////
275
276 template<typename TVertexDescriptor, typename TCargo>
277 struct LessMsaEdgeCargo_ :
278 public ::std::binary_function<TVertexDescriptor, TCargo, bool>
279 {
280 inline bool
operatorLessMsaEdgeCargo_281 operator() (MsaEdgeCargo_<TVertexDescriptor, TCargo> const& a1,
282 MsaEdgeCargo_<TVertexDescriptor, TCargo> const& a2) const
283 {
284 return (a1.v1 == a2.v1) ? (a1.v2 < a2.v2) : (a1.v1 < a2.v1);
285 }
286 };
287
288 //////////////////////////////////////////////////////////////////////////////
289
290 /**
291 .Function.tripletLibraryExtension:
292 ..summary:Performs a full or group-based consistency extension.
293 ..cat:Graph
294 ..signature:
295 tripletLibraryExtension(graph, [,guideTree, minMembers])
296 ..param.graph:An alignment graph.
297 ...type:Spec.Alignment Graph
298 ..param.guideTree:A guide tree.
299 ..param.minMembers:Minimum number of sequences per group.
300 ...remarks:If a guide tree and a minimum number of memebers is given, the triplet extension is limited to groups of sequences.
301 ..returns:void
302 ..include:seqan/graph_msa.h
303 */
304 template<typename TStringSet, typename TCargo, typename TSpec>
305 inline void
tripletLibraryExtension(Graph<Alignment<TStringSet,TCargo,TSpec>> & g)306 tripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
307 {
308 SEQAN_CHECKPOINT
309 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
310 typedef typename Size<TGraph>::Type TSize;
311 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
312 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
313
314 // Store all edges
315 typedef std::pair<TVertexDescriptor, TVertexDescriptor> TNewEdge;
316 typedef std::map<TNewEdge, TCargo> TEdgeMap;
317 typedef typename TEdgeMap::iterator TEdgeMapIter;
318 TEdgeMap newEMap;
319 typedef MsaEdgeCargo_<TVertexDescriptor, TCargo> TEdge;
320 typedef String<TEdge> TEdgeString;
321 TEdgeString fullEdges;
322 TEdgeIterator itE(g);
323 for(;!atEnd(itE);goNext(itE)) {
324 TVertexDescriptor sV = sourceVertex(itE);
325 TVertexDescriptor tV = targetVertex(itE);
326 TCargo c = cargo(*itE);
327 appendValue(fullEdges, TEdge(sV, tV, c), Generous());
328 appendValue(fullEdges, TEdge(tV, sV, c), Generous());
329 newEMap.insert(std::make_pair(TNewEdge(sV, tV), c));
330 }
331 clearEdges(g);
332 ::std::sort(begin(fullEdges, Standard()), end(fullEdges, Standard()), LessMsaEdgeCargo_<TVertexDescriptor, TCargo>() );
333
334 // Perform triplet extension
335 typedef typename Iterator<TEdgeString, Standard>::Type TEdgeIter;
336 TEdgeIter itEdges1 = begin(fullEdges, Standard());
337 TEdgeIter itEdgesEnd = end(fullEdges, Standard());
338 for(; itEdges1 != itEdgesEnd; ++itEdges1) {
339 for(TEdgeIter itEdges2 = itEdges1; ++itEdges2 != itEdgesEnd;) {
340 if ((*itEdges1).v1 != (*itEdges2).v1) break;
341 if (sequenceId(g, (*itEdges1).v2) != sequenceId(g, (*itEdges2).v2)) {
342 TCargo weight = ((*itEdges1).c < (*itEdges2).c) ? (*itEdges1).c : (*itEdges2).c;
343 if ((*itEdges1).v2 < (*itEdges2).v2) {
344 TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges1).v2, (*itEdges2).v2));
345 if (pos != newEMap.end()) (*pos).second += weight;
346 else newEMap.insert(std::make_pair(TNewEdge((*itEdges1).v2, (*itEdges2).v2), weight));
347 } else {
348 TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges2).v2, (*itEdges1).v2));
349 if (pos != newEMap.end()) (*pos).second += weight;
350 else newEMap.insert(std::make_pair(TNewEdge((*itEdges2).v2, (*itEdges1).v2), weight));
351 }
352 }
353 }
354 }
355 clear(fullEdges);
356
357 // Insert edges
358 for(TEdgeMapIter itE = newEMap.begin(); itE != newEMap.end(); ++itE)
359 addEdge(g, (*itE).first.first, (*itE).first.second, (*itE).second);
360 }
361
362 //////////////////////////////////////////////////////////////////////////////
363
364 template<typename TGuideTree, typename TSeqGroups, typename TGroupRoot, typename TSize>
365 inline void
_subTreeSearch(TGuideTree & guideTree,TSeqGroups & seqGroups,TGroupRoot & groupRoot,TSize minMembers)366 _subTreeSearch(TGuideTree& guideTree,
367 TSeqGroups& seqGroups,
368 TGroupRoot& groupRoot,
369 TSize minMembers)
370 {
371 typedef typename Value<TSeqGroups>::Type TSeqGroup;
372 typedef typename VertexDescriptor<TGuideTree>::Type TVertexDescriptor;
373
374 // Initialization
375 TVertexDescriptor rootVertex = getRoot(guideTree);
376 TSize nVertices = numVertices(guideTree);
377 TSize nSeq = (nVertices / 2) + 1;
378
379 // Number of subsequent leaves for each node
380 typedef typename Iterator<TGuideTree, BfsIterator>::Type TBfsIterator;
381 String<TSize> numLeaves;
382 resizeVertexMap(guideTree, numLeaves);
383
384 // All vertices in reversed bfs order
385 typedef String<TVertexDescriptor> TVertexString;
386 TVertexString vertices;
387 resize(vertices, nVertices);
388
389 // Walk through the tree in bfs order
390 TBfsIterator bfsIt(guideTree, rootVertex);
391 TSize pos = length(vertices) - 1;
392 for(;!atEnd(bfsIt);goNext(bfsIt), --pos) {
393 if (isLeaf(guideTree, *bfsIt)) property(numLeaves, *bfsIt) = 1;
394 else property(numLeaves, *bfsIt) = 0;
395 value(vertices, pos) = *bfsIt;
396 }
397
398 // Count the number of leaves for each internal node
399 typedef typename Iterator<TVertexString, Standard>::Type TVertexIter;
400 TVertexIter itVert = begin(vertices, Standard());
401 TVertexIter itVertEnd = end(vertices, Standard());
402 for(;itVert != itVertEnd; ++itVert) {
403 if (!isLeaf(guideTree, *itVert)) {
404 typedef typename Iterator<TGuideTree, AdjacencyIterator>::Type TAdjacencyIterator;
405 TAdjacencyIterator adjIt(guideTree, *itVert);
406 for(;!atEnd(adjIt);goNext(adjIt)) property(numLeaves, *itVert) += property(numLeaves, *adjIt);
407 }
408 }
409
410 // Delineate the groups
411 itVert = begin(vertices, Standard());
412 for(;itVert != itVertEnd; ++itVert) {
413 if (property(numLeaves, *itVert) >= minMembers) {
414 appendValue(seqGroups, TSeqGroup(), Generous());
415 appendValue(groupRoot, *itVert, Generous());
416 TSize elem = length(seqGroups) - 1;
417 collectLeaves(guideTree, *itVert, seqGroups[elem]);
418 property(numLeaves, *itVert) = 0;
419 // Do not take any parent of the group root
420 if (*itVert != rootVertex) {
421 TVertexDescriptor pVert = parentVertex(guideTree, *itVert);
422 while(pVert != rootVertex) {
423 property(numLeaves, pVert) = 0;
424 pVert = parentVertex(guideTree, pVert);
425 }
426 property(numLeaves, pVert) = 0;
427 }
428 }
429 }
430 if (!length(seqGroups)) {
431 appendValue(seqGroups, TSeqGroup());
432 appendValue(groupRoot, rootVertex);
433 collectLeaves(guideTree, rootVertex, seqGroups[0]);
434 }
435
436 // Label all internal vertices with the closest root node
437 typedef Pair<TSize, TSize> TDistGroup; // Distance, group index
438 String<TDistGroup> closestRoot;
439 resize(closestRoot, getIdUpperBound(_getVertexIdManager(guideTree)), TDistGroup(0,0), Exact());
440 for(TSize i=0; i< (TSize) length(groupRoot); ++i) {
441 TVertexDescriptor v = groupRoot[i];
442 TSize dist = 0;
443 while(v != rootVertex) {
444 ++dist;
445 v = parentVertex(guideTree, v);
446 if ((property(closestRoot,v).i1 == 0) ||
447 (property(closestRoot,v).i1 > dist)) {
448 property(closestRoot, v) = TDistGroup(dist,i);
449 }
450 }
451 }
452
453 // Find ungrouped vertices
454 typedef typename Iterator<TSeqGroup, Standard>::Type TSeqGroupIter;
455 TSeqGroup allGroupedLeaves;
456 for(TSize i=0; i< (TSize) length(seqGroups); ++i) {
457 TSeqGroupIter itSeqGroup = begin(seqGroups[i], Standard());
458 TSeqGroupIter itSeqGroupEnd = end(seqGroups[i], Standard());
459 for(;itSeqGroup != itSeqGroupEnd; ++itSeqGroup)
460 appendValue(allGroupedLeaves, *itSeqGroup, Generous());
461 }
462 appendValue(allGroupedLeaves, nSeq);
463 ::std::sort(begin(allGroupedLeaves, Standard()), end(allGroupedLeaves, Standard()));
464 TSeqGroupIter itSeqGroup = begin(allGroupedLeaves, Standard());
465 TSeqGroupIter itSeqGroupEnd = end(allGroupedLeaves, Standard());
466 TSize leftover = 0;
467 TSeqGroup ungroupedLeaves;
468 for(;itSeqGroup != itSeqGroupEnd; ++itSeqGroup, ++leftover) {
469 while (leftover<*itSeqGroup) {
470 appendValue(ungroupedLeaves, leftover, Generous());
471 ++leftover;
472 }
473 }
474
475
476 //std::cout << guideTree << std::endl;
477 //std::cout << nSeq << std::endl;
478 //std::cout << length(groupRoot) << std::endl;
479 //std::cout << length(allGroupedLeaves) << std::endl;
480 //std::cout << length(ungroupedLeaves) << std::endl;
481
482 // Group the ungrouped vertices to the closest group
483 clear(allGroupedLeaves);
484 itSeqGroup = begin(ungroupedLeaves, Standard());
485 itSeqGroupEnd = end(ungroupedLeaves, Standard());
486 for(; itSeqGroup != itSeqGroupEnd; ++itSeqGroup) {
487 TVertexDescriptor v = *itSeqGroup;
488 while(v != rootVertex) {
489 v = parentVertex(guideTree, v);
490 if (property(closestRoot,v).i1 != 0) {
491 appendValue(seqGroups[property(closestRoot,v).i2], *itSeqGroup, Generous());
492 break;
493 }
494 }
495 }
496 }
497
498 //////////////////////////////////////////////////////////////////////////////
499
500 template<typename TStringSet, typename TCargo, typename TSpec, typename TGuideTree, typename TSize>
501 inline void
tripletLibraryExtension(Graph<Alignment<TStringSet,TCargo,TSpec>> & g,TGuideTree & guideTree,TSize minMembers)502 tripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
503 TGuideTree& guideTree,
504 TSize minMembers)
505 {
506 SEQAN_CHECKPOINT
507 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
508 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
509 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
510 typedef typename VertexDescriptor<TGuideTree>::Type TTreeVertex;
511 TStringSet& strSet = stringSet(g);
512 TSize nSeq = length(strSet);
513
514 // Identify large subtrees
515 typedef String<TSize> TSequenceGroups;
516 String<TSequenceGroups> seqGroup;
517 typedef String<TTreeVertex> TGroupRoot;
518 TGroupRoot groupRoot;
519 _subTreeSearch(guideTree, seqGroup, groupRoot, minMembers);
520
521 // Label the subtree sequences
522 String<TSize> seqLabels;
523 resize(seqLabels, nSeq);
524 typedef typename Iterator<TSequenceGroups, Standard>::Type TSeqSetIter;
525 for(TSize i=0; i< (TSize) length(seqGroup); ++i) {
526 TSeqSetIter itSeqGroup = begin(seqGroup[i], Standard());
527 TSeqSetIter itSeqGroupEnd = end(seqGroup[i], Standard());
528 for(;itSeqGroup != itSeqGroupEnd; ++itSeqGroup)
529 seqLabels[*itSeqGroup] = i;
530 }
531
532 // Store all edges
533 typedef std::pair<TVertexDescriptor, TVertexDescriptor> TNewEdge;
534 typedef std::map<TNewEdge, TCargo> TEdgeMap;
535 typedef typename TEdgeMap::iterator TEdgeMapIter;
536 TEdgeMap newEMap;
537 typedef MsaEdgeCargo_<TVertexDescriptor, TCargo> TEdge;
538 typedef String<TEdge> TEdgeString;
539 TEdgeString initialEdges;
540 TEdgeIterator itE(g);
541 for(;!atEnd(itE);goNext(itE)) {
542 TVertexDescriptor sV = sourceVertex(itE);
543 TVertexDescriptor tV = targetVertex(itE);
544 TCargo c = cargo(*itE);
545 appendValue(initialEdges, TEdge(sV, tV, c), Generous());
546 newEMap.insert(std::make_pair(TNewEdge(sV, tV), c));
547 }
548 clearEdges(g);
549
550
551 // Perform triplet extension
552 typedef typename Iterator<TEdgeString, Standard>::Type TEdgeIter;
553 TEdgeString fullEdges;
554 for(TSize i=0; i< (TSize) length(seqGroup); ++i) {
555 TEdgeIter itInitial = begin(initialEdges, Standard());
556 TEdgeIter itInitialEnd = end(initialEdges, Standard());
557 for(; itInitial != itInitialEnd; ++itInitial) {
558 TSize seq1 = idToPosition(strSet, sequenceId(g, (*itInitial).v1));
559 TSize seq2 = idToPosition(strSet, sequenceId(g, (*itInitial).v2));
560 if ((seqLabels[seq1] == i) && (seqLabels[seq2] == i)) {
561 appendValue(fullEdges, *itInitial, Generous());
562 appendValue(fullEdges, TEdge((*itInitial).v2, (*itInitial).v1, (*itInitial).c), Generous());
563 }
564 }
565 ::std::sort(begin(fullEdges, Standard()), end(fullEdges, Standard()), LessMsaEdgeCargo_<TVertexDescriptor, TCargo>() );
566 typedef typename Iterator<TEdgeString, Standard>::Type TEdgeIter;
567 TEdgeIter itEdges1 = begin(fullEdges, Standard());
568 TEdgeIter itEdgesEnd = end(fullEdges, Standard());
569 for(; itEdges1 != itEdgesEnd; ++itEdges1) {
570 for(TEdgeIter itEdges2 = itEdges1; ++itEdges2 != itEdgesEnd;) {
571 if ((*itEdges1).v1 != (*itEdges2).v1) break;
572 if (sequenceId(g, (*itEdges1).v2) != sequenceId(g, (*itEdges2).v2)) {
573 TCargo weight = ((*itEdges1).c < (*itEdges2).c) ? (*itEdges1).c : (*itEdges2).c;
574 if ((*itEdges1).v2 < (*itEdges2).v2) {
575 TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges1).v2, (*itEdges2).v2));
576 if (pos != newEMap.end()) (*pos).second += weight;
577 else newEMap.insert(std::make_pair(TNewEdge((*itEdges1).v2, (*itEdges2).v2), weight));
578 } else {
579 TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges2).v2, (*itEdges1).v2));
580 if (pos != newEMap.end()) (*pos).second += weight;
581 else newEMap.insert(std::make_pair(TNewEdge((*itEdges2).v2, (*itEdges1).v2), weight));
582 }
583 }
584 }
585 }
586 clear(fullEdges);
587 }
588
589 // Insert edges
590 for(TEdgeMapIter itE = newEMap.begin(); itE != newEMap.end(); ++itE)
591 addEdge(g, (*itE).first.first, (*itE).first.second, (*itE).second);
592 }
593
594 //////////////////////////////////////////////////////////////////////////////
595
596 template<typename TStringSet, typename TCargo, typename TSpec>
597 inline void
graphBasedTripletLibraryExtension(Graph<Alignment<TStringSet,TCargo,TSpec>> & g)598 graphBasedTripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
599 {
600 SEQAN_CHECKPOINT
601 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
602 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
603
604 // Two tasks:
605 // 1) Add edges for the case that a and c is aligned, b and c is aligned, but a and b are not, give these edges the appropriate weight
606 // 2) Augment all existing edges
607 String<TCargo> newCargoMap;
608 typedef String<TVertexDescriptor> TVertexString;
609 typedef String<TCargo> TCargoString;
610 TVertexString edges_vertices;
611 TCargoString edges_cargo;
612
613 // Triplet Extension
614 typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
615 typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
616 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
617 typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
618
619 // Remember the old cargo
620 resize(newCargoMap, getIdUpperBound(_getEdgeIdManager(g)), Exact());
621 TEdgeIterator it(g);
622 for(;!atEnd(it);++it)
623 property(newCargoMap, *it) = cargo(*it);
624
625 // Iterate over all vertices
626 for(TVertexIterator itVertex(g);!atEnd(itVertex);++itVertex) {
627 TOutEdgeIterator outIt1(g, *itVertex);
628 while (!atEnd(outIt1)) {
629 TOutEdgeIterator outIt2 = outIt1;
630 goNext(outIt2);
631 // Consider always 2 neighbors
632 while (!atEnd(outIt2)) {
633 TVertexDescriptor tV1 = targetVertex(outIt1);
634 TVertexDescriptor tV2 = targetVertex(outIt2);
635 if (sequenceId(g, tV1) != sequenceId(g,tV2)) {
636 TEdgeDescriptor e = findEdge(g, tV1, tV2);
637 if (e == 0) {
638 // New edge
639 TCargo val = (cargo(*outIt1) < cargo(*outIt2)) ? cargo(*outIt1) : cargo(*outIt2);
640
641 // Remember the edge with cargo
642 appendValue(edges_vertices, tV1, Generous());
643 appendValue(edges_vertices, tV2, Generous());
644 appendValue(edges_cargo, val, Generous());
645 } else {
646 // Increase weight of existing edge
647 if (cargo(*outIt2) > cargo(*outIt1)) property(newCargoMap, e) += cargo(*outIt1);
648 else property(newCargoMap, e) += cargo(*outIt2);
649 }
650 }
651 goNext(outIt2);
652 }
653 goNext(outIt1);
654 }
655 }
656
657
658 // Assign the new weights and clean-up the cargo map
659 TEdgeIterator itE(g);
660 for(;!atEnd(itE);++itE) cargo(*itE) = property(newCargoMap, *itE);
661 clear(newCargoMap);
662
663 // Add edges
664 typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
665 typedef typename Iterator<TVertexString, Standard>::Type TVertexStringIter;
666 typedef typename Iterator<TCargoString, Standard>::Type TCargoStringIter;
667
668 // Finally add the new edges created by the triplet approach
669 TVertexStringIter itV = begin(edges_vertices, Standard());
670 TVertexStringIter itVNext = begin(edges_vertices, Standard());
671 ++itVNext;
672 TVertexStringIter endIt = end(edges_vertices, Standard());
673 TCargoStringIter itC = begin(edges_cargo, Standard());
674 for(;itV != endIt; itV += 2, itVNext +=2, ++itC) {
675 // The same edge could have been created multiple times, so check if it exists
676 TEdgeDescriptor e = findEdge(g, *itV, *itVNext);
677 if (e == 0) addEdge(g, *itV, *itVNext, *itC);
678 else cargo(e) += *itC;
679 }
680 SEQAN_ASSERT_TRUE(itC == end(edges_cargo));
681 }
682
683 ////////////////////////////////////////////////////////////////////////////////
684 //
685 //template<typename TStringSet, typename TCargo, typename TSpec>
686 //inline void
687 //reducedTripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
688 //{
689 // SEQAN_CHECKPOINT
690 // typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
691 // typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
692 // typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
693 // typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
694 // typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
695 // typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
696 //
697 //
698 // // Just augment existing edges
699 // String<TCargo> newCargoMap;
700 // resize(newCargoMap, getIdUpperBound(_getEdgeIdManager(g)), Exact());
701 // TEdgeIterator it(g);
702 // for(;!atEnd(it);++it) assignProperty(newCargoMap, *it, cargo(*it));
703 //
704 // // Iterate over all vertices
705 // for(TVertexIterator itVertex(g);!atEnd(itVertex);++itVertex) {
706 // TOutEdgeIterator outIt1(g, *itVertex);
707 // while (!atEnd(outIt1)) {
708 // TOutEdgeIterator outIt2 = outIt1;
709 // goNext(outIt2);
710 // // Consider always 2 neighbors
711 // while (!atEnd(outIt2)) {
712 // TVertexDescriptor tV1 = targetVertex(outIt1);
713 // TVertexDescriptor tV2 = targetVertex(outIt2);
714 // if (sequenceId(g, tV1) != sequenceId(g,tV2)) {
715 // TEdgeDescriptor e = findEdge(g, tV1, tV2);
716 // if (e != 0) {
717 // // Increase weight of existing edge
718 // if (getCargo(*outIt2) > getCargo(*outIt1)) property(newCargoMap, e) += getCargo(*outIt1);
719 // else property(newCargoMap, e) += getCargo(*outIt2);
720 // }
721 // }
722 // goNext(outIt2);
723 // }
724 // goNext(outIt1);
725 // }
726 // }
727 //
728 // // Assign the new weights and clean-up the cargo map
729 // TEdgeIterator itE(g);
730 // for(;!atEnd(itE);goNext(itE)) cargo(value(itE)) = property(newCargoMap, value(itE));
731 // clear(newCargoMap);
732 //}
733
734 //////////////////////////////////////////////////////////////////////////////
735 // Sum of Pairs Scoring
736 //////////////////////////////////////////////////////////////////////////////
737
738
739 //////////////////////////////////////////////////////////////////////////////
740 // This version is sensitive to gap openings
741
742 /**
743 .Function.sumOfPairsScore:
744 ..summary:Given a multiple alignment, this function calculates the sum-of-pairs score.
745 ..cat:Graph
746 ..signature:
747 sumOfPairsScore(graph, score_type)
748 ..param.graph:An alignment graph.
749 ...type:Spec.Alignment Graph
750 ..param.score_type:A score object.
751 ...type:Class.Score
752 ..remarks:This function does NOT assume independent columns.
753 That is, gap openings are properly scored.
754 If you want the fast version assuming independ columns use sumOfPairsScoreInd.
755 ..returns:void
756 ..include:seqan/graph_msa.h
757 */
758 template<typename TStringSet, typename TCargo, typename TSpec, typename TScore>
759 inline typename Value<TScore>::Type
sumOfPairsScore(Graph<Alignment<TStringSet,TCargo,TSpec>> const & g,TScore const & score_type)760 sumOfPairsScore(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
761 TScore const& score_type)
762 {
763 SEQAN_CHECKPOINT
764 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
765 typedef typename Size<TGraph>::Type TSize;
766 typedef typename Value<TScore>::Type TScoreValue;
767 typedef typename Value<typename Value<TStringSet>::Type>::Type TAlphabet;
768
769 // Convert the graph
770 String<char> mat;
771 convertAlignment(g, mat);
772 char gapChar = gapValue<char>();
773
774 TScoreValue gap = scoreGapExtend(score_type);
775 TScoreValue gapOpen = scoreGapOpen(score_type);
776 TSize nseq = length(stringSet(g));
777 TSize len = length(mat) / nseq;
778
779 bool gapOpeni = false;
780 bool gapOpenj = false;
781 TScoreValue totalScore = 0;
782 for(TSize i = 0; i<nseq-1; ++i) {
783 for(TSize j=i+1; j<nseq; ++j) {
784 for(TSize k=0;k<len; ++k) {
785 if (value(mat, i*len+k) != gapChar) {
786 if (value(mat, j*len + k) != gapChar) {
787 gapOpeni = false;
788 gapOpenj = false;
789 totalScore += score(const_cast<TScore&>(score_type), TAlphabet(value(mat, i*len+k)), TAlphabet(value(mat, j*len + k)));
790 } else {
791 if (gapOpenj) {
792 totalScore += gap;
793 } else {
794 gapOpenj = true;
795 totalScore += gapOpen;
796 }
797 }
798 } else if (value(mat, j*len + k) != gapChar) {
799 if (gapOpeni) {
800 totalScore += gap;
801 } else {
802 gapOpeni = true;
803 totalScore += gapOpen;
804 }
805 }
806 }
807 }
808 }
809 return totalScore;
810 }
811
812 //////////////////////////////////////////////////////////////////////////////
813 // This version is insensitive to gap openings, assumes independent columns
814 template<typename TStringSet, typename TCargo, typename TSpec, typename TScore>
815 inline typename Value<TScore>::Type
sumOfPairsScoreInd(Graph<Alignment<TStringSet,TCargo,TSpec>> const & g,TScore const & score_type)816 sumOfPairsScoreInd(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
817 TScore const& score_type)
818 {
819 SEQAN_CHECKPOINT
820 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
821 typedef typename Size<TGraph>::Type TSize;
822 typedef typename Value<TScore>::Type TScoreValue;
823 typedef typename Value<typename Value<TStringSet>::Type>::Type TAlphabet;
824
825 // Convert the graph
826 String<char> mat;
827 convertAlignment(g, mat);
828 char gapChar = gapValue<char>();
829
830 TScoreValue gap = scoreGapExtend(score_type);
831 TSize nseq = length(stringSet(g));
832 TSize len = length(mat) / nseq;
833
834 TScoreValue totalScore = 0;
835 for(TSize k=0;k<len; ++k) {
836 for(TSize i = 0; i<nseq-1; ++i) {
837 for(TSize j=i+1; j<nseq; ++j) {
838 if (value(mat, i*len+k) != gapChar) {
839 if (value(mat, j*len + k) != gapChar) {
840 totalScore += score(const_cast<TScore&>(score_type), TAlphabet(value(mat, i*len+k)), TAlphabet(value(mat, j*len + k)));
841 } else totalScore += gap;
842 } else if (value(mat, j*len + k) != gapChar) {
843 totalScore += gap;
844 }
845 }
846 }
847 }
848 return totalScore;
849 }
850
851
852 //////////////////////////////////////////////////////////////////////////////
853
854 /**
855 .Function.alignmentEvaluation:
856 ..summary:Given a multiple alignment, this function calculates all kinds of alignment statistics.
857 ..cat:Graph
858 ..signature:
859 alignmentEvaluation(graph, score_type, gapExCount, gapCount, pairCount, numPairs, len)
860 ..param.graph:An alignment graph.
861 ...type:Spec.Alignment Graph
862 ..param.score_type:A score object.
863 ...type:Class.Score
864 ..param.gapExCount:Number of gap extensions.
865 ..param.gapCount:Number of gaps.
866 ..param.pairCount:Number of aligned pairs.
867 ..param.numPairs:Counter for each pair.
868 ..param.len:Alignment length.
869 ..returns:Score of the alignment.
870 ..include:seqan/graph_msa.h
871 */
872 template<typename TStringSet, typename TCargo, typename TSpec, typename TScore, typename TSize>
873 inline typename Value<TScore>::Type
alignmentEvaluation(Graph<Alignment<TStringSet,TCargo,TSpec>> const & g,TScore const & score_type,TSize & gapExCount,TSize & gapCount,TSize & pairCount,String<TSize> & numPairs,TSize & len)874 alignmentEvaluation(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
875 TScore const& score_type,
876 TSize& gapExCount,
877 TSize& gapCount,
878 TSize& pairCount,
879 String<TSize>& numPairs,
880 TSize& len)
881 {
882 SEQAN_CHECKPOINT
883 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
884 typedef typename Value<TScore>::Type TScoreValue;
885 typedef typename Value<typename Value<TStringSet>::Type>::Type TAlphabet;
886 TSize alphSize = ValueSize<TAlphabet>::VALUE;
887
888 // Initialization;
889 gapExCount = 0;
890 gapCount = 0;
891 pairCount = 0;
892 clear(numPairs);
893
894 // Convert the graph
895 String<char> mat;
896 convertAlignment(g, mat);
897 char gapChar = gapValue<char>();
898
899 TScoreValue gap = scoreGapExtend(score_type);
900 TScoreValue gapOpen = scoreGapOpen(score_type);
901 TSize nseq = length(stringSet(g));
902 len = length(mat) / nseq;
903
904 bool gapOpeni = false;
905 bool gapOpenj = false;
906 TScoreValue totalScore = 0;
907 resize(numPairs, alphSize * alphSize, 0);
908 for(TSize i = 0; i<nseq-1; ++i) {
909 for(TSize j=i+1; j<nseq; ++j) {
910 for(TSize k=0;k<len; ++k) {
911 if (value(mat, i*len+k) != gapChar) {
912 if (value(mat, j*len + k) != gapChar) {
913 gapOpeni = false;
914 gapOpenj = false;
915 ++pairCount;
916 TSize index1 = ordValue(TAlphabet(value(mat, i*len+k)));
917 TSize index2 = ordValue(TAlphabet(value(mat, j*len + k)));
918 value(numPairs, index1 * alphSize + index2) += 1;
919 totalScore += score(const_cast<TScore&>(score_type), TAlphabet(value(mat, i*len+k)), TAlphabet(value(mat, j*len + k)));
920 } else {
921 if (gapOpenj) {
922 ++gapExCount;
923 totalScore += gap;
924 } else {
925 gapOpenj = true;
926 ++gapCount;
927 totalScore += gapOpen;
928 }
929 }
930 } else if (value(mat, j*len + k) != gapChar) {
931 if (gapOpeni) {
932 ++gapExCount;
933 totalScore += gap;
934 } else {
935 ++gapCount;
936 gapOpeni = true;
937 totalScore += gapOpen;
938 }
939 }
940 }
941 }
942 }
943 return totalScore;
944 }
945
946 //////////////////////////////////////////////////////////////////////////////
947
948 template<typename TStringSet, typename TCargo, typename TSpec, typename TSource, typename TSpec2>
949 inline bool
convertAlignment(Graph<Alignment<TStringSet,TCargo,TSpec>> const & gAlign,Align<TSource,TSpec2> & align)950 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& gAlign,
951 Align<TSource, TSpec2>& align)
952 {
953 // Pipe into Align data structure
954 String<char> mat;
955 if (convertAlignment(gAlign, mat)) {
956 typedef Align<TSource, TSpec2> TAlign;
957 typedef typename Size<TAlign>::Type TSize;
958 typedef typename Row<TAlign>::Type TRow;
959 typedef typename Iterator<TRow>::Type TRowIterator;
960 TStringSet& sourceSet = stringSet(gAlign);
961 TSize nseq = length(sourceSet);
962 clearGaps(align);
963 if (empty(rows(align))) {
964 resize(rows(align), nseq);
965 for(TSize i = 0; i<nseq; ++i) assignSource(row(align, i), sourceSet[i]);
966 }
967 String<TRowIterator> rowIter;
968 resize(rowIter, nseq);
969 for(TSize i = 0; i<nseq; ++i) value(rowIter, i) = begin(row(align, i));
970 TSize lenMat = length(mat);
971 TSize colLen = lenMat / nseq;
972 TSize gapCount = 0;
973 char gapChar = gapValue<char>();
974 for(TSize alignRow = 0; alignRow < nseq; ++alignRow) {
975 for(TSize pos = alignRow * colLen; pos < (alignRow + 1) * colLen; ++pos) {
976 if (value(mat, pos) != gapChar) {
977 if (gapCount) {
978 insertGaps(value(rowIter, alignRow), gapCount);
979 goFurther(value(rowIter, alignRow), gapCount);
980 gapCount = 0;
981 }
982 goNext(value(rowIter,alignRow));
983 } else ++gapCount;
984 }
985 if (gapCount) {
986 insertGaps(value(rowIter, alignRow), gapCount);
987 goFurther(value(rowIter, alignRow), gapCount);
988 gapCount = 0;
989 }
990 }
991 } else return false;
992 return true;
993 }
994
995
996 //////////////////////////////////////////////////////////////////////////////
997
998 template<typename TSource, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec>
999 inline bool
convertAlignment(Align<TSource,TSpec2> const & align,Graph<Alignment<TStringSet,TCargo,TSpec>> & gAlign)1000 convertAlignment(Align<TSource, TSpec2> const& align,
1001 Graph<Alignment<TStringSet, TCargo, TSpec> >& gAlign)
1002 {
1003 typedef Align<TSource, TSpec2> const TAlign;
1004 typedef typename Value<TSource>::Type TAlphabet;
1005 typedef typename Size<TAlign>::Type TSize;
1006 typedef typename Row<TAlign>::Type TRow;
1007 typedef typename Iterator<TRow, Standard>::Type TRowIterator;
1008 clearVertices(gAlign);
1009 if (!length(stringSet(gAlign))) {
1010 TStringSet sourceSet = stringSet(const_cast<Align<TSource, TSpec2>&>(align));
1011 assignStringSet(gAlign, sourceSet);
1012 }
1013 TSize nseq = length(rows(align));
1014 String<TRowIterator> rowIter;
1015 String<TRowIterator> rowIterEnd;
1016 resize(rowIter, nseq);
1017 resize(rowIterEnd, nseq);
1018 for(TSize i = 0; i<nseq; ++i) {
1019 value(rowIter, i) = begin(row(align, i), Standard());
1020 value(rowIterEnd, i) = end(row(align, i), Standard());
1021 }
1022 String<Fragment<> > matches;
1023 for(TSize alignRow1 = 0; alignRow1 < nseq; ++alignRow1) {
1024 for(TSize alignRow2 = alignRow1 + 1; alignRow2 < nseq; ++alignRow2) {
1025 TRowIterator pos1 = value(rowIter,alignRow1);
1026 TRowIterator pos2 = value(rowIter,alignRow2);
1027 TSize alignPos = 0;
1028 TSize length = 0;
1029 TSize offset1 = 0;
1030 TSize offset2 = 0;
1031 for(;pos1 != value(rowIterEnd, alignRow1); ++pos1, ++pos2, ++alignPos) {
1032 if ((isGap(pos1)) || (isGap(pos2))) {
1033 if (length) {
1034 appendValue(matches, Fragment<>(alignRow1, alignPos - offset1 - length, alignRow2, alignPos - offset2 - length, length));
1035 length = 0;
1036 }
1037 if (isGap(pos1)) ++offset1;
1038 if (isGap(pos2)) ++offset2;
1039 } else ++length;
1040 }
1041 if (length) appendValue(matches, Fragment<>(alignRow1, alignPos - offset1 - length, alignRow2, alignPos - offset2 - length, length));
1042 }
1043 }
1044 matchRefinement(matches,stringSet(gAlign),gAlign);
1045 return true;
1046 }
1047
1048 }// namespace SEQAN_NAMESPACE_MAIN
1049
1050 #endif //#ifndef SEQAN_HEADER_...
1051