1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_GRAPH_ALIGN_TCOFFEE_BASE_H
34 #define SEQAN_HEADER_GRAPH_ALIGN_TCOFFEE_BASE_H
35 
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38 
39 //////////////////////////////////////////////////////////////////////////////
40 // Distance unity
41 //////////////////////////////////////////////////////////////////////////////
42 
43 //static const int SEQAN_DISTANCE_UNITY = 1 << 20;
44 static const int SEQAN_DISTANCE_UNITY = 1;
45 
46 
47 //////////////////////////////////////////////////////////////////////////////
48 // Tags
49 //////////////////////////////////////////////////////////////////////////////
50 
51 //////////////////////////////////////////////////////////////////////////////
52 
53 /**
54 .Tag.Alignment Graph Combination:
55 ..summary:A tag to specify how to combine alignment graphs.
56 ..include:seqan/graph_msa.h
57 */
58 
59 /**
60 .Tag.Alignment Graph Combination.value.FractionalScore:
61 	Rescores matches with the appropriate fractional score.
62 ..include:seqan/graph_msa.h
63 */
64 struct FractionalScore_;
65 typedef Tag<FractionalScore_> const FractionalScore;
66 
67 
68 /**
69 .Tag.Alignment Graph Combination.value.FrequencyCount:
70 	Rescores matches with the frequency count for this edge.
71 ..include:seqan/graph_msa.h
72 */
73 struct FrequencyCounting_;
74 typedef Tag<FrequencyCounting_> const FrequencyCounting;
75 
76 /**
77 .Tag.Alignment Graph Combination.value.ReScore:
78 	Rescores the matches after segment-match refinement.
79 ..include:seqan/graph_msa.h
80 */
81 struct ReScore_;
82 typedef Tag<ReScore_> const ReScore;
83 
84 
85 //////////////////////////////////////////////////////////////////////////////
86 // Generating an alignment graph from segment matches
87 //////////////////////////////////////////////////////////////////////////////
88 
89 
90 //////////////////////////////////////////////////////////////////////////////
91 
92 template<typename TFragment, typename TSpec1, typename TScoreValue, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec>
93 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,String<TScoreValue,TSpec2> & scores,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,FractionalScore)94 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
95 					String<TScoreValue, TSpec2>& scores,
96 					Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
97 					FractionalScore)
98 {
99 	SEQAN_CHECKPOINT
100 	typedef String<TFragment, TSpec1> TFragmentString;
101 	typedef typename Iterator<TFragmentString, Standard>::Type TFragmentStringIter;
102 	typedef String<TScoreValue, TSpec2> TScoreValues;
103 	typedef typename Iterator<TScoreValues, Standard>::Type TScoreValuesIter;
104 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TOutGraph;
105 	typedef typename Size<TFragmentString>::Type TSize;
106 	typedef typename Id<TOutGraph>::Type TId;
107 	typedef typename EdgeDescriptor<TOutGraph>::Type TEdgeDescriptor;
108 	typedef typename VertexDescriptor<TOutGraph>::Type TVertexDescriptor;
109 
110 	// Initialization
111 	clearVertices(outGraph);
112 	TStringSet& strSet = stringSet(outGraph);
113 
114 	// Segment-match refinement
115 	matchRefinement(matches,strSet,outGraph);
116 
117 	// Clear edge-weights
118 	typedef typename Iterator<TOutGraph, EdgeIterator>::Type TEdgeIterator;
119 	TEdgeIterator itE(outGraph);
120 	for(;!atEnd(itE);goNext(itE)) cargo(value(itE)) = 1;
121 
122 	// Adapt the scores
123 	TFragmentStringIter it = begin(matches, Standard() );
124 	TFragmentStringIter endIt = end(matches, Standard() );
125 	TScoreValuesIter scoreIt = begin(scores, Standard() );
126 	for(; it != endIt; ++it, ++scoreIt) {
127 		TId id1 = sequenceId(*it,0);
128 		TId id2 = sequenceId(*it,1);
129 		TSize pos1 = fragmentBegin(*it, id1);
130 		TSize pos2 = fragmentBegin(*it, id2);
131 		TSize fragLen = fragmentLength(*it, id1);
132 		TSize end1 = pos1 + fragLen;
133 		while(pos1 < end1) {
134 			TVertexDescriptor p1 = findVertex(outGraph, id1, pos1);
135 			TVertexDescriptor p2 = findVertex(outGraph, id2, pos2);
136 			TSize vertexLen = fragmentLength(outGraph, p1);
137 			cargo(findEdge(outGraph, p1, p2)) += (TCargo) ((vertexLen * (*scoreIt)) / fragLen);
138 			pos1 += vertexLen;
139 			pos2 += vertexLen;
140 		}
141 	}
142 }
143 
144 
145 //////////////////////////////////////////////////////////////////////////////
146 
147 template<typename TFragment, typename TSpec1, typename TStringSet, typename TCargo, typename TSpec>
148 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,FrequencyCounting)149 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
150 					Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
151 					FrequencyCounting)
152 {
153 	SEQAN_CHECKPOINT
154 	typedef String<TFragment, TSpec1> TFragmentString;
155 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TOutGraph;
156 	typedef typename Size<TFragmentString>::Type TSize;
157 
158 	// Initialization
159 	clearVertices(outGraph);
160 	TStringSet& strSet = stringSet(outGraph);
161 
162 	// Segment-match refinement
163 	matchRefinement(matches,strSet,outGraph);
164 }
165 
166 //////////////////////////////////////////////////////////////////////////////
167 
168 template<typename TFragment, typename TSpec1, typename TScoreValue, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec>
169 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,String<TScoreValue,TSpec2> &,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,FrequencyCounting)170 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
171 					String<TScoreValue, TSpec2>&,
172 					Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
173 					FrequencyCounting)
174 {
175 	buildAlignmentGraph(matches, outGraph, FrequencyCounting() );
176 }
177 
178 //////////////////////////////////////////////////////////////////////////////
179 
180 template<typename TString, typename TSpec, typename TScoreType, typename TSize, typename TSpec2, typename TScoreString, typename TScoreValue>
181 inline void
_scoreMatches(StringSet<TString,TSpec> const & seqSet,TScoreType const & scType,String<Fragment<TSize,ExactFragment<>>,TSpec2> const & matches,TScoreString & scores,TScoreValue offset)182 _scoreMatches(StringSet<TString, TSpec> const& seqSet,
183 			  TScoreType const& scType,
184 			  String<Fragment<TSize, ExactFragment<> >, TSpec2> const& matches,
185 			  TScoreString& scores,
186 			  TScoreValue offset)
187 {
188 	SEQAN_CHECKPOINT
189 	typedef String<Fragment<TSize, ExactFragment<> >, TSpec2> TFragmentString;
190 	typedef typename Id<typename Value<TFragmentString>::Type>::Type TId;
191 	typedef typename Iterator<TFragmentString, Standard>::Type TFragmentStringIter;
192 	typedef typename Iterator<TString, Standard>::Type TStringIter;
193 	typedef typename Iterator<TScoreString, Standard>::Type TScoreStringIter;
194 	resize(scores, length(matches));
195 
196 	// Get the scores
197 	TFragmentStringIter itF = begin(matches, Standard() );
198 	TFragmentStringIter itFEnd = end(matches, Standard() );
199 	TScoreStringIter itSc = begin(scores, Standard() );
200 	TId id1 = 0; TId id2 = 0;
201 	TSize pos1 = 0; TSize pos2 = 0;	TSize fragLen = 0;
202 	for(; itF != itFEnd; ++itF, ++itSc) {
203 		id1 = sequenceId(*itF,0);
204 		id2 = sequenceId(*itF,1);
205 		pos1 = fragmentBegin(*itF, id1);
206 		pos2 = fragmentBegin(*itF, id2);
207 		fragLen = fragmentLength(*itF, id1);
208 		TStringIter itS1 = begin(seqSet[idToPosition(seqSet, id1)], Standard() );
209 		itS1 += pos1;
210 		TStringIter itS2 = begin(seqSet[idToPosition(seqSet, id2)], Standard() );
211 		itS2 += pos2;
212 		*itSc = 0;
213 		for(TSize i = 0; i<fragLen; ++i, ++itS1, ++itS2)
214 			*itSc += offset + score(scType, *itS1, *itS2);
215 	}
216 }
217 
218 //////////////////////////////////////////////////////////////////////////////
219 
220 template<typename TString, typename TSpec, typename TScoreType, typename TFragment, typename TSpec2, typename TScoreString>
221 inline void
_scoreMatches(StringSet<TString,TSpec> const & seqSet,TScoreType const & scType,String<TFragment,TSpec2> const & matches,TScoreString & scores)222 _scoreMatches(StringSet<TString, TSpec> const& seqSet,
223 			  TScoreType const& scType,
224 			  String<TFragment, TSpec2> const& matches,
225 			  TScoreString& scores)
226 {
227 	SEQAN_CHECKPOINT
228 	_scoreMatches(seqSet, scType, matches, scores, (typename Value<TScoreString>::Type) 10);
229 }
230 
231 
232 //////////////////////////////////////////////////////////////////////////////
233 
234 template<typename TFragment, typename TSpec1, typename TScoreValue, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec, typename TScore>
235 inline void
buildAlignmentGraph(String<TFragment,TSpec1> & matches,String<TScoreValue,TSpec2> & scores,Graph<Alignment<TStringSet,TCargo,TSpec>> & outGraph,TScore const & scType,ReScore)236 buildAlignmentGraph(String<TFragment, TSpec1>& matches,
237 					String<TScoreValue, TSpec2>& scores,
238 					Graph<Alignment<TStringSet, TCargo, TSpec> >& outGraph,
239 					TScore const& scType,
240 					ReScore)
241 {
242 	// ReScore
243 	_scoreMatches(stringSet(outGraph), scType, matches, scores);
244 
245 	// Use fractinal score
246 	buildAlignmentGraph(matches, scores, outGraph, FractionalScore() );
247 }
248 
249 
250 //////////////////////////////////////////////////////////////////////////////
251 // Consistency: Triplet extension
252 //////////////////////////////////////////////////////////////////////////////
253 
254 //////////////////////////////////////////////////////////////////////////////
255 
256 //////////////////////////////////////////////////////////////////////////////
257 
258 
259 
260 template<typename TVertexDescriptor, typename TCargo>
261 struct MsaEdgeCargo_ {
262  public:
263 	 TVertexDescriptor v1;
264 	 TVertexDescriptor v2;
265 	 TCargo c;
266 
MsaEdgeCargo_MsaEdgeCargo_267 	 MsaEdgeCargo_() {}
268 
269 
MsaEdgeCargo_MsaEdgeCargo_270 	 MsaEdgeCargo_(TVertexDescriptor vert1, TVertexDescriptor vert2, TCargo carg) :
271 	 v1(vert1), v2(vert2), c(carg) {}
272 };
273 
274 //////////////////////////////////////////////////////////////////////////////
275 
276 template<typename TVertexDescriptor, typename TCargo>
277 struct LessMsaEdgeCargo_ :
278 	public ::std::binary_function<TVertexDescriptor, TCargo, bool>
279 {
280 	inline bool
operatorLessMsaEdgeCargo_281 	operator() (MsaEdgeCargo_<TVertexDescriptor, TCargo> const& a1,
282 				MsaEdgeCargo_<TVertexDescriptor, TCargo> const& a2) const
283 	{
284 		return (a1.v1 == a2.v1) ? (a1.v2 < a2.v2) : (a1.v1 < a2.v1);
285 	}
286 };
287 
288 //////////////////////////////////////////////////////////////////////////////
289 
290 /**
291 .Function.tripletLibraryExtension:
292 ..summary:Performs a full or group-based consistency extension.
293 ..cat:Graph
294 ..signature:
295 tripletLibraryExtension(graph, [,guideTree, minMembers])
296 ..param.graph:An alignment graph.
297 ...type:Spec.Alignment Graph
298 ..param.guideTree:A guide tree.
299 ..param.minMembers:Minimum number of sequences per group.
300 ...remarks:If a guide tree and a minimum number of memebers is given, the triplet extension is limited to groups of sequences.
301 ..returns:void
302 ..include:seqan/graph_msa.h
303 */
304 template<typename TStringSet, typename TCargo, typename TSpec>
305 inline void
tripletLibraryExtension(Graph<Alignment<TStringSet,TCargo,TSpec>> & g)306 tripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
307 {
308 	SEQAN_CHECKPOINT
309 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
310 	typedef typename Size<TGraph>::Type TSize;
311 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
312 	typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
313 
314 	// Store all edges
315 	typedef std::pair<TVertexDescriptor, TVertexDescriptor> TNewEdge;
316 	typedef std::map<TNewEdge, TCargo> TEdgeMap;
317 	typedef typename TEdgeMap::iterator TEdgeMapIter;
318 	TEdgeMap newEMap;
319 	typedef MsaEdgeCargo_<TVertexDescriptor, TCargo> TEdge;
320 	typedef String<TEdge> TEdgeString;
321 	TEdgeString fullEdges;
322 	TEdgeIterator itE(g);
323 	for(;!atEnd(itE);goNext(itE)) {
324 		TVertexDescriptor sV = sourceVertex(itE);
325 		TVertexDescriptor tV = targetVertex(itE);
326 		TCargo c = cargo(*itE);
327 		appendValue(fullEdges, TEdge(sV, tV, c), Generous());
328 		appendValue(fullEdges, TEdge(tV, sV, c), Generous());
329 		newEMap.insert(std::make_pair(TNewEdge(sV, tV), c));
330 	}
331 	clearEdges(g);
332 	::std::sort(begin(fullEdges, Standard()), end(fullEdges, Standard()), LessMsaEdgeCargo_<TVertexDescriptor, TCargo>() );
333 
334 	// Perform triplet extension
335 	typedef typename Iterator<TEdgeString, Standard>::Type TEdgeIter;
336 	TEdgeIter itEdges1 = begin(fullEdges, Standard());
337 	TEdgeIter itEdgesEnd = end(fullEdges, Standard());
338 	for(; itEdges1 != itEdgesEnd; ++itEdges1) {
339 		for(TEdgeIter itEdges2 = itEdges1; ++itEdges2 != itEdgesEnd;) {
340 			if ((*itEdges1).v1 != (*itEdges2).v1) break;
341 			if (sequenceId(g, (*itEdges1).v2) != sequenceId(g, (*itEdges2).v2)) {
342 				TCargo weight = ((*itEdges1).c < (*itEdges2).c) ? (*itEdges1).c : (*itEdges2).c;
343 				if ((*itEdges1).v2 < (*itEdges2).v2) {
344 					TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges1).v2, (*itEdges2).v2));
345 					if (pos != newEMap.end()) (*pos).second += weight;
346 					else newEMap.insert(std::make_pair(TNewEdge((*itEdges1).v2, (*itEdges2).v2), weight));
347 				} else {
348 					TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges2).v2, (*itEdges1).v2));
349 					if (pos != newEMap.end()) (*pos).second += weight;
350 					else newEMap.insert(std::make_pair(TNewEdge((*itEdges2).v2, (*itEdges1).v2), weight));
351 				}
352 			}
353 		}
354 	}
355 	clear(fullEdges);
356 
357 	// Insert edges
358 	for(TEdgeMapIter itE = newEMap.begin(); itE != newEMap.end(); ++itE)
359 		addEdge(g, (*itE).first.first, (*itE).first.second, (*itE).second);
360 }
361 
362 //////////////////////////////////////////////////////////////////////////////
363 
364 template<typename TGuideTree, typename TSeqGroups, typename TGroupRoot, typename TSize>
365 inline void
_subTreeSearch(TGuideTree & guideTree,TSeqGroups & seqGroups,TGroupRoot & groupRoot,TSize minMembers)366 _subTreeSearch(TGuideTree& guideTree,
367 			   TSeqGroups& seqGroups,
368 			   TGroupRoot& groupRoot,
369 			   TSize minMembers)
370 {
371 	typedef typename Value<TSeqGroups>::Type TSeqGroup;
372 	typedef typename VertexDescriptor<TGuideTree>::Type TVertexDescriptor;
373 
374 	// Initialization
375 	TVertexDescriptor rootVertex = getRoot(guideTree);
376 	TSize nVertices = numVertices(guideTree);
377 	TSize nSeq = (nVertices / 2) + 1;
378 
379 	// Number of subsequent leaves for each node
380 	typedef typename Iterator<TGuideTree, BfsIterator>::Type TBfsIterator;
381 	String<TSize> numLeaves;
382 	resizeVertexMap(guideTree, numLeaves);
383 
384 	// All vertices in reversed bfs order
385 	typedef String<TVertexDescriptor> TVertexString;
386 	TVertexString vertices;
387 	resize(vertices, nVertices);
388 
389 	// Walk through the tree in bfs order
390 	TBfsIterator bfsIt(guideTree, rootVertex);
391 	TSize pos = length(vertices) - 1;
392 	for(;!atEnd(bfsIt);goNext(bfsIt), --pos) {
393 		if (isLeaf(guideTree, *bfsIt)) property(numLeaves, *bfsIt) = 1;
394 		else property(numLeaves, *bfsIt) = 0;
395 		value(vertices, pos) = *bfsIt;
396 	}
397 
398 	// Count the number of leaves for each internal node
399 	typedef typename Iterator<TVertexString, Standard>::Type TVertexIter;
400 	TVertexIter itVert = begin(vertices, Standard());
401 	TVertexIter itVertEnd = end(vertices, Standard());
402 	for(;itVert != itVertEnd; ++itVert) {
403 		if (!isLeaf(guideTree, *itVert)) {
404 			typedef typename Iterator<TGuideTree, AdjacencyIterator>::Type TAdjacencyIterator;
405 			TAdjacencyIterator adjIt(guideTree, *itVert);
406 			for(;!atEnd(adjIt);goNext(adjIt)) property(numLeaves, *itVert) += property(numLeaves, *adjIt);
407 		}
408 	}
409 
410 	// Delineate the groups
411 	itVert = begin(vertices, Standard());
412 	for(;itVert != itVertEnd; ++itVert) {
413 		if (property(numLeaves, *itVert) >= minMembers) {
414 			appendValue(seqGroups, TSeqGroup(), Generous());
415 			appendValue(groupRoot, *itVert, Generous());
416 			TSize elem = length(seqGroups) - 1;
417 			collectLeaves(guideTree, *itVert, seqGroups[elem]);
418 			property(numLeaves, *itVert) = 0;
419 			// Do not take any parent of the group root
420 			if (*itVert != rootVertex) {
421 				TVertexDescriptor pVert = parentVertex(guideTree, *itVert);
422 				while(pVert != rootVertex) {
423 					property(numLeaves, pVert) = 0;
424 					pVert = parentVertex(guideTree, pVert);
425 				}
426 				property(numLeaves, pVert) = 0;
427 			}
428 		}
429 	}
430 	if (!length(seqGroups)) {
431 		appendValue(seqGroups, TSeqGroup());
432 		appendValue(groupRoot, rootVertex);
433 		collectLeaves(guideTree, rootVertex, seqGroups[0]);
434 	}
435 
436 	// Label all internal vertices with the closest root node
437 	typedef Pair<TSize, TSize> TDistGroup; // Distance, group index
438 	String<TDistGroup> closestRoot;
439 	resize(closestRoot, getIdUpperBound(_getVertexIdManager(guideTree)), TDistGroup(0,0), Exact());
440 	for(TSize i=0; i< (TSize) length(groupRoot); ++i) {
441 		TVertexDescriptor v = groupRoot[i];
442 		TSize dist = 0;
443 		while(v != rootVertex) {
444 			++dist;
445 			v = parentVertex(guideTree, v);
446 			if ((property(closestRoot,v).i1 == 0) ||
447 				(property(closestRoot,v).i1 > dist)) {
448 				property(closestRoot, v) = TDistGroup(dist,i);
449 			}
450 		}
451 	}
452 
453 	// Find ungrouped vertices
454 	typedef typename Iterator<TSeqGroup, Standard>::Type TSeqGroupIter;
455 	TSeqGroup allGroupedLeaves;
456 	for(TSize i=0; i< (TSize) length(seqGroups); ++i) {
457 		TSeqGroupIter itSeqGroup = begin(seqGroups[i], Standard());
458 		TSeqGroupIter itSeqGroupEnd = end(seqGroups[i], Standard());
459 		for(;itSeqGroup != itSeqGroupEnd; ++itSeqGroup)
460 			appendValue(allGroupedLeaves, *itSeqGroup, Generous());
461 	}
462 	appendValue(allGroupedLeaves, nSeq);
463 	::std::sort(begin(allGroupedLeaves, Standard()), end(allGroupedLeaves, Standard()));
464 	TSeqGroupIter itSeqGroup = begin(allGroupedLeaves, Standard());
465 	TSeqGroupIter itSeqGroupEnd = end(allGroupedLeaves, Standard());
466 	TSize leftover = 0;
467 	TSeqGroup ungroupedLeaves;
468 	for(;itSeqGroup != itSeqGroupEnd; ++itSeqGroup, ++leftover) {
469 		while (leftover<*itSeqGroup) {
470 			appendValue(ungroupedLeaves, leftover, Generous());
471 			++leftover;
472 		}
473 	}
474 
475 
476 	//std::cout << guideTree << std::endl;
477 	//std::cout << nSeq << std::endl;
478 	//std::cout << length(groupRoot) << std::endl;
479 	//std::cout << length(allGroupedLeaves) << std::endl;
480 	//std::cout << length(ungroupedLeaves) << std::endl;
481 
482 	// Group the ungrouped vertices to the closest group
483 	clear(allGroupedLeaves);
484 	itSeqGroup = begin(ungroupedLeaves, Standard());
485 	itSeqGroupEnd = end(ungroupedLeaves, Standard());
486 	for(; itSeqGroup != itSeqGroupEnd; ++itSeqGroup) {
487 		TVertexDescriptor v = *itSeqGroup;
488 		while(v != rootVertex) {
489 			v = parentVertex(guideTree, v);
490 			if (property(closestRoot,v).i1 != 0) {
491 				appendValue(seqGroups[property(closestRoot,v).i2], *itSeqGroup, Generous());
492 				break;
493 			}
494 		}
495 	}
496 }
497 
498 //////////////////////////////////////////////////////////////////////////////
499 
500 template<typename TStringSet, typename TCargo, typename TSpec, typename TGuideTree, typename TSize>
501 inline void
tripletLibraryExtension(Graph<Alignment<TStringSet,TCargo,TSpec>> & g,TGuideTree & guideTree,TSize minMembers)502 tripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
503 						TGuideTree& guideTree,
504 						TSize minMembers)
505 {
506 	SEQAN_CHECKPOINT
507 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
508 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
509 	typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
510 	typedef typename VertexDescriptor<TGuideTree>::Type TTreeVertex;
511 	TStringSet& strSet = stringSet(g);
512 	TSize nSeq = length(strSet);
513 
514 	// Identify large subtrees
515 	typedef String<TSize> TSequenceGroups;
516 	String<TSequenceGroups> seqGroup;
517 	typedef String<TTreeVertex> TGroupRoot;
518 	TGroupRoot groupRoot;
519 	_subTreeSearch(guideTree, seqGroup, groupRoot, minMembers);
520 
521 	// Label the subtree sequences
522 	String<TSize> seqLabels;
523 	resize(seqLabels, nSeq);
524 	typedef typename Iterator<TSequenceGroups, Standard>::Type TSeqSetIter;
525 	for(TSize i=0; i< (TSize) length(seqGroup); ++i) {
526 		TSeqSetIter itSeqGroup = begin(seqGroup[i], Standard());
527 		TSeqSetIter itSeqGroupEnd = end(seqGroup[i], Standard());
528 		for(;itSeqGroup != itSeqGroupEnd; ++itSeqGroup)
529 			seqLabels[*itSeqGroup] = i;
530 	}
531 
532 	// Store all edges
533 	typedef std::pair<TVertexDescriptor, TVertexDescriptor> TNewEdge;
534 	typedef std::map<TNewEdge, TCargo> TEdgeMap;
535 	typedef typename TEdgeMap::iterator TEdgeMapIter;
536 	TEdgeMap newEMap;
537 	typedef MsaEdgeCargo_<TVertexDescriptor, TCargo> TEdge;
538 	typedef String<TEdge> TEdgeString;
539 	TEdgeString initialEdges;
540 	TEdgeIterator itE(g);
541 	for(;!atEnd(itE);goNext(itE)) {
542 		TVertexDescriptor sV = sourceVertex(itE);
543 		TVertexDescriptor tV = targetVertex(itE);
544 		TCargo c = cargo(*itE);
545 		appendValue(initialEdges, TEdge(sV, tV, c), Generous());
546 		newEMap.insert(std::make_pair(TNewEdge(sV, tV), c));
547 	}
548 	clearEdges(g);
549 
550 
551 	// Perform triplet extension
552 	typedef typename Iterator<TEdgeString, Standard>::Type TEdgeIter;
553 	TEdgeString fullEdges;
554 	for(TSize i=0; i< (TSize) length(seqGroup); ++i) {
555 		TEdgeIter itInitial = begin(initialEdges, Standard());
556 		TEdgeIter itInitialEnd = end(initialEdges, Standard());
557 		for(; itInitial != itInitialEnd; ++itInitial) {
558 			TSize seq1 = idToPosition(strSet, sequenceId(g, (*itInitial).v1));
559 			TSize seq2 = idToPosition(strSet, sequenceId(g, (*itInitial).v2));
560 			if ((seqLabels[seq1] == i) && (seqLabels[seq2] == i)) {
561 				appendValue(fullEdges, *itInitial, Generous());
562 				appendValue(fullEdges, TEdge((*itInitial).v2, (*itInitial).v1, (*itInitial).c), Generous());
563 			}
564 		}
565 		::std::sort(begin(fullEdges, Standard()), end(fullEdges, Standard()), LessMsaEdgeCargo_<TVertexDescriptor, TCargo>() );
566 		typedef typename Iterator<TEdgeString, Standard>::Type TEdgeIter;
567 		TEdgeIter itEdges1 = begin(fullEdges, Standard());
568 		TEdgeIter itEdgesEnd = end(fullEdges, Standard());
569 		for(; itEdges1 != itEdgesEnd; ++itEdges1) {
570 			for(TEdgeIter itEdges2 = itEdges1; ++itEdges2 != itEdgesEnd;) {
571 				if ((*itEdges1).v1 != (*itEdges2).v1) break;
572 				if (sequenceId(g, (*itEdges1).v2) != sequenceId(g, (*itEdges2).v2)) {
573 					TCargo weight = ((*itEdges1).c < (*itEdges2).c) ? (*itEdges1).c : (*itEdges2).c;
574 					if ((*itEdges1).v2 < (*itEdges2).v2) {
575 						TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges1).v2, (*itEdges2).v2));
576 						if (pos != newEMap.end()) (*pos).second += weight;
577 						else newEMap.insert(std::make_pair(TNewEdge((*itEdges1).v2, (*itEdges2).v2), weight));
578 					} else {
579 						TEdgeMapIter pos = newEMap.find(TNewEdge((*itEdges2).v2, (*itEdges1).v2));
580 						if (pos != newEMap.end()) (*pos).second += weight;
581 						else newEMap.insert(std::make_pair(TNewEdge((*itEdges2).v2, (*itEdges1).v2), weight));
582 					}
583 				}
584 			}
585 		}
586 		clear(fullEdges);
587 	}
588 
589 	// Insert edges
590 	for(TEdgeMapIter itE = newEMap.begin(); itE != newEMap.end(); ++itE)
591 		addEdge(g, (*itE).first.first, (*itE).first.second, (*itE).second);
592 }
593 
594 //////////////////////////////////////////////////////////////////////////////
595 
596 template<typename TStringSet, typename TCargo, typename TSpec>
597 inline void
graphBasedTripletLibraryExtension(Graph<Alignment<TStringSet,TCargo,TSpec>> & g)598 graphBasedTripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
599 {
600 	SEQAN_CHECKPOINT
601 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
602 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
603 
604 	// Two tasks:
605 	// 1) Add edges for the case that a and c is aligned, b and c is aligned, but a and b are not, give these edges the appropriate weight
606 	// 2) Augment all existing edges
607 	String<TCargo> newCargoMap;
608 	typedef String<TVertexDescriptor> TVertexString;
609 	typedef String<TCargo> TCargoString;
610 	TVertexString edges_vertices;
611 	TCargoString edges_cargo;
612 
613 	// Triplet Extension
614 	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
615 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
616 	typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
617 	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
618 
619 	// Remember the old cargo
620 	resize(newCargoMap, getIdUpperBound(_getEdgeIdManager(g)), Exact());
621 	TEdgeIterator it(g);
622 	for(;!atEnd(it);++it)
623 		property(newCargoMap, *it) = cargo(*it);
624 
625 	// Iterate over all vertices
626 	for(TVertexIterator itVertex(g);!atEnd(itVertex);++itVertex) {
627 		TOutEdgeIterator outIt1(g, *itVertex);
628 		while (!atEnd(outIt1)) {
629 			TOutEdgeIterator outIt2 = outIt1;
630 			goNext(outIt2);
631 			// Consider always 2 neighbors
632 			while (!atEnd(outIt2)) {
633 				TVertexDescriptor tV1 = targetVertex(outIt1);
634 				TVertexDescriptor tV2 = targetVertex(outIt2);
635 				if (sequenceId(g, tV1) != sequenceId(g,tV2)) {
636 					TEdgeDescriptor e = findEdge(g, tV1, tV2);
637 					if (e == 0) {
638 						// New edge
639 						TCargo val = (cargo(*outIt1) < cargo(*outIt2)) ? cargo(*outIt1) : cargo(*outIt2);
640 
641 						// Remember the edge with cargo
642 						appendValue(edges_vertices, tV1, Generous());
643 						appendValue(edges_vertices, tV2, Generous());
644 						appendValue(edges_cargo, val, Generous());
645 					} else {
646 						// Increase weight of existing edge
647 						if (cargo(*outIt2) > cargo(*outIt1)) property(newCargoMap, e) += cargo(*outIt1);
648 						else property(newCargoMap, e) += cargo(*outIt2);
649 					}
650 				}
651 				goNext(outIt2);
652 			}
653 			goNext(outIt1);
654 		}
655 	}
656 
657 
658 	// Assign the new weights and clean-up the cargo map
659 	TEdgeIterator itE(g);
660 	for(;!atEnd(itE);++itE) cargo(*itE) = property(newCargoMap, *itE);
661 	clear(newCargoMap);
662 
663 	// Add edges
664 	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
665 	typedef typename Iterator<TVertexString, Standard>::Type TVertexStringIter;
666 	typedef typename Iterator<TCargoString, Standard>::Type TCargoStringIter;
667 
668 	// Finally add the new edges created by the triplet approach
669 	TVertexStringIter itV = begin(edges_vertices, Standard());
670 	TVertexStringIter itVNext = begin(edges_vertices, Standard());
671 	++itVNext;
672 	TVertexStringIter endIt = end(edges_vertices, Standard());
673 	TCargoStringIter itC = begin(edges_cargo, Standard());
674 	for(;itV != endIt; itV += 2, itVNext +=2, ++itC) {
675 		// The same edge could have been created multiple times, so check if it exists
676 		TEdgeDescriptor e = findEdge(g, *itV, *itVNext);
677 		if (e == 0) addEdge(g, *itV, *itVNext, *itC);
678 		else cargo(e) += *itC;
679 	}
680 	SEQAN_ASSERT_TRUE(itC == end(edges_cargo));
681 }
682 
683 ////////////////////////////////////////////////////////////////////////////////
684 //
685 //template<typename TStringSet, typename TCargo, typename TSpec>
686 //inline void
687 //reducedTripletLibraryExtension(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
688 //{
689 //	SEQAN_CHECKPOINT
690 //	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
691 //	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
692 //	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
693 //	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
694 //	typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
695 //	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
696 //
697 //
698 //	// Just augment existing edges
699 //	String<TCargo> newCargoMap;
700 //	resize(newCargoMap, getIdUpperBound(_getEdgeIdManager(g)), Exact());
701 //	TEdgeIterator it(g);
702 //	for(;!atEnd(it);++it) assignProperty(newCargoMap, *it, cargo(*it));
703 //
704 //	// Iterate over all vertices
705 //	for(TVertexIterator itVertex(g);!atEnd(itVertex);++itVertex) {
706 //		TOutEdgeIterator outIt1(g, *itVertex);
707 //		while (!atEnd(outIt1)) {
708 //			TOutEdgeIterator outIt2 = outIt1;
709 //			goNext(outIt2);
710 //			// Consider always 2 neighbors
711 //			while (!atEnd(outIt2)) {
712 //				TVertexDescriptor tV1 = targetVertex(outIt1);
713 //				TVertexDescriptor tV2 = targetVertex(outIt2);
714 //				if (sequenceId(g, tV1) != sequenceId(g,tV2)) {
715 //					TEdgeDescriptor e = findEdge(g, tV1, tV2);
716 //					if (e != 0) {
717 //						// Increase weight of existing edge
718 //						if (getCargo(*outIt2) > getCargo(*outIt1)) property(newCargoMap, e) += getCargo(*outIt1);
719 //						else property(newCargoMap, e) += getCargo(*outIt2);
720 //					}
721 //				}
722 //				goNext(outIt2);
723 //			}
724 //			goNext(outIt1);
725 //		}
726 //	}
727 //
728 //	// Assign the new weights and clean-up the cargo map
729 //	TEdgeIterator itE(g);
730 //	for(;!atEnd(itE);goNext(itE)) cargo(value(itE)) = property(newCargoMap, value(itE));
731 //	clear(newCargoMap);
732 //}
733 
734 //////////////////////////////////////////////////////////////////////////////
735 // Sum of Pairs Scoring
736 //////////////////////////////////////////////////////////////////////////////
737 
738 
739 //////////////////////////////////////////////////////////////////////////////
740 // This version is sensitive to gap openings
741 
742 /**
743 .Function.sumOfPairsScore:
744 ..summary:Given a multiple alignment, this function calculates the sum-of-pairs score.
745 ..cat:Graph
746 ..signature:
747 sumOfPairsScore(graph, score_type)
748 ..param.graph:An alignment graph.
749 ...type:Spec.Alignment Graph
750 ..param.score_type:A score object.
751 ...type:Class.Score
752 ..remarks:This function does NOT assume independent columns.
753 That is, gap openings are properly scored.
754 If you want the fast version assuming independ columns use sumOfPairsScoreInd.
755 ..returns:void
756 ..include:seqan/graph_msa.h
757 */
758 template<typename TStringSet, typename TCargo, typename TSpec, typename TScore>
759 inline typename Value<TScore>::Type
sumOfPairsScore(Graph<Alignment<TStringSet,TCargo,TSpec>> const & g,TScore const & score_type)760 sumOfPairsScore(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
761 				TScore const& score_type)
762 {
763 	SEQAN_CHECKPOINT
764 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
765 	typedef typename Size<TGraph>::Type TSize;
766 	typedef typename Value<TScore>::Type TScoreValue;
767 	typedef typename Value<typename Value<TStringSet>::Type>::Type TAlphabet;
768 
769 	// Convert the graph
770 	String<char> mat;
771 	convertAlignment(g, mat);
772 	char gapChar = gapValue<char>();
773 
774 	TScoreValue gap = scoreGapExtend(score_type);
775 	TScoreValue gapOpen = scoreGapOpen(score_type);
776 	TSize nseq = length(stringSet(g));
777 	TSize len = length(mat) / nseq;
778 
779 	bool gapOpeni = false;
780 	bool gapOpenj = false;
781 	TScoreValue totalScore = 0;
782 	for(TSize i = 0; i<nseq-1; ++i) {
783 		for(TSize j=i+1; j<nseq; ++j) {
784 			for(TSize k=0;k<len; ++k) {
785 				if (value(mat, i*len+k) != gapChar) {
786 					if (value(mat, j*len + k) != gapChar) {
787 						gapOpeni = false;
788 						gapOpenj = false;
789 						totalScore += score(const_cast<TScore&>(score_type), TAlphabet(value(mat, i*len+k)), TAlphabet(value(mat, j*len + k)));
790 					} else {
791 						if (gapOpenj) {
792 							totalScore += gap;
793 						} else {
794 							gapOpenj = true;
795 							totalScore += gapOpen;
796 						}
797 					}
798 				} else if (value(mat, j*len + k) != gapChar) {
799 						if (gapOpeni) {
800 							totalScore += gap;
801 						} else {
802 							gapOpeni = true;
803 							totalScore += gapOpen;
804 						}
805 				}
806 			}
807 		}
808 	}
809 	return totalScore;
810 }
811 
812 //////////////////////////////////////////////////////////////////////////////
813 // This version is insensitive to gap openings, assumes independent columns
814 template<typename TStringSet, typename TCargo, typename TSpec, typename TScore>
815 inline typename Value<TScore>::Type
sumOfPairsScoreInd(Graph<Alignment<TStringSet,TCargo,TSpec>> const & g,TScore const & score_type)816 sumOfPairsScoreInd(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
817 				   TScore const& score_type)
818 {
819 	SEQAN_CHECKPOINT
820 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
821 	typedef typename Size<TGraph>::Type TSize;
822 	typedef typename Value<TScore>::Type TScoreValue;
823 	typedef typename Value<typename Value<TStringSet>::Type>::Type TAlphabet;
824 
825 	// Convert the graph
826 	String<char> mat;
827 	convertAlignment(g, mat);
828 	char gapChar = gapValue<char>();
829 
830 	TScoreValue gap = scoreGapExtend(score_type);
831 	TSize nseq = length(stringSet(g));
832 	TSize len = length(mat) / nseq;
833 
834 	TScoreValue totalScore = 0;
835 	for(TSize k=0;k<len; ++k) {
836 		for(TSize i = 0; i<nseq-1; ++i) {
837 			for(TSize j=i+1; j<nseq; ++j) {
838 				if (value(mat, i*len+k) != gapChar) {
839 					if (value(mat, j*len + k) != gapChar) {
840 						totalScore += score(const_cast<TScore&>(score_type), TAlphabet(value(mat, i*len+k)), TAlphabet(value(mat, j*len + k)));
841 					} else totalScore += gap;
842 				} else if (value(mat, j*len + k) != gapChar) {
843 						totalScore += gap;
844 				}
845 			}
846 		}
847 	}
848 	return totalScore;
849 }
850 
851 
852 //////////////////////////////////////////////////////////////////////////////
853 
854 /**
855 .Function.alignmentEvaluation:
856 ..summary:Given a multiple alignment, this function calculates all kinds of alignment statistics.
857 ..cat:Graph
858 ..signature:
859 alignmentEvaluation(graph, score_type, gapExCount, gapCount, pairCount, numPairs, len)
860 ..param.graph:An alignment graph.
861 ...type:Spec.Alignment Graph
862 ..param.score_type:A score object.
863 ...type:Class.Score
864 ..param.gapExCount:Number of gap extensions.
865 ..param.gapCount:Number of gaps.
866 ..param.pairCount:Number of aligned pairs.
867 ..param.numPairs:Counter for each pair.
868 ..param.len:Alignment length.
869 ..returns:Score of the alignment.
870 ..include:seqan/graph_msa.h
871 */
872 template<typename TStringSet, typename TCargo, typename TSpec, typename TScore, typename TSize>
873 inline typename Value<TScore>::Type
alignmentEvaluation(Graph<Alignment<TStringSet,TCargo,TSpec>> const & g,TScore const & score_type,TSize & gapExCount,TSize & gapCount,TSize & pairCount,String<TSize> & numPairs,TSize & len)874 alignmentEvaluation(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
875 					TScore const& score_type,
876 					TSize& gapExCount,
877 					TSize& gapCount,
878 					TSize& pairCount,
879 					String<TSize>& numPairs,
880 					TSize& len)
881 {
882 	SEQAN_CHECKPOINT
883 	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
884 	typedef typename Value<TScore>::Type TScoreValue;
885 	typedef typename Value<typename Value<TStringSet>::Type>::Type TAlphabet;
886 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
887 
888 	// Initialization;
889 	gapExCount = 0;
890 	gapCount = 0;
891 	pairCount = 0;
892 	clear(numPairs);
893 
894 	// Convert the graph
895 	String<char> mat;
896 	convertAlignment(g, mat);
897 	char gapChar = gapValue<char>();
898 
899 	TScoreValue gap = scoreGapExtend(score_type);
900 	TScoreValue gapOpen = scoreGapOpen(score_type);
901 	TSize nseq = length(stringSet(g));
902 	len = length(mat) / nseq;
903 
904 	bool gapOpeni = false;
905 	bool gapOpenj = false;
906 	TScoreValue totalScore = 0;
907 	resize(numPairs, alphSize * alphSize, 0);
908 	for(TSize i = 0; i<nseq-1; ++i) {
909 		for(TSize j=i+1; j<nseq; ++j) {
910 			for(TSize k=0;k<len; ++k) {
911 				if (value(mat, i*len+k) != gapChar) {
912 					if (value(mat, j*len + k) != gapChar) {
913 						gapOpeni = false;
914 						gapOpenj = false;
915 						++pairCount;
916 						TSize index1 = ordValue(TAlphabet(value(mat, i*len+k)));
917 						TSize index2 = ordValue(TAlphabet(value(mat, j*len + k)));
918 						value(numPairs, index1 * alphSize + index2) += 1;
919 						totalScore += score(const_cast<TScore&>(score_type), TAlphabet(value(mat, i*len+k)), TAlphabet(value(mat, j*len + k)));
920 					} else {
921 						if (gapOpenj) {
922 							++gapExCount;
923 							totalScore += gap;
924 						} else {
925 							gapOpenj = true;
926 							++gapCount;
927 							totalScore += gapOpen;
928 						}
929 					}
930 				} else if (value(mat, j*len + k) != gapChar) {
931 						if (gapOpeni) {
932 							++gapExCount;
933 							totalScore += gap;
934 						} else {
935 							++gapCount;
936 							gapOpeni = true;
937 							totalScore += gapOpen;
938 						}
939 				}
940 			}
941 		}
942 	}
943 	return totalScore;
944 }
945 
946 //////////////////////////////////////////////////////////////////////////////
947 
948 template<typename TStringSet, typename TCargo, typename TSpec, typename TSource, typename TSpec2>
949 inline bool
convertAlignment(Graph<Alignment<TStringSet,TCargo,TSpec>> const & gAlign,Align<TSource,TSpec2> & align)950 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& gAlign,
951 				 Align<TSource, TSpec2>& align)
952 {
953 	// Pipe into Align data structure
954 	String<char> mat;
955 	if (convertAlignment(gAlign, mat)) {
956 		typedef Align<TSource, TSpec2> TAlign;
957 		typedef typename Size<TAlign>::Type TSize;
958 		typedef typename Row<TAlign>::Type TRow;
959 		typedef typename Iterator<TRow>::Type TRowIterator;
960 		TStringSet& sourceSet = stringSet(gAlign);
961 		TSize nseq = length(sourceSet);
962 		clearGaps(align);
963 		if (empty(rows(align))) {
964 			resize(rows(align), nseq);
965 			for(TSize i = 0; i<nseq; ++i) assignSource(row(align, i), sourceSet[i]);
966 		}
967 		String<TRowIterator> rowIter;
968 		resize(rowIter, nseq);
969 		for(TSize i = 0; i<nseq; ++i) value(rowIter, i) = begin(row(align, i));
970 		TSize lenMat = length(mat);
971 		TSize colLen = lenMat / nseq;
972 		TSize gapCount = 0;
973 		char gapChar = gapValue<char>();
974 		for(TSize alignRow = 0; alignRow < nseq; ++alignRow) {
975 			for(TSize pos = alignRow * colLen; pos < (alignRow + 1) * colLen; ++pos) {
976 				if (value(mat, pos) != gapChar) {
977 					if (gapCount) {
978 						insertGaps(value(rowIter, alignRow), gapCount);
979 						goFurther(value(rowIter, alignRow), gapCount);
980 						gapCount = 0;
981 					}
982 					goNext(value(rowIter,alignRow));
983 				} else ++gapCount;
984 			}
985 			if (gapCount) {
986 				insertGaps(value(rowIter, alignRow), gapCount);
987 				goFurther(value(rowIter, alignRow), gapCount);
988 				gapCount = 0;
989 			}
990 		}
991 	} else return false;
992 	return true;
993 }
994 
995 
996 //////////////////////////////////////////////////////////////////////////////
997 
998 template<typename TSource, typename TSpec2, typename TStringSet, typename TCargo, typename TSpec>
999 inline bool
convertAlignment(Align<TSource,TSpec2> const & align,Graph<Alignment<TStringSet,TCargo,TSpec>> & gAlign)1000 convertAlignment(Align<TSource, TSpec2> const& align,
1001 				 Graph<Alignment<TStringSet, TCargo, TSpec> >& gAlign)
1002 {
1003 	typedef Align<TSource, TSpec2> const TAlign;
1004 	typedef typename Value<TSource>::Type TAlphabet;
1005 	typedef typename Size<TAlign>::Type TSize;
1006 	typedef typename Row<TAlign>::Type TRow;
1007 	typedef typename Iterator<TRow, Standard>::Type TRowIterator;
1008 	clearVertices(gAlign);
1009 	if (!length(stringSet(gAlign))) {
1010 		TStringSet sourceSet = stringSet(const_cast<Align<TSource, TSpec2>&>(align));
1011 		assignStringSet(gAlign, sourceSet);
1012 	}
1013 	TSize nseq = length(rows(align));
1014 	String<TRowIterator> rowIter;
1015 	String<TRowIterator> rowIterEnd;
1016 	resize(rowIter, nseq);
1017 	resize(rowIterEnd, nseq);
1018 	for(TSize i = 0; i<nseq; ++i) {
1019 		value(rowIter, i) = begin(row(align, i), Standard());
1020 		value(rowIterEnd, i) = end(row(align, i), Standard());
1021 	}
1022 	String<Fragment<> > matches;
1023 	for(TSize alignRow1 = 0; alignRow1 < nseq; ++alignRow1) {
1024 		for(TSize alignRow2 = alignRow1 + 1; alignRow2 < nseq; ++alignRow2) {
1025 			TRowIterator pos1 = value(rowIter,alignRow1);
1026 			TRowIterator pos2 = value(rowIter,alignRow2);
1027 			TSize alignPos = 0;
1028 			TSize length = 0;
1029 			TSize offset1 = 0;
1030 			TSize offset2 = 0;
1031 			for(;pos1 != value(rowIterEnd, alignRow1); ++pos1, ++pos2, ++alignPos) {
1032 				if ((isGap(pos1)) || (isGap(pos2))) {
1033 					if (length) {
1034 						appendValue(matches, Fragment<>(alignRow1, alignPos - offset1 - length, alignRow2, alignPos - offset2 - length, length));
1035 						length = 0;
1036 					}
1037 					if (isGap(pos1)) ++offset1;
1038 					if (isGap(pos2)) ++offset2;
1039 				} else ++length;
1040 			}
1041 			if (length) appendValue(matches, Fragment<>(alignRow1, alignPos - offset1 - length, alignRow2, alignPos - offset2 - length, length));
1042 		}
1043 	}
1044 	matchRefinement(matches,stringSet(gAlign),gAlign);
1045 	return true;
1046 }
1047 
1048 }// namespace SEQAN_NAMESPACE_MAIN
1049 
1050 #endif //#ifndef SEQAN_HEADER_...
1051