1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Tobias Rausch <rausch@embl.de>
33 // Author: Anne-Katrin Emde <anne-katrin.emde@fu-berlin.de>
34 // ==========================================================================
35 
36 #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_
37 #define SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_
38 
39 namespace seqan {
40 
41 //////////////////////////////////////////////////////////////////////////////
42 // Alignment Graph
43 //////////////////////////////////////////////////////////////////////////////
44 
45 //////////////////////////////////////////////////////////////////////////////
46 
47 
48 
49 
50 
51 
52 //////////////////////////////////////////////////////////////////////////////
53 // Alignment Graph Output Tags
54 //////////////////////////////////////////////////////////////////////////////
55 
56 /*!
57  * @defgroup AlignmentGraphFormatTags Alignment Graph Formats
58  * @brief File formats to write an @link AlignmentGraph @endlink.
59  *
60  * @tag AlignmentGraphFormatTags#MsfFormat
61  * @headerfile <seqan/graph_align.h>
62  * @brief MSF Format to write an @link AlignmentGraph @endlink.
63  *
64  * @signature typedef Tag<MsfFormat_> const MsfFormat;
65  *
66  * @tag AlignmentGraphFormatTags#FastaFormat
67  * @headerfile <seqan/graph_align.h>
68  * @brief Fasta Format to write an @link AlignmentGraph @endlink.
69  *
70  * @signature typedef Tag<FastaFormat_> const FastaFormat;
71  *
72  * @tag AlignmentGraphFormatTags#CgVizFormat
73  * @headerfile <seqan/graph_align.h>
74  * @brief CgViz Format to write an @link AlignmentGraph @endlink.
75  *
76  * @signature typedef Tag<CgVizFormat_> const CgVizFormat;
77  */
78 
79 //////////////////////////////////////////////////////////////////////////////
80 
81 //////////////////////////////////////////////////////////////////////////////
82 
83 struct MsfFormat_;
84 typedef Tag<MsfFormat_> const MsfFormat;
85 
86 //////////////////////////////////////////////////////////////////////////////
87 
88 struct FastaFormat_;
89 typedef Tag<FastaFormat_> const FastaFormat;
90 
91 //////////////////////////////////////////////////////////////////////////////
92 
93 struct CgVizFormat_;
94 typedef Tag<CgVizFormat_> const CgVizFormat;
95 
96 
97 
98 //////////////////////////////////////////////////////////////////////////////
99 // Default Alignment Graph
100 //////////////////////////////////////////////////////////////////////////////
101 
102 template<typename TStringSet, typename TCargo = unsigned int, typename TSpec = Default>
103 struct Alignment;
104 
105 
106 
107 
108 
109 //////////////////////////////////////////////////////////////////////////////
110 // Metafunctions
111 //////////////////////////////////////////////////////////////////////////////
112 
113 //////////////////////////////////////////////////////////////////////////////
114 
115 template<typename TStringSet, typename TCargo, typename TSpec>
116 struct EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > const> {
117     typedef typename EdgeType<Graph<Undirected<TCargo, TSpec> > const>::Type Type;
118 };
119 
120 //////////////////////////////////////////////////////////////////////////////
121 
122 template<typename TStringSet, typename TCargo, typename TSpec>
123 struct EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > > {
124     typedef typename EdgeType<Graph<Undirected<TCargo, TSpec> > >::Type Type;
125 };
126 
127 
128 //////////////////////////////////////////////////////////////////////////////
129 
130 template<typename TStringSet, typename TCargo, typename TSpec>
131 struct Host<Graph<Alignment<TStringSet, TCargo, TSpec> > > {
132     typedef TStringSet Type;
133 };
134 
135 //////////////////////////////////////////////////////////////////////////////
136 
137 template<typename TStringSet, typename TCargo, typename TSpec>
138 struct Host<Graph<Alignment<TStringSet, TCargo, TSpec> > const> {
139     typedef TStringSet const Type;
140 };
141 
142 
143 
144 
145 
146 
147 //////////////////////////////////////////////////////////////////////////////
148 // Actual Alignment Graph Class
149 //////////////////////////////////////////////////////////////////////////////
150 
151 
152 //////////////////////////////////////////////////////////////////////////////
153 
154 template<typename TId = unsigned int, typename TSize = unsigned int>
155 class FragmentInfo {
156 public:
157     TId data_seq_id;
158     TSize data_begin;
159     TSize data_length;
160 
161     FragmentInfo() :
162         data_seq_id(0),
163         data_begin(0),
164         data_length(0)
165     {
166     }
167 
168     FragmentInfo(TId id, TSize beg, TSize len) :
169         data_seq_id(id),
170         data_begin(beg),
171         data_length(len)
172     {
173     }
174 
175 };
176 
177 //////////////////////////////////////////////////////////////////////////////
178 
179 /*!
180  * @class AlignmentGraph
181  * @extends Graph
182  * @headerfile <seqan/graph_align.h>
183  * @brief Alignment graph type.
184  *
185  * <img src="alignmentGraph.png" tite="An alignment graph with 3 sequences." />
186  *
187  * @signature template <typename TStringSet[, typename TCargo[, typename TSpec]]>
188  *            class Graph<Alignment<TStringSet, TCargo, TSpec> >;
189  *
190  * @tparam TStringSet The type of the @link StringSet @endlink containing the sequence information.  <b>Must be a
191  *                    @link DependentStringSet @endlink.</b>
192  * @tparam TCargo     The cargo type that can be attached to edges.  Default: <tt>unsigned</tt>.
193  * @tparam TSpec      The specializing type.  Default: <tt>Default</tt>.  Use <tt>WithoutEdgeId</tt> here to omit
194  *                    edge ids.  NB: if edges do not store ids then external property maps do not work.
195  *
196  * @section Example
197  *
198  * @include demos/graph_align/graph_align.cpp
199  *
200  * @code{.console}
201  * Score = -8
202  * Alignment matrix:
203  *       0     .    :
204  *         ATCGAATGCGGA
205  *         |     |||  |
206  *         ACTCGTTGC--A
207  * @endcode
208  */
209 
210 template<typename TString, typename TSpecial, typename TCargo, typename TSpec>
211 class Graph<Alignment<StringSet<TString, Dependent<TSpecial> >, TCargo, TSpec> >
212 {
213     public:
214         typedef typename Id<Graph>::Type TIdType_;
215         typedef typename VertexDescriptor<Graph>::Type TVertexDescriptor_;
216         typedef typename Size<Graph>::Type TSize_;
217         typedef std::pair<TIdType_, TSize_> TKey_;
218         typedef std::map<TKey_, TVertexDescriptor_> TPosToVertexMap_;
219         typedef FragmentInfo<TIdType_, TSize_> TFragmentInfo_;
220 
221         // Alignment graph
222         Graph<Undirected<TCargo, TSpec> > data_align;
223 
224         // Sequences
225         Holder<StringSet<TString, Dependent<TSpecial> > > data_sequence;
226 
227         // Alignment specific members
228         String<TFragmentInfo_> data_fragment;
229 
230         // STL Map to retrieve a vertex given SeqId, Position
231         TPosToVertexMap_ data_pvMap;
232 
233 
234         Graph() {
235         }
236 
237 
238         template <typename TDefault>
239         Graph(StringSet<TString, Dependent<TDefault> > const& sSet) {
240             SEQAN_CHECKPOINT
241             data_sequence = sSet;
242 
243             // Cover all sequences with nil vertices
244             TVertexDescriptor_ nilVertex = getNil<TVertexDescriptor_>();
245             TSize_ lenSet = length(sSet);
246             for(TSize_ k=0; k<lenSet;++k)
247                 data_pvMap.insert(std::make_pair(TKey_(positionToId(sSet,k), length(sSet[k])), nilVertex));
248         }
249 
250         template <typename TDefault>
251         Graph(StringSet<TString, Owner<TDefault> > const& sSet) {
252             SEQAN_CHECKPOINT
253             StringSet<TString, Dependent<> > depStr(sSet);
254             data_sequence = depStr;
255 
256             // Cover all sequences with nil vertices
257             TVertexDescriptor_ nilVertex = getNil<TVertexDescriptor_>();
258             TSize_ lenSet = length(sSet);
259             for(TSize_ k=0; k<lenSet;++k)
260                 data_pvMap.insert(std::make_pair(TKey_(positionToId(const_cast<StringSet<TString, Owner<TDefault> >&>(sSet),k), length(sSet[k])), nilVertex));
261         }
262 
263 
264         ~Graph() {
265             SEQAN_CHECKPOINT
266             clear(*this);
267         }
268 
269         Graph(Graph const & _other)
270         {
271             SEQAN_CHECKPOINT
272             _copyGraph(_other, *this);
273         }
274 
275         Graph const& operator = (Graph const & _other) {
276             SEQAN_CHECKPOINT
277             if (this == &_other) return *this;
278             clear(*this);
279             _copyGraph(_other, *this);
280             return *this;
281         }
282 };
283 
284 //////////////////////////////////////////////////////////////////////////////
285 // INTERNAL FUNCTIONS
286 //////////////////////////////////////////////////////////////////////////////
287 
288 /////////////////////////////////////////////////////////////////////////////
289 
290 template<typename TStringSet, typename TCargo, typename TSpec>
291 inline String<typename EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type*>&
292 _getVertexString(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) {
293     SEQAN_CHECKPOINT
294     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
295     typedef typename EdgeType<TGraph>::Type TEdgeStump;
296     return const_cast<String<TEdgeStump*>&>(g.data_align.data_vertex);
297 }
298 
299 /////////////////////////////////////////////////////////////////////////////
300 
301 template<typename TStringSet, typename TCargo, typename TSpec>
302 inline typename VertexIdHandler<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&
303 _getVertexIdManager(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) {
304     SEQAN_CHECKPOINT
305     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
306     typedef typename VertexIdHandler<TGraph>::Type TVertexIdManager;
307     return const_cast<TVertexIdManager&>(g.data_align.data_id_managerV);
308 }
309 
310 //////////////////////////////////////////////////////////////////////////////
311 
312 template<typename TStringSet, typename TCargo, typename TSpec>
313 inline typename EdgeIdHandler<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&
314 _getEdgeIdManager(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) {
315     SEQAN_CHECKPOINT
316     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
317     typedef typename EdgeIdHandler<TGraph>::Type TEdgeIdManager;
318     return const_cast<TEdgeIdManager&>(g.data_align.data_id_managerE);
319 }
320 
321 //////////////////////////////////////////////////////////////////////////////
322 
323 template<typename TStringSet, typename TCargo, typename TSpec>
324 inline void
325 _copyGraph(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source,
326            Graph<Alignment<TStringSet, TCargo, TSpec> >& dest,
327            bool)
328 {
329     SEQAN_CHECKPOINT
330     clear(dest);
331     dest.data_align = source.data_align;
332     dest.data_sequence = source.data_sequence;
333     dest.data_fragment = source.data_fragment;
334     dest.data_pvMap = source.data_pvMap;
335 }
336 
337 //////////////////////////////////////////////////////////////////////////////
338 
339 template<typename TStringSet, typename TCargo, typename TSpec>
340 inline void
341 _copyGraph(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source,
342            Graph<Alignment<TStringSet, TCargo, TSpec> >& dest)
343 {
344     _copyGraph(source, dest, false); // Never transpose, underlying graph is undirected
345 }
346 
347 
348 //////////////////////////////////////////////////////////////////////////////
349 // FUNCTIONS
350 //////////////////////////////////////////////////////////////////////////////
351 
352 
353 //////////////////////////////////////////////////////////////////////////////
354 
355 template<typename TStringSet, typename TCargo, typename TSpec>
356 inline void
357 transpose(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source,
358           Graph<Alignment<TStringSet, TCargo, TSpec> >& dest)
359 {
360     SEQAN_CHECKPOINT
361     // Alignment graph, no transpose just copy
362     _copyGraph(source, dest, false);
363 }
364 
365 //////////////////////////////////////////////////////////////////////////////
366 
367 template<typename TStringSet, typename TCargo, typename TSpec>
368 inline void
369 transpose(Graph<Alignment<TStringSet, TCargo, TSpec> >&)
370 {
371     SEQAN_CHECKPOINT
372     // Nothing to do in an alignment graph
373 }
374 
375 //////////////////////////////////////////////////////////////////////////////
376 
377 template<typename TStringSet, typename TCargo, typename TSpec>
378 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
379 numEdges(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g)
380 {
381     SEQAN_CHECKPOINT
382     return numEdges(g.data_align);
383 }
384 
385 //////////////////////////////////////////////////////////////////////////////
386 
387 template<typename TStringSet, typename TCargo, typename TSpec>
388 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
389 numVertices(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g)
390 {
391     SEQAN_CHECKPOINT
392     return numVertices(g.data_align);
393 }
394 
395 //////////////////////////////////////////////////////////////////////////////
396 
397 template<typename TStringSet, typename TCargo, typename TSpec>
398 inline bool
399 empty(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g)
400 {
401     SEQAN_CHECKPOINT
402     return empty(g.data_align);
403 }
404 
405 //////////////////////////////////////////////////////////////////////////////
406 
407 template<typename TStringSet, typename TCargo, typename TSpec>
408 inline void
409 clearEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
410 {
411     SEQAN_CHECKPOINT
412     clearEdges(g.data_align);
413 }
414 
415 //////////////////////////////////////////////////////////////////////////////
416 
417 template<typename TStringSet, typename TCargo, typename TSpec>
418 inline void
419 clearVertices(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
420 {
421     SEQAN_CHECKPOINT
422     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
423     typedef typename Size<TStringSet>::Type TSize;
424     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
425     typedef typename TGraph::TKey_ TKey;
426 
427     clear(g.data_fragment);
428     g.data_pvMap.clear();
429     clearVertices(g.data_align);
430 
431 
432     // Don't forget to cover the sequences with nil vertices again
433     TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
434     if(!empty(value(g.data_sequence))) {
435         TSize lenSet = length(stringSet(g));
436         for(TSize k=0; k<lenSet;++k)
437             g.data_pvMap.insert(std::make_pair(TKey(positionToId(stringSet(g),k), length(stringSet(g)[k])), nilVertex));
438     }
439 }
440 
441 //////////////////////////////////////////////////////////////////////////////
442 
443 template<typename TStringSet, typename TCargo, typename TSpec>
444 inline void
445 clear(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
446 {
447     SEQAN_CHECKPOINT
448     // Only clear also removes the sequences
449     clear(value(g.data_sequence));
450     clearVertices(g);
451 }
452 
453 //////////////////////////////////////////////////////////////////////////////
454 
455 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
456 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
457 outDegree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
458           TVertexDescriptor const vertex)
459 {
460     SEQAN_CHECKPOINT
461     return outDegree(g.data_align, vertex);
462 }
463 
464 //////////////////////////////////////////////////////////////////////////////
465 
466 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
467 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
468 inDegree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
469          TVertexDescriptor const vertex)
470 {
471     SEQAN_CHECKPOINT
472     return inDegree(g.data_align, vertex);
473 }
474 
475 //////////////////////////////////////////////////////////////////////////////
476 
477 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
478 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
479 degree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
480        TVertexDescriptor const vertex)
481 {
482     SEQAN_CHECKPOINT
483     return degree(g.data_align, vertex);
484 }
485 
486 //////////////////////////////////////////////////////////////////////////////
487 
488 template<typename TStringSet, typename TCargo, typename TSpec, typename TId, typename TPos, typename TLength>
489 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
490 addVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
491           TId id,
492           TPos begin,
493           TLength len)
494 {
495     SEQAN_CHECKPOINT
496     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
497     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
498     typedef typename Id<TGraph>::Type TIdType;
499     typedef typename Size<TGraph>::Type TSize;
500     typedef typename TGraph::TFragmentInfo_ TFragmentInfo;
501     typedef typename TGraph::TKey_ TKey;
502     typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap;
503     //typedef typename TPosToVertexMap::key_type TKey;
504 
505     //for(TPosToVertexMap::const_iterator p = g.data_pvMap.begin(); p != g.data_pvMap.end(); ++p) {
506     //    std::cout << p->first.first << ',' << p->first.second << ':' << p->second << std::endl;
507     //}
508 
509     TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
510 
511     // Store the new fragment
512     typename TPosToVertexMap::iterator interval = g.data_pvMap.lower_bound(TKey((TIdType)id, (TSize)begin + (TSize)len));
513     // Segment does not belong to Sequence anymore
514     SEQAN_ASSERT(interval != g.data_pvMap.end());
515     // Segment end must be assigned to nil so far
516     SEQAN_ASSERT(interval->second == nilVertex);
517     // Segment must belong to the whole old interval
518     SEQAN_ASSERT(*interval == *g.data_pvMap.upper_bound(TKey((TIdType)id, (TSize)begin)));
519 
520     // Insert new vertex
521     TVertexDescriptor vd = addVertex(g.data_align);
522     if (length(g.data_fragment) <= vd) resize(g.data_fragment, vd + 1, Generous());
523     assignProperty(g.data_fragment, vd, TFragmentInfo(id, begin, len));
524 
525     // Update position to vertex map
526     // Does the end of the new fragment coincides with the end of the interval?
527     if ( (TSize) begin + len == (TSize) interval->first.second) {
528         // Does the beginning of the new fragment coincides with the beginning of the interval?
529         if ((begin == 0) ||
530             (g.data_pvMap.find(TKey((TIdType)id, (TSize)begin)) != g.data_pvMap.end())) {
531             // Replace interval
532             interval->second = vd;
533         } else {
534             // Split interval once
535             g.data_pvMap.insert(std::make_pair(TKey(interval->first.first,(TSize)begin), interval->second));
536             g.data_pvMap.erase(interval);
537             g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd));
538         }
539     } else {
540         // Does the beginning of the new fragment coincides with the beginning of the interval?
541         if ((begin == 0) ||
542             (g.data_pvMap.find(TKey((TIdType)id, (TSize)begin)) != g.data_pvMap.end())) {
543             // Split interval once
544             // Just insert here because we store interval ends
545             g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd));
546         } else {
547             // Split interval twice
548             TSize tmp = interval->first.second;
549             g.data_pvMap.insert(std::make_pair(TKey(interval->first.first,begin), interval->second));
550             g.data_pvMap.erase(interval);
551             g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd));
552             g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,tmp), nilVertex));
553         }
554     }
555     return vd;
556 }
557 
558 //////////////////////////////////////////////////////////////////////////////
559 
560 template<typename TStringSet, typename TCargo, typename TSpec, typename TVD>
561 inline void
562 removeVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
563              TVD const v)
564 {
565     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
566     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
567     typedef typename TGraph::TKey_ TKey;
568     typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap;
569     typename TPosToVertexMap::iterator interval = (g.data_pvMap.lower_bound(TKey(sequenceId(g,v), fragmentBegin(g,v) + fragmentLength(g,v))));
570 
571     // Clear the interval
572     interval->second = getNil<TVertexDescriptor>();
573     typename TPosToVertexMap::iterator interval_iter = interval;
574     if (interval_iter != g.data_pvMap.begin()) {
575         --interval_iter;
576         if ((interval_iter->second == getNil<TVertexDescriptor>()) &&
577             (interval_iter->first.first == interval->first.first)) {
578             g.data_pvMap.erase(interval_iter);
579         }
580     }
581     interval_iter = interval;
582     ++interval_iter;
583     if (interval_iter != g.data_pvMap.end()) {
584         if ((interval_iter->second == getNil<TVertexDescriptor>()) &&
585             (interval_iter->first.first == interval->first.first)) {
586             g.data_pvMap.erase(interval);
587         }
588     }
589 
590     // Remove the vertex
591     removeVertex(g.data_align,v);
592 }
593 
594 //////////////////////////////////////////////////////////////////////////////
595 
596 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
597 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
598 addEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
599         TVertexDescriptor const source,
600         TVertexDescriptor const target)
601 {
602     SEQAN_CHECKPOINT
603     return addEdge(g.data_align, source, target);
604 }
605 
606 //////////////////////////////////////////////////////////////////////////////
607 
608 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor, typename TCargo2>
609 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
610 addEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
611         TVertexDescriptor const source,
612         TVertexDescriptor const target,
613         TCargo2 const cargo)
614 {
615     SEQAN_CHECKPOINT
616     return addEdge(g.data_align, source, target, cargo);
617 }
618 
619 //////////////////////////////////////////////////////////////////////////////
620 
621 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
622 inline void
623 removeEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
624            TVertexDescriptor const source,
625            TVertexDescriptor const target)
626 {
627     SEQAN_CHECKPOINT
628     removeEdge(g.data_align, source, target);
629 }
630 
631 //////////////////////////////////////////////////////////////////////////////
632 
633 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor>
634 inline void
635 removeEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
636            TEdgeDescriptor const edge)
637 {
638     SEQAN_CHECKPOINT
639     removeEdge(g.data_align, sourceVertex(g.data_align,edge), targetVertex(g.data_align,edge));
640 }
641 
642 //////////////////////////////////////////////////////////////////////////////
643 
644 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
645 inline void
646 removeOutEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
647                TVertexDescriptor const v)
648 {
649     SEQAN_CHECKPOINT
650     removeOutEdges(g.data_align, v);
651 }
652 
653 //////////////////////////////////////////////////////////////////////////////
654 
655 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
656 inline void
657 removeInEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
658               TVertexDescriptor const v)
659 {
660     SEQAN_CHECKPOINT
661     removeInEdges(g.data_align,v);
662 }
663 
664 //////////////////////////////////////////////////////////////////////////////
665 
666 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor>
667 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
668 targetVertex(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
669              TEdgeDescriptor const edge)
670 {
671     SEQAN_CHECKPOINT
672     return targetVertex(g.data_align, edge);
673 }
674 
675 //////////////////////////////////////////////////////////////////////////////
676 
677 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor>
678 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
679 sourceVertex(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
680              TEdgeDescriptor const edge)
681 {
682     SEQAN_CHECKPOINT
683     return sourceVertex(g.data_align, edge);
684 }
685 
686 //////////////////////////////////////////////////////////////////////////////
687 
688 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix>
689 inline void
690 getAdjacencyMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
691                    TMatrix& mat)
692 {
693     SEQAN_CHECKPOINT
694     getAdjacencyMatrix(g.data_align, mat);
695 }
696 
697 //////////////////////////////////////////////////////////////////////////////
698 
699 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
700 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
701 findEdge(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
702          TVertexDescriptor const v,
703          TVertexDescriptor const w)
704 {
705     SEQAN_CHECKPOINT
706     return findEdge(g.data_align, v, w);
707 }
708 
709 //////////////////////////////////////////////////////////////////////////////
710 
711 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec>
712 inline void
713 write(TFile & target,
714       Graph<Alignment<TStringSet, TCargo, TSpec> > const& g)
715 {
716 //IOREV _nodoc_ specialization not documented (at least not obviously)
717     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
718     typedef typename Size<TGraph>::Type TSize;
719     typedef typename TGraph::TFragmentInfo_ TSegment;
720     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
721     typedef typename EdgeType<TGraph>::Type TEdgeStump;
722     typedef typename Iterator<String<TEdgeStump*> const, Standard>::Type TIterConst;
723 
724     String<char> align;
725     if (!convertAlignment(g, align)) {
726         write(target,"Adjacency list:\n");
727         TIterConst it = begin(g.data_align.data_vertex, Standard());
728         TIterConst itEnd = end(g.data_align.data_vertex, Standard());
729         TVertexDescriptor pos = 0;
730         for(;it!=itEnd; ++it, ++pos) {
731             if (!idInUse(g.data_align.data_id_managerV, pos)) continue;
732             TVertexDescriptor sourceV = pos;
733             appendNumber(target, (int)sourceV);
734             TSegment seg = getProperty(g.data_fragment, sourceV);
735             write(target," (SeqId:");
736             appendNumber(target, (int)seg.data_seq_id);
737             write(target," ,Begin:");
738             appendNumber(target, (int)seg.data_begin);
739             write(target," ,Length:");
740             appendNumber(target, (int)seg.data_length);
741             write(target,") -> ");
742             TEdgeStump* current = *it;
743             while(current!=0) {
744                 TVertexDescriptor adjV = getTarget(current);
745                 if (adjV != sourceV) {
746                     appendNumber(target, (int)adjV);
747                     writeValue(target, ',');
748                     current=getNextS(current);
749                 } else {
750                     adjV = getSource(current);
751                     appendNumber(target, (int)adjV);
752                     writeValue(target, ',');
753                     current=getNextT(current);
754                 }
755             }
756             writeValue(target, '\n');
757         }
758         write(target,"Edge list:\n");
759         it = begin(g.data_align.data_vertex, Standard());
760         pos = 0;
761         for(;it!=itEnd; ++it, ++pos) {
762             TVertexDescriptor sourceV = pos;
763             TEdgeStump* current = *it;
764             while(current!=0) {
765                 TVertexDescriptor targetV = getTarget(current);
766                 if (sourceV != targetV) {
767                     write(target,"Source: ");
768                     appendNumber(target, (int)sourceV);
769                     write(target, ",Target: ");
770                     appendNumber(target, (int)targetV);
771                     write(target, " (Id: ");
772                     appendNumber(target, (int)_getId(current));
773                     write(target, ")\n");
774                     current=getNextS(current);
775                 } else {
776                     current=getNextT(current);
777                 }
778             }
779         }
780     } else {
781         TSize nseq = length(stringSet(g));
782         TSize colLen = length(align) / nseq;
783 
784         TSize baseCount=0;
785         TSize leftSpace=6;
786         TSize xPos = 0;
787         write(target,"Alignment matrix:\n");
788         while (xPos < colLen) {
789             TSize windowSize = 50;
790             if ((xPos + windowSize)>colLen) windowSize = colLen - xPos;
791 
792             // Print header line
793             TSize offset=0;
794             // Larger numbers need to be further left
795             if (baseCount != 0) offset = (unsigned int) floor(log((double)baseCount) / log((double)10));
796             for(TSize j = 0;j<leftSpace-offset;++j) writeValue(target, ' ');
797             appendNumber(target, (int)baseCount);
798             baseCount+=windowSize;
799             writeValue(target, ' ');
800             for(TSize col = 1;col<=windowSize;++col) {
801                 if ((col % 10)==0) writeValue(target, ':');
802                 else if ((col % 5)==0) writeValue(target, '.');
803                 else writeValue(target, ' ');
804             }
805             write(target, " \n");
806 
807             // Print sequences
808             for(TSize row=0;row<2*nseq-1;++row) {
809                 for(TSize col = 0;col<leftSpace+2;++col)
810                     writeValue(target, ' ');
811                 if ((row % 2)==0) {
812                     for(TSize col = xPos;col<xPos+windowSize;++col)
813                         writeValue(target, align[(row/2)*colLen+col]);
814                 } else {
815                     for(TSize col = xPos;col<xPos+windowSize;++col) {
816                         if ((align[((row-1)/2)*colLen + col] != gapValue<char>()) &&
817                             (align[((row+1)/2)*colLen + col] != gapValue<char>()) &&
818                             (align[((row-1)/2)*colLen + col] == align[((row+1)/2)*colLen + col]))
819                             writeValue(target, '|');
820                         else
821                             writeValue(target, ' ');
822                     }
823                 }
824                 writeValue(target, '\n');
825             }
826             writeValue(target, '\n');
827             xPos+=windowSize;
828         }
829         writeValue(target, '\n');
830     }
831 }
832 
833 //////////////////////////////////////////////////////////////////////////////
834 
835 template <typename TFile, typename TSpec, typename TNames>
836 inline void
837 write(TFile & file,
838       Graph<TSpec> const& g,
839       TNames const& names,
840       FastaFormat)
841 {
842 //IOREV _nodoc_ specialization not documented
843     SEQAN_CHECKPOINT
844     typedef Graph<TSpec> TGraph;
845     typedef typename Size<TGraph>::Type TSize;
846 
847     typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output());
848 
849     String<char> align;
850     if (convertAlignment(g, align)) {
851         TSize nseq = length(stringSet(g));
852         TSize colLen = length(align) / nseq;
853         typedef typename Iterator<String<char>, Standard>::Type TIter;
854         TIter it = begin(align, Standard());
855         for(TSize i = 0; i<nseq; ++i) {
856             writeValue(target, '>');
857             write(target,names[i]);
858             writeValue(target, '\n');
859             TSize col = 0;
860             while(col < colLen) {
861                 TSize max = ((colLen - col) < 60) ? colLen - col : 60;
862                 for(TSize finger = 0; finger<max; ++finger, ++col, ++it)
863                     writeValue(target, *it);
864                 writeValue(target, '\n');
865             }
866         }
867     }
868 }
869 
870 
871 
872 //////////////////////////////////////////////////////////////////////////////
873 
874 template <typename TFile, typename TSpec, typename TNames>
875 inline void
876 write(TFile & file,
877       Graph<TSpec> const& g,
878       TNames const& names,
879       MsfFormat)
880 {
881 //IOREV _nodoc_ specialization not documented
882     SEQAN_CHECKPOINT
883     typedef Graph<TSpec> TGraph;
884     typedef typename Size<TGraph>::Type TSize;
885 
886     typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output());
887 
888     String<char> align;
889     if (convertAlignment(g, align)) {
890         TSize nseq = length(stringSet(g));
891         TSize colLen = length(align) / nseq;
892 
893         write(target,"PileUp\n");
894         writeValue(target, '\n');
895         write(target," MSF: ");
896         appendNumber(target, (unsigned)colLen);
897         write(target," Type: P Check: 0 ..\n\n");
898         TSize offset = 0;
899         for(TSize i = 0; i<nseq; ++i) {
900             write(target," Name: ");
901             write(target,names[i]);
902             write(target," oo  Len:  ");
903             TSize len = length(names[i]);
904             if (len > offset) offset = len;
905             appendNumber(target, (unsigned)colLen);
906             write(target," Check: 0");
907             write(target," Weight: 1.00");
908             writeValue(target, '\n');
909         }
910         offset += 5;
911         writeValue(target, '\n');
912         write(target, "//\n");
913         writeValue(target, '\n');
914         writeValue(target, '\n');
915         TSize col = 0;
916         while(col < colLen) {
917             TSize max = 0;
918             for(TSize i = 0; i<nseq; ++i) {
919                 max = ((colLen - col) < 50) ? colLen - col : 50;
920                 write(target,names[i]);
921                 for(TSize j = 0; j<offset - length(names[i]); ++j)
922                     writeValue(target, ' ');
923                 for(TSize finger = col; finger<col+max; ++finger) {
924                     if ((finger - col) % 10 == 0) writeValue(target, ' ');
925                     if (align[i*colLen + finger] == '-') writeValue(target, '.');
926                     else writeValue(target, align[i*colLen + finger]);
927                 }
928                 writeValue(target, '\n');
929             }
930             col += max;
931             writeValue(target, '\n');
932             writeValue(target, '\n');
933         }
934     }
935 }
936 
937 
938 //////////////////////////////////////////////////////////////////////////////
939 template <typename TFile, typename TStringSet, typename TSpec, typename TEdge>
940 inline void
941 _writeCargo(TFile & file,
942              Graph<Alignment<TStringSet, void, TSpec> > const&,
943              TEdge const&)
944 {
945     appendNumber(file, 0);
946 }
947 
948 //////////////////////////////////////////////////////////////////////////////
949 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec, typename TEdge>
950 inline void
951 _writeCargo(TFile & file,
952              Graph<Alignment<TStringSet, TCargo, TSpec> > const&,
953              TEdge const& edge)
954 {
955     appendNumber(file, (int)getCargo(edge));
956 }
957 
958 //////////////////////////////////////////////////////////////////////////////
959 
960 
961 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec, typename TNames>
962 inline void
963 write(TFile & file,
964       Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
965       TNames const& names,
966       CgVizFormat)
967 {
968 //IOREV _nodoc_ specialization not documented
969     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
970     typedef typename Size<TGraph>::Type TSize;
971     typedef typename Id<TGraph>::Type TId;
972     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
973     typedef typename EdgeType<TGraph>::Type TEdgeStump;
974     typedef typename Iterator<String<TEdgeStump*> const, Rooted>::Type TIterConst;
975 
976     typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output());
977 
978     TStringSet& str = stringSet(g);
979     TSize nseq = length(str);
980 
981     write(target, "{DATA Data\n");
982     write(target, "\t[__GLOBAL__] tracks=");
983     appendNumber(target, nseq);
984     writeValue(target, '\n');
985     for(TSize i = 0; i<nseq; ++i) {
986         write(target, "\tfasta_id=\"");
987         write(target,names[i]);
988         write(target, "\" sequence=\"");
989         write(target,str[i]);
990         write(target, "\" track=");
991         appendNumber(target, i);
992         write(target, " type=\"DNA\": 0 ");
993         appendNumber(target, length(str[i])- 1);
994         writeValue(target, '\n');
995     }
996     write(target, "}\n");
997     for(TSize i = 0; i<nseq; ++i) {
998         write(target, "{DATA ");
999         appendNumber(target, i);
1000         //write(target,names[i]);
1001         write(target, "-seqlen\n\t[__GLOBAL__]\n\tlength=");
1002         appendNumber(target, length(str[i]));
1003         write(target, ":\t0 ");
1004         appendNumber(target, length(str[i])- 1);
1005         write(target, "\n}\n");
1006     }
1007     for(TSize i=0; i<nseq; ++i) {
1008         for(TSize j=i+1; j<nseq; ++j) {
1009             write(target, "{DATA ");
1010             appendNumber(target, i);
1011             //write(target,names[i]);
1012             write(target, "-vs-");
1013             appendNumber(target, j);
1014             //write(target,names[j]);
1015             writeValue(target, '\n');
1016             write(target, "\t[__GLOBAL__]\n");
1017             for(TIterConst it = begin(g.data_align.data_vertex);!atEnd(it);goNext(it)) {
1018                 TVertexDescriptor sourceV = position(it);
1019                 TId id1 = sequenceId(g, sourceV);
1020                 if ((positionToId(str, id1) != i) &&
1021                     (positionToId(str, id1) != j)) continue;
1022                 TEdgeStump* current = getValue(it);
1023                 while(current!=0) {
1024                     TVertexDescriptor targetV = getTarget(current);
1025                     TId id2 = sequenceId(g, targetV);
1026                     if (sourceV != targetV) {
1027                         if ((positionToId(str, id2) != i) &&
1028                             (positionToId(str, id2) != j)) {
1029                                 current=getNextS(current);
1030                                 continue;
1031                         }
1032                         write(target, "\t");
1033                         write(target, "source=");
1034                         appendNumber(target, (int)sourceV);
1035                         writeValue(target, ' ');
1036                         write(target, "target=");
1037                         appendNumber(target, (int)targetV);
1038                         writeValue(target, ' ');
1039                         write(target, "edgeId=");
1040                         appendNumber(target, (int)_getId(current));
1041                         writeValue(target, ' ');
1042                         write(target, "cargo=");
1043                         _writeCargo(target,g,current);
1044                         writeValue(target, ' ');
1045                         write(target, "label=");
1046                         write(target,label(g,sourceV));
1047                         writeValue(target, ' ');
1048                         write(target, "labelOpp=");
1049                         write(target,label(g,targetV));
1050                         writeValue(target, ':');
1051                         writeValue(target, '\t');
1052                         appendNumber(target, (int)fragmentBegin(g, sourceV));
1053                         writeValue(target, ' ');
1054                         appendNumber(target, (int)fragmentBegin(g, targetV));
1055                         writeValue(target, ' ');
1056                         appendNumber(target, (int)(fragmentBegin(g, sourceV) + fragmentLength(g, sourceV)));
1057                         writeValue(target, ' ');
1058                         appendNumber(target, (int)(fragmentBegin(g, targetV) + fragmentLength(g, targetV)));
1059                         writeValue(target, '\n');
1060                         current=getNextS(current);
1061                     } else {
1062                         current=getNextT(current);
1063                     }
1064                 }
1065             }
1066             write(target, "}\n");
1067         }
1068     }
1069 }
1070 
1071 
1072 //////////////////////////////////////////////////////////////////////////////
1073 
1074 /*!
1075  * @fn AlignmentGraph#assignStringSet
1076  * @brief Assigns a new StringSet to an AlignmentGraph.
1077  *
1078  * @signature void assignStringSet(g, stringSet);
1079  *
1080  * @param[in,out] g         An AlignmentGraph.
1081  * @param[in]     stringSet The @link StringSet @endlink to assign to.
1082  *
1083  * @see AlignmentGraph#getStringSet
1084  * @see AlignmentGraph#stringSet
1085  */
1086 
1087 template<typename TString, typename TDefault, typename TCargo, typename TSpec, typename TDefault2>
1088 inline void
1089 assignStringSet(Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> >& g,
1090                 StringSet<TString, Dependent<TDefault2> > const& sStr)
1091 {
1092     SEQAN_CHECKPOINT
1093     typedef Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> > TGraph;
1094     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1095     typedef typename TGraph::TKey_ TKey;
1096     typedef typename Size<TGraph>::Type TSize;
1097 
1098     TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
1099     clear(g);
1100     g.data_sequence = (StringSet<TString, Dependent<TDefault> >) sStr;
1101     TSize lenSet = length(sStr);
1102     for(TSize k=0; k<lenSet;++k)
1103         g.data_pvMap.insert(std::make_pair(TKey(positionToId(sStr,k), length(sStr[k])), nilVertex));
1104 }
1105 
1106 //////////////////////////////////////////////////////////////////////////////
1107 
1108 template<typename TString, typename TDefault, typename TCargo, typename TSpec, typename TDefault2>
1109 inline void
1110 assignStringSet(Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> >& g,
1111                 StringSet<TString, Owner<TDefault2> > const& sStr)
1112 {
1113     SEQAN_CHECKPOINT
1114     StringSet<TString, Dependent<> > depStr(sStr);
1115     assignStringSet(g, depStr);
1116 }
1117 
1118 
1119 //////////////////////////////////////////////////////////////////////////////
1120 
1121 /*!
1122  * @fn AlignmentGraph#getStringSet
1123  * @brief Gets the StringSet of an AlignmentGraph.
1124  *
1125  * @signature THost getStringSet(g);
1126  *
1127  * @param[in,out] g An AlignmentGraph.
1128  *
1129  * @return THost A const reference to the StringSet's host.
1130  *
1131  * @see AlignmentGraph#assignStringSet
1132  * @see AlignmentGraph#stringSet
1133  */
1134 
1135 template<typename TStringSet, typename TCargo, typename TSpec>
1136 inline typename Host<Graph<Alignment<TStringSet, TCargo, TSpec> > const>::Type&
1137 getStringSet(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g)
1138 {
1139     SEQAN_CHECKPOINT
1140     return value(g.data_sequence);
1141 }
1142 
1143 
1144 //////////////////////////////////////////////////////////////////////////////
1145 
1146 /*!
1147  * @fn AlignmentGraph#stringSet
1148  * @brief Return reference to the StringSet of an AlignmentGraph.
1149  *
1150  * @signature THostRef stringSet(g);
1151  *
1152  * @param[in,out] g An AlignmentGraph.
1153  *
1154  * @return THost A reference to the StringSet's host.
1155  *
1156  * @see AlignmentGraph#assignStringSet
1157  * @see AlignmentGraph#getStringSet
1158  */
1159 
1160 template <typename T>
1161 struct StringSetType;
1162 
1163 template <typename TStringSet, typename TCargo, typename TSpec>
1164 struct StringSetType<Graph<Alignment<TStringSet, TCargo, TSpec> > >
1165 {
1166     typedef typename Host<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type & Type;
1167 };
1168 
1169 
1170 template<typename TStringSet, typename TCargo, typename TSpec>
1171 inline typename StringSetType<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
1172 stringSet(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g)
1173 {
1174     SEQAN_CHECKPOINT
1175     return const_cast<TStringSet&>(value(g.data_sequence));
1176 }
1177 
1178 //////////////////////////////////////////////////////////////////////////////
1179 
1180 /*!
1181  * @fn AlignmentGraph#label
1182  * @brief Get the label associated with this vertex descriptor or the sequence that is associated with a fragment.
1183  *
1184  * @signature TInfix label(ag, v);
1185  *
1186  * @param[in] ag The AlignmentGraph to query.
1187  * @param[in] v  The vertex descriptor to query.
1188  *
1189  * @return TInfix An infix representing the sequence label.
1190  */
1191 
1192 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
1193 inline typename Infix<typename Value<TStringSet>::Type>::Type
1194 label(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1195       TVertexDescriptor const v)
1196 {
1197     SEQAN_CHECKPOINT
1198     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1199     typedef typename TGraph::TFragmentInfo_ TSegment;
1200     TSegment seg = getProperty(g.data_fragment, v);
1201     //std::cout << seg.data_seq_id << ",";
1202     //std::cout << seg.data_begin << ",";
1203     //std::cout << seg.data_length << ",";
1204     //std::cout << getValueById(value(g.data_sequence), seg.data_seq_id) << std::endl;
1205     //std::cout << infix(getValueById(value(g.data_sequence), seg.data_seq_id), seg.data_begin, seg.data_begin + seg.data_length) << std::endl;
1206     return infix(getValueById(value(g.data_sequence), seg.data_seq_id), seg.data_begin, seg.data_begin + seg.data_length);
1207 }
1208 
1209 //////////////////////////////////////////////////////////////////////////////
1210 
1211 /*!
1212  * @fn AlignmentGraph#sequenceId
1213  * @brief Gets the sequence id that is associated with this vertex descriptor or with a sequence of a fragment.
1214  *
1215  * @signature TId sequenceId(ag, v);
1216  *
1217  * @param[in] ag The AlignmentGraph.
1218  * @param[in] v  Vertex descriptor of the vertex to retrieve sequence id for.
1219  *
1220  * @return TId The sequence id.
1221  */
1222 
1223 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
1224 inline typename Id<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&
1225 sequenceId(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1226            TVertexDescriptor const v)
1227 {
1228     SEQAN_CHECKPOINT
1229     return const_cast<typename Id<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_seq_id);
1230 }
1231 
1232 //////////////////////////////////////////////////////////////////////////////
1233 
1234 /*!
1235  * @fn AlignmentGraph#fragmentBegin
1236  * @brief Gets the begin position for this vertex descriptor in the sequence.
1237  *
1238  * @signature TPos fragmentBegin(ag, v);
1239  *
1240  * @param[in] ag The AlignmentGraph to query.
1241  * @param[in] v  The vertex descriptor of the vertex.
1242  *
1243  * @return TPos The begin position.
1244  */
1245 
1246 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
1247 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&
1248 fragmentBegin(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1249               TVertexDescriptor const v)
1250 {
1251     SEQAN_CHECKPOINT
1252     return const_cast<typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_begin);
1253 }
1254 
1255 //////////////////////////////////////////////////////////////////////////////
1256 
1257 /*!
1258  * @fn AlignmentGraph#fragmentLength
1259  * @brief Gets the length of the fragment for this vertex descriptor in the sequence.
1260  *
1261  * @signature TSize fragmentLength(ag, v);
1262  *
1263  * @param[in] ag The AlignmentGraph to query.
1264  * @param[in] v  The vertex descriptor of the vertex.
1265  *
1266  * @return TPos The fragment size.
1267  */
1268 
1269 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor>
1270 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&
1271 fragmentLength(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1272                TVertexDescriptor const v)
1273 {
1274     SEQAN_CHECKPOINT
1275     return const_cast<typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_length);
1276 }
1277 
1278 //////////////////////////////////////////////////////////////////////////////
1279 
1280 /*!
1281  * @fn AlignmentGraph#findVertex
1282  * @brief Finds a vertx given a sequence id and a position.
1283  *
1284  * @signature TVertexDescriptor findVertex(ag, id, pos);
1285  *
1286  * @param[in] ag  The AlignmentGraph to search in.
1287  * @param[in] id  The sequence id to search for.
1288  * @param[in] pos The position to search for.
1289  *
1290  * @return TVertexDescriptor The vertex covering the given position on the specified sequence,
1291  * <tt>getNil&lt;TVertexDescriptor&gt;()</tt> if none could be found.
1292  */
1293 
1294 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId, typename TPos>
1295 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
1296 findVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
1297            TSeqId id,
1298            TPos pos)
1299 {
1300     SEQAN_CHECKPOINT
1301     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1302     typedef typename TGraph::TKey_ TKey;
1303     typedef typename Size<TGraph>::Type TSize;
1304     typedef typename Id<TGraph>::Type TIdType;
1305     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1306 
1307     return (pos >= (TPos) length(getValueById(stringSet(g),id))) ? getNil<TVertexDescriptor>() : g.data_pvMap.upper_bound(TKey((TIdType)id, (TSize)pos))->second;
1308 }
1309 
1310 //////////////////////////////////////////////////////////////////////////////
1311 
1312 /*!
1313  * @fn AlignmentGraph#getProjectedPosition
1314  * @brief Projects a position of one sequence taking part in a pairwise match onto the other sequence.
1315  *
1316  * @signature getProjectedPosition(ag, seqId, pos, seqId2, pos2);
1317  *
1318  * @param[in]  ag     The @link AlignmentGraph @endlink to query.
1319  * @param[in]  seqId  The sequenceId.
1320  * @param[in]  pos    The position to project from.
1321  * @param[out] seqId2 The resulting id of the sequence that was projected onto.
1322  * @param[out] pos2   The reuslting position of the sequence was was projected onto.
1323  */
1324 
1325 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId, typename TPosition, typename TSeqId2, typename TPosition2>
1326 inline void
1327 getProjectedPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
1328                      TSeqId const id1,
1329                      TPosition const pos1,
1330                      TSeqId2& id2,
1331                      TPosition2& pos2)
1332 {
1333     SEQAN_ASSERT(length(stringSet(g)) == 2);
1334 
1335     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1336     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1337     typedef typename EdgeType<TGraph>::Type TEdgeStump;
1338     TStringSet& str = stringSet(g);
1339     TVertexDescriptor sV = findVertex(g, id1, pos1);
1340 
1341     // Case 1: No projection possible
1342     if (sV == getNil<TVertexDescriptor>()) {
1343         if ( (TSeqId) positionToId(str, 0) == id1) id2 = (TSeqId2) positionToId(str,1);
1344         else id2 = (TSeqId2) positionToId(str,0);
1345         pos2 = 0;
1346         return;
1347     }
1348 
1349     // Case 2: Projection is possible
1350     TEdgeStump* current = getValue(g.data_align.data_vertex, sV);
1351     if(current != (TEdgeStump*) 0) {
1352         TVertexDescriptor tV = target(current);
1353         if (tV == sV) tV = source(current);
1354         pos2 = (TPosition2) (fragmentBegin(g,tV) + (pos1 - fragmentBegin(g, sV)));
1355         id2 = (TSeqId2) sequenceId(g, tV);
1356         return;
1357     } else {
1358         // If no out-going edge, get the preceding or following vertex
1359         if (fragmentBegin(g, sV) == 0) {
1360             getProjectedPosition(g, id1, fragmentBegin(g,sV) + fragmentLength(g, sV), id2, pos2);
1361             return;
1362         } else {
1363             getProjectedPosition(g, id1, fragmentBegin(g,sV) - 1, id2, pos2);
1364             return;
1365         }
1366     }
1367 }
1368 
1369 //////////////////////////////////////////////////////////////////////////////
1370 
1371 template<typename TStringSet, typename TValue, typename TCargo, typename TSpec, typename TSeqId, typename TPosition, typename TSeqId2, typename TPosition2>
1372 inline void
1373 getProjectedPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
1374                      TValue seg_num,
1375                      TSeqId const id1,
1376                      TPosition const pos1,
1377                      TSeqId2& id2,
1378                      TPosition2& pos2)
1379 {
1380     SEQAN_ASSERT(length(stringSet(g)) == 2);
1381 
1382     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1383     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1384     typedef typename EdgeType<TGraph>::Type TEdgeStump;
1385     TStringSet& str = stringSet(g);
1386     TVertexDescriptor sV = findVertex(g, id1, pos1);
1387 
1388     // Case 1: No projection possible
1389     if (sV == getNil<TVertexDescriptor>()) {
1390         if ( (TSeqId) positionToId(str, 0) == id1) id2 = (TSeqId2) positionToId(str,1);
1391         else id2 = (TSeqId2) positionToId(str,0);
1392         pos2 = 0;
1393         return;
1394     }
1395 
1396     // Case 2: Projection is possible
1397     TEdgeStump* current = getValue(g.data_align.data_vertex, sV);
1398     if(current != (TEdgeStump*) 0) {
1399         TVertexDescriptor tV = target(current);
1400         if (seg_num == 0) tV = source(current); // segnum 0 is defined as the sourceVertex, segnum 1 is targetVertex
1401         pos2 = (TPosition2) (fragmentBegin(g,tV) + (pos1 - fragmentBegin(g, sV)));
1402         id2 = (TSeqId2) sequenceId(g, tV);
1403         return;
1404     } else {
1405         // If no out-going edge, get the preceding or following vertex
1406         if (fragmentBegin(g, sV) == 0) {
1407             getProjectedPosition(g, seg_num, id1, fragmentBegin(g,sV) + fragmentLength(g, sV), id2, pos2);
1408             return;
1409         } else {
1410             getProjectedPosition(g, seg_num, id1, fragmentBegin(g,sV) - 1, id2, pos2);
1411             return;
1412         }
1413     }
1414 }
1415 
1416 //////////////////////////////////////////////////////////////////////////////
1417 
1418 /*!
1419  * @fn AlignmentGraph#getFirstCoveredPosition
1420  * @brief Finds the first position in a sequence that is not assigned to a nil vertex.
1421  *
1422  * @signature TPos getFirstCoveredPosition(ag, id);
1423  *
1424  * @param[in] ag An @link AlignmentGraph @endlink.
1425  * @param[in] id A sequence id.
1426  *
1427  * @return TPos A sequence position.
1428  *
1429  * @see AlignmentGraph#getLastCoveredPosition
1430  */
1431 
1432 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId>
1433 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
1434 getFirstCoveredPosition(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1435                         TSeqId const id)
1436 {
1437     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1438     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1439     typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap;
1440 
1441 
1442     typename TPosToVertexMap::const_iterator it = g.data_pvMap.upper_bound(std::make_pair(id, 0));
1443 
1444     // Case 1: id is not covered
1445     if (it == g.data_pvMap.end()) return length(getValueById(stringSet(g), id));
1446 
1447     // Case 2: We found a nil vertex, go one forward
1448     if (it->second == getNil<TVertexDescriptor>()) {
1449         ++it;
1450         if (it == g.data_pvMap.end()) return length(getValueById(stringSet(g), id));
1451     }
1452 
1453     // Now we have the right vertex, return the beginning if the sequence id still fits
1454     if (it->first.first != id) return length(getValueById(stringSet(g), id));
1455     else return fragmentBegin(g, it->second);
1456 }
1457 
1458 //////////////////////////////////////////////////////////////////////////////
1459 
1460 /*!
1461  * @fn AlignmentGraph#getLastCoveredPosition
1462  * @brief Finds the last position in a sequence that is not assigned to a nil vertex.
1463  *
1464  * @signature TPos getLastCoveredPosition(ag, id);
1465  *
1466  * @param[in] ag An @link AlignmentGraph @endlink.
1467  * @param[in] id A sequence id.
1468  *
1469  * @return TPos A sequence position.
1470  *
1471  * @see AlignmentGraph#getFirstCoveredPosition
1472  */
1473 
1474 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId>
1475 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type
1476 getLastCoveredPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
1477                        TSeqId id)
1478 {
1479     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1480     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1481     typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap;
1482 
1483     TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
1484     typename TPosToVertexMap::const_iterator it = g.data_pvMap.lower_bound(std::make_pair(id, length(getValueById(stringSet(g), id))));
1485 
1486     // Case 1: No last position (all nil)
1487     if ((it == g.data_pvMap.begin()) && (it->second == nilVertex)) return 0;
1488 
1489     // Case 2: Found a nil position but there is a vertex before
1490     if (it->second == nilVertex) {
1491         --it;
1492     }
1493 
1494     // If the sequence id still matches return the position behind the last position belonging to this vertex
1495     if (it->first.first != id) return 0;
1496     return fragmentBegin(g, it->second) + fragmentLength(g, it->second);
1497 }
1498 
1499 
1500 //////////////////////////////////////////////////////////////////////////////
1501 
1502 /*!
1503  * @fn AlignmentGraph#convertAlignment
1504  * @brief Converts an alignment graph into an alignment matrix.
1505  *
1506  * @signature bool convertAlignment(g, component, order, compLength);
1507  * @signature bool convertAlignment(g, matrix);
1508  *
1509  * @param[in]  g          AlignmentGraph to convert.  An alignment graph.
1510  * @param[out] component  Vertex to component mapping.
1511  * @param[out] order      The order of the component graph when sorting topologically.
1512  * @param[out] compLength Component sizes.
1513  * @param[out] matrix     A string that represents an alignment matrix.
1514  *
1515  * @return bool true iff the alignment graph is a valid alignment and false otherwise.
1516  *
1517  * @section Remarks
1518  *
1519  * The variant with <tt>component</tt> and <tt>order</tt> computes a topological sorting of connected components.
1520  */
1521 
1522 template<typename TStringSet, typename TCargo, typename TSpec, typename TComponentMap, typename TOrderMap, typename TComponentLength>
1523 inline bool
1524 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1525                  TComponentMap& component,
1526                  TOrderMap& order,
1527                  TComponentLength& compLength)
1528 {
1529     SEQAN_CHECKPOINT
1530     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1531     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1532     typedef typename Id<TGraph>::Type TIdType;
1533     typedef typename Size<TGraph>::Type TSize;
1534     typedef typename Value<TComponentMap>::Type TComponent;
1535     typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap;
1536     typedef typename TComponentLength::mapped_type TMappedType;
1537     typedef typename TComponentLength::key_type TKeyType;
1538     TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
1539 
1540     // Check for empty graph
1541     if (empty(g)) return false;
1542 
1543     // Connected Components
1544     TSize numComponents = connectedComponents(component, g);
1545 
1546     // Make a directed graph to represent the ordering of the components
1547     // Note: Multiple vertices might have the same component
1548     Graph<Directed<void, WithoutEdgeId> > componentGraph;
1549     reserve(_getVertexString(componentGraph), numComponents);
1550     for(TSize i = 0; i<numComponents;++i) addVertex(componentGraph);
1551 
1552     TSize nseq = length(value(g.data_sequence));
1553     String<std::set<TComponent> > componentsPerSeq;
1554     typedef String<String<TComponent> > TOrderedComponents;
1555     TOrderedComponents orderedComponentsPerSeq;
1556     resize(componentsPerSeq, nseq);
1557     resize(orderedComponentsPerSeq, nseq);
1558     typename TPosToVertexMap::const_iterator it1 = g.data_pvMap.begin();
1559     typename TPosToVertexMap::const_iterator it1End = g.data_pvMap.end();
1560     for(;it1!=it1End;++it1) {
1561         // If sections are not assigned to a vertex -> no alignment
1562         if (it1->second == nilVertex) return false;
1563 
1564         // Remember the sequence that component belongs to
1565         TSize currentSeq = idToPosition(value(g.data_sequence), it1->first.first);
1566 
1567         // Append component
1568         TComponent c = getProperty(component, it1->second);
1569         if ((value(componentsPerSeq,currentSeq)).empty()) {
1570             String<TComponent> tmp;
1571             value(orderedComponentsPerSeq, currentSeq) = tmp;
1572         }
1573         appendValue(value(orderedComponentsPerSeq, currentSeq), c, Generous());
1574         // If two components appear twice in the same sequence -> no alignment
1575         if (!((value(componentsPerSeq,currentSeq)).insert(c)).second) return false;
1576     }
1577     clear(componentsPerSeq);
1578 
1579     // Draw edges for the components within a sequence
1580     typedef typename Iterator<TOrderedComponents>::Type TIterTOrderedComponents;
1581     TIterTOrderedComponents itBegin = begin(orderedComponentsPerSeq);
1582     TIterTOrderedComponents itEnd = end(orderedComponentsPerSeq);
1583     for(;itBegin != itEnd; ++itBegin) {
1584         TSize n = length(*itBegin);
1585         for(TSize i = 0; i<n-1; ++i) {
1586             addEdge(componentGraph, value((*itBegin), i), value((*itBegin), i+1));
1587         }
1588     }
1589 
1590     // Make a topological sort of the component graph
1591     topologicalSort(order, componentGraph);
1592 
1593     //// Debug code
1594     //std::cout << "Topological sort: " << std::endl;
1595     //for(TSize i = 0; i<length(order);++i) {
1596     //    std::cout << order[i] << ',';
1597     //}
1598     //std::cout << std::endl;
1599 
1600     // Walk through all sequences and check the component order
1601     unsigned int compIndex = 0;
1602     unsigned int compIndexLen = length(order);
1603     typename TPosToVertexMap::const_iterator it = g.data_pvMap.begin();
1604     TIdType currentSeq = it->first.first;
1605     for(; it != g.data_pvMap.end(); ++it) {
1606         if (it->first.first != currentSeq) {
1607             compIndex = 0;
1608             currentSeq = it->first.first;
1609         }
1610         TKeyType c = (TKeyType) getProperty(component, it->second);
1611         if(!compLength.insert(std::make_pair(c, (TMappedType) fragmentLength(g, it->second))).second) {
1612             compLength[c] = _max(compLength[c], (TMappedType) fragmentLength(g, it->second));
1613         }
1614         while ((compIndex < compIndexLen) && (order[compIndex] != c)) ++compIndex;
1615         // Crossing components -> no alignment
1616         if (compIndex >= compIndexLen) return false;
1617         // Next component
1618         ++compIndex;
1619     }
1620 
1621     return true;
1622 }
1623 
1624 
1625 //////////////////////////////////////////////////////////////////////////////
1626 
1627 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix>
1628 inline bool
1629 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1630                  TMatrix& mat)
1631 {
1632     SEQAN_CHECKPOINT
1633     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1634     typedef typename Value<TMatrix>::Type TValue;
1635     typedef typename Size<TGraph>::Type TSize;
1636     typedef typename Id<TGraph>::Type TIdType;
1637     typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap;
1638     typedef std::map<unsigned int, unsigned int> TComponentLength;
1639 
1640     // Strongly Connected Components, topological sort, and length of each component
1641     String<unsigned int> component;
1642     String<unsigned int> order;
1643     TComponentLength compLength;
1644 
1645     if (!convertAlignment(g, component, order, compLength)) return false;
1646 
1647     // Create the matrix
1648     TSize len = 0;
1649     TSize nseq = length(stringSet(g));
1650     for(TComponentLength::iterator cIt=compLength.begin(); cIt != compLength.end(); ++cIt) len+=cIt->second;
1651     char gapChar = gapValue<char>();
1652     resize(mat, len * nseq, gapChar);
1653 
1654     // Fill the matrix
1655     TSize row = 0;
1656     TSize col = 0;
1657     typename TPosToVertexMap::const_iterator it = g.data_pvMap.begin();
1658     unsigned int compIndex = 0;
1659     unsigned int compIndexLen = length(order);
1660     TIdType currentSeq = it->first.first;
1661     for(; it != g.data_pvMap.end(); ++it) {
1662         if (it->first.first != currentSeq) {
1663             SEQAN_ASSERT(col <= len);
1664             //std::cout << std::endl;
1665             ++row;col=0;
1666             compIndex = 0;
1667             currentSeq = it->first.first;
1668         }
1669         unsigned int c = getProperty(component, it->second);
1670         while ((compIndex < compIndexLen) && (order[compIndex] != c)) {
1671             for(TSize i=0;i<compLength[order[compIndex]];++i, ++col)
1672                 mat[row*len + col] = gapValue<char>();
1673             ++compIndex;
1674         }
1675         typedef typename Value<TStringSet>::Type TStringSetStr;
1676         TStringSetStr str = label(g,it->second);
1677         typedef typename Iterator<TStringSetStr, Standard>::Type TStringSetStrIter;
1678         TStringSetStrIter itStr = begin(str, Standard());
1679         TStringSetStrIter itStrEnd = end(str, Standard());
1680         for(;itStr != itStrEnd; goNext(itStr), ++col)
1681             mat[row*len + col] = (TValue) (*itStr);
1682         for(TSize i = length(str); i < compLength[order[compIndex]]; ++i, ++col)
1683             mat[row*len + col] = gapValue<char>();
1684         ++compIndex;
1685     }
1686     SEQAN_ASSERT(row + 1 == nseq);
1687     //std::cout << std::endl;
1688 
1689     return true;
1690 }
1691 
1692 
1693 
1694 //////////////////////////////////////////////////////////////////////////////
1695 
1696 template<typename TValue, typename TSpec1, typename TStringSet, typename TCargo, typename TSpec2>
1697 inline bool
1698 convertAlignment(String<TValue, TSpec1> const& mat,
1699                  Graph<Alignment<TStringSet, TCargo, TSpec2> >& g)
1700 {
1701     typedef String<TValue, TSpec1> TMatrix;
1702     typedef Graph<Alignment<TStringSet, TCargo, TSpec2> > TGraph;
1703     typedef typename Size<TGraph>::Type TSize;
1704     typedef typename Iterator<TMatrix, Standard>::Type TMatIter;
1705     clearVertices(g);
1706     TValue gapChar = gapValue<TValue>();
1707     TSize nseq = length(stringSet(g));
1708     TSize alignLen = length(mat) / nseq;
1709     String<Fragment<> > matches;
1710     for(TSize seq1 = 0; seq1<nseq; ++seq1) {
1711         for(TSize seq2 = seq1 + 1; seq2<nseq; ++seq2) {
1712             TMatIter seq1It = begin(mat);
1713             seq1It += seq1 * alignLen;
1714             TMatIter seq2It = begin(mat);
1715             seq2It += seq2 * alignLen;
1716             TSize alignPos = 0;
1717             TSize length = 0;
1718             TSize offset1 = 0;
1719             TSize offset2 = 0;
1720             for(TSize col = 0; col<alignLen; ++col, ++seq1It, ++seq2It, ++alignPos) {
1721                 if ((*seq1It == gapChar) || (*seq2It == gapChar)) {
1722                     if (length) {
1723                         appendValue(matches, Fragment<>(seq1, alignPos - offset1 - length, seq2, alignPos - offset2 - length, length));
1724                         length = 0;
1725                     }
1726                     if (*seq1It == gapChar) ++offset1;
1727                     if (*seq2It == gapChar) ++offset2;
1728                 } else ++length;
1729             }
1730             if (length) appendValue(matches, Fragment<>(seq1, alignPos - offset1 - length, seq2, alignPos - offset2 - length, length));
1731         }
1732     }
1733     //_debugMatches(stringSet(g), matches);
1734     matchRefinement(matches,stringSet(g),g);
1735     return true;
1736 }
1737 
1738 
1739 
1740 //////////////////////////////////////////////////////////////////////////////
1741 
1742 
1743 template<typename TStringSet, typename TCargo, typename TSpec>
1744 inline void
1745 rebuildGraph(Graph<Alignment<TStringSet, TCargo, TSpec> >& g)
1746 {
1747     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1748     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1749     typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
1750     typedef typename Size<TGraph>::Type TSize;
1751 
1752     // Initialization
1753     typedef Fragment<> TFragment;
1754     typedef String<TFragment> TFragmentString;
1755     TFragmentString matches;
1756     TSize nseq = length(stringSet(g));
1757 
1758     // Collect all character pairs
1759     typedef std::pair<TSize, TSize> TResiduePair;
1760     typedef std::set<TResiduePair> TResiduePairSet;
1761     String<TResiduePairSet> resPair;
1762     resize(resPair, nseq * nseq);
1763     TEdgeIterator itE(g);
1764     for(;!atEnd(itE);++itE) {
1765         TVertexDescriptor sV = sourceVertex(itE);
1766         TVertexDescriptor tV = targetVertex(itE);
1767         TSize seq1 = idToPosition(stringSet(g), sequenceId(g, sV));
1768         TSize seq2 = idToPosition(stringSet(g), sequenceId(g, tV));
1769         TSize index = 0;
1770         TSize pos1 = 0;
1771         TSize pos2 = 0;
1772         if (seq1 < seq2) {
1773             index = seq1 * nseq + seq2;
1774             pos1 = fragmentBegin(g, sV);
1775             pos2 = fragmentBegin(g, tV);
1776         } else {
1777             index = seq2 * nseq + seq1;
1778             pos1 = fragmentBegin(g, tV);
1779             pos2 = fragmentBegin(g, sV);
1780         }
1781         for(TSize i = 0; i<fragmentLength(g, sV); ++i) {
1782             resPair[index].insert(std::make_pair(pos1 + i, pos2 + i));
1783         }
1784     }
1785 
1786     // Rebuild the graph with maximal segments
1787     for(TSize i = 0; i<length(resPair); ++i) {
1788         if (resPair[i].empty()) continue;
1789         TSize seq1 = i / nseq;
1790         TSize seq2 = i % nseq;
1791         typename TResiduePairSet::const_iterator pos = resPair[i].begin();
1792         typename TResiduePairSet::const_iterator posEnd = resPair[i].end();
1793         TSize startMatch1 = pos->first;
1794         TSize startMatch2 = pos->second;
1795         TSize len = 1;
1796         ++pos;
1797         while(pos != posEnd) {
1798             if ((startMatch1 + len == pos->first) && (startMatch2 + len == pos->second)) ++len;
1799             else {
1800                 appendValue(matches, TFragment(seq1, startMatch1, seq2, startMatch2, len), Generous());
1801                 startMatch1 = pos->first;
1802                 startMatch2 = pos->second;
1803                 len = 1;
1804             }
1805             ++pos;
1806         }
1807         appendValue(matches, TFragment(seq1, startMatch1, seq2, startMatch2, len), Generous());
1808     }
1809     clearVertices(g);
1810     matchRefinement(matches,stringSet(g),g);
1811 }
1812 
1813 
1814 
1815 
1816 
1817 //////////////////////////////////////////////////////////////////////////////
1818 // Heaviest Common Subsequence adaptation
1819 //////////////////////////////////////////////////////////////////////////////
1820 
1821 //////////////////////////////////////////////////////////////////////////////
1822 
1823 template<typename TStringSet, typename TCargo, typename TSpec, typename TSize2, typename TSpec2, typename TPositions, typename TSize, typename TVertexDescriptor, typename TString>
1824 inline void
1825 _heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const&,
1826                             String<TSize2, TSpec2> const& /*slotToPos*/,
1827                             TPositions const&,
1828                             TSize const,
1829                             TSize const,
1830                             TVertexDescriptor const,
1831                             TString const&,
1832                             TString const&,
1833                             Nothing&)
1834 {
1835     SEQAN_CHECKPOINT
1836 }
1837 
1838 //////////////////////////////////////////////////////////////////////////////
1839 
1840 template<typename TStringSet, typename TCargo, typename TSpec, typename TSize2, typename TSpec2, typename TPositions, typename TSize, typename TString, typename TOutString>
1841 inline void
1842 _heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1843                             String<TSize2, TSpec2> const& slotToPos,
1844                             TPositions const& positions,
1845                             TSize const m,
1846                             TSize const n,
1847                             TString const& str1,
1848                             TString const& str2,
1849                             TOutString& align)
1850 {
1851     SEQAN_CHECKPOINT
1852     typedef typename Value<TString>::Type TVertexSet;
1853     typedef typename Iterator<TString const, Standard>::Type TStringIter;
1854     typedef typename Iterator<TString, Standard>::Type TSIter;
1855     typedef typename Iterator<TVertexSet const, Standard>::Type TVertexSetIter;
1856 
1857     // Create the alignment sequence
1858     TSize numMatches = length(positions);
1859     TSize alignLength = numMatches + (n - numMatches) + (m - numMatches);
1860     clear(align);
1861     resize(align, alignLength, TVertexSet(), Exact() );
1862     TSIter pointerAlign = begin(align, Standard());
1863     TSIter pointerAlignEnd = end(align, Standard());
1864     TStringIter pointerStr1 = begin(str1, Standard());
1865     TSize posStr1 = 0;
1866     TSize posStr2 = 0;
1867     TStringIter pointerStr2 = begin(str2, Standard());
1868     int p = length(positions) - 1;
1869     while(pointerAlign != pointerAlignEnd) {
1870         TSize i = m;
1871         TSize j = n;
1872         if (p>=0) {
1873             i = (TSize) (slotToPos[positions[p]] / (TSize) n);   // Get the index in str1
1874             j = n - 1 - (TSize) (slotToPos[positions[p]] % (TSize) n); // Get the index in str2
1875         };
1876 
1877         // In what order do we insert gaps? -> Only important at the beginning and at the end, not between matches
1878         bool firstI = true;
1879         if ((i != posStr1) && (j != posStr2))
1880         {
1881             if ((posStr1 == 0) && (posStr2 == 0)) {
1882                 TStringIter tmpPointerStr1 = pointerStr1;
1883                 TStringIter tmpPointerStr2 = pointerStr2;
1884                 TSize tmpPosStr1 = posStr1;
1885                 TSize tmpPosStr2 = posStr2;
1886                 TSize len1 = 0;
1887                 TSize len2 = 0;
1888                 for(;i != tmpPosStr1; ++tmpPosStr1, ++tmpPointerStr1) len1 += fragmentLength(g, value(*tmpPointerStr1, 0));
1889                 for(;j != tmpPosStr2; ++tmpPosStr2, ++tmpPointerStr2) len2 += fragmentLength(g, value(*tmpPointerStr2, 0));
1890                 if (len1 > len2) firstI = false;
1891             } else if ((i == m) && (i == n)) {
1892                 TStringIter tmpPointerStr1 = pointerStr1;
1893                 TStringIter tmpPointerStr2 = pointerStr2;
1894                 TSize tmpPosStr1 = posStr1;
1895                 TSize tmpPosStr2 = posStr2;
1896                 TSize len1 = 0;
1897                 TSize len2 = 0;
1898                 for(;i != tmpPosStr1; ++tmpPosStr1, ++tmpPointerStr1) len1 += fragmentLength(g, value(*tmpPointerStr1, 0));
1899                 for(;j != tmpPosStr2; ++tmpPosStr2, ++tmpPointerStr2) len2 += fragmentLength(g, value(*tmpPointerStr2, 0));
1900                 if (len1 < len2) firstI = false;
1901             }
1902         }
1903         if (firstI) {
1904             // Gaps in seq 2
1905             while (i != posStr1) {
1906                 TVertexSetIter itV = begin(*pointerStr1, Standard());
1907                 TVertexSetIter itVEnd = end(*pointerStr1, Standard());
1908                 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous());
1909                 ++pointerAlign;
1910                 ++pointerStr1; ++posStr1;
1911             }
1912             // Gaps in seq 1
1913             while (j != posStr2) {
1914                 TVertexSetIter itV = begin(*pointerStr2, Standard());
1915                 TVertexSetIter itVEnd = end(*pointerStr2, Standard());
1916                 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous());
1917                 ++pointerAlign;
1918                 ++pointerStr2; ++posStr2;
1919             }
1920         } else {
1921             // Gaps in seq 1
1922             while (j != posStr2) {
1923                 TVertexSetIter itV = begin(*pointerStr2, Standard());
1924                 TVertexSetIter itVEnd = end(*pointerStr2, Standard());
1925                 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous());
1926                 ++pointerAlign;
1927                 ++pointerStr2; ++posStr2;
1928             }
1929             // Gaps in seq 2
1930             while (i != posStr1) {
1931                 TVertexSetIter itV = begin(*pointerStr1, Standard());
1932                 TVertexSetIter itVEnd = end(*pointerStr1, Standard());
1933                 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous());
1934                 ++pointerAlign;
1935                 ++pointerStr1; ++posStr1;
1936             }
1937         }
1938 
1939         // Matches
1940         if (p>=0) {
1941             TVertexSetIter itV = begin(*pointerStr1, Standard());
1942             TVertexSetIter itVEnd = end(*pointerStr1, Standard());
1943             for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous());
1944             TVertexSetIter itV2 = begin(*pointerStr2, Standard());
1945             TVertexSetIter itVEnd2 = end(*pointerStr2, Standard());
1946             for(;itV2 != itVEnd2;++itV2) appendValue(*pointerAlign, *itV2, Generous());
1947             ++pointerAlign;
1948             ++pointerStr1; ++posStr1;
1949             ++pointerStr2; ++posStr2;
1950             --p;
1951         }
1952     }
1953 }
1954 
1955 
1956 
1957 
1958 //////////////////////////////////////////////////////////////////////////////
1959 
1960 // TODO(holtgrew): Where does this belong? This is also there in graph_algorithms.h...
1961 
1962 template<typename TStringSet, typename TCargo, typename TSpec, typename TString, typename TOutString>
1963 inline TCargo
1964 heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
1965                           TString const& str1,
1966                           TString const& str2,
1967                           TOutString& align)
1968 {
1969     SEQAN_CHECKPOINT
1970     typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
1971     typedef typename Size<TGraph>::Type TSize;
1972     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
1973     typedef typename Value<TString>::Type TVertexSet;
1974 
1975     TSize m = length(str1);  // How many sets of vertex descriptors in seq1
1976     TSize n = length(str2);  // How many sets of vertex descriptors in seq2
1977 
1978     // Size of the sequences
1979     // Note for profile alignments every member of the sequence is a String!!! of vertex descriptors
1980 
1981     // Fill the vertex to position map for str1
1982     // Remember for each vertex descriptor the position in the sequence
1983     typedef String<TSize> TMapVertexPos;
1984     TMapVertexPos map;
1985     resize(map, getIdUpperBound(_getVertexIdManager(g)), MaxValue<TSize>::VALUE);
1986     typedef typename Iterator<TString const, Standard>::Type TStringIterConst;
1987     typedef typename Iterator<TVertexSet const, Standard>::Type TVertexSetIterConst;
1988     TStringIterConst itStr1 = begin(str1, Standard());
1989     TStringIterConst itStrEnd1 = end(str1, Standard());
1990     TSize pos = 0;
1991     TVertexSetIterConst itV;
1992     TVertexSetIterConst itVEnd;
1993     for(;itStr1 != itStrEnd1;++itStr1, ++pos) {
1994         itV = begin(*itStr1, Standard());
1995         itVEnd = end(*itStr1, Standard());
1996         for(;itV != itVEnd;++itV) map[*itV] = pos;
1997     }
1998 
1999     // We could create the full graph -> too expensive
2000     // Remember which edges are actually present
2001     typedef String<TSize> TOccupiedPositions;
2002     typedef typename Iterator<TOccupiedPositions, Standard>::Type TOccIter;
2003     TOccupiedPositions occupiedPositions;
2004     TStringIterConst itStr2 = begin(str2, Standard());
2005     TStringIterConst itStrEnd2 = end(str2, Standard());
2006     TSize posItStr2 = 0;
2007     TSize pPos = 0;
2008     for(;itStr2 != itStrEnd2;++itStr2, ++posItStr2) {
2009         itV = begin(*itStr2, Standard());
2010         itVEnd = end(*itStr2, Standard());
2011         for(;itV != itVEnd;++itV) {
2012             TOutEdgeIterator itOut(g, *itV);
2013             for(;!atEnd(itOut); ++itOut) {
2014                 // Target vertex must be in the map
2015                 pPos = map[targetVertex(itOut)];
2016                 if (pPos != MaxValue<TSize>::VALUE)
2017                     appendValue(occupiedPositions, pPos * n + (TSize) (n - posItStr2 - 1), Generous());
2018             }
2019         }
2020     }
2021     std::sort(begin(occupiedPositions, Standard()), end(occupiedPositions, Standard()));
2022     // Get all occupied positions
2023     typedef String<TSize> TSlotToPos;
2024     typedef typename Iterator<TSlotToPos, Standard>::Type TSlotToPosIter;
2025     TSlotToPos slotToPos;
2026     TSize counter = 0;
2027     TSize oldVal = MaxValue<TSize>::VALUE;
2028     TOccIter occIt = begin(occupiedPositions, Standard());
2029     TOccIter occItEnd = end(occupiedPositions, Standard());
2030     for(;occIt != occItEnd; ++occIt) {
2031         if (oldVal != *occIt) {
2032             appendValue(slotToPos, *occIt, Generous());
2033             oldVal = *occIt;
2034             ++counter;
2035         }
2036     }
2037     clear(occupiedPositions);
2038 
2039     // Walk through str2 and fill in the weights of the actual edges
2040     typedef String<TCargo> TWeights;
2041     TWeights weights;
2042     resize(weights, length(slotToPos), 0);
2043     itStr2 = begin(str2, Standard());
2044     posItStr2 = 0;
2045     for(;itStr2 != itStrEnd2;++itStr2, ++posItStr2) {
2046         itV = begin(*itStr2, Standard());
2047         itVEnd = end(*itStr2, Standard());
2048         for(;itV != itVEnd;++itV) {
2049             TOutEdgeIterator itOut(g, *itV);
2050             for(;!atEnd(itOut); ++itOut) {
2051                 // Target vertex must be in the map
2052                 pPos = map[targetVertex(itOut)];
2053                 if ( pPos != MaxValue<TSize>::VALUE)
2054                     weights[std::distance(begin(slotToPos, Standard()), std::lower_bound(begin(slotToPos, Standard()), end(slotToPos, Standard()), pPos * n + (TSize) (n - posItStr2 - 1)))] += (TCargo) cargo(*itOut);
2055             }
2056         }
2057     }
2058     clear(map);
2059 
2060     // Now the tough part: Find the right number for a given position
2061     typedef String<TSize> TSequenceString;
2062     typedef typename Iterator<TSequenceString, Standard>::Type TSeqIter;
2063     TSequenceString seq;
2064     resize(seq, length(slotToPos));
2065     TSeqIter itSeq = begin(seq, Standard());
2066     TSlotToPosIter itSlotPos = begin(slotToPos, Standard());
2067     TSlotToPosIter itSlotPosEnd = end(slotToPos, Standard());
2068     for(;itSlotPos != itSlotPosEnd; ++itSlotPos, ++itSeq)
2069         *itSeq = n - 1 - (*itSlotPos % n);
2070 
2071     // Calculate the heaviest increasing subsequence
2072     String<TSize> positions;
2073     TCargo score = (TCargo) heaviestIncreasingSubsequence(seq, weights, positions);
2074 
2075     // Retrieve the alignment sequence
2076     _heaviestCommonSubsequence(g, slotToPos, positions, m, n, str1, str2, align);
2077 
2078     return score;
2079 
2080 }
2081 
2082 //////////////////////////////////////////////////////////////////////////////
2083 
2084 template<typename TStringSet, typename TCargo, typename TSpec, typename TString>
2085 inline TCargo
2086 heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
2087                           TString const& str1,
2088                           TString const& str2)
2089 {
2090     SEQAN_CHECKPOINT
2091     Nothing noth;
2092     return heaviestCommonSubsequence(g, str1, str2, noth);
2093 }
2094 
2095 template <typename TStringSet, typename TCargo, typename TSpec, typename TSequenceH, typename TSequenceV, typename TId, typename TPos, typename TTraceValue>
2096 inline void
2097 _alignTracePrint(Graph<Alignment<TStringSet, TCargo, TSpec> >& g,
2098                  TSequenceH const &,
2099                  TSequenceV const &,
2100                  TId const id1,
2101                  TPos const pos1,
2102                  TId const id2,
2103                  TPos const pos2,
2104                  TPos const segLen,
2105                  TTraceValue const tv)
2106 {
2107     // TraceBack values
2108     TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2;
2109 
2110     if (segLen == 0) return;
2111 
2112     if (tv == Horizontal)
2113         addVertex(g, id1, pos1, segLen);
2114     else if (tv == Vertical)
2115         addVertex(g, id2, pos2, segLen);
2116     else if (tv == Diagonal)
2117         addEdge(g, addVertex(g, id1, pos1, segLen), addVertex(g, id2, pos2, segLen));
2118 }
2119 
2120 // ----------------------------------------------------------------------------
2121 // Function _alignTracePrint()                            [String<Fragment<> >]
2122 // ----------------------------------------------------------------------------
2123 
2124 // TODO(holtgrew): Are TSequenceH and TSequenceV used *anywhere*.
2125 
2126 template <typename TFragment, typename TSequenceH, typename TSequenceV, typename TId, typename TPos, typename TTraceValue>
2127 inline void
2128 _alignTracePrint(String<TFragment>& matches,
2129                  TSequenceH const &,
2130                  TSequenceV const &,
2131                  TId const id1,
2132                  TPos const pos1,
2133                  TId const id2,
2134                  TPos const pos2,
2135                  TPos const seqLen,
2136                  TTraceValue const tv)
2137 {
2138     // Only the diagonal case
2139     if ((seqLen) && (tv == 0))
2140         appendValue(matches, TFragment(id1, pos1, id2, pos2, seqLen), Generous());
2141 }
2142 
2143 }  // namespace seqan
2144 
2145 #endif  // #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_
2146