1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: Tobias Rausch <rausch@embl.de> 33 // Author: Anne-Katrin Emde <anne-katrin.emde@fu-berlin.de> 34 // ========================================================================== 35 36 #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_ 37 #define SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_ 38 39 namespace seqan { 40 41 ////////////////////////////////////////////////////////////////////////////// 42 // Alignment Graph 43 ////////////////////////////////////////////////////////////////////////////// 44 45 ////////////////////////////////////////////////////////////////////////////// 46 47 48 49 50 51 52 ////////////////////////////////////////////////////////////////////////////// 53 // Alignment Graph Output Tags 54 ////////////////////////////////////////////////////////////////////////////// 55 56 /*! 57 * @defgroup AlignmentGraphFormatTags Alignment Graph Formats 58 * @brief File formats to write an @link AlignmentGraph @endlink. 59 * 60 * @tag AlignmentGraphFormatTags#MsfFormat 61 * @headerfile <seqan/graph_align.h> 62 * @brief MSF Format to write an @link AlignmentGraph @endlink. 63 * 64 * @signature typedef Tag<MsfFormat_> const MsfFormat; 65 * 66 * @tag AlignmentGraphFormatTags#FastaFormat 67 * @headerfile <seqan/graph_align.h> 68 * @brief Fasta Format to write an @link AlignmentGraph @endlink. 69 * 70 * @signature typedef Tag<FastaFormat_> const FastaFormat; 71 * 72 * @tag AlignmentGraphFormatTags#CgVizFormat 73 * @headerfile <seqan/graph_align.h> 74 * @brief CgViz Format to write an @link AlignmentGraph @endlink. 75 * 76 * @signature typedef Tag<CgVizFormat_> const CgVizFormat; 77 */ 78 79 ////////////////////////////////////////////////////////////////////////////// 80 81 ////////////////////////////////////////////////////////////////////////////// 82 83 struct MsfFormat_; 84 typedef Tag<MsfFormat_> const MsfFormat; 85 86 ////////////////////////////////////////////////////////////////////////////// 87 88 struct FastaFormat_; 89 typedef Tag<FastaFormat_> const FastaFormat; 90 91 ////////////////////////////////////////////////////////////////////////////// 92 93 struct CgVizFormat_; 94 typedef Tag<CgVizFormat_> const CgVizFormat; 95 96 97 98 ////////////////////////////////////////////////////////////////////////////// 99 // Default Alignment Graph 100 ////////////////////////////////////////////////////////////////////////////// 101 102 template<typename TStringSet, typename TCargo = unsigned int, typename TSpec = Default> 103 struct Alignment; 104 105 106 107 108 109 ////////////////////////////////////////////////////////////////////////////// 110 // Metafunctions 111 ////////////////////////////////////////////////////////////////////////////// 112 113 ////////////////////////////////////////////////////////////////////////////// 114 115 template<typename TStringSet, typename TCargo, typename TSpec> 116 struct EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > const> { 117 typedef typename EdgeType<Graph<Undirected<TCargo, TSpec> > const>::Type Type; 118 }; 119 120 ////////////////////////////////////////////////////////////////////////////// 121 122 template<typename TStringSet, typename TCargo, typename TSpec> 123 struct EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > > { 124 typedef typename EdgeType<Graph<Undirected<TCargo, TSpec> > >::Type Type; 125 }; 126 127 128 ////////////////////////////////////////////////////////////////////////////// 129 130 template<typename TStringSet, typename TCargo, typename TSpec> 131 struct Host<Graph<Alignment<TStringSet, TCargo, TSpec> > > { 132 typedef TStringSet Type; 133 }; 134 135 ////////////////////////////////////////////////////////////////////////////// 136 137 template<typename TStringSet, typename TCargo, typename TSpec> 138 struct Host<Graph<Alignment<TStringSet, TCargo, TSpec> > const> { 139 typedef TStringSet const Type; 140 }; 141 142 143 144 145 146 147 ////////////////////////////////////////////////////////////////////////////// 148 // Actual Alignment Graph Class 149 ////////////////////////////////////////////////////////////////////////////// 150 151 152 ////////////////////////////////////////////////////////////////////////////// 153 154 template<typename TId = unsigned int, typename TSize = unsigned int> 155 class FragmentInfo { 156 public: 157 TId data_seq_id; 158 TSize data_begin; 159 TSize data_length; 160 161 FragmentInfo() : 162 data_seq_id(0), 163 data_begin(0), 164 data_length(0) 165 { 166 } 167 168 FragmentInfo(TId id, TSize beg, TSize len) : 169 data_seq_id(id), 170 data_begin(beg), 171 data_length(len) 172 { 173 } 174 175 }; 176 177 ////////////////////////////////////////////////////////////////////////////// 178 179 /*! 180 * @class AlignmentGraph 181 * @extends Graph 182 * @headerfile <seqan/graph_align.h> 183 * @brief Alignment graph type. 184 * 185 * <img src="alignmentGraph.png" tite="An alignment graph with 3 sequences." /> 186 * 187 * @signature template <typename TStringSet[, typename TCargo[, typename TSpec]]> 188 * class Graph<Alignment<TStringSet, TCargo, TSpec> >; 189 * 190 * @tparam TStringSet The type of the @link StringSet @endlink containing the sequence information. <b>Must be a 191 * @link DependentStringSet @endlink.</b> 192 * @tparam TCargo The cargo type that can be attached to edges. Default: <tt>unsigned</tt>. 193 * @tparam TSpec The specializing type. Default: <tt>Default</tt>. Use <tt>WithoutEdgeId</tt> here to omit 194 * edge ids. NB: if edges do not store ids then external property maps do not work. 195 * 196 * @section Example 197 * 198 * @include demos/graph_align/graph_align.cpp 199 * 200 * @code{.console} 201 * Score = -8 202 * Alignment matrix: 203 * 0 . : 204 * ATCGAATGCGGA 205 * | ||| | 206 * ACTCGTTGC--A 207 * @endcode 208 */ 209 210 template<typename TString, typename TSpecial, typename TCargo, typename TSpec> 211 class Graph<Alignment<StringSet<TString, Dependent<TSpecial> >, TCargo, TSpec> > 212 { 213 public: 214 typedef typename Id<Graph>::Type TIdType_; 215 typedef typename VertexDescriptor<Graph>::Type TVertexDescriptor_; 216 typedef typename Size<Graph>::Type TSize_; 217 typedef std::pair<TIdType_, TSize_> TKey_; 218 typedef std::map<TKey_, TVertexDescriptor_> TPosToVertexMap_; 219 typedef FragmentInfo<TIdType_, TSize_> TFragmentInfo_; 220 221 // Alignment graph 222 Graph<Undirected<TCargo, TSpec> > data_align; 223 224 // Sequences 225 Holder<StringSet<TString, Dependent<TSpecial> > > data_sequence; 226 227 // Alignment specific members 228 String<TFragmentInfo_> data_fragment; 229 230 // STL Map to retrieve a vertex given SeqId, Position 231 TPosToVertexMap_ data_pvMap; 232 233 234 Graph() { 235 } 236 237 238 template <typename TDefault> 239 Graph(StringSet<TString, Dependent<TDefault> > const& sSet) { 240 SEQAN_CHECKPOINT 241 data_sequence = sSet; 242 243 // Cover all sequences with nil vertices 244 TVertexDescriptor_ nilVertex = getNil<TVertexDescriptor_>(); 245 TSize_ lenSet = length(sSet); 246 for(TSize_ k=0; k<lenSet;++k) 247 data_pvMap.insert(std::make_pair(TKey_(positionToId(sSet,k), length(sSet[k])), nilVertex)); 248 } 249 250 template <typename TDefault> 251 Graph(StringSet<TString, Owner<TDefault> > const& sSet) { 252 SEQAN_CHECKPOINT 253 StringSet<TString, Dependent<> > depStr(sSet); 254 data_sequence = depStr; 255 256 // Cover all sequences with nil vertices 257 TVertexDescriptor_ nilVertex = getNil<TVertexDescriptor_>(); 258 TSize_ lenSet = length(sSet); 259 for(TSize_ k=0; k<lenSet;++k) 260 data_pvMap.insert(std::make_pair(TKey_(positionToId(const_cast<StringSet<TString, Owner<TDefault> >&>(sSet),k), length(sSet[k])), nilVertex)); 261 } 262 263 264 ~Graph() { 265 SEQAN_CHECKPOINT 266 clear(*this); 267 } 268 269 Graph(Graph const & _other) 270 { 271 SEQAN_CHECKPOINT 272 _copyGraph(_other, *this); 273 } 274 275 Graph const& operator = (Graph const & _other) { 276 SEQAN_CHECKPOINT 277 if (this == &_other) return *this; 278 clear(*this); 279 _copyGraph(_other, *this); 280 return *this; 281 } 282 }; 283 284 ////////////////////////////////////////////////////////////////////////////// 285 // INTERNAL FUNCTIONS 286 ////////////////////////////////////////////////////////////////////////////// 287 288 ///////////////////////////////////////////////////////////////////////////// 289 290 template<typename TStringSet, typename TCargo, typename TSpec> 291 inline String<typename EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type*>& 292 _getVertexString(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) { 293 SEQAN_CHECKPOINT 294 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 295 typedef typename EdgeType<TGraph>::Type TEdgeStump; 296 return const_cast<String<TEdgeStump*>&>(g.data_align.data_vertex); 297 } 298 299 ///////////////////////////////////////////////////////////////////////////// 300 301 template<typename TStringSet, typename TCargo, typename TSpec> 302 inline typename VertexIdHandler<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 303 _getVertexIdManager(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) { 304 SEQAN_CHECKPOINT 305 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 306 typedef typename VertexIdHandler<TGraph>::Type TVertexIdManager; 307 return const_cast<TVertexIdManager&>(g.data_align.data_id_managerV); 308 } 309 310 ////////////////////////////////////////////////////////////////////////////// 311 312 template<typename TStringSet, typename TCargo, typename TSpec> 313 inline typename EdgeIdHandler<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 314 _getEdgeIdManager(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) { 315 SEQAN_CHECKPOINT 316 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 317 typedef typename EdgeIdHandler<TGraph>::Type TEdgeIdManager; 318 return const_cast<TEdgeIdManager&>(g.data_align.data_id_managerE); 319 } 320 321 ////////////////////////////////////////////////////////////////////////////// 322 323 template<typename TStringSet, typename TCargo, typename TSpec> 324 inline void 325 _copyGraph(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source, 326 Graph<Alignment<TStringSet, TCargo, TSpec> >& dest, 327 bool) 328 { 329 SEQAN_CHECKPOINT 330 clear(dest); 331 dest.data_align = source.data_align; 332 dest.data_sequence = source.data_sequence; 333 dest.data_fragment = source.data_fragment; 334 dest.data_pvMap = source.data_pvMap; 335 } 336 337 ////////////////////////////////////////////////////////////////////////////// 338 339 template<typename TStringSet, typename TCargo, typename TSpec> 340 inline void 341 _copyGraph(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source, 342 Graph<Alignment<TStringSet, TCargo, TSpec> >& dest) 343 { 344 _copyGraph(source, dest, false); // Never transpose, underlying graph is undirected 345 } 346 347 348 ////////////////////////////////////////////////////////////////////////////// 349 // FUNCTIONS 350 ////////////////////////////////////////////////////////////////////////////// 351 352 353 ////////////////////////////////////////////////////////////////////////////// 354 355 template<typename TStringSet, typename TCargo, typename TSpec> 356 inline void 357 transpose(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source, 358 Graph<Alignment<TStringSet, TCargo, TSpec> >& dest) 359 { 360 SEQAN_CHECKPOINT 361 // Alignment graph, no transpose just copy 362 _copyGraph(source, dest, false); 363 } 364 365 ////////////////////////////////////////////////////////////////////////////// 366 367 template<typename TStringSet, typename TCargo, typename TSpec> 368 inline void 369 transpose(Graph<Alignment<TStringSet, TCargo, TSpec> >&) 370 { 371 SEQAN_CHECKPOINT 372 // Nothing to do in an alignment graph 373 } 374 375 ////////////////////////////////////////////////////////////////////////////// 376 377 template<typename TStringSet, typename TCargo, typename TSpec> 378 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 379 numEdges(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 380 { 381 SEQAN_CHECKPOINT 382 return numEdges(g.data_align); 383 } 384 385 ////////////////////////////////////////////////////////////////////////////// 386 387 template<typename TStringSet, typename TCargo, typename TSpec> 388 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 389 numVertices(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 390 { 391 SEQAN_CHECKPOINT 392 return numVertices(g.data_align); 393 } 394 395 ////////////////////////////////////////////////////////////////////////////// 396 397 template<typename TStringSet, typename TCargo, typename TSpec> 398 inline bool 399 empty(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 400 { 401 SEQAN_CHECKPOINT 402 return empty(g.data_align); 403 } 404 405 ////////////////////////////////////////////////////////////////////////////// 406 407 template<typename TStringSet, typename TCargo, typename TSpec> 408 inline void 409 clearEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 410 { 411 SEQAN_CHECKPOINT 412 clearEdges(g.data_align); 413 } 414 415 ////////////////////////////////////////////////////////////////////////////// 416 417 template<typename TStringSet, typename TCargo, typename TSpec> 418 inline void 419 clearVertices(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 420 { 421 SEQAN_CHECKPOINT 422 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 423 typedef typename Size<TStringSet>::Type TSize; 424 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 425 typedef typename TGraph::TKey_ TKey; 426 427 clear(g.data_fragment); 428 g.data_pvMap.clear(); 429 clearVertices(g.data_align); 430 431 432 // Don't forget to cover the sequences with nil vertices again 433 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 434 if(!empty(value(g.data_sequence))) { 435 TSize lenSet = length(stringSet(g)); 436 for(TSize k=0; k<lenSet;++k) 437 g.data_pvMap.insert(std::make_pair(TKey(positionToId(stringSet(g),k), length(stringSet(g)[k])), nilVertex)); 438 } 439 } 440 441 ////////////////////////////////////////////////////////////////////////////// 442 443 template<typename TStringSet, typename TCargo, typename TSpec> 444 inline void 445 clear(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 446 { 447 SEQAN_CHECKPOINT 448 // Only clear also removes the sequences 449 clear(value(g.data_sequence)); 450 clearVertices(g); 451 } 452 453 ////////////////////////////////////////////////////////////////////////////// 454 455 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 456 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 457 outDegree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 458 TVertexDescriptor const vertex) 459 { 460 SEQAN_CHECKPOINT 461 return outDegree(g.data_align, vertex); 462 } 463 464 ////////////////////////////////////////////////////////////////////////////// 465 466 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 467 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 468 inDegree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 469 TVertexDescriptor const vertex) 470 { 471 SEQAN_CHECKPOINT 472 return inDegree(g.data_align, vertex); 473 } 474 475 ////////////////////////////////////////////////////////////////////////////// 476 477 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 478 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 479 degree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 480 TVertexDescriptor const vertex) 481 { 482 SEQAN_CHECKPOINT 483 return degree(g.data_align, vertex); 484 } 485 486 ////////////////////////////////////////////////////////////////////////////// 487 488 template<typename TStringSet, typename TCargo, typename TSpec, typename TId, typename TPos, typename TLength> 489 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 490 addVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 491 TId id, 492 TPos begin, 493 TLength len) 494 { 495 SEQAN_CHECKPOINT 496 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 497 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 498 typedef typename Id<TGraph>::Type TIdType; 499 typedef typename Size<TGraph>::Type TSize; 500 typedef typename TGraph::TFragmentInfo_ TFragmentInfo; 501 typedef typename TGraph::TKey_ TKey; 502 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 503 //typedef typename TPosToVertexMap::key_type TKey; 504 505 //for(TPosToVertexMap::const_iterator p = g.data_pvMap.begin(); p != g.data_pvMap.end(); ++p) { 506 // std::cout << p->first.first << ',' << p->first.second << ':' << p->second << std::endl; 507 //} 508 509 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 510 511 // Store the new fragment 512 typename TPosToVertexMap::iterator interval = g.data_pvMap.lower_bound(TKey((TIdType)id, (TSize)begin + (TSize)len)); 513 // Segment does not belong to Sequence anymore 514 SEQAN_ASSERT(interval != g.data_pvMap.end()); 515 // Segment end must be assigned to nil so far 516 SEQAN_ASSERT(interval->second == nilVertex); 517 // Segment must belong to the whole old interval 518 SEQAN_ASSERT(*interval == *g.data_pvMap.upper_bound(TKey((TIdType)id, (TSize)begin))); 519 520 // Insert new vertex 521 TVertexDescriptor vd = addVertex(g.data_align); 522 if (length(g.data_fragment) <= vd) resize(g.data_fragment, vd + 1, Generous()); 523 assignProperty(g.data_fragment, vd, TFragmentInfo(id, begin, len)); 524 525 // Update position to vertex map 526 // Does the end of the new fragment coincides with the end of the interval? 527 if ( (TSize) begin + len == (TSize) interval->first.second) { 528 // Does the beginning of the new fragment coincides with the beginning of the interval? 529 if ((begin == 0) || 530 (g.data_pvMap.find(TKey((TIdType)id, (TSize)begin)) != g.data_pvMap.end())) { 531 // Replace interval 532 interval->second = vd; 533 } else { 534 // Split interval once 535 g.data_pvMap.insert(std::make_pair(TKey(interval->first.first,(TSize)begin), interval->second)); 536 g.data_pvMap.erase(interval); 537 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd)); 538 } 539 } else { 540 // Does the beginning of the new fragment coincides with the beginning of the interval? 541 if ((begin == 0) || 542 (g.data_pvMap.find(TKey((TIdType)id, (TSize)begin)) != g.data_pvMap.end())) { 543 // Split interval once 544 // Just insert here because we store interval ends 545 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd)); 546 } else { 547 // Split interval twice 548 TSize tmp = interval->first.second; 549 g.data_pvMap.insert(std::make_pair(TKey(interval->first.first,begin), interval->second)); 550 g.data_pvMap.erase(interval); 551 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd)); 552 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,tmp), nilVertex)); 553 } 554 } 555 return vd; 556 } 557 558 ////////////////////////////////////////////////////////////////////////////// 559 560 template<typename TStringSet, typename TCargo, typename TSpec, typename TVD> 561 inline void 562 removeVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 563 TVD const v) 564 { 565 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 566 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 567 typedef typename TGraph::TKey_ TKey; 568 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 569 typename TPosToVertexMap::iterator interval = (g.data_pvMap.lower_bound(TKey(sequenceId(g,v), fragmentBegin(g,v) + fragmentLength(g,v)))); 570 571 // Clear the interval 572 interval->second = getNil<TVertexDescriptor>(); 573 typename TPosToVertexMap::iterator interval_iter = interval; 574 if (interval_iter != g.data_pvMap.begin()) { 575 --interval_iter; 576 if ((interval_iter->second == getNil<TVertexDescriptor>()) && 577 (interval_iter->first.first == interval->first.first)) { 578 g.data_pvMap.erase(interval_iter); 579 } 580 } 581 interval_iter = interval; 582 ++interval_iter; 583 if (interval_iter != g.data_pvMap.end()) { 584 if ((interval_iter->second == getNil<TVertexDescriptor>()) && 585 (interval_iter->first.first == interval->first.first)) { 586 g.data_pvMap.erase(interval); 587 } 588 } 589 590 // Remove the vertex 591 removeVertex(g.data_align,v); 592 } 593 594 ////////////////////////////////////////////////////////////////////////////// 595 596 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 597 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 598 addEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 599 TVertexDescriptor const source, 600 TVertexDescriptor const target) 601 { 602 SEQAN_CHECKPOINT 603 return addEdge(g.data_align, source, target); 604 } 605 606 ////////////////////////////////////////////////////////////////////////////// 607 608 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor, typename TCargo2> 609 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 610 addEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 611 TVertexDescriptor const source, 612 TVertexDescriptor const target, 613 TCargo2 const cargo) 614 { 615 SEQAN_CHECKPOINT 616 return addEdge(g.data_align, source, target, cargo); 617 } 618 619 ////////////////////////////////////////////////////////////////////////////// 620 621 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 622 inline void 623 removeEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 624 TVertexDescriptor const source, 625 TVertexDescriptor const target) 626 { 627 SEQAN_CHECKPOINT 628 removeEdge(g.data_align, source, target); 629 } 630 631 ////////////////////////////////////////////////////////////////////////////// 632 633 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor> 634 inline void 635 removeEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 636 TEdgeDescriptor const edge) 637 { 638 SEQAN_CHECKPOINT 639 removeEdge(g.data_align, sourceVertex(g.data_align,edge), targetVertex(g.data_align,edge)); 640 } 641 642 ////////////////////////////////////////////////////////////////////////////// 643 644 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 645 inline void 646 removeOutEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 647 TVertexDescriptor const v) 648 { 649 SEQAN_CHECKPOINT 650 removeOutEdges(g.data_align, v); 651 } 652 653 ////////////////////////////////////////////////////////////////////////////// 654 655 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 656 inline void 657 removeInEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 658 TVertexDescriptor const v) 659 { 660 SEQAN_CHECKPOINT 661 removeInEdges(g.data_align,v); 662 } 663 664 ////////////////////////////////////////////////////////////////////////////// 665 666 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor> 667 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 668 targetVertex(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 669 TEdgeDescriptor const edge) 670 { 671 SEQAN_CHECKPOINT 672 return targetVertex(g.data_align, edge); 673 } 674 675 ////////////////////////////////////////////////////////////////////////////// 676 677 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor> 678 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 679 sourceVertex(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 680 TEdgeDescriptor const edge) 681 { 682 SEQAN_CHECKPOINT 683 return sourceVertex(g.data_align, edge); 684 } 685 686 ////////////////////////////////////////////////////////////////////////////// 687 688 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix> 689 inline void 690 getAdjacencyMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 691 TMatrix& mat) 692 { 693 SEQAN_CHECKPOINT 694 getAdjacencyMatrix(g.data_align, mat); 695 } 696 697 ////////////////////////////////////////////////////////////////////////////// 698 699 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 700 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 701 findEdge(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 702 TVertexDescriptor const v, 703 TVertexDescriptor const w) 704 { 705 SEQAN_CHECKPOINT 706 return findEdge(g.data_align, v, w); 707 } 708 709 ////////////////////////////////////////////////////////////////////////////// 710 711 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec> 712 inline void 713 write(TFile & target, 714 Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 715 { 716 //IOREV _nodoc_ specialization not documented (at least not obviously) 717 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 718 typedef typename Size<TGraph>::Type TSize; 719 typedef typename TGraph::TFragmentInfo_ TSegment; 720 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 721 typedef typename EdgeType<TGraph>::Type TEdgeStump; 722 typedef typename Iterator<String<TEdgeStump*> const, Standard>::Type TIterConst; 723 724 String<char> align; 725 if (!convertAlignment(g, align)) { 726 write(target,"Adjacency list:\n"); 727 TIterConst it = begin(g.data_align.data_vertex, Standard()); 728 TIterConst itEnd = end(g.data_align.data_vertex, Standard()); 729 TVertexDescriptor pos = 0; 730 for(;it!=itEnd; ++it, ++pos) { 731 if (!idInUse(g.data_align.data_id_managerV, pos)) continue; 732 TVertexDescriptor sourceV = pos; 733 appendNumber(target, (int)sourceV); 734 TSegment seg = getProperty(g.data_fragment, sourceV); 735 write(target," (SeqId:"); 736 appendNumber(target, (int)seg.data_seq_id); 737 write(target," ,Begin:"); 738 appendNumber(target, (int)seg.data_begin); 739 write(target," ,Length:"); 740 appendNumber(target, (int)seg.data_length); 741 write(target,") -> "); 742 TEdgeStump* current = *it; 743 while(current!=0) { 744 TVertexDescriptor adjV = getTarget(current); 745 if (adjV != sourceV) { 746 appendNumber(target, (int)adjV); 747 writeValue(target, ','); 748 current=getNextS(current); 749 } else { 750 adjV = getSource(current); 751 appendNumber(target, (int)adjV); 752 writeValue(target, ','); 753 current=getNextT(current); 754 } 755 } 756 writeValue(target, '\n'); 757 } 758 write(target,"Edge list:\n"); 759 it = begin(g.data_align.data_vertex, Standard()); 760 pos = 0; 761 for(;it!=itEnd; ++it, ++pos) { 762 TVertexDescriptor sourceV = pos; 763 TEdgeStump* current = *it; 764 while(current!=0) { 765 TVertexDescriptor targetV = getTarget(current); 766 if (sourceV != targetV) { 767 write(target,"Source: "); 768 appendNumber(target, (int)sourceV); 769 write(target, ",Target: "); 770 appendNumber(target, (int)targetV); 771 write(target, " (Id: "); 772 appendNumber(target, (int)_getId(current)); 773 write(target, ")\n"); 774 current=getNextS(current); 775 } else { 776 current=getNextT(current); 777 } 778 } 779 } 780 } else { 781 TSize nseq = length(stringSet(g)); 782 TSize colLen = length(align) / nseq; 783 784 TSize baseCount=0; 785 TSize leftSpace=6; 786 TSize xPos = 0; 787 write(target,"Alignment matrix:\n"); 788 while (xPos < colLen) { 789 TSize windowSize = 50; 790 if ((xPos + windowSize)>colLen) windowSize = colLen - xPos; 791 792 // Print header line 793 TSize offset=0; 794 // Larger numbers need to be further left 795 if (baseCount != 0) offset = (unsigned int) floor(log((double)baseCount) / log((double)10)); 796 for(TSize j = 0;j<leftSpace-offset;++j) writeValue(target, ' '); 797 appendNumber(target, (int)baseCount); 798 baseCount+=windowSize; 799 writeValue(target, ' '); 800 for(TSize col = 1;col<=windowSize;++col) { 801 if ((col % 10)==0) writeValue(target, ':'); 802 else if ((col % 5)==0) writeValue(target, '.'); 803 else writeValue(target, ' '); 804 } 805 write(target, " \n"); 806 807 // Print sequences 808 for(TSize row=0;row<2*nseq-1;++row) { 809 for(TSize col = 0;col<leftSpace+2;++col) 810 writeValue(target, ' '); 811 if ((row % 2)==0) { 812 for(TSize col = xPos;col<xPos+windowSize;++col) 813 writeValue(target, align[(row/2)*colLen+col]); 814 } else { 815 for(TSize col = xPos;col<xPos+windowSize;++col) { 816 if ((align[((row-1)/2)*colLen + col] != gapValue<char>()) && 817 (align[((row+1)/2)*colLen + col] != gapValue<char>()) && 818 (align[((row-1)/2)*colLen + col] == align[((row+1)/2)*colLen + col])) 819 writeValue(target, '|'); 820 else 821 writeValue(target, ' '); 822 } 823 } 824 writeValue(target, '\n'); 825 } 826 writeValue(target, '\n'); 827 xPos+=windowSize; 828 } 829 writeValue(target, '\n'); 830 } 831 } 832 833 ////////////////////////////////////////////////////////////////////////////// 834 835 template <typename TFile, typename TSpec, typename TNames> 836 inline void 837 write(TFile & file, 838 Graph<TSpec> const& g, 839 TNames const& names, 840 FastaFormat) 841 { 842 //IOREV _nodoc_ specialization not documented 843 SEQAN_CHECKPOINT 844 typedef Graph<TSpec> TGraph; 845 typedef typename Size<TGraph>::Type TSize; 846 847 typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output()); 848 849 String<char> align; 850 if (convertAlignment(g, align)) { 851 TSize nseq = length(stringSet(g)); 852 TSize colLen = length(align) / nseq; 853 typedef typename Iterator<String<char>, Standard>::Type TIter; 854 TIter it = begin(align, Standard()); 855 for(TSize i = 0; i<nseq; ++i) { 856 writeValue(target, '>'); 857 write(target,names[i]); 858 writeValue(target, '\n'); 859 TSize col = 0; 860 while(col < colLen) { 861 TSize max = ((colLen - col) < 60) ? colLen - col : 60; 862 for(TSize finger = 0; finger<max; ++finger, ++col, ++it) 863 writeValue(target, *it); 864 writeValue(target, '\n'); 865 } 866 } 867 } 868 } 869 870 871 872 ////////////////////////////////////////////////////////////////////////////// 873 874 template <typename TFile, typename TSpec, typename TNames> 875 inline void 876 write(TFile & file, 877 Graph<TSpec> const& g, 878 TNames const& names, 879 MsfFormat) 880 { 881 //IOREV _nodoc_ specialization not documented 882 SEQAN_CHECKPOINT 883 typedef Graph<TSpec> TGraph; 884 typedef typename Size<TGraph>::Type TSize; 885 886 typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output()); 887 888 String<char> align; 889 if (convertAlignment(g, align)) { 890 TSize nseq = length(stringSet(g)); 891 TSize colLen = length(align) / nseq; 892 893 write(target,"PileUp\n"); 894 writeValue(target, '\n'); 895 write(target," MSF: "); 896 appendNumber(target, (unsigned)colLen); 897 write(target," Type: P Check: 0 ..\n\n"); 898 TSize offset = 0; 899 for(TSize i = 0; i<nseq; ++i) { 900 write(target," Name: "); 901 write(target,names[i]); 902 write(target," oo Len: "); 903 TSize len = length(names[i]); 904 if (len > offset) offset = len; 905 appendNumber(target, (unsigned)colLen); 906 write(target," Check: 0"); 907 write(target," Weight: 1.00"); 908 writeValue(target, '\n'); 909 } 910 offset += 5; 911 writeValue(target, '\n'); 912 write(target, "//\n"); 913 writeValue(target, '\n'); 914 writeValue(target, '\n'); 915 TSize col = 0; 916 while(col < colLen) { 917 TSize max = 0; 918 for(TSize i = 0; i<nseq; ++i) { 919 max = ((colLen - col) < 50) ? colLen - col : 50; 920 write(target,names[i]); 921 for(TSize j = 0; j<offset - length(names[i]); ++j) 922 writeValue(target, ' '); 923 for(TSize finger = col; finger<col+max; ++finger) { 924 if ((finger - col) % 10 == 0) writeValue(target, ' '); 925 if (align[i*colLen + finger] == '-') writeValue(target, '.'); 926 else writeValue(target, align[i*colLen + finger]); 927 } 928 writeValue(target, '\n'); 929 } 930 col += max; 931 writeValue(target, '\n'); 932 writeValue(target, '\n'); 933 } 934 } 935 } 936 937 938 ////////////////////////////////////////////////////////////////////////////// 939 template <typename TFile, typename TStringSet, typename TSpec, typename TEdge> 940 inline void 941 _writeCargo(TFile & file, 942 Graph<Alignment<TStringSet, void, TSpec> > const&, 943 TEdge const&) 944 { 945 appendNumber(file, 0); 946 } 947 948 ////////////////////////////////////////////////////////////////////////////// 949 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec, typename TEdge> 950 inline void 951 _writeCargo(TFile & file, 952 Graph<Alignment<TStringSet, TCargo, TSpec> > const&, 953 TEdge const& edge) 954 { 955 appendNumber(file, (int)getCargo(edge)); 956 } 957 958 ////////////////////////////////////////////////////////////////////////////// 959 960 961 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec, typename TNames> 962 inline void 963 write(TFile & file, 964 Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 965 TNames const& names, 966 CgVizFormat) 967 { 968 //IOREV _nodoc_ specialization not documented 969 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 970 typedef typename Size<TGraph>::Type TSize; 971 typedef typename Id<TGraph>::Type TId; 972 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 973 typedef typename EdgeType<TGraph>::Type TEdgeStump; 974 typedef typename Iterator<String<TEdgeStump*> const, Rooted>::Type TIterConst; 975 976 typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output()); 977 978 TStringSet& str = stringSet(g); 979 TSize nseq = length(str); 980 981 write(target, "{DATA Data\n"); 982 write(target, "\t[__GLOBAL__] tracks="); 983 appendNumber(target, nseq); 984 writeValue(target, '\n'); 985 for(TSize i = 0; i<nseq; ++i) { 986 write(target, "\tfasta_id=\""); 987 write(target,names[i]); 988 write(target, "\" sequence=\""); 989 write(target,str[i]); 990 write(target, "\" track="); 991 appendNumber(target, i); 992 write(target, " type=\"DNA\": 0 "); 993 appendNumber(target, length(str[i])- 1); 994 writeValue(target, '\n'); 995 } 996 write(target, "}\n"); 997 for(TSize i = 0; i<nseq; ++i) { 998 write(target, "{DATA "); 999 appendNumber(target, i); 1000 //write(target,names[i]); 1001 write(target, "-seqlen\n\t[__GLOBAL__]\n\tlength="); 1002 appendNumber(target, length(str[i])); 1003 write(target, ":\t0 "); 1004 appendNumber(target, length(str[i])- 1); 1005 write(target, "\n}\n"); 1006 } 1007 for(TSize i=0; i<nseq; ++i) { 1008 for(TSize j=i+1; j<nseq; ++j) { 1009 write(target, "{DATA "); 1010 appendNumber(target, i); 1011 //write(target,names[i]); 1012 write(target, "-vs-"); 1013 appendNumber(target, j); 1014 //write(target,names[j]); 1015 writeValue(target, '\n'); 1016 write(target, "\t[__GLOBAL__]\n"); 1017 for(TIterConst it = begin(g.data_align.data_vertex);!atEnd(it);goNext(it)) { 1018 TVertexDescriptor sourceV = position(it); 1019 TId id1 = sequenceId(g, sourceV); 1020 if ((positionToId(str, id1) != i) && 1021 (positionToId(str, id1) != j)) continue; 1022 TEdgeStump* current = getValue(it); 1023 while(current!=0) { 1024 TVertexDescriptor targetV = getTarget(current); 1025 TId id2 = sequenceId(g, targetV); 1026 if (sourceV != targetV) { 1027 if ((positionToId(str, id2) != i) && 1028 (positionToId(str, id2) != j)) { 1029 current=getNextS(current); 1030 continue; 1031 } 1032 write(target, "\t"); 1033 write(target, "source="); 1034 appendNumber(target, (int)sourceV); 1035 writeValue(target, ' '); 1036 write(target, "target="); 1037 appendNumber(target, (int)targetV); 1038 writeValue(target, ' '); 1039 write(target, "edgeId="); 1040 appendNumber(target, (int)_getId(current)); 1041 writeValue(target, ' '); 1042 write(target, "cargo="); 1043 _writeCargo(target,g,current); 1044 writeValue(target, ' '); 1045 write(target, "label="); 1046 write(target,label(g,sourceV)); 1047 writeValue(target, ' '); 1048 write(target, "labelOpp="); 1049 write(target,label(g,targetV)); 1050 writeValue(target, ':'); 1051 writeValue(target, '\t'); 1052 appendNumber(target, (int)fragmentBegin(g, sourceV)); 1053 writeValue(target, ' '); 1054 appendNumber(target, (int)fragmentBegin(g, targetV)); 1055 writeValue(target, ' '); 1056 appendNumber(target, (int)(fragmentBegin(g, sourceV) + fragmentLength(g, sourceV))); 1057 writeValue(target, ' '); 1058 appendNumber(target, (int)(fragmentBegin(g, targetV) + fragmentLength(g, targetV))); 1059 writeValue(target, '\n'); 1060 current=getNextS(current); 1061 } else { 1062 current=getNextT(current); 1063 } 1064 } 1065 } 1066 write(target, "}\n"); 1067 } 1068 } 1069 } 1070 1071 1072 ////////////////////////////////////////////////////////////////////////////// 1073 1074 /*! 1075 * @fn AlignmentGraph#assignStringSet 1076 * @brief Assigns a new StringSet to an AlignmentGraph. 1077 * 1078 * @signature void assignStringSet(g, stringSet); 1079 * 1080 * @param[in,out] g An AlignmentGraph. 1081 * @param[in] stringSet The @link StringSet @endlink to assign to. 1082 * 1083 * @see AlignmentGraph#getStringSet 1084 * @see AlignmentGraph#stringSet 1085 */ 1086 1087 template<typename TString, typename TDefault, typename TCargo, typename TSpec, typename TDefault2> 1088 inline void 1089 assignStringSet(Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> >& g, 1090 StringSet<TString, Dependent<TDefault2> > const& sStr) 1091 { 1092 SEQAN_CHECKPOINT 1093 typedef Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> > TGraph; 1094 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1095 typedef typename TGraph::TKey_ TKey; 1096 typedef typename Size<TGraph>::Type TSize; 1097 1098 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 1099 clear(g); 1100 g.data_sequence = (StringSet<TString, Dependent<TDefault> >) sStr; 1101 TSize lenSet = length(sStr); 1102 for(TSize k=0; k<lenSet;++k) 1103 g.data_pvMap.insert(std::make_pair(TKey(positionToId(sStr,k), length(sStr[k])), nilVertex)); 1104 } 1105 1106 ////////////////////////////////////////////////////////////////////////////// 1107 1108 template<typename TString, typename TDefault, typename TCargo, typename TSpec, typename TDefault2> 1109 inline void 1110 assignStringSet(Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> >& g, 1111 StringSet<TString, Owner<TDefault2> > const& sStr) 1112 { 1113 SEQAN_CHECKPOINT 1114 StringSet<TString, Dependent<> > depStr(sStr); 1115 assignStringSet(g, depStr); 1116 } 1117 1118 1119 ////////////////////////////////////////////////////////////////////////////// 1120 1121 /*! 1122 * @fn AlignmentGraph#getStringSet 1123 * @brief Gets the StringSet of an AlignmentGraph. 1124 * 1125 * @signature THost getStringSet(g); 1126 * 1127 * @param[in,out] g An AlignmentGraph. 1128 * 1129 * @return THost A const reference to the StringSet's host. 1130 * 1131 * @see AlignmentGraph#assignStringSet 1132 * @see AlignmentGraph#stringSet 1133 */ 1134 1135 template<typename TStringSet, typename TCargo, typename TSpec> 1136 inline typename Host<Graph<Alignment<TStringSet, TCargo, TSpec> > const>::Type& 1137 getStringSet(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 1138 { 1139 SEQAN_CHECKPOINT 1140 return value(g.data_sequence); 1141 } 1142 1143 1144 ////////////////////////////////////////////////////////////////////////////// 1145 1146 /*! 1147 * @fn AlignmentGraph#stringSet 1148 * @brief Return reference to the StringSet of an AlignmentGraph. 1149 * 1150 * @signature THostRef stringSet(g); 1151 * 1152 * @param[in,out] g An AlignmentGraph. 1153 * 1154 * @return THost A reference to the StringSet's host. 1155 * 1156 * @see AlignmentGraph#assignStringSet 1157 * @see AlignmentGraph#getStringSet 1158 */ 1159 1160 template <typename T> 1161 struct StringSetType; 1162 1163 template <typename TStringSet, typename TCargo, typename TSpec> 1164 struct StringSetType<Graph<Alignment<TStringSet, TCargo, TSpec> > > 1165 { 1166 typedef typename Host<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type & Type; 1167 }; 1168 1169 1170 template<typename TStringSet, typename TCargo, typename TSpec> 1171 inline typename StringSetType<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1172 stringSet(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 1173 { 1174 SEQAN_CHECKPOINT 1175 return const_cast<TStringSet&>(value(g.data_sequence)); 1176 } 1177 1178 ////////////////////////////////////////////////////////////////////////////// 1179 1180 /*! 1181 * @fn AlignmentGraph#label 1182 * @brief Get the label associated with this vertex descriptor or the sequence that is associated with a fragment. 1183 * 1184 * @signature TInfix label(ag, v); 1185 * 1186 * @param[in] ag The AlignmentGraph to query. 1187 * @param[in] v The vertex descriptor to query. 1188 * 1189 * @return TInfix An infix representing the sequence label. 1190 */ 1191 1192 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1193 inline typename Infix<typename Value<TStringSet>::Type>::Type 1194 label(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1195 TVertexDescriptor const v) 1196 { 1197 SEQAN_CHECKPOINT 1198 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1199 typedef typename TGraph::TFragmentInfo_ TSegment; 1200 TSegment seg = getProperty(g.data_fragment, v); 1201 //std::cout << seg.data_seq_id << ","; 1202 //std::cout << seg.data_begin << ","; 1203 //std::cout << seg.data_length << ","; 1204 //std::cout << getValueById(value(g.data_sequence), seg.data_seq_id) << std::endl; 1205 //std::cout << infix(getValueById(value(g.data_sequence), seg.data_seq_id), seg.data_begin, seg.data_begin + seg.data_length) << std::endl; 1206 return infix(getValueById(value(g.data_sequence), seg.data_seq_id), seg.data_begin, seg.data_begin + seg.data_length); 1207 } 1208 1209 ////////////////////////////////////////////////////////////////////////////// 1210 1211 /*! 1212 * @fn AlignmentGraph#sequenceId 1213 * @brief Gets the sequence id that is associated with this vertex descriptor or with a sequence of a fragment. 1214 * 1215 * @signature TId sequenceId(ag, v); 1216 * 1217 * @param[in] ag The AlignmentGraph. 1218 * @param[in] v Vertex descriptor of the vertex to retrieve sequence id for. 1219 * 1220 * @return TId The sequence id. 1221 */ 1222 1223 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1224 inline typename Id<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 1225 sequenceId(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1226 TVertexDescriptor const v) 1227 { 1228 SEQAN_CHECKPOINT 1229 return const_cast<typename Id<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_seq_id); 1230 } 1231 1232 ////////////////////////////////////////////////////////////////////////////// 1233 1234 /*! 1235 * @fn AlignmentGraph#fragmentBegin 1236 * @brief Gets the begin position for this vertex descriptor in the sequence. 1237 * 1238 * @signature TPos fragmentBegin(ag, v); 1239 * 1240 * @param[in] ag The AlignmentGraph to query. 1241 * @param[in] v The vertex descriptor of the vertex. 1242 * 1243 * @return TPos The begin position. 1244 */ 1245 1246 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1247 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 1248 fragmentBegin(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1249 TVertexDescriptor const v) 1250 { 1251 SEQAN_CHECKPOINT 1252 return const_cast<typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_begin); 1253 } 1254 1255 ////////////////////////////////////////////////////////////////////////////// 1256 1257 /*! 1258 * @fn AlignmentGraph#fragmentLength 1259 * @brief Gets the length of the fragment for this vertex descriptor in the sequence. 1260 * 1261 * @signature TSize fragmentLength(ag, v); 1262 * 1263 * @param[in] ag The AlignmentGraph to query. 1264 * @param[in] v The vertex descriptor of the vertex. 1265 * 1266 * @return TPos The fragment size. 1267 */ 1268 1269 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1270 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 1271 fragmentLength(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1272 TVertexDescriptor const v) 1273 { 1274 SEQAN_CHECKPOINT 1275 return const_cast<typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_length); 1276 } 1277 1278 ////////////////////////////////////////////////////////////////////////////// 1279 1280 /*! 1281 * @fn AlignmentGraph#findVertex 1282 * @brief Finds a vertx given a sequence id and a position. 1283 * 1284 * @signature TVertexDescriptor findVertex(ag, id, pos); 1285 * 1286 * @param[in] ag The AlignmentGraph to search in. 1287 * @param[in] id The sequence id to search for. 1288 * @param[in] pos The position to search for. 1289 * 1290 * @return TVertexDescriptor The vertex covering the given position on the specified sequence, 1291 * <tt>getNil<TVertexDescriptor>()</tt> if none could be found. 1292 */ 1293 1294 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId, typename TPos> 1295 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1296 findVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1297 TSeqId id, 1298 TPos pos) 1299 { 1300 SEQAN_CHECKPOINT 1301 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1302 typedef typename TGraph::TKey_ TKey; 1303 typedef typename Size<TGraph>::Type TSize; 1304 typedef typename Id<TGraph>::Type TIdType; 1305 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1306 1307 return (pos >= (TPos) length(getValueById(stringSet(g),id))) ? getNil<TVertexDescriptor>() : g.data_pvMap.upper_bound(TKey((TIdType)id, (TSize)pos))->second; 1308 } 1309 1310 ////////////////////////////////////////////////////////////////////////////// 1311 1312 /*! 1313 * @fn AlignmentGraph#getProjectedPosition 1314 * @brief Projects a position of one sequence taking part in a pairwise match onto the other sequence. 1315 * 1316 * @signature getProjectedPosition(ag, seqId, pos, seqId2, pos2); 1317 * 1318 * @param[in] ag The @link AlignmentGraph @endlink to query. 1319 * @param[in] seqId The sequenceId. 1320 * @param[in] pos The position to project from. 1321 * @param[out] seqId2 The resulting id of the sequence that was projected onto. 1322 * @param[out] pos2 The reuslting position of the sequence was was projected onto. 1323 */ 1324 1325 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId, typename TPosition, typename TSeqId2, typename TPosition2> 1326 inline void 1327 getProjectedPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1328 TSeqId const id1, 1329 TPosition const pos1, 1330 TSeqId2& id2, 1331 TPosition2& pos2) 1332 { 1333 SEQAN_ASSERT(length(stringSet(g)) == 2); 1334 1335 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1336 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1337 typedef typename EdgeType<TGraph>::Type TEdgeStump; 1338 TStringSet& str = stringSet(g); 1339 TVertexDescriptor sV = findVertex(g, id1, pos1); 1340 1341 // Case 1: No projection possible 1342 if (sV == getNil<TVertexDescriptor>()) { 1343 if ( (TSeqId) positionToId(str, 0) == id1) id2 = (TSeqId2) positionToId(str,1); 1344 else id2 = (TSeqId2) positionToId(str,0); 1345 pos2 = 0; 1346 return; 1347 } 1348 1349 // Case 2: Projection is possible 1350 TEdgeStump* current = getValue(g.data_align.data_vertex, sV); 1351 if(current != (TEdgeStump*) 0) { 1352 TVertexDescriptor tV = target(current); 1353 if (tV == sV) tV = source(current); 1354 pos2 = (TPosition2) (fragmentBegin(g,tV) + (pos1 - fragmentBegin(g, sV))); 1355 id2 = (TSeqId2) sequenceId(g, tV); 1356 return; 1357 } else { 1358 // If no out-going edge, get the preceding or following vertex 1359 if (fragmentBegin(g, sV) == 0) { 1360 getProjectedPosition(g, id1, fragmentBegin(g,sV) + fragmentLength(g, sV), id2, pos2); 1361 return; 1362 } else { 1363 getProjectedPosition(g, id1, fragmentBegin(g,sV) - 1, id2, pos2); 1364 return; 1365 } 1366 } 1367 } 1368 1369 ////////////////////////////////////////////////////////////////////////////// 1370 1371 template<typename TStringSet, typename TValue, typename TCargo, typename TSpec, typename TSeqId, typename TPosition, typename TSeqId2, typename TPosition2> 1372 inline void 1373 getProjectedPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1374 TValue seg_num, 1375 TSeqId const id1, 1376 TPosition const pos1, 1377 TSeqId2& id2, 1378 TPosition2& pos2) 1379 { 1380 SEQAN_ASSERT(length(stringSet(g)) == 2); 1381 1382 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1383 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1384 typedef typename EdgeType<TGraph>::Type TEdgeStump; 1385 TStringSet& str = stringSet(g); 1386 TVertexDescriptor sV = findVertex(g, id1, pos1); 1387 1388 // Case 1: No projection possible 1389 if (sV == getNil<TVertexDescriptor>()) { 1390 if ( (TSeqId) positionToId(str, 0) == id1) id2 = (TSeqId2) positionToId(str,1); 1391 else id2 = (TSeqId2) positionToId(str,0); 1392 pos2 = 0; 1393 return; 1394 } 1395 1396 // Case 2: Projection is possible 1397 TEdgeStump* current = getValue(g.data_align.data_vertex, sV); 1398 if(current != (TEdgeStump*) 0) { 1399 TVertexDescriptor tV = target(current); 1400 if (seg_num == 0) tV = source(current); // segnum 0 is defined as the sourceVertex, segnum 1 is targetVertex 1401 pos2 = (TPosition2) (fragmentBegin(g,tV) + (pos1 - fragmentBegin(g, sV))); 1402 id2 = (TSeqId2) sequenceId(g, tV); 1403 return; 1404 } else { 1405 // If no out-going edge, get the preceding or following vertex 1406 if (fragmentBegin(g, sV) == 0) { 1407 getProjectedPosition(g, seg_num, id1, fragmentBegin(g,sV) + fragmentLength(g, sV), id2, pos2); 1408 return; 1409 } else { 1410 getProjectedPosition(g, seg_num, id1, fragmentBegin(g,sV) - 1, id2, pos2); 1411 return; 1412 } 1413 } 1414 } 1415 1416 ////////////////////////////////////////////////////////////////////////////// 1417 1418 /*! 1419 * @fn AlignmentGraph#getFirstCoveredPosition 1420 * @brief Finds the first position in a sequence that is not assigned to a nil vertex. 1421 * 1422 * @signature TPos getFirstCoveredPosition(ag, id); 1423 * 1424 * @param[in] ag An @link AlignmentGraph @endlink. 1425 * @param[in] id A sequence id. 1426 * 1427 * @return TPos A sequence position. 1428 * 1429 * @see AlignmentGraph#getLastCoveredPosition 1430 */ 1431 1432 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId> 1433 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1434 getFirstCoveredPosition(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1435 TSeqId const id) 1436 { 1437 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1438 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1439 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1440 1441 1442 typename TPosToVertexMap::const_iterator it = g.data_pvMap.upper_bound(std::make_pair(id, 0)); 1443 1444 // Case 1: id is not covered 1445 if (it == g.data_pvMap.end()) return length(getValueById(stringSet(g), id)); 1446 1447 // Case 2: We found a nil vertex, go one forward 1448 if (it->second == getNil<TVertexDescriptor>()) { 1449 ++it; 1450 if (it == g.data_pvMap.end()) return length(getValueById(stringSet(g), id)); 1451 } 1452 1453 // Now we have the right vertex, return the beginning if the sequence id still fits 1454 if (it->first.first != id) return length(getValueById(stringSet(g), id)); 1455 else return fragmentBegin(g, it->second); 1456 } 1457 1458 ////////////////////////////////////////////////////////////////////////////// 1459 1460 /*! 1461 * @fn AlignmentGraph#getLastCoveredPosition 1462 * @brief Finds the last position in a sequence that is not assigned to a nil vertex. 1463 * 1464 * @signature TPos getLastCoveredPosition(ag, id); 1465 * 1466 * @param[in] ag An @link AlignmentGraph @endlink. 1467 * @param[in] id A sequence id. 1468 * 1469 * @return TPos A sequence position. 1470 * 1471 * @see AlignmentGraph#getFirstCoveredPosition 1472 */ 1473 1474 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId> 1475 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1476 getLastCoveredPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1477 TSeqId id) 1478 { 1479 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1480 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1481 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1482 1483 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 1484 typename TPosToVertexMap::const_iterator it = g.data_pvMap.lower_bound(std::make_pair(id, length(getValueById(stringSet(g), id)))); 1485 1486 // Case 1: No last position (all nil) 1487 if ((it == g.data_pvMap.begin()) && (it->second == nilVertex)) return 0; 1488 1489 // Case 2: Found a nil position but there is a vertex before 1490 if (it->second == nilVertex) { 1491 --it; 1492 } 1493 1494 // If the sequence id still matches return the position behind the last position belonging to this vertex 1495 if (it->first.first != id) return 0; 1496 return fragmentBegin(g, it->second) + fragmentLength(g, it->second); 1497 } 1498 1499 1500 ////////////////////////////////////////////////////////////////////////////// 1501 1502 /*! 1503 * @fn AlignmentGraph#convertAlignment 1504 * @brief Converts an alignment graph into an alignment matrix. 1505 * 1506 * @signature bool convertAlignment(g, component, order, compLength); 1507 * @signature bool convertAlignment(g, matrix); 1508 * 1509 * @param[in] g AlignmentGraph to convert. An alignment graph. 1510 * @param[out] component Vertex to component mapping. 1511 * @param[out] order The order of the component graph when sorting topologically. 1512 * @param[out] compLength Component sizes. 1513 * @param[out] matrix A string that represents an alignment matrix. 1514 * 1515 * @return bool true iff the alignment graph is a valid alignment and false otherwise. 1516 * 1517 * @section Remarks 1518 * 1519 * The variant with <tt>component</tt> and <tt>order</tt> computes a topological sorting of connected components. 1520 */ 1521 1522 template<typename TStringSet, typename TCargo, typename TSpec, typename TComponentMap, typename TOrderMap, typename TComponentLength> 1523 inline bool 1524 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1525 TComponentMap& component, 1526 TOrderMap& order, 1527 TComponentLength& compLength) 1528 { 1529 SEQAN_CHECKPOINT 1530 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1531 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1532 typedef typename Id<TGraph>::Type TIdType; 1533 typedef typename Size<TGraph>::Type TSize; 1534 typedef typename Value<TComponentMap>::Type TComponent; 1535 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1536 typedef typename TComponentLength::mapped_type TMappedType; 1537 typedef typename TComponentLength::key_type TKeyType; 1538 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 1539 1540 // Check for empty graph 1541 if (empty(g)) return false; 1542 1543 // Connected Components 1544 TSize numComponents = connectedComponents(component, g); 1545 1546 // Make a directed graph to represent the ordering of the components 1547 // Note: Multiple vertices might have the same component 1548 Graph<Directed<void, WithoutEdgeId> > componentGraph; 1549 reserve(_getVertexString(componentGraph), numComponents); 1550 for(TSize i = 0; i<numComponents;++i) addVertex(componentGraph); 1551 1552 TSize nseq = length(value(g.data_sequence)); 1553 String<std::set<TComponent> > componentsPerSeq; 1554 typedef String<String<TComponent> > TOrderedComponents; 1555 TOrderedComponents orderedComponentsPerSeq; 1556 resize(componentsPerSeq, nseq); 1557 resize(orderedComponentsPerSeq, nseq); 1558 typename TPosToVertexMap::const_iterator it1 = g.data_pvMap.begin(); 1559 typename TPosToVertexMap::const_iterator it1End = g.data_pvMap.end(); 1560 for(;it1!=it1End;++it1) { 1561 // If sections are not assigned to a vertex -> no alignment 1562 if (it1->second == nilVertex) return false; 1563 1564 // Remember the sequence that component belongs to 1565 TSize currentSeq = idToPosition(value(g.data_sequence), it1->first.first); 1566 1567 // Append component 1568 TComponent c = getProperty(component, it1->second); 1569 if ((value(componentsPerSeq,currentSeq)).empty()) { 1570 String<TComponent> tmp; 1571 value(orderedComponentsPerSeq, currentSeq) = tmp; 1572 } 1573 appendValue(value(orderedComponentsPerSeq, currentSeq), c, Generous()); 1574 // If two components appear twice in the same sequence -> no alignment 1575 if (!((value(componentsPerSeq,currentSeq)).insert(c)).second) return false; 1576 } 1577 clear(componentsPerSeq); 1578 1579 // Draw edges for the components within a sequence 1580 typedef typename Iterator<TOrderedComponents>::Type TIterTOrderedComponents; 1581 TIterTOrderedComponents itBegin = begin(orderedComponentsPerSeq); 1582 TIterTOrderedComponents itEnd = end(orderedComponentsPerSeq); 1583 for(;itBegin != itEnd; ++itBegin) { 1584 TSize n = length(*itBegin); 1585 for(TSize i = 0; i<n-1; ++i) { 1586 addEdge(componentGraph, value((*itBegin), i), value((*itBegin), i+1)); 1587 } 1588 } 1589 1590 // Make a topological sort of the component graph 1591 topologicalSort(order, componentGraph); 1592 1593 //// Debug code 1594 //std::cout << "Topological sort: " << std::endl; 1595 //for(TSize i = 0; i<length(order);++i) { 1596 // std::cout << order[i] << ','; 1597 //} 1598 //std::cout << std::endl; 1599 1600 // Walk through all sequences and check the component order 1601 unsigned int compIndex = 0; 1602 unsigned int compIndexLen = length(order); 1603 typename TPosToVertexMap::const_iterator it = g.data_pvMap.begin(); 1604 TIdType currentSeq = it->first.first; 1605 for(; it != g.data_pvMap.end(); ++it) { 1606 if (it->first.first != currentSeq) { 1607 compIndex = 0; 1608 currentSeq = it->first.first; 1609 } 1610 TKeyType c = (TKeyType) getProperty(component, it->second); 1611 if(!compLength.insert(std::make_pair(c, (TMappedType) fragmentLength(g, it->second))).second) { 1612 compLength[c] = _max(compLength[c], (TMappedType) fragmentLength(g, it->second)); 1613 } 1614 while ((compIndex < compIndexLen) && (order[compIndex] != c)) ++compIndex; 1615 // Crossing components -> no alignment 1616 if (compIndex >= compIndexLen) return false; 1617 // Next component 1618 ++compIndex; 1619 } 1620 1621 return true; 1622 } 1623 1624 1625 ////////////////////////////////////////////////////////////////////////////// 1626 1627 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix> 1628 inline bool 1629 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1630 TMatrix& mat) 1631 { 1632 SEQAN_CHECKPOINT 1633 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1634 typedef typename Value<TMatrix>::Type TValue; 1635 typedef typename Size<TGraph>::Type TSize; 1636 typedef typename Id<TGraph>::Type TIdType; 1637 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1638 typedef std::map<unsigned int, unsigned int> TComponentLength; 1639 1640 // Strongly Connected Components, topological sort, and length of each component 1641 String<unsigned int> component; 1642 String<unsigned int> order; 1643 TComponentLength compLength; 1644 1645 if (!convertAlignment(g, component, order, compLength)) return false; 1646 1647 // Create the matrix 1648 TSize len = 0; 1649 TSize nseq = length(stringSet(g)); 1650 for(TComponentLength::iterator cIt=compLength.begin(); cIt != compLength.end(); ++cIt) len+=cIt->second; 1651 char gapChar = gapValue<char>(); 1652 resize(mat, len * nseq, gapChar); 1653 1654 // Fill the matrix 1655 TSize row = 0; 1656 TSize col = 0; 1657 typename TPosToVertexMap::const_iterator it = g.data_pvMap.begin(); 1658 unsigned int compIndex = 0; 1659 unsigned int compIndexLen = length(order); 1660 TIdType currentSeq = it->first.first; 1661 for(; it != g.data_pvMap.end(); ++it) { 1662 if (it->first.first != currentSeq) { 1663 SEQAN_ASSERT(col <= len); 1664 //std::cout << std::endl; 1665 ++row;col=0; 1666 compIndex = 0; 1667 currentSeq = it->first.first; 1668 } 1669 unsigned int c = getProperty(component, it->second); 1670 while ((compIndex < compIndexLen) && (order[compIndex] != c)) { 1671 for(TSize i=0;i<compLength[order[compIndex]];++i, ++col) 1672 mat[row*len + col] = gapValue<char>(); 1673 ++compIndex; 1674 } 1675 typedef typename Value<TStringSet>::Type TStringSetStr; 1676 TStringSetStr str = label(g,it->second); 1677 typedef typename Iterator<TStringSetStr, Standard>::Type TStringSetStrIter; 1678 TStringSetStrIter itStr = begin(str, Standard()); 1679 TStringSetStrIter itStrEnd = end(str, Standard()); 1680 for(;itStr != itStrEnd; goNext(itStr), ++col) 1681 mat[row*len + col] = (TValue) (*itStr); 1682 for(TSize i = length(str); i < compLength[order[compIndex]]; ++i, ++col) 1683 mat[row*len + col] = gapValue<char>(); 1684 ++compIndex; 1685 } 1686 SEQAN_ASSERT(row + 1 == nseq); 1687 //std::cout << std::endl; 1688 1689 return true; 1690 } 1691 1692 1693 1694 ////////////////////////////////////////////////////////////////////////////// 1695 1696 template<typename TValue, typename TSpec1, typename TStringSet, typename TCargo, typename TSpec2> 1697 inline bool 1698 convertAlignment(String<TValue, TSpec1> const& mat, 1699 Graph<Alignment<TStringSet, TCargo, TSpec2> >& g) 1700 { 1701 typedef String<TValue, TSpec1> TMatrix; 1702 typedef Graph<Alignment<TStringSet, TCargo, TSpec2> > TGraph; 1703 typedef typename Size<TGraph>::Type TSize; 1704 typedef typename Iterator<TMatrix, Standard>::Type TMatIter; 1705 clearVertices(g); 1706 TValue gapChar = gapValue<TValue>(); 1707 TSize nseq = length(stringSet(g)); 1708 TSize alignLen = length(mat) / nseq; 1709 String<Fragment<> > matches; 1710 for(TSize seq1 = 0; seq1<nseq; ++seq1) { 1711 for(TSize seq2 = seq1 + 1; seq2<nseq; ++seq2) { 1712 TMatIter seq1It = begin(mat); 1713 seq1It += seq1 * alignLen; 1714 TMatIter seq2It = begin(mat); 1715 seq2It += seq2 * alignLen; 1716 TSize alignPos = 0; 1717 TSize length = 0; 1718 TSize offset1 = 0; 1719 TSize offset2 = 0; 1720 for(TSize col = 0; col<alignLen; ++col, ++seq1It, ++seq2It, ++alignPos) { 1721 if ((*seq1It == gapChar) || (*seq2It == gapChar)) { 1722 if (length) { 1723 appendValue(matches, Fragment<>(seq1, alignPos - offset1 - length, seq2, alignPos - offset2 - length, length)); 1724 length = 0; 1725 } 1726 if (*seq1It == gapChar) ++offset1; 1727 if (*seq2It == gapChar) ++offset2; 1728 } else ++length; 1729 } 1730 if (length) appendValue(matches, Fragment<>(seq1, alignPos - offset1 - length, seq2, alignPos - offset2 - length, length)); 1731 } 1732 } 1733 //_debugMatches(stringSet(g), matches); 1734 matchRefinement(matches,stringSet(g),g); 1735 return true; 1736 } 1737 1738 1739 1740 ////////////////////////////////////////////////////////////////////////////// 1741 1742 1743 template<typename TStringSet, typename TCargo, typename TSpec> 1744 inline void 1745 rebuildGraph(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 1746 { 1747 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1748 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1749 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator; 1750 typedef typename Size<TGraph>::Type TSize; 1751 1752 // Initialization 1753 typedef Fragment<> TFragment; 1754 typedef String<TFragment> TFragmentString; 1755 TFragmentString matches; 1756 TSize nseq = length(stringSet(g)); 1757 1758 // Collect all character pairs 1759 typedef std::pair<TSize, TSize> TResiduePair; 1760 typedef std::set<TResiduePair> TResiduePairSet; 1761 String<TResiduePairSet> resPair; 1762 resize(resPair, nseq * nseq); 1763 TEdgeIterator itE(g); 1764 for(;!atEnd(itE);++itE) { 1765 TVertexDescriptor sV = sourceVertex(itE); 1766 TVertexDescriptor tV = targetVertex(itE); 1767 TSize seq1 = idToPosition(stringSet(g), sequenceId(g, sV)); 1768 TSize seq2 = idToPosition(stringSet(g), sequenceId(g, tV)); 1769 TSize index = 0; 1770 TSize pos1 = 0; 1771 TSize pos2 = 0; 1772 if (seq1 < seq2) { 1773 index = seq1 * nseq + seq2; 1774 pos1 = fragmentBegin(g, sV); 1775 pos2 = fragmentBegin(g, tV); 1776 } else { 1777 index = seq2 * nseq + seq1; 1778 pos1 = fragmentBegin(g, tV); 1779 pos2 = fragmentBegin(g, sV); 1780 } 1781 for(TSize i = 0; i<fragmentLength(g, sV); ++i) { 1782 resPair[index].insert(std::make_pair(pos1 + i, pos2 + i)); 1783 } 1784 } 1785 1786 // Rebuild the graph with maximal segments 1787 for(TSize i = 0; i<length(resPair); ++i) { 1788 if (resPair[i].empty()) continue; 1789 TSize seq1 = i / nseq; 1790 TSize seq2 = i % nseq; 1791 typename TResiduePairSet::const_iterator pos = resPair[i].begin(); 1792 typename TResiduePairSet::const_iterator posEnd = resPair[i].end(); 1793 TSize startMatch1 = pos->first; 1794 TSize startMatch2 = pos->second; 1795 TSize len = 1; 1796 ++pos; 1797 while(pos != posEnd) { 1798 if ((startMatch1 + len == pos->first) && (startMatch2 + len == pos->second)) ++len; 1799 else { 1800 appendValue(matches, TFragment(seq1, startMatch1, seq2, startMatch2, len), Generous()); 1801 startMatch1 = pos->first; 1802 startMatch2 = pos->second; 1803 len = 1; 1804 } 1805 ++pos; 1806 } 1807 appendValue(matches, TFragment(seq1, startMatch1, seq2, startMatch2, len), Generous()); 1808 } 1809 clearVertices(g); 1810 matchRefinement(matches,stringSet(g),g); 1811 } 1812 1813 1814 1815 1816 1817 ////////////////////////////////////////////////////////////////////////////// 1818 // Heaviest Common Subsequence adaptation 1819 ////////////////////////////////////////////////////////////////////////////// 1820 1821 ////////////////////////////////////////////////////////////////////////////// 1822 1823 template<typename TStringSet, typename TCargo, typename TSpec, typename TSize2, typename TSpec2, typename TPositions, typename TSize, typename TVertexDescriptor, typename TString> 1824 inline void 1825 _heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const&, 1826 String<TSize2, TSpec2> const& /*slotToPos*/, 1827 TPositions const&, 1828 TSize const, 1829 TSize const, 1830 TVertexDescriptor const, 1831 TString const&, 1832 TString const&, 1833 Nothing&) 1834 { 1835 SEQAN_CHECKPOINT 1836 } 1837 1838 ////////////////////////////////////////////////////////////////////////////// 1839 1840 template<typename TStringSet, typename TCargo, typename TSpec, typename TSize2, typename TSpec2, typename TPositions, typename TSize, typename TString, typename TOutString> 1841 inline void 1842 _heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1843 String<TSize2, TSpec2> const& slotToPos, 1844 TPositions const& positions, 1845 TSize const m, 1846 TSize const n, 1847 TString const& str1, 1848 TString const& str2, 1849 TOutString& align) 1850 { 1851 SEQAN_CHECKPOINT 1852 typedef typename Value<TString>::Type TVertexSet; 1853 typedef typename Iterator<TString const, Standard>::Type TStringIter; 1854 typedef typename Iterator<TString, Standard>::Type TSIter; 1855 typedef typename Iterator<TVertexSet const, Standard>::Type TVertexSetIter; 1856 1857 // Create the alignment sequence 1858 TSize numMatches = length(positions); 1859 TSize alignLength = numMatches + (n - numMatches) + (m - numMatches); 1860 clear(align); 1861 resize(align, alignLength, TVertexSet(), Exact() ); 1862 TSIter pointerAlign = begin(align, Standard()); 1863 TSIter pointerAlignEnd = end(align, Standard()); 1864 TStringIter pointerStr1 = begin(str1, Standard()); 1865 TSize posStr1 = 0; 1866 TSize posStr2 = 0; 1867 TStringIter pointerStr2 = begin(str2, Standard()); 1868 int p = length(positions) - 1; 1869 while(pointerAlign != pointerAlignEnd) { 1870 TSize i = m; 1871 TSize j = n; 1872 if (p>=0) { 1873 i = (TSize) (slotToPos[positions[p]] / (TSize) n); // Get the index in str1 1874 j = n - 1 - (TSize) (slotToPos[positions[p]] % (TSize) n); // Get the index in str2 1875 }; 1876 1877 // In what order do we insert gaps? -> Only important at the beginning and at the end, not between matches 1878 bool firstI = true; 1879 if ((i != posStr1) && (j != posStr2)) 1880 { 1881 if ((posStr1 == 0) && (posStr2 == 0)) { 1882 TStringIter tmpPointerStr1 = pointerStr1; 1883 TStringIter tmpPointerStr2 = pointerStr2; 1884 TSize tmpPosStr1 = posStr1; 1885 TSize tmpPosStr2 = posStr2; 1886 TSize len1 = 0; 1887 TSize len2 = 0; 1888 for(;i != tmpPosStr1; ++tmpPosStr1, ++tmpPointerStr1) len1 += fragmentLength(g, value(*tmpPointerStr1, 0)); 1889 for(;j != tmpPosStr2; ++tmpPosStr2, ++tmpPointerStr2) len2 += fragmentLength(g, value(*tmpPointerStr2, 0)); 1890 if (len1 > len2) firstI = false; 1891 } else if ((i == m) && (i == n)) { 1892 TStringIter tmpPointerStr1 = pointerStr1; 1893 TStringIter tmpPointerStr2 = pointerStr2; 1894 TSize tmpPosStr1 = posStr1; 1895 TSize tmpPosStr2 = posStr2; 1896 TSize len1 = 0; 1897 TSize len2 = 0; 1898 for(;i != tmpPosStr1; ++tmpPosStr1, ++tmpPointerStr1) len1 += fragmentLength(g, value(*tmpPointerStr1, 0)); 1899 for(;j != tmpPosStr2; ++tmpPosStr2, ++tmpPointerStr2) len2 += fragmentLength(g, value(*tmpPointerStr2, 0)); 1900 if (len1 < len2) firstI = false; 1901 } 1902 } 1903 if (firstI) { 1904 // Gaps in seq 2 1905 while (i != posStr1) { 1906 TVertexSetIter itV = begin(*pointerStr1, Standard()); 1907 TVertexSetIter itVEnd = end(*pointerStr1, Standard()); 1908 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1909 ++pointerAlign; 1910 ++pointerStr1; ++posStr1; 1911 } 1912 // Gaps in seq 1 1913 while (j != posStr2) { 1914 TVertexSetIter itV = begin(*pointerStr2, Standard()); 1915 TVertexSetIter itVEnd = end(*pointerStr2, Standard()); 1916 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1917 ++pointerAlign; 1918 ++pointerStr2; ++posStr2; 1919 } 1920 } else { 1921 // Gaps in seq 1 1922 while (j != posStr2) { 1923 TVertexSetIter itV = begin(*pointerStr2, Standard()); 1924 TVertexSetIter itVEnd = end(*pointerStr2, Standard()); 1925 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1926 ++pointerAlign; 1927 ++pointerStr2; ++posStr2; 1928 } 1929 // Gaps in seq 2 1930 while (i != posStr1) { 1931 TVertexSetIter itV = begin(*pointerStr1, Standard()); 1932 TVertexSetIter itVEnd = end(*pointerStr1, Standard()); 1933 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1934 ++pointerAlign; 1935 ++pointerStr1; ++posStr1; 1936 } 1937 } 1938 1939 // Matches 1940 if (p>=0) { 1941 TVertexSetIter itV = begin(*pointerStr1, Standard()); 1942 TVertexSetIter itVEnd = end(*pointerStr1, Standard()); 1943 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1944 TVertexSetIter itV2 = begin(*pointerStr2, Standard()); 1945 TVertexSetIter itVEnd2 = end(*pointerStr2, Standard()); 1946 for(;itV2 != itVEnd2;++itV2) appendValue(*pointerAlign, *itV2, Generous()); 1947 ++pointerAlign; 1948 ++pointerStr1; ++posStr1; 1949 ++pointerStr2; ++posStr2; 1950 --p; 1951 } 1952 } 1953 } 1954 1955 1956 1957 1958 ////////////////////////////////////////////////////////////////////////////// 1959 1960 // TODO(holtgrew): Where does this belong? This is also there in graph_algorithms.h... 1961 1962 template<typename TStringSet, typename TCargo, typename TSpec, typename TString, typename TOutString> 1963 inline TCargo 1964 heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1965 TString const& str1, 1966 TString const& str2, 1967 TOutString& align) 1968 { 1969 SEQAN_CHECKPOINT 1970 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1971 typedef typename Size<TGraph>::Type TSize; 1972 typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator; 1973 typedef typename Value<TString>::Type TVertexSet; 1974 1975 TSize m = length(str1); // How many sets of vertex descriptors in seq1 1976 TSize n = length(str2); // How many sets of vertex descriptors in seq2 1977 1978 // Size of the sequences 1979 // Note for profile alignments every member of the sequence is a String!!! of vertex descriptors 1980 1981 // Fill the vertex to position map for str1 1982 // Remember for each vertex descriptor the position in the sequence 1983 typedef String<TSize> TMapVertexPos; 1984 TMapVertexPos map; 1985 resize(map, getIdUpperBound(_getVertexIdManager(g)), MaxValue<TSize>::VALUE); 1986 typedef typename Iterator<TString const, Standard>::Type TStringIterConst; 1987 typedef typename Iterator<TVertexSet const, Standard>::Type TVertexSetIterConst; 1988 TStringIterConst itStr1 = begin(str1, Standard()); 1989 TStringIterConst itStrEnd1 = end(str1, Standard()); 1990 TSize pos = 0; 1991 TVertexSetIterConst itV; 1992 TVertexSetIterConst itVEnd; 1993 for(;itStr1 != itStrEnd1;++itStr1, ++pos) { 1994 itV = begin(*itStr1, Standard()); 1995 itVEnd = end(*itStr1, Standard()); 1996 for(;itV != itVEnd;++itV) map[*itV] = pos; 1997 } 1998 1999 // We could create the full graph -> too expensive 2000 // Remember which edges are actually present 2001 typedef String<TSize> TOccupiedPositions; 2002 typedef typename Iterator<TOccupiedPositions, Standard>::Type TOccIter; 2003 TOccupiedPositions occupiedPositions; 2004 TStringIterConst itStr2 = begin(str2, Standard()); 2005 TStringIterConst itStrEnd2 = end(str2, Standard()); 2006 TSize posItStr2 = 0; 2007 TSize pPos = 0; 2008 for(;itStr2 != itStrEnd2;++itStr2, ++posItStr2) { 2009 itV = begin(*itStr2, Standard()); 2010 itVEnd = end(*itStr2, Standard()); 2011 for(;itV != itVEnd;++itV) { 2012 TOutEdgeIterator itOut(g, *itV); 2013 for(;!atEnd(itOut); ++itOut) { 2014 // Target vertex must be in the map 2015 pPos = map[targetVertex(itOut)]; 2016 if (pPos != MaxValue<TSize>::VALUE) 2017 appendValue(occupiedPositions, pPos * n + (TSize) (n - posItStr2 - 1), Generous()); 2018 } 2019 } 2020 } 2021 std::sort(begin(occupiedPositions, Standard()), end(occupiedPositions, Standard())); 2022 // Get all occupied positions 2023 typedef String<TSize> TSlotToPos; 2024 typedef typename Iterator<TSlotToPos, Standard>::Type TSlotToPosIter; 2025 TSlotToPos slotToPos; 2026 TSize counter = 0; 2027 TSize oldVal = MaxValue<TSize>::VALUE; 2028 TOccIter occIt = begin(occupiedPositions, Standard()); 2029 TOccIter occItEnd = end(occupiedPositions, Standard()); 2030 for(;occIt != occItEnd; ++occIt) { 2031 if (oldVal != *occIt) { 2032 appendValue(slotToPos, *occIt, Generous()); 2033 oldVal = *occIt; 2034 ++counter; 2035 } 2036 } 2037 clear(occupiedPositions); 2038 2039 // Walk through str2 and fill in the weights of the actual edges 2040 typedef String<TCargo> TWeights; 2041 TWeights weights; 2042 resize(weights, length(slotToPos), 0); 2043 itStr2 = begin(str2, Standard()); 2044 posItStr2 = 0; 2045 for(;itStr2 != itStrEnd2;++itStr2, ++posItStr2) { 2046 itV = begin(*itStr2, Standard()); 2047 itVEnd = end(*itStr2, Standard()); 2048 for(;itV != itVEnd;++itV) { 2049 TOutEdgeIterator itOut(g, *itV); 2050 for(;!atEnd(itOut); ++itOut) { 2051 // Target vertex must be in the map 2052 pPos = map[targetVertex(itOut)]; 2053 if ( pPos != MaxValue<TSize>::VALUE) 2054 weights[std::distance(begin(slotToPos, Standard()), std::lower_bound(begin(slotToPos, Standard()), end(slotToPos, Standard()), pPos * n + (TSize) (n - posItStr2 - 1)))] += (TCargo) cargo(*itOut); 2055 } 2056 } 2057 } 2058 clear(map); 2059 2060 // Now the tough part: Find the right number for a given position 2061 typedef String<TSize> TSequenceString; 2062 typedef typename Iterator<TSequenceString, Standard>::Type TSeqIter; 2063 TSequenceString seq; 2064 resize(seq, length(slotToPos)); 2065 TSeqIter itSeq = begin(seq, Standard()); 2066 TSlotToPosIter itSlotPos = begin(slotToPos, Standard()); 2067 TSlotToPosIter itSlotPosEnd = end(slotToPos, Standard()); 2068 for(;itSlotPos != itSlotPosEnd; ++itSlotPos, ++itSeq) 2069 *itSeq = n - 1 - (*itSlotPos % n); 2070 2071 // Calculate the heaviest increasing subsequence 2072 String<TSize> positions; 2073 TCargo score = (TCargo) heaviestIncreasingSubsequence(seq, weights, positions); 2074 2075 // Retrieve the alignment sequence 2076 _heaviestCommonSubsequence(g, slotToPos, positions, m, n, str1, str2, align); 2077 2078 return score; 2079 2080 } 2081 2082 ////////////////////////////////////////////////////////////////////////////// 2083 2084 template<typename TStringSet, typename TCargo, typename TSpec, typename TString> 2085 inline TCargo 2086 heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 2087 TString const& str1, 2088 TString const& str2) 2089 { 2090 SEQAN_CHECKPOINT 2091 Nothing noth; 2092 return heaviestCommonSubsequence(g, str1, str2, noth); 2093 } 2094 2095 template <typename TStringSet, typename TCargo, typename TSpec, typename TSequenceH, typename TSequenceV, typename TId, typename TPos, typename TTraceValue> 2096 inline void 2097 _alignTracePrint(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 2098 TSequenceH const &, 2099 TSequenceV const &, 2100 TId const id1, 2101 TPos const pos1, 2102 TId const id2, 2103 TPos const pos2, 2104 TPos const segLen, 2105 TTraceValue const tv) 2106 { 2107 // TraceBack values 2108 TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; 2109 2110 if (segLen == 0) return; 2111 2112 if (tv == Horizontal) 2113 addVertex(g, id1, pos1, segLen); 2114 else if (tv == Vertical) 2115 addVertex(g, id2, pos2, segLen); 2116 else if (tv == Diagonal) 2117 addEdge(g, addVertex(g, id1, pos1, segLen), addVertex(g, id2, pos2, segLen)); 2118 } 2119 2120 // ---------------------------------------------------------------------------- 2121 // Function _alignTracePrint() [String<Fragment<> >] 2122 // ---------------------------------------------------------------------------- 2123 2124 // TODO(holtgrew): Are TSequenceH and TSequenceV used *anywhere*. 2125 2126 template <typename TFragment, typename TSequenceH, typename TSequenceV, typename TId, typename TPos, typename TTraceValue> 2127 inline void 2128 _alignTracePrint(String<TFragment>& matches, 2129 TSequenceH const &, 2130 TSequenceV const &, 2131 TId const id1, 2132 TPos const pos1, 2133 TId const id2, 2134 TPos const pos2, 2135 TPos const seqLen, 2136 TTraceValue const tv) 2137 { 2138 // Only the diagonal case 2139 if ((seqLen) && (tv == 0)) 2140 appendValue(matches, TFragment(id1, pos1, id2, pos2, seqLen), Generous()); 2141 } 2142 2143 } // namespace seqan 2144 2145 #endif // #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_ 2146