1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: Tobias Rausch <rausch@embl.de> 33 // Author: Anne-Katrin Emde <anne-katrin.emde@fu-berlin.de> 34 // ========================================================================== 35 36 #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_ 37 #define SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_ 38 39 namespace seqan { 40 41 ////////////////////////////////////////////////////////////////////////////// 42 // Alignment Graph 43 ////////////////////////////////////////////////////////////////////////////// 44 45 ////////////////////////////////////////////////////////////////////////////// 46 47 48 49 50 51 52 ////////////////////////////////////////////////////////////////////////////// 53 // Alignment Graph Output Tags 54 ////////////////////////////////////////////////////////////////////////////// 55 56 /*! 57 * @defgroup AlignmentGraphFormatTags Alignment Graph Formats 58 * @brief File formats to write an @link AlignmentGraph @endlink. 59 * 60 * @tag AlignmentGraphFormatTags#MsfFormat 61 * @headerfile <seqan/graph_align.h> 62 * @brief MSF Format to write an @link AlignmentGraph @endlink. 63 * 64 * @signature typedef Tag<MsfFormat_> const MsfFormat; 65 * 66 * @tag AlignmentGraphFormatTags#FastaFormat 67 * @headerfile <seqan/graph_align.h> 68 * @brief Fasta Format to write an @link AlignmentGraph @endlink. 69 * 70 * @signature typedef Tag<FastaFormat_> const FastaFormat; 71 * 72 * @tag AlignmentGraphFormatTags#CgVizFormat 73 * @headerfile <seqan/graph_align.h> 74 * @brief CgViz Format to write an @link AlignmentGraph @endlink. 75 * 76 * @signature typedef Tag<CgVizFormat_> const CgVizFormat; 77 */ 78 79 ////////////////////////////////////////////////////////////////////////////// 80 81 ////////////////////////////////////////////////////////////////////////////// 82 83 struct MsfFormat_; 84 typedef Tag<MsfFormat_> const MsfFormat; 85 86 ////////////////////////////////////////////////////////////////////////////// 87 88 struct FastaFormat_; 89 typedef Tag<FastaFormat_> const FastaFormat; 90 91 ////////////////////////////////////////////////////////////////////////////// 92 93 struct CgVizFormat_; 94 typedef Tag<CgVizFormat_> const CgVizFormat; 95 96 97 98 ////////////////////////////////////////////////////////////////////////////// 99 // Default Alignment Graph 100 ////////////////////////////////////////////////////////////////////////////// 101 102 template<typename TStringSet, typename TCargo = unsigned int, typename TSpec = Default> 103 struct Alignment; 104 105 106 107 108 109 ////////////////////////////////////////////////////////////////////////////// 110 // Metafunctions 111 ////////////////////////////////////////////////////////////////////////////// 112 113 ////////////////////////////////////////////////////////////////////////////// 114 115 template<typename TStringSet, typename TCargo, typename TSpec> 116 struct EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > const> { 117 typedef typename EdgeType<Graph<Undirected<TCargo, TSpec> > const>::Type Type; 118 }; 119 120 ////////////////////////////////////////////////////////////////////////////// 121 122 template<typename TStringSet, typename TCargo, typename TSpec> 123 struct EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > > { 124 typedef typename EdgeType<Graph<Undirected<TCargo, TSpec> > >::Type Type; 125 }; 126 127 128 ////////////////////////////////////////////////////////////////////////////// 129 130 template<typename TStringSet, typename TCargo, typename TSpec> 131 struct Host<Graph<Alignment<TStringSet, TCargo, TSpec> > > { 132 typedef TStringSet Type; 133 }; 134 135 ////////////////////////////////////////////////////////////////////////////// 136 137 template<typename TStringSet, typename TCargo, typename TSpec> 138 struct Host<Graph<Alignment<TStringSet, TCargo, TSpec> > const> { 139 typedef TStringSet const Type; 140 }; 141 142 143 144 145 146 147 ////////////////////////////////////////////////////////////////////////////// 148 // Actual Alignment Graph Class 149 ////////////////////////////////////////////////////////////////////////////// 150 151 152 ////////////////////////////////////////////////////////////////////////////// 153 154 template<typename TId = unsigned int, typename TSize = unsigned int> 155 class FragmentInfo { 156 public: 157 TId data_seq_id; 158 TSize data_begin; 159 TSize data_length; 160 161 FragmentInfo() : 162 data_seq_id(0), 163 data_begin(0), 164 data_length(0) 165 { 166 } 167 168 FragmentInfo(TId id, TSize beg, TSize len) : 169 data_seq_id(id), 170 data_begin(beg), 171 data_length(len) 172 { 173 } 174 175 }; 176 177 ////////////////////////////////////////////////////////////////////////////// 178 179 /*! 180 * @class AlignmentGraph 181 * @extends Graph 182 * @headerfile <seqan/graph_align.h> 183 * @brief Alignment graph type. 184 * 185 * <img src="alignmentGraph.png" tite="An alignment graph with 3 sequences." /> 186 * 187 * @signature template <typename TStringSet[, typename TCargo[, typename TSpec]]> 188 * class Graph<Alignment<TStringSet, TCargo, TSpec> >; 189 * 190 * @tparam TStringSet The type of the @link StringSet @endlink containing the sequence information. <b>Must be a 191 * @link DependentStringSet @endlink.</b> 192 * @tparam TCargo The cargo type that can be attached to edges. Default: <tt>unsigned</tt>. 193 * @tparam TSpec The specializing type. Default: <tt>Default</tt>. Use <tt>WithoutEdgeId</tt> here to omit 194 * edge ids. NB: if edges do not store ids then external property maps do not work. 195 * 196 * @section Example 197 * 198 * @include demos/dox/graph_align/graph_align.cpp 199 * 200 * @code{.console} 201 * Score = -8 202 * Alignment matrix: 203 * 0 . : 204 * ATCGAATGCGGA 205 * | ||| | 206 * ACTCGTTGC--A 207 * @endcode 208 */ 209 210 template<typename TString, typename TSpecial, typename TCargo, typename TSpec> 211 class Graph<Alignment<StringSet<TString, Dependent<TSpecial> >, TCargo, TSpec> > 212 { 213 public: 214 typedef typename Id<Graph>::Type TIdType_; 215 typedef typename VertexDescriptor<Graph>::Type TVertexDescriptor_; 216 typedef typename Size<Graph>::Type TSize_; 217 typedef std::pair<TIdType_, TSize_> TKey_; 218 typedef std::map<TKey_, TVertexDescriptor_> TPosToVertexMap_; 219 typedef FragmentInfo<TIdType_, TSize_> TFragmentInfo_; 220 221 // Alignment graph 222 Graph<Undirected<TCargo, TSpec> > data_align; 223 224 // Sequences 225 Holder<StringSet<TString, Dependent<TSpecial> > > data_sequence; 226 227 // Alignment specific members 228 String<TFragmentInfo_> data_fragment; 229 230 // STL Map to retrieve a vertex given SeqId, Position 231 TPosToVertexMap_ data_pvMap; 232 233 234 Graph() { 235 } 236 237 238 template <typename TDefault> 239 Graph(StringSet<TString, Dependent<TDefault> > const& sSet) { 240 data_sequence = sSet; 241 242 // Cover all sequences with nil vertices 243 TVertexDescriptor_ nilVertex = getNil<TVertexDescriptor_>(); 244 TSize_ lenSet = length(sSet); 245 for(TSize_ k=0; k<lenSet;++k) 246 data_pvMap.insert(std::make_pair(TKey_(positionToId(sSet,k), length(sSet[k])), nilVertex)); 247 } 248 249 template <typename TDefault> 250 Graph(StringSet<TString, Owner<TDefault> > const& sSet) { 251 StringSet<TString, Dependent<> > depStr(sSet); 252 data_sequence = depStr; 253 254 // Cover all sequences with nil vertices 255 TVertexDescriptor_ nilVertex = getNil<TVertexDescriptor_>(); 256 TSize_ lenSet = length(sSet); 257 for(TSize_ k=0; k<lenSet;++k) 258 data_pvMap.insert(std::make_pair(TKey_(positionToId(const_cast<StringSet<TString, Owner<TDefault> >&>(sSet),k), length(sSet[k])), nilVertex)); 259 } 260 261 262 ~Graph() { 263 clear(*this); 264 } 265 266 Graph(Graph const & _other) 267 { 268 _copyGraph(_other, *this); 269 } 270 271 Graph const& operator = (Graph const & _other) { 272 if (this == &_other) return *this; 273 clear(*this); 274 _copyGraph(_other, *this); 275 return *this; 276 } 277 }; 278 279 ////////////////////////////////////////////////////////////////////////////// 280 // INTERNAL FUNCTIONS 281 ////////////////////////////////////////////////////////////////////////////// 282 283 ///////////////////////////////////////////////////////////////////////////// 284 285 template<typename TStringSet, typename TCargo, typename TSpec> 286 inline String<typename EdgeType<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type*>& 287 _getVertexString(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) { 288 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 289 typedef typename EdgeType<TGraph>::Type TEdgeStump; 290 return const_cast<String<TEdgeStump*>&>(g.data_align.data_vertex); 291 } 292 293 ///////////////////////////////////////////////////////////////////////////// 294 295 template<typename TStringSet, typename TCargo, typename TSpec> 296 inline typename VertexIdHandler<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 297 _getVertexIdManager(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) { 298 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 299 typedef typename VertexIdHandler<TGraph>::Type TVertexIdManager; 300 return const_cast<TVertexIdManager&>(g.data_align.data_id_managerV); 301 } 302 303 ////////////////////////////////////////////////////////////////////////////// 304 305 template<typename TStringSet, typename TCargo, typename TSpec> 306 inline typename EdgeIdHandler<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 307 _getEdgeIdManager(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) { 308 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 309 typedef typename EdgeIdHandler<TGraph>::Type TEdgeIdManager; 310 return const_cast<TEdgeIdManager&>(g.data_align.data_id_managerE); 311 } 312 313 ////////////////////////////////////////////////////////////////////////////// 314 315 template<typename TStringSet, typename TCargo, typename TSpec> 316 inline void 317 _copyGraph(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source, 318 Graph<Alignment<TStringSet, TCargo, TSpec> >& dest, 319 bool) 320 { 321 clear(dest); 322 dest.data_align = source.data_align; 323 dest.data_sequence = source.data_sequence; 324 dest.data_fragment = source.data_fragment; 325 dest.data_pvMap = source.data_pvMap; 326 } 327 328 ////////////////////////////////////////////////////////////////////////////// 329 330 template<typename TStringSet, typename TCargo, typename TSpec> 331 inline void 332 _copyGraph(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source, 333 Graph<Alignment<TStringSet, TCargo, TSpec> >& dest) 334 { 335 _copyGraph(source, dest, false); // Never transpose, underlying graph is undirected 336 } 337 338 339 ////////////////////////////////////////////////////////////////////////////// 340 // FUNCTIONS 341 ////////////////////////////////////////////////////////////////////////////// 342 343 344 ////////////////////////////////////////////////////////////////////////////// 345 346 template<typename TStringSet, typename TCargo, typename TSpec> 347 inline void 348 transpose(Graph<Alignment<TStringSet, TCargo, TSpec> > const& source, 349 Graph<Alignment<TStringSet, TCargo, TSpec> >& dest) 350 { 351 // Alignment graph, no transpose just copy 352 _copyGraph(source, dest, false); 353 } 354 355 ////////////////////////////////////////////////////////////////////////////// 356 357 template<typename TStringSet, typename TCargo, typename TSpec> 358 inline void 359 transpose(Graph<Alignment<TStringSet, TCargo, TSpec> >&) 360 { 361 // Nothing to do in an alignment graph 362 } 363 364 ////////////////////////////////////////////////////////////////////////////// 365 366 template<typename TStringSet, typename TCargo, typename TSpec> 367 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 368 numEdges(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 369 { 370 return numEdges(g.data_align); 371 } 372 373 ////////////////////////////////////////////////////////////////////////////// 374 375 template<typename TStringSet, typename TCargo, typename TSpec> 376 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 377 numVertices(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 378 { 379 return numVertices(g.data_align); 380 } 381 382 ////////////////////////////////////////////////////////////////////////////// 383 384 template<typename TStringSet, typename TCargo, typename TSpec> 385 inline bool 386 empty(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 387 { 388 return empty(g.data_align); 389 } 390 391 ////////////////////////////////////////////////////////////////////////////// 392 393 template<typename TStringSet, typename TCargo, typename TSpec> 394 inline void 395 clearEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 396 { 397 clearEdges(g.data_align); 398 } 399 400 ////////////////////////////////////////////////////////////////////////////// 401 402 template<typename TStringSet, typename TCargo, typename TSpec> 403 inline void 404 clearVertices(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 405 { 406 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 407 typedef typename Size<TStringSet>::Type TSize; 408 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 409 typedef typename TGraph::TKey_ TKey; 410 411 clear(g.data_fragment); 412 g.data_pvMap.clear(); 413 clearVertices(g.data_align); 414 415 416 // Don't forget to cover the sequences with nil vertices again 417 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 418 if(!empty(value(g.data_sequence))) { 419 TSize lenSet = length(stringSet(g)); 420 for(TSize k=0; k<lenSet;++k) 421 g.data_pvMap.insert(std::make_pair(TKey(positionToId(stringSet(g),k), length(stringSet(g)[k])), nilVertex)); 422 } 423 } 424 425 ////////////////////////////////////////////////////////////////////////////// 426 427 template<typename TStringSet, typename TCargo, typename TSpec> 428 inline void 429 clear(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 430 { 431 // Only clear also removes the sequences 432 clear(value(g.data_sequence)); 433 clearVertices(g); 434 } 435 436 ////////////////////////////////////////////////////////////////////////////// 437 438 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 439 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 440 outDegree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 441 TVertexDescriptor const vertex) 442 { 443 return outDegree(g.data_align, vertex); 444 } 445 446 ////////////////////////////////////////////////////////////////////////////// 447 448 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 449 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 450 inDegree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 451 TVertexDescriptor const vertex) 452 { 453 return inDegree(g.data_align, vertex); 454 } 455 456 ////////////////////////////////////////////////////////////////////////////// 457 458 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 459 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 460 degree(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 461 TVertexDescriptor const vertex) 462 { 463 return degree(g.data_align, vertex); 464 } 465 466 ////////////////////////////////////////////////////////////////////////////// 467 468 template<typename TStringSet, typename TCargo, typename TSpec, typename TId, typename TPos, typename TLength> 469 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 470 addVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 471 TId id, 472 TPos begin, 473 TLength len) 474 { 475 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 476 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 477 typedef typename Id<TGraph>::Type TIdType; 478 typedef typename Size<TGraph>::Type TSize; 479 typedef typename TGraph::TFragmentInfo_ TFragmentInfo; 480 typedef typename TGraph::TKey_ TKey; 481 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 482 //typedef typename TPosToVertexMap::key_type TKey; 483 484 //for(TPosToVertexMap::const_iterator p = g.data_pvMap.begin(); p != g.data_pvMap.end(); ++p) { 485 // std::cout << p->first.first << ',' << p->first.second << ':' << p->second << std::endl; 486 //} 487 488 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 489 490 // Store the new fragment 491 typename TPosToVertexMap::iterator interval = g.data_pvMap.lower_bound(TKey((TIdType)id, (TSize)begin + (TSize)len)); 492 // Segment does not belong to Sequence anymore 493 SEQAN_ASSERT(interval != g.data_pvMap.end()); 494 // Segment end must be assigned to nil so far 495 SEQAN_ASSERT(interval->second == nilVertex); 496 // Segment must belong to the whole old interval 497 SEQAN_ASSERT(*interval == *g.data_pvMap.upper_bound(TKey((TIdType)id, (TSize)begin))); 498 499 // Insert new vertex 500 TVertexDescriptor vd = addVertex(g.data_align); 501 if (length(g.data_fragment) <= vd) resize(g.data_fragment, vd + 1, Generous()); 502 assignProperty(g.data_fragment, vd, TFragmentInfo(id, begin, len)); 503 504 // Update position to vertex map 505 // Does the end of the new fragment coincides with the end of the interval? 506 if ( (TSize) begin + len == (TSize) interval->first.second) { 507 // Does the beginning of the new fragment coincides with the beginning of the interval? 508 if ((begin == 0) || 509 (g.data_pvMap.find(TKey((TIdType)id, (TSize)begin)) != g.data_pvMap.end())) { 510 // Replace interval 511 interval->second = vd; 512 } else { 513 // Split interval once 514 g.data_pvMap.insert(std::make_pair(TKey(interval->first.first,(TSize)begin), interval->second)); 515 g.data_pvMap.erase(interval); 516 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd)); 517 } 518 } else { 519 // Does the beginning of the new fragment coincides with the beginning of the interval? 520 if ((begin == 0) || 521 (g.data_pvMap.find(TKey((TIdType)id, (TSize)begin)) != g.data_pvMap.end())) { 522 // Split interval once 523 // Just insert here because we store interval ends 524 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd)); 525 } else { 526 // Split interval twice 527 TSize tmp = interval->first.second; 528 g.data_pvMap.insert(std::make_pair(TKey(interval->first.first,begin), interval->second)); 529 g.data_pvMap.erase(interval); 530 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,(TSize)begin+len), vd)); 531 g.data_pvMap.insert(std::make_pair(TKey((TIdType)id,tmp), nilVertex)); 532 } 533 } 534 return vd; 535 } 536 537 ////////////////////////////////////////////////////////////////////////////// 538 539 template<typename TStringSet, typename TCargo, typename TSpec, typename TVD> 540 inline void 541 removeVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 542 TVD const v) 543 { 544 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 545 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 546 typedef typename TGraph::TKey_ TKey; 547 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 548 typename TPosToVertexMap::iterator interval = (g.data_pvMap.lower_bound(TKey(sequenceId(g,v), fragmentBegin(g,v) + fragmentLength(g,v)))); 549 550 // Clear the interval 551 interval->second = getNil<TVertexDescriptor>(); 552 typename TPosToVertexMap::iterator interval_iter = interval; 553 if (interval_iter != g.data_pvMap.begin()) { 554 --interval_iter; 555 if ((interval_iter->second == getNil<TVertexDescriptor>()) && 556 (interval_iter->first.first == interval->first.first)) { 557 g.data_pvMap.erase(interval_iter); 558 } 559 } 560 interval_iter = interval; 561 ++interval_iter; 562 if (interval_iter != g.data_pvMap.end()) { 563 if ((interval_iter->second == getNil<TVertexDescriptor>()) && 564 (interval_iter->first.first == interval->first.first)) { 565 g.data_pvMap.erase(interval); 566 } 567 } 568 569 // Remove the vertex 570 removeVertex(g.data_align,v); 571 } 572 573 ////////////////////////////////////////////////////////////////////////////// 574 575 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 576 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 577 addEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 578 TVertexDescriptor const source, 579 TVertexDescriptor const target) 580 { 581 return addEdge(g.data_align, source, target); 582 } 583 584 ////////////////////////////////////////////////////////////////////////////// 585 586 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor, typename TCargo2> 587 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 588 addEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 589 TVertexDescriptor const source, 590 TVertexDescriptor const target, 591 TCargo2 const cargo) 592 { 593 return addEdge(g.data_align, source, target, cargo); 594 } 595 596 ////////////////////////////////////////////////////////////////////////////// 597 598 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 599 inline void 600 removeEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 601 TVertexDescriptor const source, 602 TVertexDescriptor const target) 603 { 604 removeEdge(g.data_align, source, target); 605 } 606 607 ////////////////////////////////////////////////////////////////////////////// 608 609 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor> 610 inline void 611 removeEdge(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 612 TEdgeDescriptor const edge) 613 { 614 removeEdge(g.data_align, sourceVertex(g.data_align,edge), targetVertex(g.data_align,edge)); 615 } 616 617 ////////////////////////////////////////////////////////////////////////////// 618 619 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 620 inline void 621 removeOutEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 622 TVertexDescriptor const v) 623 { 624 removeOutEdges(g.data_align, v); 625 } 626 627 ////////////////////////////////////////////////////////////////////////////// 628 629 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 630 inline void 631 removeInEdges(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 632 TVertexDescriptor const v) 633 { 634 removeInEdges(g.data_align,v); 635 } 636 637 ////////////////////////////////////////////////////////////////////////////// 638 639 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor> 640 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 641 targetVertex(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 642 TEdgeDescriptor const edge) 643 { 644 return targetVertex(g.data_align, edge); 645 } 646 647 ////////////////////////////////////////////////////////////////////////////// 648 649 template<typename TStringSet, typename TCargo, typename TSpec, typename TEdgeDescriptor> 650 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 651 sourceVertex(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 652 TEdgeDescriptor const edge) 653 { 654 return sourceVertex(g.data_align, edge); 655 } 656 657 ////////////////////////////////////////////////////////////////////////////// 658 659 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix> 660 inline void 661 getAdjacencyMatrix(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 662 TMatrix& mat) 663 { 664 getAdjacencyMatrix(g.data_align, mat); 665 } 666 667 ////////////////////////////////////////////////////////////////////////////// 668 669 template<typename TVector, typename TStringSet, typename TCargo, typename TSpec, typename TVertex> 670 inline void 671 getVertexAdjacencyVector(TVector & vectIn, 672 TVector & vectOut, 673 Graph<Alignment<TStringSet, TCargo, TSpec> > const & g, 674 TVertex const & vertex) 675 { 676 getVertexAdjacencyVector(vectIn, vectOut, g.data_align, vertex); 677 } 678 679 ////////////////////////////////////////////////////////////////////////////// 680 681 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 682 inline typename EdgeDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 683 findEdge(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 684 TVertexDescriptor const v, 685 TVertexDescriptor const w) 686 { 687 return findEdge(g.data_align, v, w); 688 } 689 690 ////////////////////////////////////////////////////////////////////////////// 691 692 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec> 693 inline void 694 write(TFile & target, 695 Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 696 { 697 //IOREV _nodoc_ specialization not documented (at least not obviously) 698 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 699 typedef typename Size<TGraph>::Type TSize; 700 typedef typename TGraph::TFragmentInfo_ TSegment; 701 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 702 typedef typename EdgeType<TGraph>::Type TEdgeStump; 703 typedef typename Iterator<String<TEdgeStump*> const, Standard>::Type TIterConst; 704 705 String<char> align; 706 if (!convertAlignment(g, align)) { 707 write(target,"Adjacency list:\n"); 708 TIterConst it = begin(g.data_align.data_vertex, Standard()); 709 TIterConst itEnd = end(g.data_align.data_vertex, Standard()); 710 TVertexDescriptor pos = 0; 711 for(;it!=itEnd; ++it, ++pos) { 712 if (!idInUse(g.data_align.data_id_managerV, pos)) continue; 713 TVertexDescriptor sourceV = pos; 714 appendNumber(target, (int)sourceV); 715 TSegment seg = getProperty(g.data_fragment, sourceV); 716 write(target," (SeqId:"); 717 appendNumber(target, (int)seg.data_seq_id); 718 write(target," ,Begin:"); 719 appendNumber(target, (int)seg.data_begin); 720 write(target," ,Length:"); 721 appendNumber(target, (int)seg.data_length); 722 write(target,") -> "); 723 TEdgeStump* current = *it; 724 while(current!=0) { 725 TVertexDescriptor adjV = getTarget(current); 726 if (adjV != sourceV) { 727 appendNumber(target, (int)adjV); 728 writeValue(target, ','); 729 current=getNextS(current); 730 } else { 731 adjV = getSource(current); 732 appendNumber(target, (int)adjV); 733 writeValue(target, ','); 734 current=getNextT(current); 735 } 736 } 737 writeValue(target, '\n'); 738 } 739 write(target,"Edge list:\n"); 740 it = begin(g.data_align.data_vertex, Standard()); 741 pos = 0; 742 for(;it!=itEnd; ++it, ++pos) { 743 TVertexDescriptor sourceV = pos; 744 TEdgeStump* current = *it; 745 while(current!=0) { 746 TVertexDescriptor targetV = getTarget(current); 747 if (sourceV != targetV) { 748 write(target,"Source: "); 749 appendNumber(target, (int)sourceV); 750 write(target, ",Target: "); 751 appendNumber(target, (int)targetV); 752 write(target, " (Id: "); 753 appendNumber(target, (int)_getId(current)); 754 write(target, ")\n"); 755 current=getNextS(current); 756 } else { 757 current=getNextT(current); 758 } 759 } 760 } 761 } else { 762 TSize nseq = length(stringSet(g)); 763 TSize colLen = length(align) / nseq; 764 765 TSize baseCount=0; 766 TSize leftSpace=6; 767 TSize xPos = 0; 768 write(target,"Alignment matrix:\n"); 769 while (xPos < colLen) { 770 TSize windowSize = 50; 771 if ((xPos + windowSize)>colLen) windowSize = colLen - xPos; 772 773 // Print header line 774 TSize offset=0; 775 // Larger numbers need to be further left 776 if (baseCount != 0) offset = (unsigned int) floor(log((double)baseCount) / log((double)10)); 777 for(TSize j = 0;j<leftSpace-offset;++j) writeValue(target, ' '); 778 appendNumber(target, (int)baseCount); 779 baseCount+=windowSize; 780 writeValue(target, ' '); 781 for(TSize col = 1;col<=windowSize;++col) { 782 if ((col % 10)==0) writeValue(target, ':'); 783 else if ((col % 5)==0) writeValue(target, '.'); 784 else writeValue(target, ' '); 785 } 786 write(target, " \n"); 787 788 // Print sequences 789 for(TSize row=0;row<2*nseq-1;++row) { 790 for(TSize col = 0;col<leftSpace+2;++col) 791 writeValue(target, ' '); 792 if ((row % 2)==0) { 793 for(TSize col = xPos;col<xPos+windowSize;++col) 794 writeValue(target, align[(row/2)*colLen+col]); 795 } else { 796 for(TSize col = xPos;col<xPos+windowSize;++col) { 797 if ((align[((row-1)/2)*colLen + col] != gapValue<char>()) && 798 (align[((row+1)/2)*colLen + col] != gapValue<char>()) && 799 (align[((row-1)/2)*colLen + col] == align[((row+1)/2)*colLen + col])) 800 writeValue(target, '|'); 801 else 802 writeValue(target, ' '); 803 } 804 } 805 writeValue(target, '\n'); 806 } 807 writeValue(target, '\n'); 808 xPos+=windowSize; 809 } 810 writeValue(target, '\n'); 811 } 812 } 813 814 ////////////////////////////////////////////////////////////////////////////// 815 816 template <typename TFile, typename TSpec, typename TNames> 817 inline void 818 write(TFile & file, 819 Graph<TSpec> const& g, 820 TNames const& names, 821 FastaFormat) 822 { 823 //IOREV _nodoc_ specialization not documented 824 typedef Graph<TSpec> TGraph; 825 typedef typename Size<TGraph>::Type TSize; 826 827 typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output()); 828 829 String<char> align; 830 if (convertAlignment(g, align)) { 831 TSize nseq = length(stringSet(g)); 832 TSize colLen = length(align) / nseq; 833 typedef typename Iterator<String<char>, Standard>::Type TIter; 834 TIter it = begin(align, Standard()); 835 for(TSize i = 0; i<nseq; ++i) { 836 writeValue(target, '>'); 837 write(target,names[i]); 838 writeValue(target, '\n'); 839 TSize col = 0; 840 while(col < colLen) { 841 TSize max = ((colLen - col) < 60) ? colLen - col : 60; 842 for(TSize finger = 0; finger<max; ++finger, ++col, ++it) 843 writeValue(target, *it); 844 writeValue(target, '\n'); 845 } 846 } 847 } 848 } 849 850 851 852 ////////////////////////////////////////////////////////////////////////////// 853 854 template <typename TFile, typename TSpec, typename TNames> 855 inline void 856 write(TFile & file, 857 Graph<TSpec> const& g, 858 TNames const& names, 859 MsfFormat) 860 { 861 //IOREV _nodoc_ specialization not documented 862 typedef Graph<TSpec> TGraph; 863 typedef typename Size<TGraph>::Type TSize; 864 865 typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output()); 866 867 String<char> align; 868 if (convertAlignment(g, align)) { 869 TSize nseq = length(stringSet(g)); 870 TSize colLen = length(align) / nseq; 871 872 write(target,"PileUp\n"); 873 writeValue(target, '\n'); 874 write(target," MSF: "); 875 appendNumber(target, (unsigned)colLen); 876 write(target," Type: P Check: 0 ..\n\n"); 877 TSize offset = 0; 878 for(TSize i = 0; i<nseq; ++i) { 879 write(target," Name: "); 880 write(target,names[i]); 881 write(target," oo Len: "); 882 TSize len = length(names[i]); 883 if (len > offset) offset = len; 884 appendNumber(target, (unsigned)colLen); 885 write(target," Check: 0"); 886 write(target," Weight: 1.00"); 887 writeValue(target, '\n'); 888 } 889 offset += 5; 890 writeValue(target, '\n'); 891 write(target, "//\n"); 892 writeValue(target, '\n'); 893 writeValue(target, '\n'); 894 TSize col = 0; 895 while(col < colLen) { 896 TSize max = 0; 897 for(TSize i = 0; i<nseq; ++i) { 898 max = ((colLen - col) < 50) ? colLen - col : 50; 899 write(target,names[i]); 900 for(TSize j = 0; j<offset - length(names[i]); ++j) 901 writeValue(target, ' '); 902 for(TSize finger = col; finger<col+max; ++finger) { 903 if ((finger - col) % 10 == 0) writeValue(target, ' '); 904 if (align[i*colLen + finger] == '-') writeValue(target, '.'); 905 else writeValue(target, align[i*colLen + finger]); 906 } 907 writeValue(target, '\n'); 908 } 909 col += max; 910 writeValue(target, '\n'); 911 writeValue(target, '\n'); 912 } 913 } 914 } 915 916 917 ////////////////////////////////////////////////////////////////////////////// 918 template <typename TFile, typename TStringSet, typename TSpec, typename TEdge> 919 inline void 920 _writeCargo(TFile & file, 921 Graph<Alignment<TStringSet, void, TSpec> > const&, 922 TEdge const&) 923 { 924 appendNumber(file, 0); 925 } 926 927 ////////////////////////////////////////////////////////////////////////////// 928 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec, typename TEdge> 929 inline void 930 _writeCargo(TFile & file, 931 Graph<Alignment<TStringSet, TCargo, TSpec> > const&, 932 TEdge const& edge) 933 { 934 appendNumber(file, (int)getCargo(edge)); 935 } 936 937 ////////////////////////////////////////////////////////////////////////////// 938 939 940 template <typename TFile, typename TStringSet, typename TCargo, typename TSpec, typename TNames> 941 inline void 942 write(TFile & file, 943 Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 944 TNames const& names, 945 CgVizFormat) 946 { 947 //IOREV _nodoc_ specialization not documented 948 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 949 typedef typename Size<TGraph>::Type TSize; 950 typedef typename Id<TGraph>::Type TId; 951 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 952 typedef typename EdgeType<TGraph>::Type TEdgeStump; 953 typedef typename Iterator<String<TEdgeStump*> const, Rooted>::Type TIterConst; 954 955 typename DirectionIterator<TFile, Output>::Type target = directionIterator(file, Output()); 956 957 TStringSet& str = stringSet(g); 958 TSize nseq = length(str); 959 960 write(target, "{DATA Data\n"); 961 write(target, "\t[__GLOBAL__] tracks="); 962 appendNumber(target, nseq); 963 writeValue(target, '\n'); 964 for(TSize i = 0; i<nseq; ++i) { 965 write(target, "\tfasta_id=\""); 966 write(target,names[i]); 967 write(target, "\" sequence=\""); 968 write(target,str[i]); 969 write(target, "\" track="); 970 appendNumber(target, i); 971 write(target, " type=\"DNA\": 0 "); 972 appendNumber(target, length(str[i])- 1); 973 writeValue(target, '\n'); 974 } 975 write(target, "}\n"); 976 for(TSize i = 0; i<nseq; ++i) { 977 write(target, "{DATA "); 978 appendNumber(target, i); 979 //write(target,names[i]); 980 write(target, "-seqlen\n\t[__GLOBAL__]\n\tlength="); 981 appendNumber(target, length(str[i])); 982 write(target, ":\t0 "); 983 appendNumber(target, length(str[i])- 1); 984 write(target, "\n}\n"); 985 } 986 for(TSize i=0; i<nseq; ++i) { 987 for(TSize j=i+1; j<nseq; ++j) { 988 write(target, "{DATA "); 989 appendNumber(target, i); 990 //write(target,names[i]); 991 write(target, "-vs-"); 992 appendNumber(target, j); 993 //write(target,names[j]); 994 writeValue(target, '\n'); 995 write(target, "\t[__GLOBAL__]\n"); 996 for(TIterConst it = begin(g.data_align.data_vertex);!atEnd(it);goNext(it)) { 997 TVertexDescriptor sourceV = position(it); 998 TId id1 = sequenceId(g, sourceV); 999 if ((positionToId(str, id1) != i) && 1000 (positionToId(str, id1) != j)) continue; 1001 TEdgeStump* current = getValue(it); 1002 while(current!=0) { 1003 TVertexDescriptor targetV = getTarget(current); 1004 TId id2 = sequenceId(g, targetV); 1005 if (sourceV != targetV) { 1006 if ((positionToId(str, id2) != i) && 1007 (positionToId(str, id2) != j)) { 1008 current=getNextS(current); 1009 continue; 1010 } 1011 write(target, "\t"); 1012 write(target, "source="); 1013 appendNumber(target, (int)sourceV); 1014 writeValue(target, ' '); 1015 write(target, "target="); 1016 appendNumber(target, (int)targetV); 1017 writeValue(target, ' '); 1018 write(target, "edgeId="); 1019 appendNumber(target, (int)_getId(current)); 1020 writeValue(target, ' '); 1021 write(target, "cargo="); 1022 _writeCargo(target,g,current); 1023 writeValue(target, ' '); 1024 write(target, "label="); 1025 write(target,label(g,sourceV)); 1026 writeValue(target, ' '); 1027 write(target, "labelOpp="); 1028 write(target,label(g,targetV)); 1029 writeValue(target, ':'); 1030 writeValue(target, '\t'); 1031 appendNumber(target, (int)fragmentBegin(g, sourceV)); 1032 writeValue(target, ' '); 1033 appendNumber(target, (int)fragmentBegin(g, targetV)); 1034 writeValue(target, ' '); 1035 appendNumber(target, (int)(fragmentBegin(g, sourceV) + fragmentLength(g, sourceV))); 1036 writeValue(target, ' '); 1037 appendNumber(target, (int)(fragmentBegin(g, targetV) + fragmentLength(g, targetV))); 1038 writeValue(target, '\n'); 1039 current=getNextS(current); 1040 } else { 1041 current=getNextT(current); 1042 } 1043 } 1044 } 1045 write(target, "}\n"); 1046 } 1047 } 1048 } 1049 1050 1051 ////////////////////////////////////////////////////////////////////////////// 1052 1053 /*! 1054 * @fn AlignmentGraph#assignStringSet 1055 * @brief Assigns a new StringSet to an AlignmentGraph. 1056 * 1057 * @signature void assignStringSet(g, stringSet); 1058 * 1059 * @param[in,out] g An AlignmentGraph. 1060 * @param[in] stringSet The @link StringSet @endlink to assign to. 1061 * 1062 * @see AlignmentGraph#getStringSet 1063 * @see AlignmentGraph#stringSet 1064 */ 1065 1066 template<typename TString, typename TDefault, typename TCargo, typename TSpec, typename TDefault2> 1067 inline void 1068 assignStringSet(Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> >& g, 1069 StringSet<TString, Dependent<TDefault2> > const& sStr) 1070 { 1071 typedef Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> > TGraph; 1072 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1073 typedef typename TGraph::TKey_ TKey; 1074 typedef typename Size<TGraph>::Type TSize; 1075 1076 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 1077 clear(g); 1078 g.data_sequence = (StringSet<TString, Dependent<TDefault> >) sStr; 1079 TSize lenSet = length(sStr); 1080 for(TSize k=0; k<lenSet;++k) 1081 g.data_pvMap.insert(std::make_pair(TKey(positionToId(sStr,k), length(sStr[k])), nilVertex)); 1082 } 1083 1084 ////////////////////////////////////////////////////////////////////////////// 1085 1086 template<typename TString, typename TDefault, typename TCargo, typename TSpec, typename TDefault2> 1087 inline void 1088 assignStringSet(Graph<Alignment<StringSet<TString, Dependent<TDefault> >, TCargo, TSpec> >& g, 1089 StringSet<TString, Owner<TDefault2> > const& sStr) 1090 { 1091 StringSet<TString, Dependent<> > depStr(sStr); 1092 assignStringSet(g, depStr); 1093 } 1094 1095 1096 ////////////////////////////////////////////////////////////////////////////// 1097 1098 /*! 1099 * @fn AlignmentGraph#getStringSet 1100 * @brief Gets the StringSet of an AlignmentGraph. 1101 * 1102 * @signature THost getStringSet(g); 1103 * 1104 * @param[in,out] g An AlignmentGraph. 1105 * 1106 * @return THost A const reference to the StringSet's host. 1107 * 1108 * @see AlignmentGraph#assignStringSet 1109 * @see AlignmentGraph#stringSet 1110 */ 1111 1112 template<typename TStringSet, typename TCargo, typename TSpec> 1113 inline typename Host<Graph<Alignment<TStringSet, TCargo, TSpec> > const>::Type& 1114 getStringSet(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 1115 { 1116 return value(g.data_sequence); 1117 } 1118 1119 1120 ////////////////////////////////////////////////////////////////////////////// 1121 1122 /*! 1123 * @fn AlignmentGraph#stringSet 1124 * @brief Return reference to the StringSet of an AlignmentGraph. 1125 * 1126 * @signature THostRef stringSet(g); 1127 * 1128 * @param[in,out] g An AlignmentGraph. 1129 * 1130 * @return THost A reference to the StringSet's host. 1131 * 1132 * @see AlignmentGraph#assignStringSet 1133 * @see AlignmentGraph#getStringSet 1134 */ 1135 1136 template <typename T> 1137 struct StringSetType; 1138 1139 template <typename TStringSet, typename TCargo, typename TSpec> 1140 struct StringSetType<Graph<Alignment<TStringSet, TCargo, TSpec> > > 1141 { 1142 typedef typename Host<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type & Type; 1143 }; 1144 1145 1146 template<typename TStringSet, typename TCargo, typename TSpec> 1147 inline typename StringSetType<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1148 stringSet(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g) 1149 { 1150 return const_cast<TStringSet&>(value(g.data_sequence)); 1151 } 1152 1153 ////////////////////////////////////////////////////////////////////////////// 1154 1155 /*! 1156 * @fn AlignmentGraph#label 1157 * @brief Get the label associated with this vertex descriptor or the sequence that is associated with a fragment. 1158 * 1159 * @signature TInfix label(ag, v); 1160 * 1161 * @param[in] ag The AlignmentGraph to query. 1162 * @param[in] v The vertex descriptor to query. 1163 * 1164 * @return TInfix An infix representing the sequence label. 1165 */ 1166 1167 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1168 inline typename Infix<typename Value<TStringSet>::Type>::Type 1169 label(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1170 TVertexDescriptor const v) 1171 { 1172 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1173 typedef typename TGraph::TFragmentInfo_ TSegment; 1174 TSegment seg = getProperty(g.data_fragment, v); 1175 //std::cout << seg.data_seq_id << ","; 1176 //std::cout << seg.data_begin << ","; 1177 //std::cout << seg.data_length << ","; 1178 //std::cout << getValueById(value(g.data_sequence), seg.data_seq_id) << std::endl; 1179 //std::cout << infix(getValueById(value(g.data_sequence), seg.data_seq_id), seg.data_begin, seg.data_begin + seg.data_length) << std::endl; 1180 return infix(getValueById(value(g.data_sequence), seg.data_seq_id), seg.data_begin, seg.data_begin + seg.data_length); 1181 } 1182 1183 ////////////////////////////////////////////////////////////////////////////// 1184 1185 /*! 1186 * @fn AlignmentGraph#sequenceId 1187 * @brief Gets the sequence id that is associated with this vertex descriptor or with a sequence of a fragment. 1188 * 1189 * @signature TId sequenceId(ag, v); 1190 * 1191 * @param[in] ag The AlignmentGraph. 1192 * @param[in] v Vertex descriptor of the vertex to retrieve sequence id for. 1193 * 1194 * @return TId The sequence id. 1195 */ 1196 1197 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1198 inline typename Id<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 1199 sequenceId(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1200 TVertexDescriptor const v) 1201 { 1202 return const_cast<typename Id<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_seq_id); 1203 } 1204 1205 ////////////////////////////////////////////////////////////////////////////// 1206 1207 /*! 1208 * @fn AlignmentGraph#fragmentBegin 1209 * @brief Gets the begin position for this vertex descriptor in the sequence. 1210 * 1211 * @signature TPos fragmentBegin(ag, v); 1212 * 1213 * @param[in] ag The AlignmentGraph to query. 1214 * @param[in] v The vertex descriptor of the vertex. 1215 * 1216 * @return TPos The begin position. 1217 */ 1218 1219 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1220 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 1221 fragmentBegin(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1222 TVertexDescriptor const v) 1223 { 1224 return const_cast<typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_begin); 1225 } 1226 1227 ////////////////////////////////////////////////////////////////////////////// 1228 1229 /*! 1230 * @fn AlignmentGraph#fragmentLength 1231 * @brief Gets the length of the fragment for this vertex descriptor in the sequence. 1232 * 1233 * @signature TSize fragmentLength(ag, v); 1234 * 1235 * @param[in] ag The AlignmentGraph to query. 1236 * @param[in] v The vertex descriptor of the vertex. 1237 * 1238 * @return TPos The fragment size. 1239 */ 1240 1241 template<typename TStringSet, typename TCargo, typename TSpec, typename TVertexDescriptor> 1242 inline typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type& 1243 fragmentLength(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1244 TVertexDescriptor const v) 1245 { 1246 return const_cast<typename Size<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type&>(g.data_fragment[v].data_length); 1247 } 1248 1249 ////////////////////////////////////////////////////////////////////////////// 1250 1251 /*! 1252 * @fn AlignmentGraph#findVertex 1253 * @brief Finds a vertx given a sequence id and a position. 1254 * 1255 * @signature TVertexDescriptor findVertex(ag, id, pos); 1256 * 1257 * @param[in] ag The AlignmentGraph to search in. 1258 * @param[in] id The sequence id to search for. 1259 * @param[in] pos The position to search for. 1260 * 1261 * @return TVertexDescriptor The vertex covering the given position on the specified sequence, 1262 * <tt>getNil<TVertexDescriptor>()</tt> if none could be found. 1263 */ 1264 1265 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId, typename TPos> 1266 inline typename VertexDescriptor<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1267 findVertex(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1268 TSeqId id, 1269 TPos pos) 1270 { 1271 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1272 typedef typename TGraph::TKey_ TKey; 1273 typedef typename Size<TGraph>::Type TSize; 1274 typedef typename Id<TGraph>::Type TIdType; 1275 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1276 1277 return (pos >= (TPos) length(getValueById(stringSet(g),id))) ? getNil<TVertexDescriptor>() : g.data_pvMap.upper_bound(TKey((TIdType)id, (TSize)pos))->second; 1278 } 1279 1280 ////////////////////////////////////////////////////////////////////////////// 1281 1282 /*! 1283 * @fn AlignmentGraph#getProjectedPosition 1284 * @brief Projects a position of one sequence taking part in a pairwise match onto the other sequence. 1285 * 1286 * @signature getProjectedPosition(ag, seqId, pos, seqId2, pos2); 1287 * 1288 * @param[in] ag The @link AlignmentGraph @endlink to query. 1289 * @param[in] seqId The sequenceId. 1290 * @param[in] pos The position to project from. 1291 * @param[out] seqId2 The resulting id of the sequence that was projected onto. 1292 * @param[out] pos2 The reuslting position of the sequence was was projected onto. 1293 */ 1294 1295 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId, typename TPosition, typename TSeqId2, typename TPosition2> 1296 inline void 1297 getProjectedPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1298 TSeqId const id1, 1299 TPosition const pos1, 1300 TSeqId2& id2, 1301 TPosition2& pos2) 1302 { 1303 SEQAN_ASSERT(length(stringSet(g)) == 2); 1304 1305 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1306 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1307 typedef typename EdgeType<TGraph>::Type TEdgeStump; 1308 TStringSet& str = stringSet(g); 1309 TVertexDescriptor sV = findVertex(g, id1, pos1); 1310 1311 // Case 1: No projection possible 1312 if (sV == getNil<TVertexDescriptor>()) { 1313 if ( (TSeqId) positionToId(str, 0) == id1) id2 = (TSeqId2) positionToId(str,1); 1314 else id2 = (TSeqId2) positionToId(str,0); 1315 pos2 = 0; 1316 return; 1317 } 1318 1319 // Case 2: Projection is possible 1320 TEdgeStump* current = getValue(g.data_align.data_vertex, sV); 1321 if(current != (TEdgeStump*) 0) { 1322 TVertexDescriptor tV = target(current); 1323 if (tV == sV) tV = source(current); 1324 pos2 = (TPosition2) (fragmentBegin(g,tV) + (pos1 - fragmentBegin(g, sV))); 1325 id2 = (TSeqId2) sequenceId(g, tV); 1326 return; 1327 } else { 1328 // If no out-going edge, get the preceding or following vertex 1329 if (fragmentBegin(g, sV) == 0) { 1330 getProjectedPosition(g, id1, fragmentBegin(g,sV) + fragmentLength(g, sV), id2, pos2); 1331 return; 1332 } else { 1333 getProjectedPosition(g, id1, fragmentBegin(g,sV) - 1, id2, pos2); 1334 return; 1335 } 1336 } 1337 } 1338 1339 ////////////////////////////////////////////////////////////////////////////// 1340 1341 template<typename TStringSet, typename TValue, typename TCargo, typename TSpec, typename TSeqId, typename TPosition, typename TSeqId2, typename TPosition2> 1342 inline void 1343 getProjectedPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1344 TValue seg_num, 1345 TSeqId const id1, 1346 TPosition const pos1, 1347 TSeqId2& id2, 1348 TPosition2& pos2) 1349 { 1350 SEQAN_ASSERT(length(stringSet(g)) == 2); 1351 1352 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1353 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1354 typedef typename EdgeType<TGraph>::Type TEdgeStump; 1355 TStringSet& str = stringSet(g); 1356 TVertexDescriptor sV = findVertex(g, id1, pos1); 1357 1358 // Case 1: No projection possible 1359 if (sV == getNil<TVertexDescriptor>()) { 1360 if ( (TSeqId) positionToId(str, 0) == id1) id2 = (TSeqId2) positionToId(str,1); 1361 else id2 = (TSeqId2) positionToId(str,0); 1362 pos2 = 0; 1363 return; 1364 } 1365 1366 // Case 2: Projection is possible 1367 TEdgeStump* current = getValue(g.data_align.data_vertex, sV); 1368 if(current != (TEdgeStump*) 0) { 1369 TVertexDescriptor tV = target(current); 1370 if (seg_num == 0) tV = source(current); // segnum 0 is defined as the sourceVertex, segnum 1 is targetVertex 1371 pos2 = (TPosition2) (fragmentBegin(g,tV) + (pos1 - fragmentBegin(g, sV))); 1372 id2 = (TSeqId2) sequenceId(g, tV); 1373 return; 1374 } else { 1375 // If no out-going edge, get the preceding or following vertex 1376 if (fragmentBegin(g, sV) == 0) { 1377 getProjectedPosition(g, seg_num, id1, fragmentBegin(g,sV) + fragmentLength(g, sV), id2, pos2); 1378 return; 1379 } else { 1380 getProjectedPosition(g, seg_num, id1, fragmentBegin(g,sV) - 1, id2, pos2); 1381 return; 1382 } 1383 } 1384 } 1385 1386 ////////////////////////////////////////////////////////////////////////////// 1387 1388 /*! 1389 * @fn AlignmentGraph#getFirstCoveredPosition 1390 * @brief Finds the first position in a sequence that is not assigned to a nil vertex. 1391 * 1392 * @signature TPos getFirstCoveredPosition(ag, id); 1393 * 1394 * @param[in] ag An @link AlignmentGraph @endlink. 1395 * @param[in] id A sequence id. 1396 * 1397 * @return TPos A sequence position. 1398 * 1399 * @see AlignmentGraph#getLastCoveredPosition 1400 */ 1401 1402 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId> 1403 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1404 getFirstCoveredPosition(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1405 TSeqId const id) 1406 { 1407 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1408 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1409 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1410 1411 1412 typename TPosToVertexMap::const_iterator it = g.data_pvMap.upper_bound(std::make_pair(id, 0)); 1413 1414 // Case 1: id is not covered 1415 if (it == g.data_pvMap.end()) return length(getValueById(stringSet(g), id)); 1416 1417 // Case 2: We found a nil vertex, go one forward 1418 if (it->second == getNil<TVertexDescriptor>()) { 1419 ++it; 1420 if (it == g.data_pvMap.end()) return length(getValueById(stringSet(g), id)); 1421 } 1422 1423 // Now we have the right vertex, return the beginning if the sequence id still fits 1424 if (it->first.first != id) return length(getValueById(stringSet(g), id)); 1425 else return fragmentBegin(g, it->second); 1426 } 1427 1428 ////////////////////////////////////////////////////////////////////////////// 1429 1430 /*! 1431 * @fn AlignmentGraph#getLastCoveredPosition 1432 * @brief Finds the last position in a sequence that is not assigned to a nil vertex. 1433 * 1434 * @signature TPos getLastCoveredPosition(ag, id); 1435 * 1436 * @param[in] ag An @link AlignmentGraph @endlink. 1437 * @param[in] id A sequence id. 1438 * 1439 * @return TPos A sequence position. 1440 * 1441 * @see AlignmentGraph#getFirstCoveredPosition 1442 */ 1443 1444 template<typename TStringSet, typename TCargo, typename TSpec, typename TSeqId> 1445 inline typename Position<Graph<Alignment<TStringSet, TCargo, TSpec> > >::Type 1446 getLastCoveredPosition(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 1447 TSeqId id) 1448 { 1449 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1450 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1451 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1452 1453 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 1454 typename TPosToVertexMap::const_iterator it = g.data_pvMap.lower_bound(std::make_pair(id, length(getValueById(stringSet(g), id)))); 1455 1456 // Case 1: No last position (all nil) 1457 if ((it == g.data_pvMap.begin()) && (it->second == nilVertex)) return 0; 1458 1459 // Case 2: Found a nil position but there is a vertex before 1460 if (it->second == nilVertex) { 1461 --it; 1462 } 1463 1464 // If the sequence id still matches return the position behind the last position belonging to this vertex 1465 if (it->first.first != id) return 0; 1466 return fragmentBegin(g, it->second) + fragmentLength(g, it->second); 1467 } 1468 1469 1470 ////////////////////////////////////////////////////////////////////////////// 1471 1472 /*! 1473 * @fn AlignmentGraph#convertAlignment 1474 * @brief Converts an alignment graph into an alignment matrix. 1475 * 1476 * @signature bool convertAlignment(g, component, order, compLength); 1477 * @signature bool convertAlignment(g, matrix); 1478 * 1479 * @param[in] g AlignmentGraph to convert. An alignment graph. 1480 * @param[out] component Vertex to component mapping. 1481 * @param[out] order The order of the component graph when sorting topologically. 1482 * @param[out] compLength Component sizes. 1483 * @param[out] matrix A string that represents an alignment matrix. 1484 * 1485 * @return bool true iff the alignment graph is a valid alignment and false otherwise. 1486 * 1487 * @section Remarks 1488 * 1489 * The variant with <tt>component</tt> and <tt>order</tt> computes a topological sorting of connected components. 1490 */ 1491 1492 template<typename TStringSet, typename TCargo, typename TSpec, typename TComponentMap, typename TOrderMap, typename TComponentLength> 1493 inline bool 1494 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1495 TComponentMap& component, 1496 TOrderMap& order, 1497 TComponentLength& compLength) 1498 { 1499 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1500 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1501 typedef typename Id<TGraph>::Type TIdType; 1502 typedef typename Size<TGraph>::Type TSize; 1503 typedef typename Value<TComponentMap>::Type TComponent; 1504 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1505 typedef typename TComponentLength::mapped_type TMappedType; 1506 typedef typename TComponentLength::key_type TKeyType; 1507 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>(); 1508 1509 // Check for empty graph 1510 if (empty(g)) return false; 1511 1512 // Connected Components 1513 TSize numComponents = connectedComponents(component, g); 1514 1515 // Make a directed graph to represent the ordering of the components 1516 // Note: Multiple vertices might have the same component 1517 Graph<Directed<void, WithoutEdgeId> > componentGraph; 1518 reserve(_getVertexString(componentGraph), numComponents); 1519 for(TSize i = 0; i<numComponents;++i) addVertex(componentGraph); 1520 1521 TSize nseq = length(value(g.data_sequence)); 1522 String<std::set<TComponent> > componentsPerSeq; 1523 typedef String<String<TComponent> > TOrderedComponents; 1524 TOrderedComponents orderedComponentsPerSeq; 1525 resize(componentsPerSeq, nseq); 1526 resize(orderedComponentsPerSeq, nseq); 1527 typename TPosToVertexMap::const_iterator it1 = g.data_pvMap.begin(); 1528 typename TPosToVertexMap::const_iterator it1End = g.data_pvMap.end(); 1529 for(;it1!=it1End;++it1) { 1530 // If sections are not assigned to a vertex -> no alignment 1531 if (it1->second == nilVertex) return false; 1532 1533 // Remember the sequence that component belongs to 1534 TSize currentSeq = idToPosition(value(g.data_sequence), it1->first.first); 1535 1536 // Append component 1537 TComponent c = getProperty(component, it1->second); 1538 if ((value(componentsPerSeq,currentSeq)).empty()) { 1539 String<TComponent> tmp; 1540 value(orderedComponentsPerSeq, currentSeq) = tmp; 1541 } 1542 appendValue(value(orderedComponentsPerSeq, currentSeq), c, Generous()); 1543 // If two components appear twice in the same sequence -> no alignment 1544 if (!((value(componentsPerSeq,currentSeq)).insert(c)).second) return false; 1545 } 1546 clear(componentsPerSeq); 1547 1548 // Draw edges for the components within a sequence 1549 typedef typename Iterator<TOrderedComponents>::Type TIterTOrderedComponents; 1550 TIterTOrderedComponents itBegin = begin(orderedComponentsPerSeq); 1551 TIterTOrderedComponents itEnd = end(orderedComponentsPerSeq); 1552 for(;itBegin != itEnd; ++itBegin) { 1553 TSize n = length(*itBegin); 1554 for(TSize i = 0; i<n-1; ++i) { 1555 addEdge(componentGraph, value((*itBegin), i), value((*itBegin), i+1)); 1556 } 1557 } 1558 1559 // Make a topological sort of the component graph 1560 topologicalSort(order, componentGraph); 1561 1562 //// Debug code 1563 //std::cout << "Topological sort: " << std::endl; 1564 //for(TSize i = 0; i<length(order);++i) { 1565 // std::cout << order[i] << ','; 1566 //} 1567 //std::cout << std::endl; 1568 1569 // Walk through all sequences and check the component order 1570 unsigned int compIndex = 0; 1571 unsigned int compIndexLen = length(order); 1572 typename TPosToVertexMap::const_iterator it = g.data_pvMap.begin(); 1573 TIdType currentSeq = it->first.first; 1574 for(; it != g.data_pvMap.end(); ++it) { 1575 if (it->first.first != currentSeq) { 1576 compIndex = 0; 1577 currentSeq = it->first.first; 1578 } 1579 TKeyType c = (TKeyType) getProperty(component, it->second); 1580 if(!compLength.insert(std::make_pair(c, (TMappedType) fragmentLength(g, it->second))).second) { 1581 compLength[c] = _max(compLength[c], (TMappedType) fragmentLength(g, it->second)); 1582 } 1583 while ((compIndex < compIndexLen) && (order[compIndex] != c)) ++compIndex; 1584 // Crossing components -> no alignment 1585 if (compIndex >= compIndexLen) return false; 1586 // Next component 1587 ++compIndex; 1588 } 1589 1590 return true; 1591 } 1592 1593 1594 ////////////////////////////////////////////////////////////////////////////// 1595 1596 template<typename TStringSet, typename TCargo, typename TSpec, typename TMatrix> 1597 inline bool 1598 convertAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1599 TMatrix& mat) 1600 { 1601 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1602 typedef typename Value<TMatrix>::Type TValue; 1603 typedef typename Size<TGraph>::Type TSize; 1604 typedef typename Id<TGraph>::Type TIdType; 1605 typedef typename TGraph::TPosToVertexMap_ TPosToVertexMap; 1606 typedef std::map<unsigned int, unsigned int> TComponentLength; 1607 1608 // Strongly Connected Components, topological sort, and length of each component 1609 String<unsigned int> component; 1610 String<unsigned int> order; 1611 TComponentLength compLength; 1612 1613 if (!convertAlignment(g, component, order, compLength)) return false; 1614 1615 // Create the matrix 1616 TSize len = 0; 1617 TSize nseq = length(stringSet(g)); 1618 for(TComponentLength::iterator cIt=compLength.begin(); cIt != compLength.end(); ++cIt) len+=cIt->second; 1619 char gapChar = gapValue<char>(); 1620 resize(mat, len * nseq, gapChar); 1621 1622 // Fill the matrix 1623 TSize row = 0; 1624 TSize col = 0; 1625 typename TPosToVertexMap::const_iterator it = g.data_pvMap.begin(); 1626 unsigned int compIndex = 0; 1627 unsigned int compIndexLen = length(order); 1628 TIdType currentSeq = it->first.first; 1629 for(; it != g.data_pvMap.end(); ++it) { 1630 if (it->first.first != currentSeq) { 1631 SEQAN_ASSERT(col <= len); 1632 //std::cout << std::endl; 1633 ++row;col=0; 1634 compIndex = 0; 1635 currentSeq = it->first.first; 1636 } 1637 unsigned int c = getProperty(component, it->second); 1638 while ((compIndex < compIndexLen) && (order[compIndex] != c)) { 1639 for(TSize i=0;i<compLength[order[compIndex]];++i, ++col) 1640 mat[row*len + col] = gapValue<char>(); 1641 ++compIndex; 1642 } 1643 typedef typename Value<TStringSet>::Type TStringSetStr; 1644 TStringSetStr str = label(g,it->second); 1645 typedef typename Iterator<TStringSetStr, Standard>::Type TStringSetStrIter; 1646 TStringSetStrIter itStr = begin(str, Standard()); 1647 TStringSetStrIter itStrEnd = end(str, Standard()); 1648 for(;itStr != itStrEnd; goNext(itStr), ++col) 1649 mat[row*len + col] = (TValue) (*itStr); 1650 for(TSize i = length(str); i < compLength[order[compIndex]]; ++i, ++col) 1651 mat[row*len + col] = gapValue<char>(); 1652 ++compIndex; 1653 } 1654 SEQAN_ASSERT(row + 1 == nseq); 1655 //std::cout << std::endl; 1656 1657 return true; 1658 } 1659 1660 1661 1662 ////////////////////////////////////////////////////////////////////////////// 1663 1664 template<typename TValue, typename TSpec1, typename TStringSet, typename TCargo, typename TSpec2> 1665 inline bool 1666 convertAlignment(String<TValue, TSpec1> const& mat, 1667 Graph<Alignment<TStringSet, TCargo, TSpec2> >& g) 1668 { 1669 typedef String<TValue, TSpec1> TMatrix; 1670 typedef Graph<Alignment<TStringSet, TCargo, TSpec2> > TGraph; 1671 typedef typename Size<TGraph>::Type TSize; 1672 typedef typename Iterator<TMatrix, Standard>::Type TMatIter; 1673 clearVertices(g); 1674 TValue gapChar = gapValue<TValue>(); 1675 TSize nseq = length(stringSet(g)); 1676 TSize alignLen = length(mat) / nseq; 1677 String<Fragment<> > matches; 1678 for(TSize seq1 = 0; seq1<nseq; ++seq1) { 1679 for(TSize seq2 = seq1 + 1; seq2<nseq; ++seq2) { 1680 TMatIter seq1It = begin(mat); 1681 seq1It += seq1 * alignLen; 1682 TMatIter seq2It = begin(mat); 1683 seq2It += seq2 * alignLen; 1684 TSize alignPos = 0; 1685 TSize length = 0; 1686 TSize offset1 = 0; 1687 TSize offset2 = 0; 1688 for(TSize col = 0; col<alignLen; ++col, ++seq1It, ++seq2It, ++alignPos) { 1689 if ((*seq1It == gapChar) || (*seq2It == gapChar)) { 1690 if (length) { 1691 appendValue(matches, Fragment<>(seq1, alignPos - offset1 - length, seq2, alignPos - offset2 - length, length)); 1692 length = 0; 1693 } 1694 if (*seq1It == gapChar) ++offset1; 1695 if (*seq2It == gapChar) ++offset2; 1696 } else ++length; 1697 } 1698 if (length) appendValue(matches, Fragment<>(seq1, alignPos - offset1 - length, seq2, alignPos - offset2 - length, length)); 1699 } 1700 } 1701 //_debugMatches(stringSet(g), matches); 1702 matchRefinement(matches,stringSet(g),g); 1703 return true; 1704 } 1705 1706 1707 1708 ////////////////////////////////////////////////////////////////////////////// 1709 1710 1711 template<typename TStringSet, typename TCargo, typename TSpec> 1712 inline void 1713 rebuildGraph(Graph<Alignment<TStringSet, TCargo, TSpec> >& g) 1714 { 1715 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1716 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor; 1717 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator; 1718 typedef typename Size<TGraph>::Type TSize; 1719 1720 // Initialization 1721 typedef Fragment<> TFragment; 1722 typedef String<TFragment> TFragmentString; 1723 TFragmentString matches; 1724 TSize nseq = length(stringSet(g)); 1725 1726 // Collect all character pairs 1727 typedef std::pair<TSize, TSize> TResiduePair; 1728 typedef std::set<TResiduePair> TResiduePairSet; 1729 String<TResiduePairSet> resPair; 1730 resize(resPair, nseq * nseq); 1731 TEdgeIterator itE(g); 1732 for(;!atEnd(itE);++itE) { 1733 TVertexDescriptor sV = sourceVertex(itE); 1734 TVertexDescriptor tV = targetVertex(itE); 1735 TSize seq1 = idToPosition(stringSet(g), sequenceId(g, sV)); 1736 TSize seq2 = idToPosition(stringSet(g), sequenceId(g, tV)); 1737 TSize index = 0; 1738 TSize pos1 = 0; 1739 TSize pos2 = 0; 1740 if (seq1 < seq2) { 1741 index = seq1 * nseq + seq2; 1742 pos1 = fragmentBegin(g, sV); 1743 pos2 = fragmentBegin(g, tV); 1744 } else { 1745 index = seq2 * nseq + seq1; 1746 pos1 = fragmentBegin(g, tV); 1747 pos2 = fragmentBegin(g, sV); 1748 } 1749 for(TSize i = 0; i<fragmentLength(g, sV); ++i) { 1750 resPair[index].insert(std::make_pair(pos1 + i, pos2 + i)); 1751 } 1752 } 1753 1754 // Rebuild the graph with maximal segments 1755 for(TSize i = 0; i<length(resPair); ++i) { 1756 if (resPair[i].empty()) continue; 1757 TSize seq1 = i / nseq; 1758 TSize seq2 = i % nseq; 1759 typename TResiduePairSet::const_iterator pos = resPair[i].begin(); 1760 typename TResiduePairSet::const_iterator posEnd = resPair[i].end(); 1761 TSize startMatch1 = pos->first; 1762 TSize startMatch2 = pos->second; 1763 TSize len = 1; 1764 ++pos; 1765 while(pos != posEnd) { 1766 if ((startMatch1 + len == pos->first) && (startMatch2 + len == pos->second)) ++len; 1767 else { 1768 appendValue(matches, TFragment(seq1, startMatch1, seq2, startMatch2, len), Generous()); 1769 startMatch1 = pos->first; 1770 startMatch2 = pos->second; 1771 len = 1; 1772 } 1773 ++pos; 1774 } 1775 appendValue(matches, TFragment(seq1, startMatch1, seq2, startMatch2, len), Generous()); 1776 } 1777 clearVertices(g); 1778 matchRefinement(matches,stringSet(g),g); 1779 } 1780 1781 1782 1783 1784 1785 ////////////////////////////////////////////////////////////////////////////// 1786 // Heaviest Common Subsequence adaptation 1787 ////////////////////////////////////////////////////////////////////////////// 1788 1789 ////////////////////////////////////////////////////////////////////////////// 1790 1791 template<typename TStringSet, typename TCargo, typename TSpec, typename TSize2, typename TSpec2, typename TPositions, typename TSize, typename TVertexDescriptor, typename TString> 1792 inline void 1793 _heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const&, 1794 String<TSize2, TSpec2> const& /*slotToPos*/, 1795 TPositions const&, 1796 TSize const, 1797 TSize const, 1798 TVertexDescriptor const, 1799 TString const&, 1800 TString const&, 1801 Nothing&) 1802 { 1803 } 1804 1805 ////////////////////////////////////////////////////////////////////////////// 1806 1807 template<typename TStringSet, typename TCargo, typename TSpec, typename TSize2, typename TSpec2, typename TPositions, typename TSize, typename TString, typename TOutString> 1808 inline void 1809 _heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1810 String<TSize2, TSpec2> const& slotToPos, 1811 TPositions const& positions, 1812 TSize const m, 1813 TSize const n, 1814 TString const& str1, 1815 TString const& str2, 1816 TOutString& align) 1817 { 1818 typedef typename Value<TString>::Type TVertexSet; 1819 typedef typename Iterator<TString const, Standard>::Type TStringIter; 1820 typedef typename Iterator<TString, Standard>::Type TSIter; 1821 typedef typename Iterator<TVertexSet const, Standard>::Type TVertexSetIter; 1822 1823 // Create the alignment sequence 1824 TSize numMatches = length(positions); 1825 TSize alignLength = numMatches + (n - numMatches) + (m - numMatches); 1826 clear(align); 1827 resize(align, alignLength, TVertexSet(), Exact() ); 1828 TSIter pointerAlign = begin(align, Standard()); 1829 TSIter pointerAlignEnd = end(align, Standard()); 1830 TStringIter pointerStr1 = begin(str1, Standard()); 1831 TSize posStr1 = 0; 1832 TSize posStr2 = 0; 1833 TStringIter pointerStr2 = begin(str2, Standard()); 1834 int p = length(positions) - 1; 1835 while(pointerAlign != pointerAlignEnd) { 1836 TSize i = m; 1837 TSize j = n; 1838 if (p>=0) { 1839 i = (TSize) (slotToPos[positions[p]] / (TSize) n); // Get the index in str1 1840 j = n - 1 - (TSize) (slotToPos[positions[p]] % (TSize) n); // Get the index in str2 1841 }; 1842 1843 // In what order do we insert gaps? -> Only important at the beginning and at the end, not between matches 1844 bool firstI = true; 1845 if ((i != posStr1) && (j != posStr2)) 1846 { 1847 if ((posStr1 == 0) && (posStr2 == 0)) { 1848 TStringIter tmpPointerStr1 = pointerStr1; 1849 TStringIter tmpPointerStr2 = pointerStr2; 1850 TSize tmpPosStr1 = posStr1; 1851 TSize tmpPosStr2 = posStr2; 1852 TSize len1 = 0; 1853 TSize len2 = 0; 1854 for(;i != tmpPosStr1; ++tmpPosStr1, ++tmpPointerStr1) len1 += fragmentLength(g, value(*tmpPointerStr1, 0)); 1855 for(;j != tmpPosStr2; ++tmpPosStr2, ++tmpPointerStr2) len2 += fragmentLength(g, value(*tmpPointerStr2, 0)); 1856 if (len1 > len2) firstI = false; 1857 } else if ((i == m) && (i == n)) { 1858 TStringIter tmpPointerStr1 = pointerStr1; 1859 TStringIter tmpPointerStr2 = pointerStr2; 1860 TSize tmpPosStr1 = posStr1; 1861 TSize tmpPosStr2 = posStr2; 1862 TSize len1 = 0; 1863 TSize len2 = 0; 1864 for(;i != tmpPosStr1; ++tmpPosStr1, ++tmpPointerStr1) len1 += fragmentLength(g, value(*tmpPointerStr1, 0)); 1865 for(;j != tmpPosStr2; ++tmpPosStr2, ++tmpPointerStr2) len2 += fragmentLength(g, value(*tmpPointerStr2, 0)); 1866 if (len1 < len2) firstI = false; 1867 } 1868 } 1869 if (firstI) { 1870 // Gaps in seq 2 1871 while (i != posStr1) { 1872 TVertexSetIter itV = begin(*pointerStr1, Standard()); 1873 TVertexSetIter itVEnd = end(*pointerStr1, Standard()); 1874 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1875 ++pointerAlign; 1876 ++pointerStr1; ++posStr1; 1877 } 1878 // Gaps in seq 1 1879 while (j != posStr2) { 1880 TVertexSetIter itV = begin(*pointerStr2, Standard()); 1881 TVertexSetIter itVEnd = end(*pointerStr2, Standard()); 1882 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1883 ++pointerAlign; 1884 ++pointerStr2; ++posStr2; 1885 } 1886 } else { 1887 // Gaps in seq 1 1888 while (j != posStr2) { 1889 TVertexSetIter itV = begin(*pointerStr2, Standard()); 1890 TVertexSetIter itVEnd = end(*pointerStr2, Standard()); 1891 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1892 ++pointerAlign; 1893 ++pointerStr2; ++posStr2; 1894 } 1895 // Gaps in seq 2 1896 while (i != posStr1) { 1897 TVertexSetIter itV = begin(*pointerStr1, Standard()); 1898 TVertexSetIter itVEnd = end(*pointerStr1, Standard()); 1899 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1900 ++pointerAlign; 1901 ++pointerStr1; ++posStr1; 1902 } 1903 } 1904 1905 // Matches 1906 if (p>=0) { 1907 TVertexSetIter itV = begin(*pointerStr1, Standard()); 1908 TVertexSetIter itVEnd = end(*pointerStr1, Standard()); 1909 for(;itV != itVEnd;++itV) appendValue(*pointerAlign, *itV, Generous()); 1910 TVertexSetIter itV2 = begin(*pointerStr2, Standard()); 1911 TVertexSetIter itVEnd2 = end(*pointerStr2, Standard()); 1912 for(;itV2 != itVEnd2;++itV2) appendValue(*pointerAlign, *itV2, Generous()); 1913 ++pointerAlign; 1914 ++pointerStr1; ++posStr1; 1915 ++pointerStr2; ++posStr2; 1916 --p; 1917 } 1918 } 1919 } 1920 1921 1922 1923 1924 ////////////////////////////////////////////////////////////////////////////// 1925 1926 // TODO(holtgrew): Where does this belong? This is also there in graph_algorithms.h... 1927 1928 template<typename TStringSet, typename TCargo, typename TSpec, typename TString, typename TOutString> 1929 inline TCargo 1930 heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 1931 TString const& str1, 1932 TString const& str2, 1933 TOutString& align) 1934 { 1935 typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph; 1936 typedef typename Size<TGraph>::Type TSize; 1937 typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator; 1938 typedef typename Value<TString>::Type TVertexSet; 1939 1940 TSize m = length(str1); // How many sets of vertex descriptors in seq1 1941 TSize n = length(str2); // How many sets of vertex descriptors in seq2 1942 1943 // Size of the sequences 1944 // Note for profile alignments every member of the sequence is a String!!! of vertex descriptors 1945 1946 // Fill the vertex to position map for str1 1947 // Remember for each vertex descriptor the position in the sequence 1948 typedef String<TSize> TMapVertexPos; 1949 TMapVertexPos map; 1950 resize(map, getIdUpperBound(_getVertexIdManager(g)), std::numeric_limits<TSize>::max()); 1951 typedef typename Iterator<TString const, Standard>::Type TStringIterConst; 1952 typedef typename Iterator<TVertexSet const, Standard>::Type TVertexSetIterConst; 1953 TStringIterConst itStr1 = begin(str1, Standard()); 1954 TStringIterConst itStrEnd1 = end(str1, Standard()); 1955 TSize pos = 0; 1956 TVertexSetIterConst itV; 1957 TVertexSetIterConst itVEnd; 1958 for(;itStr1 != itStrEnd1;++itStr1, ++pos) { 1959 itV = begin(*itStr1, Standard()); 1960 itVEnd = end(*itStr1, Standard()); 1961 for(;itV != itVEnd;++itV) map[*itV] = pos; 1962 } 1963 1964 // We could create the full graph -> too expensive 1965 // Remember which edges are actually present 1966 typedef String<TSize> TOccupiedPositions; 1967 typedef typename Iterator<TOccupiedPositions, Standard>::Type TOccIter; 1968 TOccupiedPositions occupiedPositions; 1969 TStringIterConst itStr2 = begin(str2, Standard()); 1970 TStringIterConst itStrEnd2 = end(str2, Standard()); 1971 TSize posItStr2 = 0; 1972 TSize pPos = 0; 1973 for(;itStr2 != itStrEnd2;++itStr2, ++posItStr2) { 1974 itV = begin(*itStr2, Standard()); 1975 itVEnd = end(*itStr2, Standard()); 1976 for(;itV != itVEnd;++itV) { 1977 TOutEdgeIterator itOut(g, *itV); 1978 for(;!atEnd(itOut); ++itOut) { 1979 // Target vertex must be in the map 1980 pPos = map[targetVertex(itOut)]; 1981 if (pPos != std::numeric_limits<TSize>::max()) 1982 appendValue(occupiedPositions, pPos * n + (TSize) (n - posItStr2 - 1), Generous()); 1983 } 1984 } 1985 } 1986 std::sort(begin(occupiedPositions, Standard()), end(occupiedPositions, Standard())); 1987 // Get all occupied positions 1988 typedef String<TSize> TSlotToPos; 1989 typedef typename Iterator<TSlotToPos, Standard>::Type TSlotToPosIter; 1990 TSlotToPos slotToPos; 1991 TSize counter = 0; 1992 TSize oldVal = std::numeric_limits<TSize>::max(); 1993 TOccIter occIt = begin(occupiedPositions, Standard()); 1994 TOccIter occItEnd = end(occupiedPositions, Standard()); 1995 for(;occIt != occItEnd; ++occIt) { 1996 if (oldVal != *occIt) { 1997 appendValue(slotToPos, *occIt, Generous()); 1998 oldVal = *occIt; 1999 ++counter; 2000 } 2001 } 2002 clear(occupiedPositions); 2003 2004 // Walk through str2 and fill in the weights of the actual edges 2005 typedef String<TCargo> TWeights; 2006 TWeights weights; 2007 resize(weights, length(slotToPos), 0); 2008 itStr2 = begin(str2, Standard()); 2009 posItStr2 = 0; 2010 for(;itStr2 != itStrEnd2;++itStr2, ++posItStr2) { 2011 itV = begin(*itStr2, Standard()); 2012 itVEnd = end(*itStr2, Standard()); 2013 for(;itV != itVEnd;++itV) { 2014 TOutEdgeIterator itOut(g, *itV); 2015 for(;!atEnd(itOut); ++itOut) { 2016 // Target vertex must be in the map 2017 pPos = map[targetVertex(itOut)]; 2018 if ( pPos != std::numeric_limits<TSize>::max()) 2019 weights[std::distance(begin(slotToPos, Standard()), std::lower_bound(begin(slotToPos, Standard()), end(slotToPos, Standard()), pPos * n + (TSize) (n - posItStr2 - 1)))] += (TCargo) cargo(*itOut); 2020 } 2021 } 2022 } 2023 clear(map); 2024 2025 // Now the tough part: Find the right number for a given position 2026 typedef String<TSize> TSequenceString; 2027 typedef typename Iterator<TSequenceString, Standard>::Type TSeqIter; 2028 TSequenceString seq; 2029 resize(seq, length(slotToPos)); 2030 TSeqIter itSeq = begin(seq, Standard()); 2031 TSlotToPosIter itSlotPos = begin(slotToPos, Standard()); 2032 TSlotToPosIter itSlotPosEnd = end(slotToPos, Standard()); 2033 for(;itSlotPos != itSlotPosEnd; ++itSlotPos, ++itSeq) 2034 *itSeq = n - 1 - (*itSlotPos % n); 2035 2036 // Calculate the heaviest increasing subsequence 2037 String<TSize> positions; 2038 TCargo score = (TCargo) heaviestIncreasingSubsequence(seq, weights, positions); 2039 2040 // Retrieve the alignment sequence 2041 _heaviestCommonSubsequence(g, slotToPos, positions, m, n, str1, str2, align); 2042 2043 return score; 2044 2045 } 2046 2047 ////////////////////////////////////////////////////////////////////////////// 2048 2049 template<typename TStringSet, typename TCargo, typename TSpec, typename TString> 2050 inline TCargo 2051 heaviestCommonSubsequence(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g, 2052 TString const& str1, 2053 TString const& str2) 2054 { 2055 Nothing noth; 2056 return heaviestCommonSubsequence(g, str1, str2, noth); 2057 } 2058 2059 template <typename TStringSet, typename TCargo, typename TSpec, typename TSequenceH, typename TSequenceV, typename TId, typename TPos, typename TTraceValue> 2060 inline void 2061 _alignTracePrint(Graph<Alignment<TStringSet, TCargo, TSpec> >& g, 2062 TSequenceH const &, 2063 TSequenceV const &, 2064 TId const id1, 2065 TPos const pos1, 2066 TId const id2, 2067 TPos const pos2, 2068 TPos const segLen, 2069 TTraceValue const tv) 2070 { 2071 // TraceBack values 2072 TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; 2073 2074 if (segLen == 0) return; 2075 2076 if (tv == Horizontal) 2077 addVertex(g, id1, pos1, segLen); 2078 else if (tv == Vertical) 2079 addVertex(g, id2, pos2, segLen); 2080 else if (tv == Diagonal) 2081 addEdge(g, addVertex(g, id1, pos1, segLen), addVertex(g, id2, pos2, segLen)); 2082 } 2083 2084 // ---------------------------------------------------------------------------- 2085 // Function _alignTracePrint() [String<Fragment<> >] 2086 // ---------------------------------------------------------------------------- 2087 2088 // TODO(holtgrew): Are TSequenceH and TSequenceV used *anywhere*. 2089 2090 template <typename TFragment, typename TSequenceH, typename TSequenceV, typename TId, typename TPos, typename TTraceValue> 2091 inline void 2092 _alignTracePrint(String<TFragment>& matches, 2093 TSequenceH const &, 2094 TSequenceV const &, 2095 TId const id1, 2096 TPos const pos1, 2097 TId const id2, 2098 TPos const pos2, 2099 TPos const seqLen, 2100 TTraceValue const tv) 2101 { 2102 // Only the diagonal case 2103 if ((seqLen) && (tv == 0)) 2104 appendValue(matches, TFragment(id1, pos1, id2, pos2, seqLen), Generous()); 2105 } 2106 2107 } // namespace seqan 2108 2109 #endif // #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_IMPL_ALIGN_H_ 2110