1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: Andreas Gogol-Doering <andreas.doering@mdc-berlin.de> 33 // ========================================================================== 34 // The Align class provides tabular alignments. It contains multiple 35 // Gap objects, one for each row of the alignment. 36 // ========================================================================== 37 38 #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ 39 #define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ 40 41 namespace seqan { 42 43 // ============================================================================ 44 // Forwards 45 // ============================================================================ 46 47 // ============================================================================ 48 // Tags, Classes, Enums 49 // ============================================================================ 50 51 // ---------------------------------------------------------------------------- 52 // Class Align 53 // ---------------------------------------------------------------------------- 54 55 /*! 56 * @class Align 57 * @implements EqualityComparableConcept 58 * @headerfile <seqan/align.h> 59 * @brief Tabular alignment of same-type sequences. 60 * 61 * @signature template <typename TSequence, typename TGapSpec> 62 * class Align; 63 * 64 * @tparam TSequence Type of the underlying sequence. 65 * @tparam TGapSpec Tag for selecting the @link Gaps @endlink specialization. 66 * 67 * The Alignment class is for storing tabular alignments of sequences having the same type. They do so by being a 68 * container of @link Gaps @endlink objects. The most common use case is storing pairwise alignments that are 69 * generated by one of the dynamic programming alignment algorithms. 70 * 71 * @section Example 72 * 73 * Here is an example of using an Align object with @link globalAlignment @endlink. 74 * 75 * @include demos/align/align.cpp 76 * 77 * The output is as follows: 78 * 79 * @include demos/align/align.cpp.stdout 80 * 81 * @see globalAlignment 82 * @see localAlignment 83 * @see Gaps 84 */ 85 86 template <typename TSource, typename TSpec = ArrayGaps> 87 class Align 88 { 89 public: 90 typedef Gaps<TSource, TSpec> TGaps; 91 typedef String<TGaps> TRows; 92 typedef typename Size<TRows>::Type TRowsSize; 93 94 TRows data_rows; 95 Align()96 Align() {} 97 98 template <typename TString, typename TStringsetSpec> Align(StringSet<TString,TStringsetSpec> & stringset)99 Align(StringSet<TString, TStringsetSpec> & stringset) 100 { 101 setStrings(*this, stringset); 102 } 103 104 Align & operator=(Align const & other) 105 { 106 data_rows = other.data_rows; 107 return *this; 108 } 109 }; 110 111 // ============================================================================ 112 // Metafunctions 113 // ============================================================================ 114 115 // ---------------------------------------------------------------------------- 116 // Metafunction Cols 117 // ---------------------------------------------------------------------------- 118 119 /*! 120 * @mfn Align#Cols 121 * @brief Return the type representing a column in an Align object. 122 * 123 * @signature Cols<TAlign>::Type 124 * 125 * @tparam TAlign The Align object to get the column type for. 126 * 127 * @return Type The resulting column type. 128 */ 129 130 template <typename TSource, typename TSpec> 131 struct Cols<Align<TSource, TSpec> > 132 { 133 typedef AlignCols<Align<TSource, TSpec> > Type; 134 }; 135 136 template <typename TSource, typename TSpec> 137 struct Cols<Align<TSource, TSpec> const> 138 { 139 typedef AlignCols<Align<TSource, TSpec> const> Type; 140 }; 141 142 // ---------------------------------------------------------------------------- 143 // Metafunction Value 144 // ---------------------------------------------------------------------------- 145 146 /*! 147 * @mfn Align#Value 148 * @brief Return the value type for an Align object. 149 * 150 * @signature Value<TAlign>::Type 151 * 152 * @tparam TAlign The Align object to get the value type for. 153 * 154 * @return Type The resulting value type. 155 */ 156 157 template <typename TSource, typename TSpec> 158 struct Value<Align<TSource, TSpec> >: 159 Value<Gaps<TSource, TSpec> > 160 {}; 161 template <typename TSource, typename TSpec> 162 struct Value<Align<TSource, TSpec> const>: 163 Value<Gaps<TSource, TSpec> const> 164 {}; 165 166 // ---------------------------------------------------------------------------- 167 // Metafunction GetValue 168 // ---------------------------------------------------------------------------- 169 170 /*! 171 * @mfn Align#GetValue 172 * @brief Return the get-value type for an Align object. 173 * 174 * @signature GetValue<TAlign>::Type 175 * 176 * @tparam TAlign The Align object to get the get-value type for. 177 * 178 * @return Type The resulting get-value type. 179 */ 180 181 template <typename TSource, typename TSpec> 182 struct GetValue<Align<TSource, TSpec> >: 183 GetValue<Gaps<TSource, TSpec> > 184 {}; 185 template <typename TSource, typename TSpec> 186 struct GetValue<Align<TSource, TSpec> const>: 187 GetValue<Gaps<TSource, TSpec> const> 188 {}; 189 190 // ---------------------------------------------------------------------------- 191 // Metafunction Reference 192 // ---------------------------------------------------------------------------- 193 194 /*! 195 * @mfn Align#Reference 196 * @brief Return the reference type for an Align object. 197 * 198 * @signature GetValue<TAlign>::Type 199 * 200 * @tparam TAlign The Align object to get the reference type for. 201 * 202 * @return Type The resulting reference type. 203 */ 204 205 template <typename TSource, typename TSpec> 206 struct Reference<Align<TSource, TSpec> >: 207 Reference<Gaps<TSource, TSpec> > 208 {}; 209 template <typename TSource, typename TSpec> 210 struct Reference<Align<TSource, TSpec> const>: 211 Reference<Gaps<TSource, TSpec> const> 212 {}; 213 214 // ---------------------------------------------------------------------------- 215 // Metafunction Rows 216 // ---------------------------------------------------------------------------- 217 218 /*! 219 * @mfn Align#Row 220 * @brief Return the row type (@link Gaps @endlink specialization). 221 * 222 * @signature Row<TAlign>::Type 223 * 224 * @tparam TAlign The Align object to get the row type for. 225 * 226 * @return Type The resulting row type. 227 */ 228 229 /*! 230 * @mfn Align#Rows 231 * @brief Return the type used for rows in an Align object (a Gaps specialization). 232 * 233 * @signature Rows<TAlign>::Type 234 * 235 * @tparam TAlign The Align object to get the rows type for. 236 * 237 * @return Type The resulting rows type. 238 */ 239 240 template <typename TSource, typename TSpec> 241 struct Rows<Align<TSource, TSpec> > 242 { 243 typedef String<Gaps<TSource, TSpec> > Type; 244 }; 245 template <typename TSource, typename TSpec> 246 struct Rows<Align<TSource, TSpec> const> 247 { 248 typedef String<Gaps<TSource, TSpec> > const Type; 249 }; 250 251 // ---------------------------------------------------------------------------- 252 // Metafunction Source 253 // ---------------------------------------------------------------------------- 254 255 /*! 256 * @mfn Align#Source 257 * @brief Return the type of the underlying sequence. 258 * 259 * @signature Rows<TAlign>::Type; 260 * 261 * @tparam TAlign The Align object to get the underlying sequence type for. 262 * 263 * @return Type The resulting sequence type. 264 */ 265 266 template <typename TSource, typename TSpec> 267 struct Source<Align<TSource, TSpec> > 268 { 269 typedef TSource Type; 270 }; 271 template <typename TSource, typename TSpec> 272 struct Source<Align<TSource, TSpec> const> 273 { 274 typedef TSource Type; 275 }; 276 277 // ---------------------------------------------------------------------------- 278 // Metafunction StringSetType 279 // ---------------------------------------------------------------------------- 280 281 /*! 282 * @mfn Align#StringSetType 283 * @brief Return the type that would be used for a string set of the sources. 284 * 285 * @signature Rows<TAlign>::Type 286 * 287 * @tparam TAlign The Align object to get the string set type for. 288 * 289 * @return Type The resulting string set type. 290 */ 291 292 template <typename TSource, typename TSpec> 293 struct StringSetType<Align<TSource, TSpec> > 294 { 295 typedef StringSet<TSource, Dependent<> > Type; 296 }; 297 298 template <typename TSource, typename TSpec> 299 struct StringSetType<Align<TSource, TSpec> const> 300 { 301 typedef StringSet<TSource, Dependent<> > Type; 302 }; 303 304 // ============================================================================ 305 // Functions 306 // ============================================================================ 307 308 // ---------------------------------------------------------------------------- 309 // Function move() 310 // ---------------------------------------------------------------------------- 311 312 template <typename TSource, typename TSpec> 313 inline 314 void move(Align<TSource, TSpec> & target, Align<TSource, TSpec> & source) 315 { 316 move(target.data_rows, source.data_rows); 317 } 318 319 // ---------------------------------------------------------------------------- 320 // Function rows() 321 // ---------------------------------------------------------------------------- 322 323 /*! 324 * @fn Align#rows 325 * @brief Returns the rows of an Align object. 326 * 327 * @signature TRows rows(align); 328 * 329 * @param[in] align The Align object to get the rows for. 330 * 331 * @return TRows A container with the Gaps of the Align object. 332 */ 333 334 template <typename TSource, typename TSpec> 335 inline typename Rows<Align<TSource, TSpec> >::Type & 336 rows(Align<TSource, TSpec> & me) 337 { 338 return me.data_rows; 339 } 340 341 template <typename TSource, typename TSpec> 342 inline typename Rows<Align<TSource, TSpec> const>::Type & 343 rows(Align<TSource, TSpec> const & me) 344 { 345 return me.data_rows; 346 } 347 348 // ---------------------------------------------------------------------------- 349 // Function row() 350 // ---------------------------------------------------------------------------- 351 352 /*! 353 * @fn Align#row 354 * @brief Returns a single row of an Align object. 355 * 356 * @signature TRow row(align, pos); 357 * 358 * @param[in] align The Align object to get the row of. 359 * @param[in] pos The number of the row to get. 360 * 361 * @return TRow Reference to the given row of align (Metafunction: @link Align#Row @endlink). 362 */ 363 364 template <typename TSource, typename TSpec, typename TPosition> 365 inline typename Row<Align<TSource, TSpec> >::Type & 366 row(Align<TSource, TSpec> & me, 367 TPosition _pos) 368 { 369 return value(rows(me), _pos); 370 } 371 372 template <typename TSource, typename TSpec, typename TPosition> 373 inline typename Row<Align<TSource, TSpec> const>::Type & 374 row(Align<TSource, TSpec> const & me, 375 TPosition _pos) 376 { 377 return value(rows(me), _pos); 378 } 379 380 // ---------------------------------------------------------------------------- 381 // Function cols() 382 // ---------------------------------------------------------------------------- 383 384 /*! 385 * @fn Align#cols 386 * @brief Returns the columns of an Align object. 387 * 388 * @signature TCols cols(align); 389 * 390 * @param[in] align The Align object to get the cols of. 391 * 392 * @return TCols The columns of the Align object (Metafunction: @link Align#Cols @endlink). 393 */ 394 395 template <typename TSource, typename TSpec> 396 inline typename Cols<Align<TSource, TSpec> >::Type 397 cols(Align<TSource, TSpec> & me) 398 { 399 return typename Cols<Align<TSource, TSpec> >::Type(me); 400 } 401 402 template <typename TSource, typename TSpec> 403 inline typename Cols<Align<TSource, TSpec> const>::Type 404 cols(Align<TSource, TSpec> const & me) 405 { 406 return typename Cols<Align<TSource, TSpec> const>::Type(me); 407 } 408 409 // ---------------------------------------------------------------------------- 410 // Function col() 411 // ---------------------------------------------------------------------------- 412 413 /*! 414 * @fn Align#col 415 * @brief Returns the columns of an Align object. 416 * 417 * @signature TCol col(align); 418 * 419 * @param[in] align The Align object to get the cols of. 420 * 421 * @return TCol The column of the Align object (Metafunction: @link Align#Col @endlink). 422 */ 423 424 template <typename TSource, typename TSpec, typename TPosition> 425 inline typename Col<Align<TSource, TSpec> >::Type 426 col(Align<TSource, TSpec> & me, 427 TPosition _pos) 428 { 429 return value(cols(me), _pos); 430 } 431 432 template <typename TSource, typename TSpec, typename TPosition> 433 inline typename Col<Align<TSource, TSpec> const>::Type 434 col(Align<TSource, TSpec> const & me, 435 TPosition _pos) 436 { 437 return value(cols(me), _pos); 438 } 439 440 // ---------------------------------------------------------------------------- 441 // Function detach() 442 // ---------------------------------------------------------------------------- 443 444 template <typename TSource, typename TSpec> 445 inline void 446 detach(Align<TSource, TSpec> & me) 447 { 448 typedef Align<TSource, TSpec> TAlign; 449 typedef typename Rows<TAlign>::Type TRows; 450 typedef typename Iterator<TRows, Standard>::Type TRowsIterator; 451 452 TRowsIterator it = begin(rows(me)); 453 TRowsIterator it_end = end(rows(me)); 454 455 while (it != it_end) 456 { 457 detach(*it); 458 ++it; 459 } 460 } 461 462 // ---------------------------------------------------------------------------- 463 // Function write() 464 // ---------------------------------------------------------------------------- 465 466 /*! 467 * @fn Align#write 468 * @deprecated Old-style I/O. 469 * @brief Writing of Gaps to Streams in human-readable format. 470 * 471 * @signature void write(stream, align); 472 * 473 * @param[in,out] stream The Stream to write to. 474 * @param[in] align The Align object to write out. 475 */ 476 477 template <typename TFile, typename TSource, typename TSpec> 478 inline void 479 write(TFile & target, 480 Align<TSource, TSpec> const & source) 481 { 482 typedef Align<TSource, TSpec> const TAlign; 483 typedef typename Row<TAlign>::Type TRow; 484 typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition; 485 typedef typename Position<TAlign>::Type TPosition; 486 487 TRowsPosition row_count = length(rows(source)); 488 TPosition begin_ = 0; 489 TPosition end_ = std::min(length(row(source, 0)), length(row(source, 1))); 490 491 unsigned int baseCount = 0; 492 unsigned int leftSpace = 6; 493 while (begin_ < end_) 494 { 495 unsigned int windowSize_ = 50; 496 if ((begin_ + windowSize_) > end_) 497 windowSize_ = end_ - begin_; 498 499 // Print header line 500 char buffer[20]; 501 int len = sprintf(buffer, "%7u", (unsigned)baseCount); 502 write(target, buffer, len); 503 baseCount += windowSize_; 504 writeValue(target, ' '); 505 for (TPosition i = 1; i <= windowSize_; ++i) 506 { 507 if ((i % 10) == 0) 508 writeValue(target, ':'); 509 else if ((i % 5) == 0) 510 writeValue(target, '.'); 511 else 512 writeValue(target, ' '); 513 } 514 writeValue(target, ' '); 515 writeValue(target, '\n'); 516 517 // Print sequences 518 for (TRowsPosition i = 0; i < 2 * row_count - 1; ++i) 519 { 520 for (unsigned int j = 0; j < leftSpace + 2; ++j) 521 writeValue(target, ' '); 522 if ((i % 2) == 0) 523 { 524 TRow & row_ = row(source, i / 2); 525 typedef typename Iterator<typename Row<TAlign>::Type const, Standard>::Type TIter; 526 TIter begin1_ = iter(row_, begin_); 527 TIter end1_ = iter(row_, begin_ + windowSize_); 528 for (; begin1_ != end1_; ++begin1_) 529 { 530 if (isGap(begin1_)) 531 writeValue(target, gapValue<char>()); 532 else 533 writeValue(target, getValue(begin1_)); 534 } 535 } 536 else 537 { 538 for (unsigned int j = 0; j < windowSize_; ++j) 539 { 540 if ((!isGap(row(source, (i - 1) / 2), begin_ + j)) && 541 (!isGap(row(source, (i + 1) / 2), begin_ + j)) && 542 (row(source, (i - 1) / 2)[begin_ + j] == row(source, (i + 1) / 2)[begin_ + j])) 543 { 544 writeValue(target, '|'); 545 } 546 else 547 { 548 writeValue(target, ' '); 549 } 550 } 551 } 552 writeValue(target, '\n'); 553 } 554 writeValue(target, '\n'); 555 begin_ += 50; 556 } 557 writeValue(target, '\n'); 558 } 559 560 // ---------------------------------------------------------------------------- 561 // Function clearClipping() 562 // ---------------------------------------------------------------------------- 563 564 /*! 565 * @fn Align#clearClipping 566 * @brief Clear clipping on all rows. 567 * 568 * @signature void clearClipping(align); 569 * 570 * @param[in,out] align Align object to clear clippings of. 571 */ 572 573 // TODO(holtgrew): Undocumented. 574 575 template <typename TSource, typename TSpec> 576 inline void 577 clearClipping(Align<TSource, TSpec> & align_) 578 { 579 typedef typename Rows<Align<TSource, TSpec> >::Type TRows; 580 typedef typename Iterator<TRows>::Type TRowsIterator; 581 582 for (TRowsIterator it = begin(rows(align_)); it != end(rows(align_)); goNext(it)) 583 clearClipping(*it); 584 } 585 586 // ---------------------------------------------------------------------------- 587 // Function operator<<() 588 // ---------------------------------------------------------------------------- 589 590 /*! 591 * @fn Align#operator<< 592 * @brief Stream-output for Align objects. 593 * 594 * @signature TStream operator<<(stream, align); 595 * 596 * @param[in,out] stream <tt>std::ostream</tt> to write to. 597 * @param[in] align Align object to write out. 598 * 599 * @return TStream Reference to stream after output of align. 600 */ 601 602 // stream operators 603 604 template <typename TStream, typename TSource, typename TSpec> 605 inline TStream & 606 operator<<(TStream & target, 607 Align<TSource, TSpec> const & source) 608 { 609 typename DirectionIterator<TStream, Output>::Type it = directionIterator(target, Output()); 610 write(it, source); 611 return target; 612 } 613 614 // ---------------------------------------------------------------------------- 615 // Function setStrings() 616 // ---------------------------------------------------------------------------- 617 618 /*! 619 * @fn Align#setStrings 620 * @brief Loads the sequences of a string set into an alignment. 621 * 622 * @signature void setStrings(align, stringSet); 623 * 624 * @param[in,out] align Align object to set underlying sequence of. 625 * @param[in] stringSet The @link StringSet @endlink with the data. 626 */ 627 628 template <typename TSource, typename TSpec, typename TSpec2> 629 inline void 630 setStrings(Align<TSource, TSpec> & me, 631 StringSet<TSource, TSpec2> & stringset) 632 { 633 typedef Align<TSource, TSpec> TAlign; 634 typedef StringSet<TSource, TSpec2> TStringset; 635 636 typedef typename Rows<TAlign>::Type TRows; 637 typedef typename Iterator<TRows>::Type TRowsIterator; 638 typedef typename Size<TStringset>::Type TStringsetSize; 639 640 clear(me.data_rows); 641 resize(me.data_rows, length(stringset)); 642 TRowsIterator it = begin(rows(me)); 643 TStringsetSize stringset_length = length(stringset); 644 for (TStringsetSize i = 0; i < stringset_length; ++i) 645 { 646 setSource(*it, value(stringset, i)); 647 ++it; 648 } 649 } 650 651 // ---------------------------------------------------------------------------- 652 // Function clearGaps() 653 // ---------------------------------------------------------------------------- 654 655 /*! 656 * @fn Align#clearGaps 657 * @brief Clear gaps of all Align rows. 658 * 659 * @signature void clearGaps(align); 660 * 661 * @param[in] align The Align object to clear all all gaps from. 662 */ 663 664 template <typename TSource, typename TSpec> 665 inline void 666 clearGaps(Align<TSource, TSpec> & me) 667 { 668 typedef Align<TSource, TSpec> TAlign; 669 typedef typename Rows<TAlign>::Type TRows; 670 typedef typename Iterator<TRows>::Type TRowsIterator; 671 672 for (TRowsIterator it = begin(rows(me)); it != end(rows(me)); goNext(it)) 673 clearGaps(*it); 674 } 675 676 // ---------------------------------------------------------------------------- 677 // Function stringSet() 678 // ---------------------------------------------------------------------------- 679 680 /*! 681 * @fn Align#stringSet 682 * @brief Return string set with all ungapped sequences. 683 * 684 * @signature TStringSet stringSet(align); 685 * 686 * @param[in] align Align object to get sequences of. 687 * 688 * @return TStringSet The set of ungapped sequences (Metafunction: @link Align#StringSetType @endlink). 689 */ 690 691 template <typename TSource, typename TSpec> 692 inline typename StringSetType<Align<TSource, TSpec> >::Type 693 stringSet(Align<TSource, TSpec> & me) 694 { 695 typedef Align<TSource, TSpec> TAlign; 696 typedef typename StringSetType<TAlign>::Type TStringSet; 697 698 typedef typename Rows<TAlign>::Type TRows; 699 typedef typename Iterator<TRows>::Type TRowsIterator; 700 701 TStringSet ss; 702 703 for (TRowsIterator it = begin(rows(me)); it != end(rows(me)); goNext(it)) 704 appendValue(ss, source(*it)); 705 return ss; 706 } 707 708 // ---------------------------------------------------------------------------- 709 // Function operator==() 710 // ---------------------------------------------------------------------------- 711 712 template <typename TSource, typename TSpec> 713 inline bool operator==(Align<TSource, TSpec> const & lhs, 714 Align<TSource, TSpec> const & rhs) 715 { 716 if (length(lhs.data_rows) != length(rhs.data_rows)) 717 return false; 718 typedef typename Align<TSource, TSpec>::TGaps TGaps; 719 typedef typename Iterator<TGaps const, Rooted>::Type TIter; 720 for (unsigned i = 0; i < length(rhs.data_rows); ++i) 721 for (TIter itL = begin(lhs.data_rows[i], Rooted()), itR = begin(rhs.data_rows[i], Rooted()); !atEnd(itL); goNext(itL), goNext(itR)) 722 if (*itL != *itR) 723 return false; 724 return true; 725 } 726 727 // ---------------------------------------------------------------------------- 728 // Function operator!=() 729 // ---------------------------------------------------------------------------- 730 731 template <typename TSource, typename TSpec> 732 inline bool operator!=(Align<TSource, TSpec> const & lhs, 733 Align<TSource, TSpec> const & rhs) 734 { 735 return !(lhs == rhs); 736 } 737 738 } // namespace seqan 739 740 #endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ 741