1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: Andreas Gogol-Doering <andreas.doering@mdc-berlin.de> 33 // ========================================================================== 34 // Implementation of the biological SimpleType specializations Dna, Dna5, 35 // DnaQ, Dna5Q, Rna, Rna5, Iupac, and AminoAcid. The conversion tables are 36 // in alphabet_residue_tabs.h. 37 // 38 // This header's structure is an exception to the standard. Because 39 // splitting into one header for each specialization is a bit too much, we 40 // define all types in one header. We define the classes, metafunctions and 41 // functions in one section, one subsection for reach type to make the whole 42 // thing more readable. Conversion through assignment is defined in the 43 // Function section. 44 // ========================================================================== 45 46 // TODO(holtgrew): Add RnaQ and Rna5Q? Can we create a tag/type for Dna and Rna that is then differentiated with one additional tag? 47 48 #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ 49 #define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ 50 51 namespace seqan { 52 53 // ============================================================================ 54 // Forwards 55 // ============================================================================ 56 57 template <typename T> struct BaseAlphabet; 58 59 // ============================================================================ 60 // Classes, Metafunctions, Functions 61 // ============================================================================ 62 63 // Also see comment at the top for more information on this exceptional 64 // structure. 65 // 66 // We define the SimpleType specializations, the metafunctions ValueSize and 67 // BitsPerValue and the functions unknownValueImpl() for each specialization in 68 // this section. 69 70 // ---------------------------------------------------------------------------- 71 // Specialization Dna 72 // ---------------------------------------------------------------------------- 73 74 /*! 75 * @class Dna 76 * @extends SimpleType 77 * @headerfile <seqan/basic.h> 78 * @brief Alphabet for DNA. 79 * 80 * @signature typedef SimpleType<unsigned char, Dna_> Dna; 81 * 82 * The ValueSize of <tt>Dna</tt> is 4. The nucleotides are enumerated this way: <tt>'A' = 0, 'C' = 1, 'G' = 2, 'T' = 83 * 3</tt>. 84 * 85 * Objects of type <tt>Dna</tt> can be converted to various other types and vice versa. An object that has a value not 86 * in <tt>{'A', 'C', 'G', 'T'}</tt> is converted to <tt>'A'</tt>. 87 * 88 * @see Dna5 89 * @see DnaString 90 * @see DnaIterator 91 */ 92 93 struct Dna_ {}; 94 typedef SimpleType<unsigned char, Dna_> Dna; 95 96 template <> 97 struct ValueSize<Dna> 98 { 99 typedef uint8_t Type; 100 static const Type VALUE = 4; 101 }; 102 103 template <> 104 struct BitsPerValue< Dna > 105 { 106 typedef uint8_t Type; 107 static const Type VALUE = 2; 108 }; 109 110 // ---------------------------------------------------------------------------- 111 // Specialization Dna5 112 // ---------------------------------------------------------------------------- 113 114 /*! 115 * @class Dna5 116 * @extends SimpleType 117 * @headerfile <seqan/basic.h> 118 * @brief Alphabet for DNA including 'N' character. 119 * 120 * @signature typedef SimpleType<unsigned char, Dna5_> Dna5; 121 * 122 * The @link FiniteOrderedAlphabetConcept#ValueSize @endlink of <tt>Dna5</tt> is 5. The nucleotides are enumerated this 123 * way: <tt>'A' = 0, 'C' = 1, 'G' = 2, 'T' = 3</tt>. The 'N' character ("unkown nucleotide") is encoded by 4. 124 * 125 * Objects of type <tt>Dna5</tt> can be converted to various other types and vice versa. An object that has a value not 126 * in <tt>{'A', 'C', 'G', 'T'}</tt> is converted to <tt>'N'</tt>. 127 * 128 * @see Dna5Iterator 129 * @see Dna5String 130 * @see Dna 131 */ 132 133 struct Dna5_ {}; 134 typedef SimpleType<unsigned char, Dna5_> Dna5; 135 136 template <> 137 struct ValueSize<Dna5> 138 { 139 typedef uint8_t Type; 140 static const Type VALUE = 5; 141 }; 142 143 template <> 144 struct BitsPerValue<Dna5> 145 { 146 typedef uint8_t Type; 147 static const Type VALUE = 3; 148 }; 149 150 inline Dna5 151 unknownValueImpl(Dna5 *) 152 { 153 static const Dna5 _result = Dna5('N'); 154 return _result; 155 } 156 157 // ---------------------------------------------------------------------------- 158 // Specialization DnaQ 159 // ---------------------------------------------------------------------------- 160 161 /*! 162 * @class DnaQ 163 * @extends SimpleType 164 * @headerfile <seqan/basic.h> 165 * @implements AlphabetWithQualitiesConcept 166 * @brief Alphabet for DNA plus PHRED quality. 167 * 168 * @signature typedef SimpleType<unsigned char, DnaQ_> DnaQ; 169 * 170 * The ValueSize of <tt>DnaQ</tt> is 4. The nucleotides are enumerated this way: <tt>'A' = 0, 'C' = 1, 'G' = 2, 'T' = 171 * 3</tt>. 172 * 173 * Objects of type <tt>DnaQ</tt> can be converted to various other types and vice versa. 174 * 175 * Note that the default quality value is set to 60. 176 * 177 * @see Dna5Q 178 * @see Dna 179 */ 180 181 #ifndef SEQAN_DEFAULT_QUALITY 182 #define SEQAN_DEFAULT_QUALITY 40 183 #endif 184 185 struct DnaQ_ {}; 186 typedef SimpleType <unsigned char, DnaQ_> DnaQ; 187 188 template <> struct ValueSize<DnaQ> 189 { 190 typedef uint8_t Type; 191 static const ValueSize<DnaQ>::Type VALUE = 4; // Considering nucleotides. 192 }; 193 194 template <> struct InternalValueSize_<DnaQ> 195 { 196 enum { VALUE = 252 }; // Considering nucleotides x Quality 0..62. 197 }; 198 199 template <> struct BitsPerValue<DnaQ> 200 { 201 enum { VALUE = 8 }; 202 typedef uint8_t Type; 203 }; 204 205 template <> struct HasQualities<DnaQ> 206 { 207 typedef True Type; 208 static const bool VALUE = true; 209 }; 210 211 template <> 212 struct BaseAlphabet<DnaQ> 213 { 214 typedef Dna Type; 215 }; 216 217 template <> 218 struct QualityValueSize<DnaQ> 219 { 220 enum { VALUE = 63 }; // 64 - 1 (N) 221 }; 222 223 template <typename TValue> 224 inline int getQualityValue(TValue const &) 225 { 226 return 0; 227 } 228 229 inline int getQualityValue(DnaQ const & c) 230 { 231 return c.value >> 2; 232 } 233 234 inline 235 void assignQualityValue(DnaQ & c, int q) 236 { 237 if (q < 0) q = 0; 238 if (q >= QualityValueSize<DnaQ>::VALUE) 239 q = QualityValueSize<DnaQ>::VALUE - 1; 240 c.value = (c.value & 3) | (q << 2); 241 } 242 243 inline 244 void assignQualityValue(DnaQ & c, char q) 245 { 246 int q1 = static_cast<int>(q - '!'); 247 if (q1 < 0) q1 = 0; 248 if (q1 >= QualityValueSize<DnaQ>::VALUE) 249 q1 = QualityValueSize<DnaQ>::VALUE - 1; 250 assignQualityValue(c, q1); 251 } 252 253 inline 254 void assignQualityValue(char & q, DnaQ c) 255 { 256 q = '!' + getQualityValue(c); 257 } 258 259 260 // ---------------------------------------------------------------------------- 261 // Specialization Dna5Q 262 // ---------------------------------------------------------------------------- 263 264 /*! 265 * @class Dna5Q 266 * @extends SimpleType 267 * @headerfile <seqan/basic.h> 268 * @implements AlphabetWithQualitiesConcept 269 * @brief Alphabet for DNA plus PHRED quality including 'N' character. 270 * 271 * @signature typedef SimpleType<unsigned char, Dna5Q_> Dna5Q; 272 * 273 * The ValueSize of <tt>Dna5Q</tt> is 5. The nucleotides are enumerated this way: <tt>'A' = 0, 'C' = 1, 'G' = 2, 'T' = 274 * 3</tt>. The 'N' character ("unknown nucleotide") is encoded by 4. 275 * 276 * Objects of type <tt>Dna5</tt> can be converted to various other types and vice versa. 277 * 278 * Note that the default quality value is set to 40. 279 * 280 * @see Dna5 281 * @see DnaQ 282 */ 283 284 struct Dna5Q_ {}; 285 typedef SimpleType <unsigned char, Dna5Q_> Dna5Q; 286 287 static const unsigned char Dna5QValueN_ = 252; // value representing N 288 289 template <> struct ValueSize<Dna5Q> 290 { 291 typedef uint8_t Type; 292 static const Type VALUE = 5; // Considering nucleotides + N. 293 }; 294 295 template <> struct InternalValueSize_<Dna5Q> 296 { 297 enum { VALUE = 253 }; // Considering (nucleotides x Quality 0..62) + N. 298 }; 299 300 template <> struct BitsPerValue<Dna5Q> 301 { 302 enum { VALUE = 8 }; 303 typedef uint8_t Type; 304 }; 305 306 template <> struct HasQualities<Dna5Q> 307 { 308 typedef True Type; 309 static const bool VALUE = true; 310 }; 311 312 template <> 313 struct BaseAlphabet<Dna5Q> 314 { 315 typedef Dna5 Type; 316 }; 317 318 template <> struct 319 QualityValueSize<Dna5Q> 320 { 321 enum { VALUE = 63 }; 322 }; 323 324 inline Dna5Q 325 unknownValueImpl(Dna5Q *) 326 { 327 static const Dna5Q _result = Dna5Q('N'); 328 return _result; 329 } 330 331 inline int getQualityValue(Dna5Q const &c) 332 { 333 // We use a lookup table to extract the qualities from DNA5Q. The lookup 334 // table based code is equivalent to the following line: 335 // return (c.value == Dna5QValueN_)? 0: c.value >> 2; 336 337 static const unsigned table[] = { 338 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 339 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 340 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 341 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 342 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 343 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 344 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 345 29, 30, 30, 30, 30, 31, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 33, 346 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 347 38, 38, 38, 39, 39, 39, 39, 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 348 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 349 46, 47, 47, 47, 47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 350 51, 51, 51, 51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 351 55, 55, 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 352 59, 59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 353 0, 0, 0, 0}; 354 return table[c.value]; 355 } 356 357 inline 358 void assignQualityValue(Dna5Q &c, int q) 359 { 360 if (q < 0) q = 0; 361 if (q >= QualityValueSize<Dna5Q>::VALUE) 362 q = QualityValueSize<Dna5Q>::VALUE - 1; 363 if (c.value != Dna5QValueN_) 364 c.value = (c.value & 3) | (q << 2); 365 } 366 367 inline 368 void assignQualityValue(Dna5Q &c, char q) 369 { 370 int q1 = static_cast<int>(q - '!'); 371 if (q1 < 0) q1 = 0; 372 if (q1 >= QualityValueSize<Dna5Q>::VALUE) 373 q1 = QualityValueSize<Dna5Q>::VALUE - 1; 374 assignQualityValue(c, q1); 375 } 376 377 inline 378 void assignQualityValue(char & q, Dna5Q c) 379 { 380 q = '!' + getQualityValue(c); 381 } 382 383 // ---------------------------------------------------------------------------- 384 // Specialization Rna 385 // ---------------------------------------------------------------------------- 386 387 /*! 388 * @class Rna 389 * @extends SimpleType 390 * @headerfile <seqan/basic.h> 391 * @brief Alphabet for RNA. 392 * 393 * @signature typedef SimpleType<unsigned char, Rna_> Rna; 394 * 395 * The ValueSize of <tt>Rna</tt> is 4. The nucleotides are enumerated this way: <tt>'A' = 0, 'C' = 1, 'G' = 2, 'U' = 396 * 3</tt>. 397 * 398 * Objects of type <tt>Rna</tt> can be converted to various other types and vice versa. An object that has a value not 399 * in <tt>{'A', 'C', 'G', 'U'}</tt> is converted to <tt>'A'</tt>. 400 * 401 * <tt>Rna</tt> is typedef for <tt>SimpleType<char,Rna_></tt>, while <tt>Rna_</tt> is a helper specialization tag class. 402 * 403 * @see Rna5 404 * @see RnaString 405 * @see RnaIterator 406 */ 407 408 struct Rna_ {}; 409 typedef SimpleType<unsigned char, Rna_> Rna; 410 411 template <> 412 struct ValueSize<Rna> 413 { 414 typedef uint8_t Type; 415 static const Type VALUE = 4; 416 }; 417 418 template <> 419 struct BitsPerValue<Rna> 420 { 421 typedef uint8_t Type; 422 static const Type VALUE = 2; 423 }; 424 425 // ---------------------------------------------------------------------------- 426 // Specialization Rna5 427 // ---------------------------------------------------------------------------- 428 429 /*! 430 * @class Rna5 431 * @extends SimpleType 432 * @headerfile <seqan/basic.h> 433 * @brief Alphabet for RNA including 'N' character. 434 * 435 * @signature typedef SimpleType<unsigned char, Rna5_> Rna5; 436 * 437 * The ValueSize of <tt>Rna5</tt> is 5. The nucleotides are enumerated this way: <tt>'A' = 0, 'C' = 1, 'G' = 2, 'U' = 438 * 3</tt>. The 'N' character ("unkown nucleotide") is encoded by 4. 439 * 440 * Objects of type <tt>Rna5</tt> can be converted to various other types and vice versa. An object that has a value not 441 * in <tt>{'A', 'C', 'G', 'U'}</tt> is converted to <tt>'N'</tt>. 442 * 443 * @see Rna5Iterator 444 * @see Rna5String 445 * @see Rna 446 */ 447 448 struct Rna5_ {}; 449 typedef SimpleType<unsigned char, Rna5_> Rna5; 450 451 template <> 452 struct ValueSize<Rna5> 453 { 454 typedef uint8_t Type; 455 static const Type VALUE = 5; 456 }; 457 458 template <> struct BitsPerValue<Rna5> 459 { 460 typedef uint8_t Type; 461 static const Type VALUE = 3; 462 }; 463 464 inline Rna5 465 unknownValueImpl(Rna5 *) 466 { 467 static const Rna5 _result = Rna5('N'); 468 return _result; 469 } 470 471 // ---------------------------------------------------------------------------- 472 // Specialization Iupac 473 // ---------------------------------------------------------------------------- 474 475 // TODO(holtgrew): We should support retrieval of nucleotides represented by a IUPAC char. 476 477 /*! 478 * @class Iupac 479 * @extends SimpleType 480 * @headerfile <seqan/basic.h> 481 * @brief Iupac code for DNA. 482 * 483 * @signature typedef SimpleType<unsigned char, Iupac_> Iupac; 484 * 485 * The ValueSize of <tt>Iupac</tt> is 16. The nucleotides are enumerated from 0 to 19 in this order: 'U'=0, 'T', 'A', 486 * 'W', 'C', 'Y', 'M', 'H', 'G', 'K', 'R', 'D', 'S', 'B', 'V', 'N'=15. 487 * 488 * Objects of type <tt>Iupac</tt> can be converted to various other types and vice versa. Unknown values are converted 489 * to <tt>'N'</tt>. 490 * 491 * @see IupacString 492 * @see IupacIterator 493 */ 494 495 struct Iupac_ {}; 496 typedef SimpleType<unsigned char, Iupac_> Iupac; 497 498 template <> struct ValueSize<Iupac> 499 { 500 typedef uint8_t Type; 501 static const Type VALUE = 16; 502 }; 503 504 template <> struct BitsPerValue<Iupac> 505 { 506 typedef uint8_t Type; 507 static const Type VALUE = 4; 508 }; 509 510 inline Iupac 511 unknownValueImpl(Iupac *) 512 { 513 static const Iupac _result = Iupac('N'); 514 return _result; 515 } 516 517 // ---------------------------------------------------------------------------- 518 // Specialization AminoAcid 519 // ---------------------------------------------------------------------------- 520 521 /*! 522 * @class AminoAcid 523 * @extends SimpleType 524 * @headerfile <seqan/basic.h> 525 * @brief IUPAC code for amino acids. 526 * @signature typedef SingleType<unsigned char, AminoAcid_> AminoAcid; 527 * 528 * The ValueSize of <tt>AminoAcid</tt> is 27. 529 * 530 * The amino acid symbols are as follows, i.e. they are sorted alphabetically 531 * up until the last two symbols: 532 * 533 * 'A' = 0, 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Y', 'Z', 'X'. '*' 534 * 535 * Of these 'B' is a wildcard for (Aspartic Acid, Asparagine), 536 * 'J' for (Leucine, Isoleucine), 'Z' for (Glutamic Acid, Glutamine) and 537 * 'X' for "any amino acid". 538 * 539 * 'O' refers to the rare Pyrrolysine, 'U' refers to the rare Selenocysteine and '*' to the terminator tRNA. 540 * 541 * Objects of type <tt>AminoAcid</tt> can be converted to <tt>char</tt> and vice versa. Unknown values are converted to 542 * <tt>'X'</tt>. 543 * 544 * @see FiniteOrderedAlphabetConcept#ValueSize 545 * @see PeptideIterator 546 * @see Peptide 547 */ 548 549 struct AminoAcid_ {}; 550 typedef SimpleType<unsigned char, AminoAcid_> AminoAcid; 551 552 template <> struct ValueSize<AminoAcid> 553 { 554 typedef uint8_t Type; 555 static const Type VALUE = 27; 556 }; 557 558 template <> struct BitsPerValue<AminoAcid> 559 { 560 typedef uint8_t Type; 561 static const Type VALUE = 5; 562 }; 563 564 inline AminoAcid 565 unknownValueImpl(AminoAcid *) 566 { 567 static const AminoAcid _result = AminoAcid('X'); 568 return _result; 569 } 570 571 // ---------------------------------------------------------------------------- 572 // Specialization Finite 573 // ---------------------------------------------------------------------------- 574 575 /*! 576 * @class Finite 577 * @extends SimpleType 578 * @headerfile <seqan/basic.h> 579 * 580 * @brief A finite alphabet of a fixed size. 581 * 582 * @signature template <typename TValue, unsigned SIZE> 583 * class SimpleType<TValue, Finite<SIZE> >; 584 * 585 * @tparam TValue The type that is use to store the values. 586 * @tparam SIZE The ValueSize of the alphabet. 587 */ 588 589 template <unsigned SIZE> 590 struct Finite; 591 592 template <typename TValue, unsigned SIZE> 593 struct ValueSize<SimpleType<TValue, Finite<SIZE> > > 594 { 595 typedef unsigned Type; 596 static const Type VALUE = SIZE; 597 }; 598 599 template <typename TValue, unsigned SIZE> 600 struct BitsPerValue<SimpleType<TValue, Finite<SIZE> > > 601 { 602 typedef uint8_t Type; 603 static const Type VALUE = Log2<SIZE>::VALUE; 604 }; 605 606 // ============================================================================ 607 // Assignment / Conversion Functions 608 // ============================================================================ 609 610 // ---------------------------------------------------------------------------- 611 // char 612 // ---------------------------------------------------------------------------- 613 614 inline void assign(char & c_target, 615 Dna const & source) 616 { 617 c_target = TranslateTableDna5ToChar_<>::VALUE[source.value]; 618 } 619 620 inline void assign(char & c_target, 621 Dna5 const & source) 622 { 623 c_target = TranslateTableDna5ToChar_<>::VALUE[source.value]; 624 } 625 626 inline void assign(char & c_target, 627 Rna const & source) 628 { 629 c_target = TranslateTableRna5ToChar_<>::VALUE[source.value]; 630 } 631 632 inline void assign(char & c_target, 633 Rna5 const & source) 634 { 635 c_target = TranslateTableRna5ToChar_<>::VALUE[source.value]; 636 } 637 638 inline void assign(char & c_target, Iupac const & source) 639 { 640 c_target = TranslateTableIupacToChar_<>::VALUE[source.value]; 641 } 642 643 inline void assign(char & c_target, AminoAcid const & source) 644 { 645 c_target = TranslateTableAAToChar_<>::VALUE[source.value]; 646 } 647 648 // ---------------------------------------------------------------------------- 649 // Dna 650 // ---------------------------------------------------------------------------- 651 652 template <> 653 struct CompareTypeImpl<Dna, uint8_t> 654 { 655 typedef Dna Type; 656 }; 657 658 inline void assign(Dna & target, uint8_t c_source) 659 { 660 target.value = TranslateTableByteToDna_<>::VALUE[c_source]; 661 } 662 663 template <> 664 struct CompareTypeImpl<Dna, char> 665 { 666 typedef Dna Type; 667 }; 668 669 inline void assign(Dna & target, char c_source) 670 { 671 target.value = TranslateTableCharToDna_<>::VALUE[(unsigned char) c_source]; 672 } 673 674 template <> 675 struct CompareTypeImpl<Dna, Dna5> 676 { 677 typedef Dna Type; 678 }; 679 680 inline void assign(Dna & target, Dna5 const & c_source) 681 { 682 target.value = c_source.value & 0x03; 683 } 684 685 template <> 686 struct CompareTypeImpl<Dna, Iupac> 687 { 688 typedef Dna Type; 689 }; 690 691 inline void assign(Dna & target, Iupac const & source) 692 { 693 target.value = TranslateTableIupacToDna_<>::VALUE[source.value]; 694 } 695 696 // ---------------------------------------------------------------------------- 697 // Dna5 698 // ---------------------------------------------------------------------------- 699 700 template <> 701 struct CompareTypeImpl<Dna5, uint8_t> 702 { 703 typedef Dna5 Type; 704 }; 705 706 inline void assign(Dna5 & target, uint8_t c_source) 707 { 708 target.value = TranslateTableByteToDna5_<>::VALUE[c_source]; 709 } 710 711 template <> 712 struct CompareTypeImpl<Dna5, char> 713 { 714 typedef Dna5 Type; 715 }; 716 717 inline void assign(Dna5 & target, char c_source) 718 { 719 target.value = TranslateTableCharToDna5_<>::VALUE[(unsigned char) c_source]; 720 } 721 722 template <> 723 struct CompareTypeImpl<Dna5, Iupac> 724 { 725 typedef Dna5 Type; 726 }; 727 728 inline void assign(Dna5 & target, Iupac const & source) 729 { 730 target.value = TranslateTableIupacToDna5_<>::VALUE[source.value]; 731 } 732 733 template <> 734 struct CompareTypeImpl<Dna5, Dna> 735 { 736 typedef Dna Type; 737 }; 738 739 inline void assign(Dna5 & target, Dna const & c_source) 740 { 741 target.value = c_source.value; 742 } 743 744 // ---------------------------------------------------------------------------- 745 // Rna 746 // ---------------------------------------------------------------------------- 747 748 template <> 749 struct CompareTypeImpl<Rna, uint8_t> 750 { 751 typedef Rna Type; 752 }; 753 754 inline void assign(Rna & target, uint8_t c_source) 755 { 756 target.value = TranslateTableByteToDna_<>::VALUE[c_source]; 757 } 758 759 template <> 760 struct CompareTypeImpl<Rna, char> 761 { 762 typedef Rna Type; 763 }; 764 765 inline void assign(Rna & target, char c_source) 766 { 767 target.value = TranslateTableCharToDna_<>::VALUE[(unsigned char)c_source]; 768 } 769 770 template <> 771 struct CompareTypeImpl<Rna5, Iupac> 772 { 773 typedef Rna5 Type; 774 }; 775 776 inline void assign(Rna5 & target, Iupac const & source) 777 { 778 target.value = TranslateTableIupacToDna5_<>::VALUE[source.value]; 779 } 780 781 template <> 782 struct CompareTypeImpl<Rna, Rna5> 783 { 784 typedef Rna Type; 785 }; 786 787 inline void assign(Rna & target, Rna5 const & c_source) 788 { 789 target.value = c_source.value & 0x03; 790 } 791 792 // --------------------------------------------------------------------------- 793 // Rna5 794 // --------------------------------------------------------------------------- 795 796 template <> 797 struct CompareTypeImpl<Rna5, uint8_t> 798 { 799 typedef Rna5 Type; 800 }; 801 802 inline void assign(Rna5 & target, uint8_t c_source) 803 { 804 target.value = TranslateTableByteToDna5_<>::VALUE[c_source]; 805 } 806 807 template <> 808 struct CompareTypeImpl<Rna5, char> 809 { 810 typedef Rna5 Type; 811 }; 812 813 inline void assign(Rna5 & target, char c_source) 814 { 815 target.value = TranslateTableCharToDna5_<>::VALUE[(unsigned char)c_source]; 816 } 817 818 template <> 819 struct CompareTypeImpl<Rna5, Rna> 820 { 821 typedef Dna Type; 822 }; 823 824 inline void assign(Rna5 & target, Rna const & c_source) 825 { 826 target.value = c_source.value; 827 } 828 829 // --------------------------------------------------------------------------- 830 // Iupac 831 // --------------------------------------------------------------------------- 832 833 template <> 834 struct CompareTypeImpl<Iupac, uint8_t> 835 { 836 typedef Iupac Type; 837 }; 838 839 inline void assign(Iupac & target, uint8_t c_source) 840 { 841 target.value = TranslateTableByteToIupac_<>::VALUE[c_source]; 842 } 843 844 template <> 845 struct CompareTypeImpl<Iupac, char> 846 { 847 typedef Iupac Type; 848 }; 849 850 inline void assign(Iupac & target, char c_source) 851 { 852 target.value = TranslateTableCharToIupac_<>::VALUE[(unsigned char) c_source]; 853 } 854 855 inline void assign(Iupac & target, Dna const & source) 856 { 857 target.value = TranslateTableDna5ToIupac_<>::VALUE[source.value]; 858 } 859 860 inline void assign(Iupac & target, Dna5 const & source) 861 { 862 target.value = TranslateTableDna5ToIupac_<>::VALUE[source.value]; 863 } 864 865 // --------------------------------------------------------------------------- 866 // Amino Acid 867 // --------------------------------------------------------------------------- 868 869 template <> 870 struct CompareTypeImpl<AminoAcid, uint8_t> 871 { 872 typedef AminoAcid Type; 873 }; 874 875 inline void assign(AminoAcid & target, uint8_t c_source) 876 { 877 target.value = TranslateTableByteToAA_<>::VALUE[c_source]; 878 } 879 880 template <> 881 struct CompareTypeImpl<AminoAcid, char> 882 { 883 typedef AminoAcid Type; 884 }; 885 886 inline void assign(AminoAcid & target, char c_source) 887 { 888 target.value = TranslateTableCharToAA_<>::VALUE[(unsigned char) c_source]; 889 } 890 891 // --------------------------------------------------------------------------- 892 // DnaQ 893 // --------------------------------------------------------------------------- 894 895 // template <typename TValue, typename TValue2> 896 // struct CompareTypeImpl<SimpleType<TValue,DnaQ_>, SimpleType<TValue2,Dna_> > 897 // { 898 // typedef SimpleType<TValue2,Dna_> Type; 899 // }; 900 // 901 // template <typename TValue, typename TValue2> 902 // struct CompareTypeImpl<SimpleType<TValue,Dna_>, SimpleType<TValue2,DnaQ_> > 903 // { 904 // typedef SimpleType<TValue,Dna_> Type; 905 // }; 906 907 template <> 908 struct CompareTypeImpl<DnaQ, DnaQ> 909 { 910 typedef Dna Type; 911 }; 912 913 template <> 914 struct CompareTypeImpl<DnaQ, Dna> 915 { 916 typedef Dna Type; 917 }; 918 919 inline void assign(DnaQ & target, Dna const & source) 920 { 921 target.value = source.value | (SEQAN_DEFAULT_QUALITY << 2); 922 } 923 924 template <> 925 struct CompareTypeImpl<Dna, DnaQ> 926 { 927 typedef Dna Type; 928 }; 929 930 inline void assign(Dna & target, DnaQ const & source) 931 { 932 target.value = source.value & 3; 933 } 934 935 template <> 936 struct CompareTypeImpl<DnaQ, Iupac> 937 { 938 typedef Iupac Type; 939 }; 940 941 inline void assign(DnaQ & target, Iupac const & source) 942 { 943 assign(target, (Dna) source); 944 } 945 946 template <> 947 struct CompareTypeImpl<Iupac, DnaQ> 948 { 949 typedef Iupac Type; 950 }; 951 952 inline void assign(Iupac & target, DnaQ const & source) 953 { 954 assign(target, (Dna) source); 955 } 956 957 template <> 958 struct CompareTypeImpl<DnaQ, Dna5> 959 { 960 typedef Dna Type; 961 }; 962 963 inline void assign(DnaQ & target, Dna5 const & source) 964 { 965 assign(target, (Dna) source); 966 } 967 968 template <> 969 struct CompareTypeImpl<DnaQ, uint8_t> 970 { 971 typedef Dna Type; 972 }; 973 974 inline void assign(DnaQ & target, uint8_t c_source) 975 { 976 assign(target, (Dna) c_source); 977 } 978 979 template <> 980 struct CompareTypeImpl<DnaQ, char> 981 { 982 typedef Dna Type; 983 }; 984 985 inline void assign(DnaQ & target, char c_source) 986 { 987 assign(target, (Dna) c_source); 988 } 989 990 inline void 991 assign(DnaQ & target, DnaQ const & source) 992 { 993 target.value = source.value; 994 } 995 996 template <typename TSource> 997 inline void 998 assign(DnaQ & target, TSource const & source) 999 { 1000 target.value = (Dna)source; 1001 } 1002 1003 inline void 1004 assign(int64_t & c_target, 1005 DnaQ & source) 1006 { 1007 c_target = Dna(source); 1008 } 1009 1010 inline void 1011 assign(int64_t & c_target, 1012 DnaQ const & source) 1013 { 1014 c_target = Dna(source); 1015 } 1016 1017 // uint64_t 1018 1019 inline void 1020 assign(uint64_t & c_target, 1021 DnaQ & source) 1022 { 1023 c_target = Dna(source); 1024 } 1025 1026 inline void 1027 assign(uint64_t & c_target, 1028 DnaQ const & source) 1029 { 1030 c_target = Dna(source); 1031 } 1032 1033 // int 1034 1035 inline void 1036 assign(int & c_target, 1037 DnaQ & source) 1038 { 1039 c_target = Dna(source); 1040 } 1041 1042 inline void 1043 assign(int & c_target, 1044 DnaQ const & source) 1045 { 1046 c_target = Dna(source); 1047 } 1048 1049 // unsigned int 1050 1051 inline void 1052 assign(unsigned int & c_target, 1053 DnaQ & source) 1054 { 1055 c_target = Dna(source); 1056 } 1057 1058 inline void 1059 assign(unsigned int & c_target, 1060 DnaQ const & source) 1061 { 1062 c_target = Dna(source); 1063 } 1064 1065 // short 1066 1067 inline void 1068 assign(short & c_target, 1069 DnaQ & source) 1070 { 1071 c_target = Dna(source); 1072 } 1073 1074 inline void 1075 assign(short & c_target, 1076 DnaQ const & source) 1077 { 1078 c_target = Dna(source); 1079 } 1080 1081 // unsigned short 1082 1083 inline void 1084 assign(unsigned short & c_target, 1085 DnaQ & source) 1086 { 1087 c_target = Dna(source); 1088 } 1089 1090 inline void 1091 assign(unsigned short & c_target, 1092 DnaQ const & source) 1093 { 1094 c_target = Dna(source); 1095 } 1096 1097 // char 1098 1099 inline void 1100 assign(char & c_target, 1101 DnaQ & source) 1102 { 1103 c_target = Dna(source); 1104 } 1105 1106 inline void 1107 assign(char & c_target, 1108 DnaQ const & source) 1109 { 1110 c_target = Dna(source); 1111 } 1112 1113 // signed char 1114 1115 inline void 1116 assign(signed char & c_target, 1117 DnaQ & source) 1118 { 1119 c_target = Dna(source); 1120 } 1121 1122 inline void 1123 assign(signed char & c_target, 1124 DnaQ const & source) 1125 { 1126 c_target = Dna(source); 1127 } 1128 1129 // unsigned char 1130 1131 inline void 1132 assign(unsigned char & c_target, 1133 DnaQ & source) 1134 { 1135 c_target = Dna(source); 1136 } 1137 1138 inline void 1139 assign(unsigned char & c_target, 1140 DnaQ const & source) 1141 { 1142 c_target = Dna(source); 1143 } 1144 1145 // --------------------------------------------------------------------------- 1146 // Dna5Q 1147 // --------------------------------------------------------------------------- 1148 1149 // template <typename TValue, typename TValue2> 1150 // struct CompareTypeImpl<SimpleType<TValue,Dna5Q_>, SimpleType<TValue2,Dna5_> > 1151 // { 1152 // typedef SimpleType<TValue2,Dna5_> Type; 1153 // }; 1154 // 1155 // template <typename TValue, typename TValue2> 1156 // struct CompareTypeImpl<SimpleType<TValue,Dna5_>, SimpleType<TValue2,Dna5Q_> > 1157 // { 1158 // typedef SimpleType<TValue,Dna5_> Type; 1159 // }; 1160 1161 1162 template <> 1163 struct CompareTypeImpl<Dna5Q, Dna5Q> 1164 { 1165 typedef Dna5 Type; 1166 }; 1167 1168 template <> 1169 struct CompareTypeImpl<DnaQ, Dna5Q> 1170 { 1171 typedef Dna Type; 1172 }; 1173 1174 inline void assign(DnaQ & target, Dna5Q const & source) 1175 { 1176 // We perform the converstion from DNA5 to DNA5 with qualities by a simple 1177 // table lookup. The lookup below is equivalent to the following line: 1178 // 1179 // target.value = (source.value == Dna5QValueN_)? 0: source.value; 1180 1181 static const unsigned table[] = { 1182 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1183 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 1184 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 1185 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 1186 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 1187 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 1188 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 1189 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 1190 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 1191 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 1192 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 1193 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 1194 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 1195 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 1196 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 1197 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 0, 0, 0, 0 1198 }; 1199 target.value = table[source.value]; 1200 } 1201 1202 template <> 1203 struct CompareTypeImpl<Dna5Q, DnaQ> 1204 { 1205 typedef Dna Type; 1206 }; 1207 1208 inline void assign(Dna5Q & target, DnaQ const & source) 1209 { 1210 target.value = source.value; 1211 } 1212 1213 1214 template <> 1215 struct CompareTypeImpl<Dna5, Dna5Q> 1216 { 1217 typedef Dna5 Type; 1218 }; 1219 1220 inline void assign(Dna5 & target, Dna5Q const & source) 1221 { 1222 1223 // We perform the conversion from DNA5 to DNA5 with qualities by a simple 1224 // table lookup. The lookup below is equivalent to the following line: 1225 // 1226 // target.value = (source.value == Dna5QValueN_)? 4: source.value & 3; 1227 1228 static const unsigned table[] = { 1229 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1230 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1231 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1232 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1233 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1234 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1235 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1236 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1237 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1238 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1239 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1240 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1241 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 4, 4, 4 // <-- note the 4 1242 }; 1243 target.value = table[source.value]; 1244 } 1245 1246 template <> 1247 struct CompareTypeImpl<Dna5Q, Dna5> 1248 { 1249 typedef Dna5 Type; 1250 }; 1251 1252 inline void assign(Dna5Q & target, Dna5 const & source) 1253 { 1254 1255 // We perform the conversion from DNA5 with qualities to DNA5 by a simple 1256 // table lookup. The lookup below is equivalent to the following line: 1257 // 1258 // target.value = (source.value == 4)? Dna5QValueN_ : source.value | (40 << 2); 1259 1260 static const unsigned table[] = { 1261 (SEQAN_DEFAULT_QUALITY << 2) + 0, (SEQAN_DEFAULT_QUALITY << 2) + 1, 1262 (SEQAN_DEFAULT_QUALITY << 2) + 2, (SEQAN_DEFAULT_QUALITY << 2) + 3, Dna5QValueN_ 1263 }; 1264 target.value = table[source.value]; 1265 } 1266 1267 template <> 1268 struct CompareTypeImpl<Dna5Q, Dna> 1269 { 1270 typedef Dna Type; 1271 }; 1272 1273 inline void assign(Dna5Q & target, Dna const & source) 1274 { 1275 assign(target, (DnaQ) source); 1276 } 1277 1278 template <> 1279 struct CompareTypeImpl<Dna, Dna5Q> 1280 { 1281 typedef Dna Type; 1282 }; 1283 1284 inline void assign(Dna & target, Dna5Q const & source) 1285 { 1286 assign(target, (Dna5)source); 1287 } 1288 1289 template <> 1290 struct CompareTypeImpl<Dna5, DnaQ> 1291 { 1292 typedef Dna5 Type; 1293 }; 1294 1295 inline void assign(Dna5 & target, DnaQ const & source) 1296 { 1297 assign(target, (Dna5Q)source); 1298 } 1299 1300 template <> 1301 struct CompareTypeImpl<Dna5Q, uint8_t> 1302 { 1303 typedef Dna5 Type; 1304 }; 1305 1306 inline void assign(Dna5Q & target, uint8_t c_source) 1307 { 1308 assign(target, (Dna5)c_source); 1309 } 1310 1311 template <> 1312 struct CompareTypeImpl<Dna5Q, char> 1313 { 1314 typedef Dna5 Type; 1315 }; 1316 1317 inline void assign(Dna5Q & target, char c_source) 1318 { 1319 assign(target, (Dna5)c_source); 1320 } 1321 1322 template <> 1323 struct CompareTypeImpl<Dna5Q, Iupac> 1324 { 1325 typedef Iupac Type; 1326 }; 1327 1328 inline void assign(Dna5Q & target, Iupac const & source) 1329 { 1330 assign(target, (Dna5)source); 1331 } 1332 1333 template <> 1334 struct CompareTypeImpl<Iupac, Dna5Q> 1335 { 1336 typedef Iupac Type; 1337 }; 1338 1339 inline void assign(Iupac & target, Dna5Q const & source) 1340 { 1341 assign(target, (Dna5)source); 1342 } 1343 1344 inline void 1345 assign(Dna5Q & target, Dna5Q const & source) 1346 { 1347 target.value = source.value; 1348 } 1349 1350 template <typename TSource> 1351 inline void 1352 assign(Dna5Q & target, TSource const & source) 1353 { 1354 assign(target, (Dna5)source); 1355 } 1356 1357 // int64_t 1358 1359 inline void 1360 assign(int64_t & c_target, 1361 Dna5Q & source) 1362 { 1363 c_target = Dna5(source); 1364 } 1365 1366 inline void 1367 assign(int64_t & c_target, 1368 Dna5Q const & source) 1369 { 1370 c_target = Dna5(source); 1371 } 1372 1373 // uint64_t 1374 1375 inline void 1376 assign(uint64_t & c_target, 1377 Dna5Q & source) 1378 { 1379 c_target = Dna5(source); 1380 } 1381 1382 inline void 1383 assign(uint64_t & c_target, 1384 Dna5Q const & source) 1385 { 1386 c_target = Dna5(source); 1387 } 1388 1389 // int 1390 1391 inline void 1392 assign(int & c_target, 1393 Dna5Q & source) 1394 { 1395 c_target = Dna5(source); 1396 } 1397 1398 inline void 1399 assign(int & c_target, 1400 Dna5Q const & source) 1401 { 1402 c_target = Dna5(source); 1403 } 1404 1405 // unsigned int 1406 1407 inline void 1408 assign(unsigned int & c_target, 1409 Dna5Q & source) 1410 { 1411 c_target = Dna5(source); 1412 } 1413 1414 inline void 1415 assign(unsigned int & c_target, 1416 Dna5Q const & source) 1417 { 1418 c_target = Dna5(source); 1419 } 1420 1421 1422 //short 1423 1424 inline void 1425 assign(short & c_target, 1426 Dna5Q & source) 1427 { 1428 c_target = Dna5(source); 1429 } 1430 1431 inline void 1432 assign(short & c_target, 1433 Dna5Q const & source) 1434 { 1435 c_target = Dna5(source); 1436 } 1437 1438 //unsigned short 1439 1440 inline void 1441 assign(unsigned short & c_target, 1442 Dna5Q & source) 1443 { 1444 c_target = Dna5(source); 1445 } 1446 1447 inline void 1448 assign(unsigned short & c_target, 1449 Dna5Q const & source) 1450 { 1451 c_target = Dna5(source); 1452 } 1453 1454 // char 1455 1456 inline void 1457 assign(char & c_target, 1458 Dna5Q & source) 1459 { 1460 c_target = Dna5(source); 1461 } 1462 1463 inline void 1464 assign(char & c_target, 1465 Dna5Q const & source) 1466 { 1467 c_target = Dna5(source); 1468 } 1469 1470 // signed char 1471 1472 inline void 1473 assign(signed char & c_target, 1474 Dna5Q & source) 1475 { 1476 c_target = Dna5(source); 1477 } 1478 1479 inline void 1480 assign(signed char & c_target, 1481 Dna5Q const & source) 1482 { 1483 c_target = Dna5(source); 1484 } 1485 1486 // unsigned char 1487 1488 inline void 1489 assign(unsigned char & c_target, 1490 Dna5Q & source) 1491 { 1492 c_target = Dna5(source); 1493 } 1494 1495 inline void 1496 assign(unsigned char & c_target, 1497 Dna5Q const & source) 1498 { 1499 c_target = Dna5(source); 1500 } 1501 1502 } // namespace seqan 1503 1504 #endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ 1505