1 // File Description 2 /// \file BamRecord.h 3 /// \brief Defines the BamRecord class. 4 // 5 // Author: Derek Barnett 6 7 #ifndef BAMRECORD_H 8 #define BAMRECORD_H 9 10 #include <cstddef> 11 #include <cstdint> 12 #include <memory> 13 #include <string> 14 #include <utility> 15 #include <vector> 16 17 #include "pbbam/Accuracy.h" 18 #include "pbbam/BamHeader.h" 19 #include "pbbam/BamRecordImpl.h" 20 #include "pbbam/ClipType.h" 21 #include "pbbam/FrameEncodingType.h" 22 #include "pbbam/Frames.h" 23 #include "pbbam/LocalContextFlags.h" 24 #include "pbbam/Orientation.h" 25 #include "pbbam/PulseBehavior.h" 26 #include "pbbam/PulseExclusionReason.h" 27 #include "pbbam/QualityValues.h" 28 #include "pbbam/ReadGroupInfo.h" 29 #include "pbbam/RecordType.h" 30 #include "pbbam/Strand.h" 31 #include "pbbam/ZmwType.h" 32 #include "pbbam/virtual/VirtualRegionType.h" 33 34 namespace PacBio { 35 namespace BAM { 36 37 namespace internal { 38 39 class BamRecordMemory; 40 class Pulse2BaseCache; 41 42 } // namespace internal 43 44 /// \brief The BamRecord class represents a %PacBio %BAM record. 45 /// 46 /// %PacBio %BAM records are extensions of normal SAM/BAM records. Thus in 47 /// addition to normal fields like bases, qualities, mapping coordinates, etc., 48 /// tags are used extensively to annotate records with additional 49 /// PacBio-specific data. 50 /// 51 /// Mapping and clipping APIs are provided as well to ensure that such 52 /// operations "trickle down" to all data fields properly. 53 /// 54 /// \sa https://samtools.github.io/hts-specs/SAMv1.pdf 55 /// for more information on standard %BAM data, and 56 /// https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst 57 /// for more information on %PacBio %BAM fields. 58 /// 59 class PBBAM_EXPORT BamRecord 60 { 61 public: 62 /// \name Constructors & Related Methods 63 /// \{ 64 65 BamRecord(); 66 BamRecord(BamHeader header); 67 BamRecord(BamRecordImpl impl); 68 BamRecord(const BamRecord& other); 69 BamRecord(BamRecord&& other); 70 BamRecord& operator=(const BamRecord& other); 71 BamRecord& operator=(BamRecord&& other); 72 virtual ~BamRecord(); 73 74 /// \} 75 76 public: 77 /// \name General Data 78 /// \{ 79 80 /// \returns this record's full name 81 /// \sa BamRecordImpl::Name 82 /// 83 std::string FullName() const; 84 85 /// \returns shared pointer to this record's associated BamHeader 86 BamHeader Header() const; 87 88 /// \returns ZMW hole number 89 /// \throws if missing zm tag & record name does not contain hole number 90 /// 91 int32_t HoleNumber() const; 92 93 /// \returns this record's LocalContextFlags 94 PacBio::BAM::LocalContextFlags LocalContextFlags() const; 95 96 /// \returns this record's movie name 97 std::string MovieName() const; 98 99 /// \returns "number of complete passes of the insert" 100 int32_t NumPasses() const; 101 102 /// \returns the record's query end position, or Sequence().length() if not 103 /// stored 104 /// \note QueryEnd is in polymerase read coordinates, NOT genomic 105 /// coordinates. 106 /// 107 Position QueryEnd() const; 108 109 /// \returns the record's query start position, or 0 if not stored 110 /// 111 /// \note QueryStart is in polymerase read coordinates, NOT genomic 112 /// coordinates. 113 /// 114 Position QueryStart() const; 115 116 /// \returns this record's expected read accuracy [0, 1000] 117 Accuracy ReadAccuracy() const; 118 119 /// \returns ReadGroupInfo object for this record 120 ReadGroupInfo ReadGroup() const; 121 122 /// \returns string ID of this record's read group 123 /// \sa ReadGroupInfo::Id 124 /// 125 std::string ReadGroupId() const; 126 127 /// \returns integer value for this record's read group ID 128 int32_t ReadGroupNumericId() const; 129 130 /// \returns this scrap record's scrap region type 131 VirtualRegionType ScrapRegionType() const; 132 133 /// \returns this scrap record's scrap ZMW type 134 ZmwType ScrapZmwType() const; 135 136 /// \returns this record's average signal-to-noise for each of A, C, G, 137 /// and T 138 /// 139 std::vector<float> SignalToNoise() const; 140 141 /// \returns this record's type 142 /// \sa RecordType 143 RecordType Type() const; 144 145 /// \} 146 147 public: 148 /// \name Mapping Data 149 /// \{ 150 151 /// \returns the record's aligned end position 152 /// 153 /// \note AlignedEnd is in polymerase read coordinates, NOT genomic 154 /// coordinates. 155 /// 156 Position AlignedEnd() const; 157 158 /// \returns the record's aligned start position 159 /// 160 /// \note AlignedStart is in polymerase read coordinates, NOT genomic 161 /// coordinates. 162 /// 163 Position AlignedStart() const; 164 165 /// \returns the record's strand as a Strand enum value 166 Strand AlignedStrand() const; 167 168 /// \returns the record's CIGAR data as a Cigar object 169 /// 170 /// \param[in] exciseAllClips if true, remove all clipping operations 171 /// (hard & soft) [default:false] 172 /// 173 Cigar CigarData(bool exciseAllClips = false) const; 174 175 /// \returns true if this record was mapped by aligner 176 bool IsMapped() const; 177 178 /// \returns this record's mapping quality. A value of 255 indicates 179 /// "unknown" 180 /// 181 uint8_t MapQuality() const; 182 183 /// \returns the number of deleted bases (relative to reference) 184 size_t NumDeletedBases() const; 185 186 /// \returns the number of inserted bases (relative to reference) 187 size_t NumInsertedBases() const; 188 189 /// \returns the number of matching bases (sum of '=' CIGAR op lengths) 190 size_t NumMatches() const; 191 192 /// \returns a tuple containing NumMatches (first) and NumMismatches 193 /// (second) 194 /// 195 std::pair<size_t, size_t> NumMatchesAndMismatches() const; 196 197 /// \returns the number of mismatching bases (sum of 'X' CIGAR op lengths) 198 size_t NumMismatches() const; 199 200 /// \returns this record's reference ID, or -1 if unmapped. 201 /// 202 /// \note This is only a valid identifier within this %BAM file 203 /// 204 int32_t ReferenceId() const; 205 206 /// \returns this record's reference name. 207 /// 208 /// \throws an exception if unmapped record. 209 /// 210 std::string ReferenceName() const; 211 212 /// \returns the record's reference end position, or UnmappedPosition if 213 /// unmapped 214 /// 215 /// \note ReferenceEnd is in reference coordinates, NOT polymerase read 216 /// coordinates. 217 /// 218 Position ReferenceEnd() const; 219 220 /// \returns the record's reference start position, or UnmappedPosition if 221 /// unmapped 222 /// 223 /// \note ReferenceStart is in reference coordinates, NOT polymerase read 224 /// coordinates. 225 /// 226 Position ReferenceStart() const; 227 228 /// \} 229 230 public: 231 /// \name Barcode Data 232 /// \{ 233 234 /// \returns forward barcode id 235 /// 236 /// \throws std::runtime_error if barcode data is absent or malformed. 237 /// \sa HasBarcodes 238 /// 239 int16_t BarcodeForward() const; 240 241 /// \returns barcode call confidence (Phred-scaled posterior probability 242 /// of correct barcode call) 243 /// 244 /// \sa HasBarcodeQuality 245 /// 246 uint8_t BarcodeQuality() const; 247 248 /// \returns reverse barcode id 249 /// 250 /// \throws std::runtime_error if barcode data is absent or malformed. 251 /// \sa HasBarcodes 252 /// 253 int16_t BarcodeReverse() const; 254 255 /// \returns the forward and reverse barcode ids 256 /// 257 /// \throws std::runtime_error if barcode data is absent or malformed. 258 /// \sa HasBarcodes 259 /// 260 std::pair<int16_t, int16_t> Barcodes() const; 261 262 /// \} 263 264 public: 265 /// \name Auxiliary Data Queries 266 /// \{ 267 268 /// \returns true if this record has AltLabelQV data 269 bool HasAltLabelQV() const; 270 271 /// \returns true if this record has AltLabelTag data 272 bool HasAltLabelTag() const; 273 274 /// \returns true if this record has Barcode data 275 bool HasBarcodes() const; 276 277 /// \returns true is this record has BarcodeQuality data 278 bool HasBarcodeQuality() const; 279 280 /// \returns true if this record has DeletionQV data 281 bool HasDeletionQV() const; 282 283 /// \returns true if this record has DeletionTag data 284 bool HasDeletionTag() const; 285 286 /// \returns true if this record has a HoleNumber 287 bool HasHoleNumber() const; 288 289 /// \returns true if this record has InsertionQV data 290 bool HasInsertionQV() const; 291 292 /// \returns true if this record has IPD data 293 bool HasIPD() const; 294 295 /// \returns true if this record has LabelQV data 296 bool HasLabelQV() const; 297 298 /// \returns true if this record has LocalContextFlags (absent in CCS) 299 bool HasLocalContextFlags() const; 300 301 /// \returns true if this record has MergeQV data 302 bool HasMergeQV() const; 303 304 /// \returns true if this record has NumPasses data 305 bool HasNumPasses() const; 306 307 /// \returns true if this record has Pkmean data 308 bool HasPkmean() const; 309 310 /// \returns true if this record has Pkmid data 311 bool HasPkmid() const; 312 313 /// \returns true if this record has Pkmean2 data 314 bool HasPkmean2() const; 315 316 /// \returns true if this record has Pkmid2 data 317 bool HasPkmid2() const; 318 319 /// \returns true if this record has PreBaseFrames aka IPD data 320 bool HasPreBaseFrames() const; 321 322 /// \returns true if this record has PrePulseFrames data 323 bool HasPrePulseFrames() const; 324 325 /// \returns true if this record has PulseCall data 326 bool HasPulseCall() const; 327 328 /// \returns true if this record has PulseCallWidth data 329 bool HasPulseCallWidth() const; 330 331 /// \returns true if this record has PulseExclusion data 332 bool HasPulseExclusion(void) const; 333 334 /// \returns true if this record has PulseMergeQV data 335 bool HasPulseMergeQV() const; 336 337 /// \returns true if this record has PulseWidth data 338 bool HasPulseWidth() const; 339 340 /// \returns true if this record has ReadAccuracyTag data 341 bool HasReadAccuracy() const; 342 343 /// \returns true if this record has QueryEnd data 344 bool HasQueryEnd() const; 345 346 /// \returns true if this record has QueryStart data 347 bool HasQueryStart() const; 348 349 /// \returns true if this record has ScrapRegionType data (only in SCRAP) 350 bool HasScrapRegionType() const; 351 352 /// \returns true if this record has scrap ZMW type data (only in SCRAP) 353 bool HasScrapZmwType() const; 354 355 /// \returns true if this record has signal-to-noise data (absent in 356 /// POLYMERASE) 357 /// 358 bool HasSignalToNoise() const; 359 360 /// \returns true if this record has StartFrame data 361 bool HasStartFrame() const; 362 363 /// \returns true if this record has SubstitutionQV data 364 bool HasSubstitutionQV() const; 365 366 /// \returns true if this record has SubstitutionTag data 367 bool HasSubstitutionTag() const; 368 369 /// \} 370 371 public: 372 /// \name Sequence & Tag Data 373 /// \{ 374 375 /// \brief Fetches this record's AltLabelTag values ("pt" tag). 376 /// 377 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 378 /// new gap chars will be '-' and padding chars will be '*'. 379 /// 380 /// \param[in] orientation Orientation of output. 381 /// 382 /// \returns AltLabelTags string 383 /// 384 std::string AltLabelTag(Orientation orientation = Orientation::NATIVE, bool aligned = false, 385 bool exciseSoftClips = false, 386 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 387 388 /// \brief Fetches this record's DeletionTag values ("dt" tag). 389 /// 390 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 391 /// new gap chars will be '-' and padding chars will be '*'. 392 /// 393 /// \param[in] orientation Orientation of output. 394 /// \param[in] aligned if true, gaps/padding will be inserted, per 395 /// Cigar info. 396 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 397 /// removed from query ends 398 /// 399 /// \returns DeletionTag string 400 /// 401 std::string DeletionTag(Orientation orientation = Orientation::NATIVE, bool aligned = false, 402 bool exciseSoftClips = false) const; 403 404 /// \brief Fetches this record's DNA sequence (SEQ field). 405 /// 406 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 407 /// new gap chars will be '-' and padding chars will be '*'. 408 /// 409 /// \param[in] orientation Orientation of output. 410 /// \param[in] aligned if true, gaps/padding will be inserted, per 411 /// Cigar info. 412 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 413 /// removed from query ends 414 /// 415 /// \returns sequence string 416 /// 417 std::string Sequence(const Orientation orientation = Orientation::NATIVE, bool aligned = false, 418 bool exciseSoftClips = false) const; 419 420 /// \brief Fetches this record's SubstitutionTag values ("st" tag). 421 /// 422 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 423 /// new gap chars will be '-' and padding chars will be '*'. 424 /// 425 /// \param[in] orientation Orientation of output. 426 /// \param[in] aligned if true, gaps/padding will be inserted, per 427 /// Cigar info. 428 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 429 /// removed from query ends 430 /// 431 /// \returns SubstitutionTags string 432 /// 433 std::string SubstitutionTag(Orientation orientation = Orientation::NATIVE, bool aligned = false, 434 bool exciseSoftClips = false) const; 435 436 /// \} 437 438 public: 439 /// \name Quality Data 440 /// \{ 441 442 /// \brief Fetches this record's AltLabelQV values ("pv" tag). 443 /// 444 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 445 /// new QVs will have a value of 0. 446 /// 447 /// \param[in] orientation Orientation of output. 448 /// 449 /// \returns AltLabelQV as QualityValues object 450 /// 451 QualityValues AltLabelQV(Orientation orientation = Orientation::NATIVE, bool aligned = false, 452 bool exciseSoftClips = false, 453 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 454 455 /// \brief Fetches this record's DeletionQV values ("dq" tag). 456 /// 457 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 458 /// new QVs will have a value of 0. 459 /// 460 /// \param[in] orientation Orientation of output. 461 /// \param[in] aligned if true, gaps/padding will be inserted, per 462 /// Cigar info. 463 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 464 /// removed from query ends 465 /// 466 /// \returns DeletionQV as QualityValues object 467 /// 468 QualityValues DeletionQV(Orientation orientation = Orientation::NATIVE, bool aligned = false, 469 bool exciseSoftClips = false) const; 470 471 /// \brief Fetches this record's InsertionQV values ("iq" tag). 472 /// 473 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 474 /// new QVs will have a value of 0. 475 /// 476 /// \param[in] orientation Orientation of output. 477 /// \param[in] aligned if true, gaps/padding will be inserted, per 478 /// Cigar info. 479 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 480 /// removed from query ends 481 /// 482 /// \returns InsertionQVs as QualityValues object 483 /// 484 QualityValues InsertionQV(Orientation orientation = Orientation::NATIVE, bool aligned = false, 485 bool exciseSoftClips = false) const; 486 487 /// \brief Fetches this record's LabelQV values ("pq" tag). 488 /// 489 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 490 /// new QVs will have a value of 0. 491 /// 492 /// \param[in] orientation Orientation of output. 493 /// 494 /// \returns LabelQV as QualityValues object 495 /// 496 QualityValues LabelQV(Orientation orientation = Orientation::NATIVE, bool aligned = false, 497 bool exciseSoftClips = false, 498 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 499 500 /// \brief Fetches this record's MergeQV values ("mq" tag). 501 /// 502 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 503 /// new QVs will have a value of 0. 504 /// 505 /// \param[in] orientation Orientation of output. 506 /// \param[in] aligned if true, gaps/padding will be inserted, per 507 /// Cigar info. 508 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 509 /// removed from query ends 510 /// 511 /// \returns MergeQV as QualityValues object 512 /// 513 QualityValues MergeQV(Orientation orientation = Orientation::NATIVE, bool aligned = false, 514 bool exciseSoftClips = false) const; 515 516 /// \brief Fetches this record's %BAM quality values (QUAL field). 517 /// 518 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 519 /// new QVs will have a value of 0. 520 /// 521 /// \param[in] orientation Orientation of output. 522 /// \param[in] aligned if true, gaps/padding will be inserted, per 523 /// Cigar info. 524 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 525 /// removed from query ends 526 /// 527 /// \returns %BAM qualities as QualityValues object 528 /// 529 QualityValues Qualities(Orientation orientation = Orientation::NATIVE, bool aligned = false, 530 bool exciseSoftClips = false) const; 531 532 /// \brief Fetches this record's SubstitutionQV values ("sq" tag). 533 /// 534 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 535 /// new QVs will have a value of 0. 536 /// 537 /// \param[in] orientation Orientation of output. 538 /// \param[in] aligned if true, gaps/padding will be inserted, per 539 /// Cigar info. 540 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 541 /// removed from query ends 542 /// 543 /// \returns SubstitutionQV as QualityValues object 544 /// 545 QualityValues SubstitutionQV(Orientation orientation = Orientation::NATIVE, 546 bool aligned = false, bool exciseSoftClips = false) const; 547 548 /// \} 549 550 public: 551 /// \name Pulse Data 552 /// \{ 553 554 /// \brief Fetches this record's IPD values ("ip" tag). 555 /// 556 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 557 /// new frames will have a value of 0; 558 /// 559 /// \param[in] orientation Orientation of output. 560 /// \param[in] aligned if true, gaps/padding will be inserted, per 561 /// Cigar info. 562 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 563 /// removed from query ends 564 /// 565 /// \returns IPD as Frames object 566 /// 567 Frames IPD(Orientation orientation = Orientation::NATIVE, bool aligned = false, 568 bool exciseSoftClips = false) const; 569 570 /// \brief Fetches this record's IPD values ("ip" tag), but does not upscale. 571 /// 572 /// \param[in] orientation Orientation of output. 573 /// \returns IPD as Frames object 574 /// 575 Frames IPDRaw(Orientation orientation = Orientation::NATIVE) const; 576 577 /// \brief Fetches this record's Pkmean values ("pa" tag). 578 /// 579 /// \param[in] orientation Orientation of output. 580 /// \returns Pkmean as vector<float> object 581 /// 582 std::vector<float> Pkmean(Orientation orientation = Orientation::NATIVE, bool aligned = false, 583 bool exciseSoftClips = false, 584 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 585 586 /// \brief Fetches this record's Pkmid values ("pm" tag). 587 /// 588 /// \param[in] orientation Orientation of output. 589 /// \returns Pkmid as vector<float> object 590 /// 591 std::vector<float> Pkmid(Orientation orientation = Orientation::NATIVE, bool aligned = false, 592 bool exciseSoftClips = false, 593 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 594 595 /// \brief Fetches this record's Pkmean2 values ("pi" tag). 596 /// 597 /// \param[in] orientation Orientation of output. 598 /// \returns Pkmean as vector<float> object 599 /// 600 std::vector<float> Pkmean2(Orientation orientation = Orientation::NATIVE, bool aligned = false, 601 bool exciseSoftClips = false, 602 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 603 604 /// \brief Fetches this record's Pkmid2 values ("ps" tag). 605 /// 606 /// \param[in] orientation Orientation of output. 607 /// \returns Pkmid as vector<float> object 608 /// 609 std::vector<float> Pkmid2(Orientation orientation = Orientation::NATIVE, bool aligned = false, 610 bool exciseSoftClips = false, 611 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 612 613 /// \brief Fetches this record's PreBaseFrames aka IPD values ("ip" tag). 614 /// 615 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 616 /// new frames will have a value of 0; 617 /// 618 /// \param[in] orientation Orientation of output. 619 /// \param[in] aligned if true, gaps/padding will be inserted, per 620 /// Cigar info. 621 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 622 /// removed from query ends 623 /// 624 /// \returns IPD as Frames object 625 /// 626 Frames PreBaseFrames(Orientation orientation = Orientation::NATIVE, bool aligned = false, 627 bool exciseSoftClips = false) const; 628 629 /// \brief Fetches this record's PrePulseFrames values ("pd" tag). 630 /// 631 /// \param[in] orientation Orientation of output. 632 /// \returns PrePulseFrames as Frames object 633 /// 634 Frames PrePulseFrames(Orientation orientation = Orientation::NATIVE, bool aligned = false, 635 bool exciseSoftClips = false, 636 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 637 638 /// \brief Fetches this record's PulseCall values ("pc" tag). 639 /// 640 /// \param[in] orientation Orientation of output. 641 /// \returns PulseCalls string 642 /// 643 std::string PulseCall(Orientation orientation = Orientation::NATIVE, bool aligned = false, 644 bool exciseSoftClips = false, 645 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 646 647 /// \brief Fetches this record's PulseCallWidth values ("px" tag). 648 /// 649 /// \param[in] orientation Orientation of output. 650 /// \returns PulseCallWidth as Frames object 651 /// 652 Frames PulseCallWidth(Orientation orientation = Orientation::NATIVE, bool aligned = false, 653 bool exciseSoftClips = false, 654 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 655 656 /// \brief Fetches this record's PulseExclusionReason values ("pe" tag). 657 /// 658 /// \returns vector of pulse exclusion reason value 659 /// 660 std::vector<PacBio::BAM::PulseExclusionReason> PulseExclusionReason( 661 Orientation orientation = Orientation::NATIVE, bool aligned = false, 662 bool exciseSoftClips = false, PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 663 664 /// \brief Fetch this record's PulseMergeQV values ("pg" tag). 665 /// 666 /// \param[in] orientation Orientation of output. 667 /// \returns PulseMergeQV as QualityValues object 668 /// 669 QualityValues PulseMergeQV(Orientation orientation = Orientation::NATIVE, bool aligned = false, 670 bool exciseSoftClips = false, 671 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 672 673 /// \brief Fetches this record's PulseWidth values ("pw" tag). 674 /// 675 /// \note If \p aligned is true, and gaps/padding need to be inserted, the 676 /// new frames will have a value of 0. 677 /// 678 /// \param[in] orientation Orientation of output. 679 /// \param[in] aligned if true, gaps/padding will be inserted, per 680 /// Cigar info. 681 /// \param[in] exciseSoftClips if true, any soft-clipped positions will be 682 /// removed from query ends 683 /// 684 /// \returns PulseWidths as Frames object 685 /// 686 Frames PulseWidth(Orientation orientation = Orientation::NATIVE, bool aligned = false, 687 bool exciseSoftClips = false) const; 688 689 /// \brief Fetches this record's PulseWidth values ("pw" tag), but does not 690 /// upscale. 691 /// 692 /// \param[in] orientation Orientation of output. 693 /// \returns PulseWidth as Frames object 694 /// 695 Frames PulseWidthRaw(Orientation orientation = Orientation::NATIVE, bool aligned = false, 696 bool exciseSoftClips = false) const; 697 698 /// \brief Fetches this record's StartFrame values ("sf" tag). 699 /// 700 /// \param[in] orientation Orientation of output 701 /// 702 /// \returns StartFrame as uint32_t vector 703 /// 704 std::vector<uint32_t> StartFrame(Orientation orientation = Orientation::NATIVE, 705 bool aligned = false, bool exciseSoftClips = false, 706 PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 707 708 /// \} 709 710 public: 711 /// \name Low-Level Access & Operations 712 /// \{ 713 714 /// \warning This method should be considered temporary and avoided as much 715 /// as possible. Direct access to the internal object is likely to 716 /// disappear as BamRecord interface matures. 717 /// 718 /// \returns const reference to underlying BamRecordImpl object 719 /// 720 const BamRecordImpl& Impl() const; 721 722 /// \warning This method should be considered temporary and avoided as much 723 /// as possible. Direct access to the internal object is likely to 724 /// disappear as BamRecord interface matures. 725 /// 726 /// \returns reference to underlying BamRecordImpl object 727 /// 728 BamRecordImpl& Impl(); 729 730 /// \} 731 732 public: 733 /// \name General Data 734 /// \{ 735 736 /// \brief Sets this record's ZMW hole number. 737 /// 738 /// \param[in] holeNumber 739 /// \returns reference to this record 740 /// 741 BamRecord& HoleNumber(const int32_t holeNumber); 742 743 /// \brief Sets this record's local context flags 744 /// 745 /// \param[in] flags 746 /// \returns reference to this record 747 /// 748 BamRecord& LocalContextFlags(const PacBio::BAM::LocalContextFlags flags); 749 750 /// \brief Sets this record's "number of complete passes of the insert". 751 /// 752 /// \param[in] numPasses 753 /// \returns reference to this record 754 /// 755 BamRecord& NumPasses(const int32_t numPasses); 756 757 /// \brief Sets this record's query end position. 758 /// 759 /// \note Changing this will modify the name of non-CCS records. 760 /// 761 /// \param[in] pos 762 /// \returns reference to this record 763 /// 764 BamRecord& QueryEnd(const PacBio::BAM::Position pos); 765 766 /// \brief Sets this record's query start position. 767 /// 768 /// \note Changing this will modify the name of non-CCS records. 769 /// 770 /// \param[in] pos 771 /// \returns reference to this record 772 /// 773 BamRecord& QueryStart(const PacBio::BAM::Position pos); 774 775 /// \brief Sets this record's expected read accuracy [0, 1000] 776 /// 777 /// \param[in] accuracy 778 /// \returns reference to this record 779 /// 780 BamRecord& ReadAccuracy(const Accuracy& accuracy); 781 782 /// \brief Attaches this record to the provided read group, changing the 783 /// record name & 'RG' tag. 784 /// 785 /// \param[in] rg 786 /// \returns reference to this record 787 /// 788 BamRecord& ReadGroup(const ReadGroupInfo& rg); 789 790 /// \brief Attaches this record to the provided read group, changing the 791 /// record name & 'RG' tag. 792 /// 793 /// \param[in] id 794 /// \returns reference to this record 795 /// 796 BamRecord& ReadGroupId(const std::string& id); 797 798 /// \brief Sets this scrap record's ScrapRegionType 799 /// 800 /// \param[in] type 801 /// \returns reference to this record 802 /// 803 BamRecord& ScrapRegionType(const VirtualRegionType type); 804 805 /// \brief Sets this scrap record's ScrapRegionType 806 /// 807 /// \param[in] type character equivalent of VirtualRegionType 808 /// \returns reference to this record 809 /// 810 BamRecord& ScrapRegionType(const char type); 811 812 /// \brief Sets this scrap record's ScrapZmwType 813 /// 814 /// \param[in] type 815 /// \returns reference to this record 816 /// 817 BamRecord& ScrapZmwType(const ZmwType type); 818 819 /// \brief Sets this scrap record's ScrapZmwType 820 /// 821 /// \param[in] type character equivalent of ZmwType 822 /// \returns reference to this record 823 /// 824 BamRecord& ScrapZmwType(const char type); 825 826 /// \brief Sets this record's average signal-to-noise in each of A, C, G, 827 /// and T 828 /// 829 /// \param[in] snr average signal-to-noise of A, C, G, and T (in this order) 830 /// \returns reference to this record 831 /// 832 BamRecord& SignalToNoise(const std::vector<float>& snr); 833 834 /// \} 835 836 public: 837 /// \name Barcode Data 838 /// \{ 839 840 /// \brief Sets this record's barcode IDs ('bc' tag) 841 /// 842 /// \param[in] barcodeIds 843 /// \returns reference to this record 844 /// 845 BamRecord& Barcodes(const std::pair<int16_t, int16_t>& barcodeIds); 846 847 /// \brief Sets this record's barcode quality ('bq' tag) 848 /// 849 /// \param[in] quality Phred-scaled confidence call 850 /// \returns reference to this record 851 /// 852 BamRecord& BarcodeQuality(const uint8_t quality); 853 854 /// \} 855 856 public: 857 /// \name Sequence & Tag Data 858 /// \{ 859 860 /// \brief Sets this record's AltLabelTag values ("at" tag). 861 /// 862 /// \param[in] tags 863 /// \returns reference to this record 864 /// 865 BamRecord& AltLabelTag(const std::string& tags); 866 867 /// \brief Sets this record's DeletionTag values ("dt" tag). 868 /// 869 /// \param[in] tags 870 /// \returns reference to this record 871 /// 872 BamRecord& DeletionTag(const std::string& tags); 873 874 /// \brief Sets this record's SubstitutionTag values ("st" tag). 875 /// 876 /// \param[in] tags 877 /// \returns reference to this record 878 /// 879 BamRecord& SubstitutionTag(const std::string& tags); 880 881 /// \} 882 883 public: 884 /// \name Quality Data 885 /// \{ 886 887 /// \brief Sets this record's AltLabelQV values ("pv" tag). 888 /// 889 /// \param[in] altLabelQVs 890 /// \returns reference to this record 891 /// 892 BamRecord& AltLabelQV(const QualityValues& altLabelQVs); 893 894 /// \brief Sets this record's DeletionQV values ("dq" tag). 895 /// 896 /// \param[in] deletionQVs 897 /// \returns reference to this record 898 /// 899 BamRecord& DeletionQV(const QualityValues& deletionQVs); 900 901 /// \brief Sets this record's InsertionQV values ("iq" tag). 902 /// 903 /// \param[in] insertionQVs 904 /// \returns reference to this record 905 /// 906 BamRecord& InsertionQV(const QualityValues& insertionQVs); 907 908 /// \brief Sets this record's LabelQV values ("pq" tag). 909 /// 910 /// \param[in] labelQVs 911 /// \returns reference to this record 912 /// 913 BamRecord& LabelQV(const QualityValues& labelQVs); 914 915 /// \brief Sets this record's MergeQV values ("mq" tag). 916 /// 917 /// \param[in] mergeQVs 918 /// \returns reference to this record 919 /// 920 BamRecord& MergeQV(const QualityValues& mergeQVs); 921 922 /// \brief Sets this record's SubstitutionQV values ("sq" tag). 923 /// 924 /// \param[in] substitutionQVs 925 /// \returns reference to this record 926 /// 927 BamRecord& SubstitutionQV(const QualityValues& substitutionQVs); 928 929 /// \} 930 931 public: 932 /// \name Pulse Data 933 /// \{ 934 935 /// \brief Sets this record's IPD values ("ip" tag). 936 /// 937 /// \param[in] frames 938 /// \param[in] encoding specify how to encode the data (8-bit lossy, or 939 /// 16-bit lossless) 940 /// \returns reference to this record 941 /// 942 BamRecord& IPD(const Frames& frames, const FrameEncodingType encoding); 943 944 /// \brief Sets this record's Pkmean values ("pm" tag). 945 /// 946 /// \param[in] photons 947 /// \returns reference to this record 948 /// 949 BamRecord& Pkmean(const std::vector<float>& photons); 950 951 /// \brief Sets this record's Pkmean values ("pm" tag). 952 /// 953 /// \param[in] encodedPhotons 954 /// \returns reference to this record 955 /// 956 BamRecord& Pkmean(const std::vector<uint16_t>& encodedPhotons); 957 958 /// \brief Sets this record's Pkmid values ("pa" tag). 959 /// 960 /// \param[in] photons 961 /// \returns reference to this record 962 /// 963 BamRecord& Pkmid(const std::vector<float>& photons); 964 965 /// \brief Sets this record's Pkmid values ("pa" tag). 966 /// 967 /// \param[in] encodedPhotons 968 /// \returns reference to this record 969 /// 970 BamRecord& Pkmid(const std::vector<uint16_t>& encodedPhotons); 971 972 /// \brief Sets this record's Pkmean2 values ("ps" tag). 973 /// 974 /// \param[in] photons 975 /// \returns reference to this record 976 /// 977 BamRecord& Pkmean2(const std::vector<float>& photons); 978 979 /// \brief Sets this record's Pkmean2 values ("ps" tag). 980 /// 981 /// \param[in] encodedPhotons 982 /// \returns reference to this record 983 /// 984 BamRecord& Pkmean2(const std::vector<uint16_t>& encodedPhotons); 985 986 /// \brief Sets this record's Pkmid2 values ("pi" tag). 987 /// 988 /// \param[in] photons 989 /// \returns reference to this record 990 /// 991 BamRecord& Pkmid2(const std::vector<float>& photons); 992 993 /// \brief Sets this record's Pkmid2 values ("pi" tag). 994 /// 995 /// \param[in] encodedPhotons 996 /// \returns reference to this record 997 /// 998 BamRecord& Pkmid2(const std::vector<uint16_t>& encodedPhotons); 999 1000 /// \brief Sets this record's PreBaseFrames aka IPD values ("ip" tag). 1001 /// 1002 /// \param[in] frames 1003 /// \param[in] encoding specify how to encode the data (8-bit lossy, or 1004 /// 16-bit lossless) 1005 /// \returns reference to this record 1006 /// 1007 BamRecord& PreBaseFrames(const Frames& frames, const FrameEncodingType encoding); 1008 1009 /// \brief Sets this record's PrePulseFrames values ("pd" tag). 1010 /// 1011 /// \param[in] frames 1012 /// \param[in] encoding specify how to encode the data (8-bit lossy, or 1013 /// 16-bit lossless) 1014 /// \returns reference to this record 1015 /// 1016 BamRecord& PrePulseFrames(const Frames& frames, const FrameEncodingType encoding); 1017 1018 /// \brief Sets this record's PulseCall values ("pc" tag). 1019 /// 1020 /// \param[in] tags 1021 /// \returns reference to this record 1022 /// 1023 BamRecord& PulseCall(const std::string& tags); 1024 1025 /// \brief Sets this record's PulseCallWidth values ("px" tag). 1026 /// 1027 /// \param[in] frames 1028 /// \param[in] encoding specify how to encode the data (8-bit lossy, or 1029 /// 16-bit lossless) 1030 /// \returns reference to this record 1031 /// 1032 BamRecord& PulseCallWidth(const Frames& frames, const FrameEncodingType encoding); 1033 1034 /// 1035 /// \\brief Sets this record's PulseExclusionReason values ("pe" tag). 1036 /// \param[in] reasons 1037 /// \return reference to this record 1038 /// 1039 BamRecord& PulseExclusionReason(const std::vector<PacBio::BAM::PulseExclusionReason>& reasons); 1040 1041 /// \brief Sets this record's PulseMergeQV values ("pg" tag). 1042 /// 1043 /// \param[in] pulseMergeQVs 1044 /// \returns reference to this record 1045 /// 1046 BamRecord& PulseMergeQV(const QualityValues& pulseMergeQVs); 1047 1048 /// \brief Sets this record's PulseWidth values ("pw" tag). 1049 /// 1050 /// \param[in] frames 1051 /// \param[in] encoding specify how to encode the data (8-bit lossy, or 1052 /// 16-bit lossless) 1053 /// \returns reference to this record 1054 /// 1055 BamRecord& PulseWidth(const Frames& frames, const FrameEncodingType encoding); 1056 1057 /// \brief Sets this record's StartFrame values ("sf" tag). 1058 /// 1059 /// \param[in] startFrame 1060 /// \returns reference to this record 1061 /// 1062 BamRecord& StartFrame(const std::vector<uint32_t>& startFrame); 1063 1064 /// \} 1065 1066 public: 1067 /// \name Low-Level Access & Operations 1068 /// \{ 1069 1070 /// \brief Resets cached aligned start/end. 1071 /// 1072 /// \note This method should not be needed in most client code. It exists 1073 /// primarily as a hook for internal reading loops (queries, index 1074 /// build, etc.) It's essentially a workaround and will likely be 1075 /// removed from the API. 1076 /// 1077 void ResetCachedPositions() const; 1078 1079 /// \brief Resets cached aligned start/end. 1080 /// 1081 /// \note This method should not be needed in most client code. It exists 1082 /// primarily as a hook for internal reading loops (queries, index 1083 /// build, etc.) It's essentially a workaround and will likely be 1084 /// removed from the API. 1085 /// 1086 void ResetCachedPositions(); 1087 1088 /// \brief Updates the record's name (BamRecord::FullName) to reflect 1089 /// modifications to name components (movie name, ZMW hole number, 1090 /// etc.) 1091 /// 1092 void UpdateName(); 1093 1094 /// \} 1095 1096 public: 1097 /// \name Pulse Data 1098 /// \{ 1099 1100 static const float photonFactor; 1101 1102 static std::vector<uint16_t> EncodePhotons(const std::vector<float>& data); 1103 1104 /// \} 1105 1106 public: 1107 /// \name Clipping & Mapping 1108 /// \{ 1109 1110 /// Creates a copied record from input, with clipping applied 1111 static BamRecord Clipped(const BamRecord& input, const ClipType clipType, 1112 const PacBio::BAM::Position start, const PacBio::BAM::Position end); 1113 1114 /// Creates a copied record from input, with mapping applied 1115 static BamRecord Mapped(const BamRecord& input, const int32_t referenceId, 1116 const Position refStart, const Strand strand, const Cigar& cigar, 1117 const uint8_t mappingQuality); 1118 1119 /// Applies clipping to this record 1120 BamRecord& Clip(const ClipType clipType, const PacBio::BAM::Position start, 1121 const PacBio::BAM::Position end); 1122 1123 /// Creates a copied record from this one, with clipping applied 1124 BamRecord Clipped(const ClipType clipType, const PacBio::BAM::Position start, 1125 const PacBio::BAM::Position end) const; 1126 1127 /// Applies mapping to this record 1128 BamRecord& Map(const int32_t referenceId, const Position refStart, const Strand strand, 1129 const Cigar& cigar, const uint8_t mappingQuality); 1130 1131 /// Creates a copied record from this one, with mapping applied 1132 BamRecord Mapped(const int32_t referenceId, const Position refStart, const Strand strand, 1133 const Cigar& cigar, const uint8_t mappingQuality) const; 1134 /// \} 1135 1136 private: 1137 BamRecordImpl impl_; 1138 1139 public: 1140 /// public & mutable so that queries can directly set the header info, 1141 /// even on a record that is const from client code's perspective 1142 mutable BamHeader header_; 1143 1144 private: 1145 /// \internal 1146 /// cached positions (mutable to allow lazy-calc in const methods) 1147 mutable Position alignedStart_; 1148 mutable Position alignedEnd_; 1149 1150 private: 1151 /// \internal 1152 /// pulse to bam mapping cache 1153 mutable std::unique_ptr<internal::Pulse2BaseCache> p2bCache_; 1154 1155 public: 1156 /// clips the PacBio tags to a specified length 1157 void ClipTags(const size_t clipPos, const size_t clipLength); 1158 1159 private: 1160 ///\internal 1161 /// clipping methods 1162 1163 void ClipFields(const size_t clipPos, const size_t clipLength); 1164 1165 BamRecord& ClipToQuery(const PacBio::BAM::Position start, const PacBio::BAM::Position end); 1166 BamRecord& ClipToReference(const PacBio::BAM::Position start, const PacBio::BAM::Position end); 1167 BamRecord& ClipToReferenceForward(const PacBio::BAM::Position start, 1168 const PacBio::BAM::Position end); 1169 BamRecord& ClipToReferenceReverse(const PacBio::BAM::Position start, 1170 const PacBio::BAM::Position end); 1171 1172 private: 1173 ///\internal 1174 /// raw tag data fetching 1175 1176 // sequence tags 1177 std::string FetchBasesRaw(const BamRecordTag tag) const; 1178 std::string FetchBases(const BamRecordTag tag, 1179 const Orientation orientation = Orientation::NATIVE, 1180 const bool aligned = false, const bool exciseSoftClips = false, 1181 const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 1182 1183 // frame tags 1184 Frames FetchFramesRaw(const BamRecordTag tag) const; 1185 Frames FetchFrames(const BamRecordTag tag, const Orientation orientation = Orientation::NATIVE, 1186 const bool aligned = false, const bool exciseSoftClips = false, 1187 const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 1188 1189 // pulse tags 1190 std::vector<float> FetchPhotonsRaw(const BamRecordTag tag) const; 1191 std::vector<float> FetchPhotons(const BamRecordTag tag, 1192 const Orientation orientation = Orientation::NATIVE, 1193 const bool aligned = false, const bool exciseSoftClips = false, 1194 const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 1195 1196 // QV tags 1197 QualityValues FetchQualitiesRaw(const BamRecordTag tag) const; 1198 QualityValues FetchQualities(const BamRecordTag tag, 1199 const Orientation orientation = Orientation::NATIVE, 1200 const bool aligned = false, const bool exciseSoftClips = false, 1201 const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 1202 1203 // UInt tags (e.g. start frame) 1204 // 1205 // TODO (DB): clean this up w.r.t FetchUInt8s 1206 // 1207 std::vector<uint32_t> FetchUInt32sRaw(const BamRecordTag tag) const; 1208 std::vector<uint32_t> FetchUInt32s( 1209 const BamRecordTag tag, const Orientation orientation = Orientation::NATIVE, 1210 const bool aligned = false, const bool exciseSoftClips = false, 1211 const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 1212 1213 // UInt tags (e.g. pulse exclusion) 1214 // 1215 // ODO (DB): clean this up w.r.t FetchUInt32s 1216 // 1217 std::vector<uint8_t> FetchUInt8sRaw(const BamRecordTag tag) const; 1218 std::vector<uint8_t> FetchUInt8s(const BamRecordTag tag, 1219 const Orientation orientation = Orientation::NATIVE, 1220 const bool aligned = false, const bool exciseSoftClips = false, 1221 const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; 1222 1223 private: 1224 ///\internal 1225 /// marked const to allow calling from const methods 1226 /// but updates our mutable cached values 1227 void CalculateAlignedPositions() const; 1228 void CalculatePulse2BaseCache() const; 1229 1230 friend class internal::BamRecordMemory; 1231 }; 1232 1233 } // namespace BAM 1234 } // namespace PacBio 1235 1236 #include "pbbam/internal/BamRecord.inl" 1237 1238 #endif // BAMRECORD_H 1239