1 // File Description
2 /// \file BamRecord.h
3 /// \brief Defines the BamRecord class.
4 //
5 // Author: Derek Barnett
6 
7 #ifndef BAMRECORD_H
8 #define BAMRECORD_H
9 
10 #include <cstddef>
11 #include <cstdint>
12 #include <memory>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 
17 #include "pbbam/Accuracy.h"
18 #include "pbbam/BamHeader.h"
19 #include "pbbam/BamRecordImpl.h"
20 #include "pbbam/ClipType.h"
21 #include "pbbam/FrameEncodingType.h"
22 #include "pbbam/Frames.h"
23 #include "pbbam/LocalContextFlags.h"
24 #include "pbbam/Orientation.h"
25 #include "pbbam/PulseBehavior.h"
26 #include "pbbam/PulseExclusionReason.h"
27 #include "pbbam/QualityValues.h"
28 #include "pbbam/ReadGroupInfo.h"
29 #include "pbbam/RecordType.h"
30 #include "pbbam/Strand.h"
31 #include "pbbam/ZmwType.h"
32 #include "pbbam/virtual/VirtualRegionType.h"
33 
34 namespace PacBio {
35 namespace BAM {
36 
37 namespace internal {
38 
39 class BamRecordMemory;
40 class Pulse2BaseCache;
41 
42 }  // namespace internal
43 
44 /// \brief The BamRecord class represents a %PacBio %BAM record.
45 ///
46 /// %PacBio %BAM records are extensions of normal SAM/BAM records. Thus in
47 /// addition to normal fields like bases, qualities, mapping coordinates, etc.,
48 /// tags are used extensively to annotate records with additional
49 /// PacBio-specific data.
50 ///
51 /// Mapping and clipping APIs are provided as well to ensure that such
52 /// operations "trickle down" to all data fields properly.
53 ///
54 /// \sa https://samtools.github.io/hts-specs/SAMv1.pdf
55 ///     for more information on standard %BAM data, and
56 ///     https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst
57 ///     for more information on %PacBio %BAM fields.
58 ///
59 class PBBAM_EXPORT BamRecord
60 {
61 public:
62     /// \name Constructors & Related Methods
63     /// \{
64 
65     BamRecord();
66     BamRecord(BamHeader header);
67     BamRecord(BamRecordImpl impl);
68     BamRecord(const BamRecord& other);
69     BamRecord(BamRecord&& other);
70     BamRecord& operator=(const BamRecord& other);
71     BamRecord& operator=(BamRecord&& other);
72     virtual ~BamRecord();
73 
74     /// \}
75 
76 public:
77     /// \name General Data
78     /// \{
79 
80     /// \returns this record's full name
81     /// \sa BamRecordImpl::Name
82     ///
83     std::string FullName() const;
84 
85     /// \returns shared pointer to this record's associated BamHeader
86     BamHeader Header() const;
87 
88     /// \returns ZMW hole number
89     /// \throws if missing zm tag & record name does not contain hole number
90     ///
91     int32_t HoleNumber() const;
92 
93     /// \returns this record's LocalContextFlags
94     PacBio::BAM::LocalContextFlags LocalContextFlags() const;
95 
96     /// \returns this record's movie name
97     std::string MovieName() const;
98 
99     /// \returns "number of complete passes of the insert"
100     int32_t NumPasses() const;
101 
102     /// \returns the record's query end position, or Sequence().length() if not
103     ///          stored
104     /// \note QueryEnd is in polymerase read coordinates, NOT genomic
105     ///       coordinates.
106     ///
107     Position QueryEnd() const;
108 
109     /// \returns the record's query start position, or 0 if not stored
110     ///
111     /// \note QueryStart is in polymerase read coordinates, NOT genomic
112     ///       coordinates.
113     ///
114     Position QueryStart() const;
115 
116     /// \returns this record's expected read accuracy [0, 1000]
117     Accuracy ReadAccuracy() const;
118 
119     /// \returns ReadGroupInfo object for this record
120     ReadGroupInfo ReadGroup() const;
121 
122     /// \returns string ID of this record's read group
123     /// \sa ReadGroupInfo::Id
124     ///
125     std::string ReadGroupId() const;
126 
127     /// \returns integer value for this record's read group ID
128     int32_t ReadGroupNumericId() const;
129 
130     /// \returns this scrap record's scrap region type
131     VirtualRegionType ScrapRegionType() const;
132 
133     /// \returns this scrap record's scrap ZMW type
134     ZmwType ScrapZmwType() const;
135 
136     /// \returns this record's average signal-to-noise for each of A, C, G,
137     ///          and T
138     ///
139     std::vector<float> SignalToNoise() const;
140 
141     /// \returns this record's type
142     /// \sa RecordType
143     RecordType Type() const;
144 
145     /// \}
146 
147 public:
148     /// \name Mapping Data
149     /// \{
150 
151     /// \returns the record's aligned end position
152     ///
153     /// \note AlignedEnd is in polymerase read coordinates, NOT genomic
154     ///       coordinates.
155     ///
156     Position AlignedEnd() const;
157 
158     /// \returns the record's aligned start position
159     ///
160     /// \note AlignedStart is in polymerase read coordinates, NOT genomic
161     ///       coordinates.
162     ///
163     Position AlignedStart() const;
164 
165     /// \returns the record's strand as a Strand enum value
166     Strand AlignedStrand() const;
167 
168     /// \returns the record's CIGAR data as a Cigar object
169     ///
170     /// \param[in] exciseAllClips   if true, remove all clipping operations
171     ///                             (hard & soft) [default:false]
172     ///
173     Cigar CigarData(bool exciseAllClips = false) const;
174 
175     /// \returns true if this record was mapped by aligner
176     bool IsMapped() const;
177 
178     /// \returns this record's mapping quality. A value of 255 indicates
179     ///          "unknown"
180     ///
181     uint8_t MapQuality() const;
182 
183     /// \returns the number of deleted bases (relative to reference)
184     size_t NumDeletedBases() const;
185 
186     /// \returns the number of inserted bases (relative to reference)
187     size_t NumInsertedBases() const;
188 
189     /// \returns the number of matching bases (sum of '=' CIGAR op lengths)
190     size_t NumMatches() const;
191 
192     /// \returns a tuple containing NumMatches (first) and NumMismatches
193     ///         (second)
194     ///
195     std::pair<size_t, size_t> NumMatchesAndMismatches() const;
196 
197     /// \returns the number of mismatching bases (sum of 'X' CIGAR op lengths)
198     size_t NumMismatches() const;
199 
200     /// \returns this record's reference ID, or -1 if unmapped.
201     ///
202     /// \note This is only a valid identifier within this %BAM file
203     ///
204     int32_t ReferenceId() const;
205 
206     /// \returns this record's reference name.
207     ///
208     /// \throws an exception if unmapped record.
209     ///
210     std::string ReferenceName() const;
211 
212     /// \returns the record's reference end position, or UnmappedPosition if
213     ///          unmapped
214     ///
215     /// \note ReferenceEnd is in reference coordinates, NOT polymerase read
216     ///       coordinates.
217     ///
218     Position ReferenceEnd() const;
219 
220     /// \returns the record's reference start position, or UnmappedPosition if
221     ///          unmapped
222     ///
223     /// \note ReferenceStart is in reference coordinates, NOT polymerase read
224     ///       coordinates.
225     ///
226     Position ReferenceStart() const;
227 
228     /// \}
229 
230 public:
231     /// \name Barcode Data
232     /// \{
233 
234     /// \returns forward barcode id
235     ///
236     /// \throws std::runtime_error if barcode data is absent or malformed.
237     /// \sa HasBarcodes
238     ///
239     int16_t BarcodeForward() const;
240 
241     /// \returns barcode call confidence (Phred-scaled posterior probability
242     ///          of correct barcode call)
243     ///
244     /// \sa HasBarcodeQuality
245     ///
246     uint8_t BarcodeQuality() const;
247 
248     /// \returns reverse barcode id
249     ///
250     /// \throws std::runtime_error if barcode data is absent or malformed.
251     /// \sa HasBarcodes
252     ///
253     int16_t BarcodeReverse() const;
254 
255     /// \returns the forward and reverse barcode ids
256     ///
257     /// \throws std::runtime_error if barcode data is absent or malformed.
258     /// \sa HasBarcodes
259     ///
260     std::pair<int16_t, int16_t> Barcodes() const;
261 
262     /// \}
263 
264 public:
265     /// \name Auxiliary Data Queries
266     /// \{
267 
268     /// \returns true if this record has AltLabelQV data
269     bool HasAltLabelQV() const;
270 
271     /// \returns true if this record has AltLabelTag data
272     bool HasAltLabelTag() const;
273 
274     /// \returns true if this record has Barcode data
275     bool HasBarcodes() const;
276 
277     /// \returns true is this record has BarcodeQuality data
278     bool HasBarcodeQuality() const;
279 
280     /// \returns true if this record has DeletionQV data
281     bool HasDeletionQV() const;
282 
283     /// \returns true if this record has DeletionTag data
284     bool HasDeletionTag() const;
285 
286     /// \returns true if this record has a HoleNumber
287     bool HasHoleNumber() const;
288 
289     /// \returns true if this record has InsertionQV data
290     bool HasInsertionQV() const;
291 
292     /// \returns true if this record has IPD data
293     bool HasIPD() const;
294 
295     /// \returns true if this record has LabelQV data
296     bool HasLabelQV() const;
297 
298     /// \returns true if this record has LocalContextFlags (absent in CCS)
299     bool HasLocalContextFlags() const;
300 
301     /// \returns true if this record has MergeQV data
302     bool HasMergeQV() const;
303 
304     /// \returns true if this record has NumPasses data
305     bool HasNumPasses() const;
306 
307     /// \returns true if this record has Pkmean data
308     bool HasPkmean() const;
309 
310     /// \returns true if this record has Pkmid data
311     bool HasPkmid() const;
312 
313     /// \returns true if this record has Pkmean2 data
314     bool HasPkmean2() const;
315 
316     /// \returns true if this record has Pkmid2 data
317     bool HasPkmid2() const;
318 
319     /// \returns true if this record has PreBaseFrames aka IPD data
320     bool HasPreBaseFrames() const;
321 
322     /// \returns true if this record has PrePulseFrames data
323     bool HasPrePulseFrames() const;
324 
325     /// \returns true if this record has PulseCall data
326     bool HasPulseCall() const;
327 
328     /// \returns true if this record has PulseCallWidth data
329     bool HasPulseCallWidth() const;
330 
331     /// \returns true if this record has PulseExclusion data
332     bool HasPulseExclusion(void) const;
333 
334     /// \returns true if this record has PulseMergeQV data
335     bool HasPulseMergeQV() const;
336 
337     /// \returns true if this record has PulseWidth data
338     bool HasPulseWidth() const;
339 
340     /// \returns true if this record has ReadAccuracyTag data
341     bool HasReadAccuracy() const;
342 
343     /// \returns true if this record has QueryEnd data
344     bool HasQueryEnd() const;
345 
346     /// \returns true if this record has QueryStart data
347     bool HasQueryStart() const;
348 
349     /// \returns true if this record has ScrapRegionType data (only in SCRAP)
350     bool HasScrapRegionType() const;
351 
352     /// \returns true if this record has scrap ZMW type data (only in SCRAP)
353     bool HasScrapZmwType() const;
354 
355     /// \returns true if this record has signal-to-noise data (absent in
356     ///          POLYMERASE)
357     ///
358     bool HasSignalToNoise() const;
359 
360     /// \returns true if this record has StartFrame data
361     bool HasStartFrame() const;
362 
363     /// \returns true if this record has SubstitutionQV data
364     bool HasSubstitutionQV() const;
365 
366     /// \returns true if this record has SubstitutionTag data
367     bool HasSubstitutionTag() const;
368 
369     /// \}
370 
371 public:
372     /// \name Sequence & Tag Data
373     /// \{
374 
375     /// \brief Fetches this record's AltLabelTag values ("pt" tag).
376     ///
377     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
378     ///       new gap chars will be '-' and padding chars will be '*'.
379     ///
380     /// \param[in] orientation      Orientation of output.
381     ///
382     /// \returns AltLabelTags string
383     ///
384     std::string AltLabelTag(Orientation orientation = Orientation::NATIVE, bool aligned = false,
385                             bool exciseSoftClips = false,
386                             PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
387 
388     /// \brief Fetches this record's DeletionTag values ("dt" tag).
389     ///
390     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
391     ///       new gap chars will be '-' and padding chars will be '*'.
392     ///
393     /// \param[in] orientation      Orientation of output.
394     /// \param[in] aligned          if true, gaps/padding will be inserted, per
395     ///                             Cigar info.
396     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
397     ///                             removed from query ends
398     ///
399     /// \returns DeletionTag string
400     ///
401     std::string DeletionTag(Orientation orientation = Orientation::NATIVE, bool aligned = false,
402                             bool exciseSoftClips = false) const;
403 
404     /// \brief Fetches this record's DNA sequence (SEQ field).
405     ///
406     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
407     ///       new gap chars will be '-' and padding chars will be '*'.
408     ///
409     /// \param[in] orientation      Orientation of output.
410     /// \param[in] aligned          if true, gaps/padding will be inserted, per
411     ///                             Cigar info.
412     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
413     ///                             removed from query ends
414     ///
415     /// \returns sequence string
416     ///
417     std::string Sequence(const Orientation orientation = Orientation::NATIVE, bool aligned = false,
418                          bool exciseSoftClips = false) const;
419 
420     /// \brief Fetches this record's SubstitutionTag values ("st" tag).
421     ///
422     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
423     ///       new gap chars will be '-' and padding chars will be '*'.
424     ///
425     /// \param[in] orientation      Orientation of output.
426     /// \param[in] aligned          if true, gaps/padding will be inserted, per
427     ///                             Cigar info.
428     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
429     ///                             removed from query ends
430     ///
431     /// \returns SubstitutionTags string
432     ///
433     std::string SubstitutionTag(Orientation orientation = Orientation::NATIVE, bool aligned = false,
434                                 bool exciseSoftClips = false) const;
435 
436     /// \}
437 
438 public:
439     /// \name Quality Data
440     /// \{
441 
442     /// \brief Fetches this record's AltLabelQV values ("pv" tag).
443     ///
444     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
445     ///       new QVs will have a value of 0.
446     ///
447     /// \param[in] orientation     Orientation of output.
448     ///
449     /// \returns AltLabelQV as QualityValues object
450     ///
451     QualityValues AltLabelQV(Orientation orientation = Orientation::NATIVE, bool aligned = false,
452                              bool exciseSoftClips = false,
453                              PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
454 
455     /// \brief Fetches this record's DeletionQV values ("dq" tag).
456     ///
457     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
458     ///       new QVs will have a value of 0.
459     ///
460     /// \param[in] orientation      Orientation of output.
461     /// \param[in] aligned          if true, gaps/padding will be inserted, per
462     ///                             Cigar info.
463     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
464     ///                             removed from query ends
465     ///
466     /// \returns DeletionQV as QualityValues object
467     ///
468     QualityValues DeletionQV(Orientation orientation = Orientation::NATIVE, bool aligned = false,
469                              bool exciseSoftClips = false) const;
470 
471     /// \brief Fetches this record's InsertionQV values ("iq" tag).
472     ///
473     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
474     ///       new QVs will have a value of 0.
475     ///
476     /// \param[in] orientation      Orientation of output.
477     /// \param[in] aligned          if true, gaps/padding will be inserted, per
478     ///                             Cigar info.
479     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
480     ///                             removed from query ends
481     ///
482     /// \returns InsertionQVs as QualityValues object
483     ///
484     QualityValues InsertionQV(Orientation orientation = Orientation::NATIVE, bool aligned = false,
485                               bool exciseSoftClips = false) const;
486 
487     /// \brief Fetches this record's LabelQV values ("pq" tag).
488     ///
489     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
490     ///       new QVs will have a value of 0.
491     ///
492     /// \param[in] orientation     Orientation of output.
493     ///
494     /// \returns LabelQV as QualityValues object
495     ///
496     QualityValues LabelQV(Orientation orientation = Orientation::NATIVE, bool aligned = false,
497                           bool exciseSoftClips = false,
498                           PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
499 
500     /// \brief Fetches this record's MergeQV values ("mq" tag).
501     ///
502     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
503     ///       new QVs will have a value of 0.
504     ///
505     /// \param[in] orientation      Orientation of output.
506     /// \param[in] aligned          if true, gaps/padding will be inserted, per
507     ///                             Cigar info.
508     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
509     ///                             removed from query ends
510     ///
511     /// \returns MergeQV as QualityValues object
512     ///
513     QualityValues MergeQV(Orientation orientation = Orientation::NATIVE, bool aligned = false,
514                           bool exciseSoftClips = false) const;
515 
516     /// \brief Fetches  this record's %BAM quality values (QUAL field).
517     ///
518     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
519     ///       new QVs will have a value of 0.
520     ///
521     /// \param[in] orientation      Orientation of output.
522     /// \param[in] aligned          if true, gaps/padding will be inserted, per
523     ///                             Cigar info.
524     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
525     ///                             removed from query ends
526     ///
527     /// \returns %BAM qualities as QualityValues object
528     ///
529     QualityValues Qualities(Orientation orientation = Orientation::NATIVE, bool aligned = false,
530                             bool exciseSoftClips = false) const;
531 
532     /// \brief Fetches this record's SubstitutionQV values ("sq" tag).
533     ///
534     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
535     ///       new QVs will have a value of 0.
536     ///
537     /// \param[in] orientation      Orientation of output.
538     /// \param[in] aligned          if true, gaps/padding will be inserted, per
539     ///                             Cigar info.
540     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
541     ///                             removed from query ends
542     ///
543     /// \returns SubstitutionQV as QualityValues object
544     ///
545     QualityValues SubstitutionQV(Orientation orientation = Orientation::NATIVE,
546                                  bool aligned = false, bool exciseSoftClips = false) const;
547 
548     /// \}
549 
550 public:
551     /// \name Pulse Data
552     /// \{
553 
554     /// \brief Fetches this record's IPD values ("ip" tag).
555     ///
556     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
557     ///       new frames will have a value of 0;
558     ///
559     /// \param[in] orientation      Orientation of output.
560     /// \param[in] aligned          if true, gaps/padding will be inserted, per
561     ///                             Cigar info.
562     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
563     ///                             removed from query ends
564     ///
565     /// \returns IPD as Frames object
566     ///
567     Frames IPD(Orientation orientation = Orientation::NATIVE, bool aligned = false,
568                bool exciseSoftClips = false) const;
569 
570     /// \brief Fetches this record's IPD values ("ip" tag), but does not upscale.
571     ///
572     /// \param[in] orientation     Orientation of output.
573     /// \returns IPD as Frames object
574     ///
575     Frames IPDRaw(Orientation orientation = Orientation::NATIVE) const;
576 
577     /// \brief Fetches this record's Pkmean values ("pa" tag).
578     ///
579     /// \param[in] orientation     Orientation of output.
580     /// \returns Pkmean as vector<float> object
581     ///
582     std::vector<float> Pkmean(Orientation orientation = Orientation::NATIVE, bool aligned = false,
583                               bool exciseSoftClips = false,
584                               PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
585 
586     /// \brief Fetches this record's Pkmid values ("pm" tag).
587     ///
588     /// \param[in] orientation     Orientation of output.
589     /// \returns Pkmid as vector<float> object
590     ///
591     std::vector<float> Pkmid(Orientation orientation = Orientation::NATIVE, bool aligned = false,
592                              bool exciseSoftClips = false,
593                              PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
594 
595     /// \brief Fetches this record's Pkmean2 values ("pi" tag).
596     ///
597     /// \param[in] orientation     Orientation of output.
598     /// \returns Pkmean as vector<float> object
599     ///
600     std::vector<float> Pkmean2(Orientation orientation = Orientation::NATIVE, bool aligned = false,
601                                bool exciseSoftClips = false,
602                                PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
603 
604     /// \brief Fetches this record's Pkmid2 values ("ps" tag).
605     ///
606     /// \param[in] orientation     Orientation of output.
607     /// \returns Pkmid as vector<float> object
608     ///
609     std::vector<float> Pkmid2(Orientation orientation = Orientation::NATIVE, bool aligned = false,
610                               bool exciseSoftClips = false,
611                               PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
612 
613     /// \brief Fetches this record's PreBaseFrames aka IPD values ("ip" tag).
614     ///
615     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
616     ///       new frames will have a value of 0;
617     ///
618     /// \param[in] orientation      Orientation of output.
619     /// \param[in] aligned          if true, gaps/padding will be inserted, per
620     ///                             Cigar info.
621     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
622     ///                             removed from query ends
623     ///
624     /// \returns IPD as Frames object
625     ///
626     Frames PreBaseFrames(Orientation orientation = Orientation::NATIVE, bool aligned = false,
627                          bool exciseSoftClips = false) const;
628 
629     /// \brief Fetches this record's PrePulseFrames values ("pd" tag).
630     ///
631     /// \param[in] orientation     Orientation of output.
632     /// \returns PrePulseFrames as Frames object
633     ///
634     Frames PrePulseFrames(Orientation orientation = Orientation::NATIVE, bool aligned = false,
635                           bool exciseSoftClips = false,
636                           PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
637 
638     /// \brief Fetches this record's PulseCall values ("pc" tag).
639     ///
640     /// \param[in] orientation     Orientation of output.
641     /// \returns PulseCalls string
642     ///
643     std::string PulseCall(Orientation orientation = Orientation::NATIVE, bool aligned = false,
644                           bool exciseSoftClips = false,
645                           PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
646 
647     /// \brief Fetches this record's PulseCallWidth values ("px" tag).
648     ///
649     /// \param[in] orientation     Orientation of output.
650     /// \returns PulseCallWidth as Frames object
651     ///
652     Frames PulseCallWidth(Orientation orientation = Orientation::NATIVE, bool aligned = false,
653                           bool exciseSoftClips = false,
654                           PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
655 
656     /// \brief Fetches this record's PulseExclusionReason values ("pe" tag).
657     ///
658     /// \returns vector of pulse exclusion reason value
659     ///
660     std::vector<PacBio::BAM::PulseExclusionReason> PulseExclusionReason(
661         Orientation orientation = Orientation::NATIVE, bool aligned = false,
662         bool exciseSoftClips = false, PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
663 
664     /// \brief Fetch this record's PulseMergeQV values ("pg" tag).
665     ///
666     /// \param[in] orientation     Orientation of output.
667     /// \returns PulseMergeQV as QualityValues object
668     ///
669     QualityValues PulseMergeQV(Orientation orientation = Orientation::NATIVE, bool aligned = false,
670                                bool exciseSoftClips = false,
671                                PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
672 
673     /// \brief Fetches this record's PulseWidth values ("pw" tag).
674     ///
675     /// \note If \p aligned is true, and gaps/padding need to be inserted, the
676     ///       new frames will have a value of 0.
677     ///
678     /// \param[in] orientation      Orientation of output.
679     /// \param[in] aligned          if true, gaps/padding will be inserted, per
680     ///                             Cigar info.
681     /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
682     ///                             removed from query ends
683     ///
684     /// \returns PulseWidths as Frames object
685     ///
686     Frames PulseWidth(Orientation orientation = Orientation::NATIVE, bool aligned = false,
687                       bool exciseSoftClips = false) const;
688 
689     /// \brief Fetches this record's PulseWidth values ("pw" tag), but does not
690     ///        upscale.
691     ///
692     /// \param[in] orientation     Orientation of output.
693     /// \returns PulseWidth as Frames object
694     ///
695     Frames PulseWidthRaw(Orientation orientation = Orientation::NATIVE, bool aligned = false,
696                          bool exciseSoftClips = false) const;
697 
698     /// \brief Fetches this record's StartFrame values ("sf" tag).
699     ///
700     /// \param[in] orientation     Orientation of output
701     ///
702     /// \returns StartFrame as uint32_t vector
703     ///
704     std::vector<uint32_t> StartFrame(Orientation orientation = Orientation::NATIVE,
705                                      bool aligned = false, bool exciseSoftClips = false,
706                                      PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
707 
708     /// \}
709 
710 public:
711     /// \name Low-Level Access & Operations
712     /// \{
713 
714     /// \warning This method should be considered temporary and avoided as much
715     ///          as possible. Direct access to the internal object is likely to
716     ///          disappear as BamRecord interface matures.
717     ///
718     /// \returns const reference to underlying BamRecordImpl object
719     ///
720     const BamRecordImpl& Impl() const;
721 
722     /// \warning This method should be considered temporary and avoided as much
723     ///          as possible. Direct access to the internal object is likely to
724     ///          disappear as BamRecord interface matures.
725     ///
726     /// \returns reference to underlying BamRecordImpl object
727     ///
728     BamRecordImpl& Impl();
729 
730     /// \}
731 
732 public:
733     /// \name General Data
734     /// \{
735 
736     /// \brief Sets this record's ZMW hole number.
737     ///
738     /// \param[in] holeNumber
739     /// \returns reference to this record
740     ///
741     BamRecord& HoleNumber(const int32_t holeNumber);
742 
743     /// \brief Sets this record's local context flags
744     ///
745     /// \param[in] flags
746     /// \returns reference to this record
747     ///
748     BamRecord& LocalContextFlags(const PacBio::BAM::LocalContextFlags flags);
749 
750     /// \brief Sets this record's "number of complete passes of the insert".
751     ///
752     /// \param[in] numPasses
753     /// \returns reference to this record
754     ///
755     BamRecord& NumPasses(const int32_t numPasses);
756 
757     /// \brief Sets this record's query end position.
758     ///
759     /// \note Changing this will modify the name of non-CCS records.
760     ///
761     /// \param[in] pos
762     /// \returns reference to this record
763     ///
764     BamRecord& QueryEnd(const PacBio::BAM::Position pos);
765 
766     /// \brief Sets this record's query start position.
767     ///
768     /// \note Changing this will modify the name of non-CCS records.
769     ///
770     /// \param[in] pos
771     /// \returns reference to this record
772     ///
773     BamRecord& QueryStart(const PacBio::BAM::Position pos);
774 
775     /// \brief Sets this record's expected read accuracy [0, 1000]
776     ///
777     /// \param[in] accuracy
778     /// \returns reference to this record
779     ///
780     BamRecord& ReadAccuracy(const Accuracy& accuracy);
781 
782     /// \brief Attaches this record to the provided read group, changing the
783     ///        record name & 'RG' tag.
784     ///
785     /// \param[in] rg
786     /// \returns reference to this record
787     ///
788     BamRecord& ReadGroup(const ReadGroupInfo& rg);
789 
790     /// \brief Attaches this record to the provided read group, changing the
791     ///        record name & 'RG' tag.
792     ///
793     /// \param[in] id
794     /// \returns reference to this record
795     ///
796     BamRecord& ReadGroupId(const std::string& id);
797 
798     /// \brief Sets this scrap record's ScrapRegionType
799     ///
800     /// \param[in] type
801     /// \returns reference to this record
802     ///
803     BamRecord& ScrapRegionType(const VirtualRegionType type);
804 
805     /// \brief Sets this scrap record's ScrapRegionType
806     ///
807     /// \param[in] type character equivalent of VirtualRegionType
808     /// \returns reference to this record
809     ///
810     BamRecord& ScrapRegionType(const char type);
811 
812     /// \brief Sets this scrap record's ScrapZmwType
813     ///
814     /// \param[in] type
815     /// \returns reference to this record
816     ///
817     BamRecord& ScrapZmwType(const ZmwType type);
818 
819     /// \brief Sets this scrap record's ScrapZmwType
820     ///
821     /// \param[in] type character equivalent of ZmwType
822     /// \returns reference to this record
823     ///
824     BamRecord& ScrapZmwType(const char type);
825 
826     /// \brief Sets this record's average signal-to-noise in each of A, C, G,
827     ///        and T
828     ///
829     /// \param[in] snr average signal-to-noise of A, C, G, and T (in this order)
830     /// \returns reference to this record
831     ///
832     BamRecord& SignalToNoise(const std::vector<float>& snr);
833 
834     /// \}
835 
836 public:
837     /// \name Barcode Data
838     /// \{
839 
840     /// \brief Sets this record's barcode IDs ('bc' tag)
841     ///
842     /// \param[in] barcodeIds
843     /// \returns reference to this record
844     ///
845     BamRecord& Barcodes(const std::pair<int16_t, int16_t>& barcodeIds);
846 
847     /// \brief Sets this record's barcode quality ('bq' tag)
848     ///
849     /// \param[in] quality Phred-scaled confidence call
850     /// \returns reference to this record
851     ///
852     BamRecord& BarcodeQuality(const uint8_t quality);
853 
854     /// \}
855 
856 public:
857     /// \name Sequence & Tag Data
858     /// \{
859 
860     /// \brief Sets this record's AltLabelTag values ("at" tag).
861     ///
862     /// \param[in] tags
863     /// \returns reference to this record
864     ///
865     BamRecord& AltLabelTag(const std::string& tags);
866 
867     /// \brief Sets this record's DeletionTag values ("dt" tag).
868     ///
869     /// \param[in] tags
870     /// \returns reference to this record
871     ///
872     BamRecord& DeletionTag(const std::string& tags);
873 
874     /// \brief Sets this record's SubstitutionTag values ("st" tag).
875     ///
876     /// \param[in] tags
877     /// \returns reference to this record
878     ///
879     BamRecord& SubstitutionTag(const std::string& tags);
880 
881     /// \}
882 
883 public:
884     /// \name Quality Data
885     /// \{
886 
887     /// \brief Sets this record's AltLabelQV values ("pv" tag).
888     ///
889     /// \param[in] altLabelQVs
890     /// \returns reference to this record
891     ///
892     BamRecord& AltLabelQV(const QualityValues& altLabelQVs);
893 
894     /// \brief Sets this record's DeletionQV values ("dq" tag).
895     ///
896     /// \param[in] deletionQVs
897     /// \returns reference to this record
898     ///
899     BamRecord& DeletionQV(const QualityValues& deletionQVs);
900 
901     /// \brief Sets this record's InsertionQV values ("iq" tag).
902     ///
903     /// \param[in] insertionQVs
904     /// \returns reference to this record
905     ///
906     BamRecord& InsertionQV(const QualityValues& insertionQVs);
907 
908     /// \brief Sets this record's LabelQV values ("pq" tag).
909     ///
910     /// \param[in] labelQVs
911     /// \returns reference to this record
912     ///
913     BamRecord& LabelQV(const QualityValues& labelQVs);
914 
915     /// \brief Sets this record's MergeQV values ("mq" tag).
916     ///
917     /// \param[in] mergeQVs
918     /// \returns reference to this record
919     ///
920     BamRecord& MergeQV(const QualityValues& mergeQVs);
921 
922     /// \brief Sets this record's SubstitutionQV values ("sq" tag).
923     ///
924     /// \param[in] substitutionQVs
925     /// \returns reference to this record
926     ///
927     BamRecord& SubstitutionQV(const QualityValues& substitutionQVs);
928 
929     /// \}
930 
931 public:
932     /// \name Pulse Data
933     /// \{
934 
935     /// \brief Sets this record's IPD values ("ip" tag).
936     ///
937     /// \param[in] frames
938     /// \param[in] encoding specify how to encode the data (8-bit lossy, or
939     ///                     16-bit lossless)
940     /// \returns reference to this record
941     ///
942     BamRecord& IPD(const Frames& frames, const FrameEncodingType encoding);
943 
944     /// \brief Sets this record's Pkmean values ("pm" tag).
945     ///
946     /// \param[in] photons
947     /// \returns reference to this record
948     ///
949     BamRecord& Pkmean(const std::vector<float>& photons);
950 
951     /// \brief Sets this record's Pkmean values ("pm" tag).
952     ///
953     /// \param[in] encodedPhotons
954     /// \returns reference to this record
955     ///
956     BamRecord& Pkmean(const std::vector<uint16_t>& encodedPhotons);
957 
958     /// \brief Sets this record's Pkmid values ("pa" tag).
959     ///
960     /// \param[in] photons
961     /// \returns reference to this record
962     ///
963     BamRecord& Pkmid(const std::vector<float>& photons);
964 
965     /// \brief Sets this record's Pkmid values ("pa" tag).
966     ///
967     /// \param[in] encodedPhotons
968     /// \returns reference to this record
969     ///
970     BamRecord& Pkmid(const std::vector<uint16_t>& encodedPhotons);
971 
972     /// \brief Sets this record's Pkmean2 values ("ps" tag).
973     ///
974     /// \param[in] photons
975     /// \returns reference to this record
976     ///
977     BamRecord& Pkmean2(const std::vector<float>& photons);
978 
979     /// \brief Sets this record's Pkmean2 values ("ps" tag).
980     ///
981     /// \param[in] encodedPhotons
982     /// \returns reference to this record
983     ///
984     BamRecord& Pkmean2(const std::vector<uint16_t>& encodedPhotons);
985 
986     /// \brief Sets this record's Pkmid2 values ("pi" tag).
987     ///
988     /// \param[in] photons
989     /// \returns reference to this record
990     ///
991     BamRecord& Pkmid2(const std::vector<float>& photons);
992 
993     /// \brief Sets this record's Pkmid2 values ("pi" tag).
994     ///
995     /// \param[in] encodedPhotons
996     /// \returns reference to this record
997     ///
998     BamRecord& Pkmid2(const std::vector<uint16_t>& encodedPhotons);
999 
1000     /// \brief Sets this record's PreBaseFrames aka IPD values ("ip" tag).
1001     ///
1002     /// \param[in] frames
1003     /// \param[in] encoding specify how to encode the data (8-bit lossy, or
1004     ///                     16-bit lossless)
1005     /// \returns reference to this record
1006     ///
1007     BamRecord& PreBaseFrames(const Frames& frames, const FrameEncodingType encoding);
1008 
1009     /// \brief Sets this record's PrePulseFrames values ("pd" tag).
1010     ///
1011     /// \param[in] frames
1012     /// \param[in] encoding specify how to encode the data (8-bit lossy, or
1013     ///                     16-bit lossless)
1014     /// \returns reference to this record
1015     ///
1016     BamRecord& PrePulseFrames(const Frames& frames, const FrameEncodingType encoding);
1017 
1018     /// \brief Sets this record's PulseCall values ("pc" tag).
1019     ///
1020     /// \param[in] tags
1021     /// \returns reference to this record
1022     ///
1023     BamRecord& PulseCall(const std::string& tags);
1024 
1025     /// \brief Sets this record's PulseCallWidth values ("px" tag).
1026     ///
1027     /// \param[in] frames
1028     /// \param[in] encoding specify how to encode the data (8-bit lossy, or
1029     ///                     16-bit lossless)
1030     /// \returns reference to this record
1031     ///
1032     BamRecord& PulseCallWidth(const Frames& frames, const FrameEncodingType encoding);
1033 
1034     ///
1035     /// \\brief Sets this record's PulseExclusionReason values ("pe" tag).
1036     /// \param[in] reasons
1037     /// \return reference to this record
1038     ///
1039     BamRecord& PulseExclusionReason(const std::vector<PacBio::BAM::PulseExclusionReason>& reasons);
1040 
1041     /// \brief Sets this record's PulseMergeQV values ("pg" tag).
1042     ///
1043     /// \param[in] pulseMergeQVs
1044     /// \returns reference to this record
1045     ///
1046     BamRecord& PulseMergeQV(const QualityValues& pulseMergeQVs);
1047 
1048     /// \brief Sets this record's PulseWidth values ("pw" tag).
1049     ///
1050     /// \param[in] frames
1051     /// \param[in] encoding specify how to encode the data (8-bit lossy, or
1052     ///                     16-bit lossless)
1053     /// \returns reference to this record
1054     ///
1055     BamRecord& PulseWidth(const Frames& frames, const FrameEncodingType encoding);
1056 
1057     /// \brief Sets this record's StartFrame values ("sf" tag).
1058     ///
1059     /// \param[in] startFrame
1060     /// \returns reference to this record
1061     ///
1062     BamRecord& StartFrame(const std::vector<uint32_t>& startFrame);
1063 
1064     /// \}
1065 
1066 public:
1067     /// \name Low-Level Access & Operations
1068     /// \{
1069 
1070     /// \brief Resets cached aligned start/end.
1071     ///
1072     /// \note This method should not be needed in most client code. It exists
1073     ///       primarily as a hook for internal reading loops (queries, index
1074     ///       build, etc.) It's essentially a workaround and will likely be
1075     ///       removed from the API.
1076     ///
1077     void ResetCachedPositions() const;
1078 
1079     /// \brief Resets cached aligned start/end.
1080     ///
1081     /// \note This method should not be needed in most client code. It exists
1082     ///       primarily as a hook for internal reading loops (queries, index
1083     ///       build, etc.) It's essentially a workaround and will likely be
1084     ///       removed from the API.
1085     ///
1086     void ResetCachedPositions();
1087 
1088     /// \brief Updates the record's name (BamRecord::FullName) to reflect
1089     ///        modifications to name components (movie name, ZMW hole number,
1090     ///        etc.)
1091     ///
1092     void UpdateName();
1093 
1094     /// \}
1095 
1096 public:
1097     /// \name Pulse Data
1098     /// \{
1099 
1100     static const float photonFactor;
1101 
1102     static std::vector<uint16_t> EncodePhotons(const std::vector<float>& data);
1103 
1104     /// \}
1105 
1106 public:
1107     /// \name Clipping & Mapping
1108     /// \{
1109 
1110     /// Creates a copied record from input, with clipping applied
1111     static BamRecord Clipped(const BamRecord& input, const ClipType clipType,
1112                              const PacBio::BAM::Position start, const PacBio::BAM::Position end);
1113 
1114     /// Creates a copied record from input, with mapping applied
1115     static BamRecord Mapped(const BamRecord& input, const int32_t referenceId,
1116                             const Position refStart, const Strand strand, const Cigar& cigar,
1117                             const uint8_t mappingQuality);
1118 
1119     /// Applies clipping to this record
1120     BamRecord& Clip(const ClipType clipType, const PacBio::BAM::Position start,
1121                     const PacBio::BAM::Position end);
1122 
1123     /// Creates a copied record from this one, with clipping applied
1124     BamRecord Clipped(const ClipType clipType, const PacBio::BAM::Position start,
1125                       const PacBio::BAM::Position end) const;
1126 
1127     /// Applies mapping to this record
1128     BamRecord& Map(const int32_t referenceId, const Position refStart, const Strand strand,
1129                    const Cigar& cigar, const uint8_t mappingQuality);
1130 
1131     /// Creates a copied record from this one, with mapping applied
1132     BamRecord Mapped(const int32_t referenceId, const Position refStart, const Strand strand,
1133                      const Cigar& cigar, const uint8_t mappingQuality) const;
1134     /// \}
1135 
1136 private:
1137     BamRecordImpl impl_;
1138 
1139 public:
1140     /// public & mutable so that queries can directly set the header info,
1141     /// even on a record that is const from client code's perspective
1142     mutable BamHeader header_;
1143 
1144 private:
1145     /// \internal
1146     /// cached positions (mutable to allow lazy-calc in const methods)
1147     mutable Position alignedStart_;
1148     mutable Position alignedEnd_;
1149 
1150 private:
1151     /// \internal
1152     /// pulse to bam mapping cache
1153     mutable std::unique_ptr<internal::Pulse2BaseCache> p2bCache_;
1154 
1155 public:
1156     /// clips the PacBio tags to a specified length
1157     void ClipTags(const size_t clipPos, const size_t clipLength);
1158 
1159 private:
1160     ///\internal
1161     /// clipping methods
1162 
1163     void ClipFields(const size_t clipPos, const size_t clipLength);
1164 
1165     BamRecord& ClipToQuery(const PacBio::BAM::Position start, const PacBio::BAM::Position end);
1166     BamRecord& ClipToReference(const PacBio::BAM::Position start, const PacBio::BAM::Position end);
1167     BamRecord& ClipToReferenceForward(const PacBio::BAM::Position start,
1168                                       const PacBio::BAM::Position end);
1169     BamRecord& ClipToReferenceReverse(const PacBio::BAM::Position start,
1170                                       const PacBio::BAM::Position end);
1171 
1172 private:
1173     ///\internal
1174     /// raw tag data fetching
1175 
1176     // sequence tags
1177     std::string FetchBasesRaw(const BamRecordTag tag) const;
1178     std::string FetchBases(const BamRecordTag tag,
1179                            const Orientation orientation = Orientation::NATIVE,
1180                            const bool aligned = false, const bool exciseSoftClips = false,
1181                            const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
1182 
1183     // frame tags
1184     Frames FetchFramesRaw(const BamRecordTag tag) const;
1185     Frames FetchFrames(const BamRecordTag tag, const Orientation orientation = Orientation::NATIVE,
1186                        const bool aligned = false, const bool exciseSoftClips = false,
1187                        const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
1188 
1189     // pulse tags
1190     std::vector<float> FetchPhotonsRaw(const BamRecordTag tag) const;
1191     std::vector<float> FetchPhotons(const BamRecordTag tag,
1192                                     const Orientation orientation = Orientation::NATIVE,
1193                                     const bool aligned = false, const bool exciseSoftClips = false,
1194                                     const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
1195 
1196     // QV tags
1197     QualityValues FetchQualitiesRaw(const BamRecordTag tag) const;
1198     QualityValues FetchQualities(const BamRecordTag tag,
1199                                  const Orientation orientation = Orientation::NATIVE,
1200                                  const bool aligned = false, const bool exciseSoftClips = false,
1201                                  const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
1202 
1203     // UInt tags (e.g. start frame)
1204     //
1205     // TODO (DB): clean this up w.r.t FetchUInt8s
1206     //
1207     std::vector<uint32_t> FetchUInt32sRaw(const BamRecordTag tag) const;
1208     std::vector<uint32_t> FetchUInt32s(
1209         const BamRecordTag tag, const Orientation orientation = Orientation::NATIVE,
1210         const bool aligned = false, const bool exciseSoftClips = false,
1211         const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
1212 
1213     // UInt tags (e.g. pulse exclusion)
1214     //
1215     // ODO (DB): clean this up w.r.t FetchUInt32s
1216     //
1217     std::vector<uint8_t> FetchUInt8sRaw(const BamRecordTag tag) const;
1218     std::vector<uint8_t> FetchUInt8s(const BamRecordTag tag,
1219                                      const Orientation orientation = Orientation::NATIVE,
1220                                      const bool aligned = false, const bool exciseSoftClips = false,
1221                                      const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
1222 
1223 private:
1224     ///\internal
1225     /// marked const to allow calling from const methods
1226     /// but updates our mutable cached values
1227     void CalculateAlignedPositions() const;
1228     void CalculatePulse2BaseCache() const;
1229 
1230     friend class internal::BamRecordMemory;
1231 };
1232 
1233 }  // namespace BAM
1234 }  // namespace PacBio
1235 
1236 #include "pbbam/internal/BamRecord.inl"
1237 
1238 #endif  // BAMRECORD_H
1239