1 // File Description
2 /// \file BamRecord.cpp
3 /// \brief Implements the BamRecord class.
4 //
5 // Author: Derek Barnett
6 
7 #include "PbbamInternalConfig.h"
8 
9 #include "pbbam/BamRecord.h"
10 
11 #include <cstddef>
12 #include <cstdint>
13 #include <iostream>
14 #include <stdexcept>
15 
16 #include <htslib/sam.h>
17 #include <boost/numeric/conversion/cast.hpp>
18 
19 #include "BamRecordTags.h"
20 #include "MemoryUtils.h"
21 #include "Pulse2BaseCache.h"
22 #include "SequenceUtils.h"
23 #include "pbbam/MakeUnique.h"
24 #include "pbbam/ZmwTypeMap.h"
25 #include "pbbam/virtual/VirtualRegionTypeMap.h"
26 
27 namespace PacBio {
28 namespace BAM {
29 namespace internal {
30 
31 // record type names
32 static const std::string recordTypeName_ZMW{"ZMW"};
33 static const std::string recordTypeName_Polymerase{"POLYMERASE"};
34 static const std::string recordTypeName_HqRegion{"HQREGION"};
35 static const std::string recordTypeName_Subread{"SUBREAD"};
36 static const std::string recordTypeName_CCS{"CCS"};
37 static const std::string recordTypeName_Scrap{"SCRAP"};
38 static const std::string recordTypeName_Transcript{"TRANSCRIPT"};
39 static const std::string recordTypeName_Unknown{"UNKNOWN"};
40 
HoleNumberFromName(const std::string & fullName)41 static int32_t HoleNumberFromName(const std::string& fullName)
42 {
43     const auto mainTokens = Split(fullName, '/');
44     if (mainTokens.at(0) == "transcript") {
45         if (mainTokens.size() != 2) throw std::runtime_error{"malformed transcript record name"};
46         return std::stoi(mainTokens.at(1));
47     } else {
48         if (mainTokens.size() != 3) throw std::runtime_error("malformed record name");
49         return std::stoi(mainTokens.at(1));
50     }
51 }
52 
QueryEndFromName(const std::string & fullName)53 static Position QueryEndFromName(const std::string& fullName)
54 {
55     const auto mainTokens = Split(fullName, '/');
56     if (mainTokens.size() != 3) throw std::runtime_error{"malformed record name"};
57 
58     const auto queryTokens = Split(mainTokens.at(2), '_');
59     if (queryTokens.size() != 2) throw std::runtime_error{"malformed record name"};
60 
61     return stoi(queryTokens.at(1));
62 }
63 
QueryStartFromName(const std::string & fullName)64 static Position QueryStartFromName(const std::string& fullName)
65 {
66     const auto mainTokens = Split(fullName, '/');
67     if (mainTokens.size() != 3) throw std::runtime_error{"malformed record name"};
68 
69     const auto queryTokens = Split(mainTokens.at(2), '_');
70     if (queryTokens.size() != 2) throw std::runtime_error{"malformed record name"};
71 
72     return stoi(queryTokens.at(0));
73 }
74 
Label(const BamRecordTag tag)75 static inline std::string Label(const BamRecordTag tag) { return BamRecordTags::LabelFor(tag); }
76 
CreateOrEdit(const BamRecordTag tag,const Tag & value,BamRecordImpl * impl)77 static BamRecordImpl* CreateOrEdit(const BamRecordTag tag, const Tag& value, BamRecordImpl* impl)
78 {
79     if (impl->HasTag(tag))
80         impl->EditTag(tag, value);
81     else
82         impl->AddTag(tag, value);
83     return impl;
84 }
85 
AlignedOffsets(const BamRecord & record,const int seqLength)86 static std::pair<int32_t, int32_t> AlignedOffsets(const BamRecord& record, const int seqLength)
87 {
88     int32_t startOffset = 0;
89     int32_t endOffset = seqLength;
90 
91     const auto b = internal::BamRecordMemory::GetRawData(record);
92     uint32_t* cigarData = bam_get_cigar(b.get());
93     const size_t numCigarOps = b->core.n_cigar;
94     if (numCigarOps > 0) {
95 
96         // start offset
97         for (size_t i = 0; i < numCigarOps; ++i) {
98             const auto type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
99             if (type == CigarOperationType::HARD_CLIP) {
100                 if (startOffset != 0 && startOffset != seqLength) {
101                     startOffset = -1;
102                     break;
103                 }
104             } else if (type == CigarOperationType::SOFT_CLIP)
105                 startOffset += bam_cigar_oplen(cigarData[i]);
106             else
107                 break;
108         }
109 
110         // end offset
111         for (int i = numCigarOps - 1; i >= 0; --i) {
112             const auto type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
113             if (type == CigarOperationType::HARD_CLIP) {
114                 if (endOffset != 0 && endOffset != seqLength) {
115                     endOffset = -1;
116                     break;
117                 }
118             } else if (type == CigarOperationType::SOFT_CLIP)
119                 endOffset -= bam_cigar_oplen(cigarData[i]);
120             else
121                 break;
122         }
123 
124         if (endOffset == 0) endOffset = seqLength;
125     }
126     return {startOffset, endOffset};
127 }
128 
129 template <typename T>
Clip(const T & input,const size_t pos,const size_t len)130 T Clip(const T& input, const size_t pos, const size_t len)
131 {
132     if (input.empty()) return {};
133     return T{input.cbegin() + pos, input.cbegin() + pos + len};
134 }
135 
136 template <typename T>
ClipPulse(const T & input,internal::Pulse2BaseCache * p2bCache,const size_t pos,const size_t len)137 T ClipPulse(const T& input, internal::Pulse2BaseCache* p2bCache, const size_t pos, const size_t len)
138 {
139     assert(p2bCache);
140     if (input.empty()) return {};
141 
142     // find start
143     size_t start = p2bCache->FindFirst();
144     size_t basesSeen = 0;
145     while (basesSeen < pos) {
146         start = p2bCache->FindNext(start);
147         ++basesSeen;
148     }
149 
150     // find end
151     size_t end = start;
152     size_t seen = 1;
153     while (seen < len) {
154         end = p2bCache->FindNext(end);
155         ++seen;
156     }
157 
158     // return clipped
159     return {input.cbegin() + start, input.cbegin() + end + 1};
160 }
161 
162 template <class InputIt, class Size, class OutputIt>
Move_N(InputIt first,Size count,OutputIt result)163 OutputIt Move_N(InputIt first, Size count, OutputIt result)
164 {
165     return std::move(first, first + count, result);
166 }
167 
168 template <typename F, typename N>
ClipAndGapify(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,F * seq,N paddingNullValue,N deletionNullValue)169 static void ClipAndGapify(const BamRecordImpl& impl, const bool aligned, const bool exciseSoftClips,
170                           F* seq, N paddingNullValue, N deletionNullValue)
171 {
172     assert(seq);
173 
174     const bool clipOrGapRequested = aligned || exciseSoftClips;
175     if (impl.IsMapped() && clipOrGapRequested) {
176         // determine final container length
177         auto incrementsOutputLength = [](const CigarOperationType type, const bool isAligned,
178                                          const bool exciseSoftClipsFromAln) {
179             if (type == CigarOperationType::HARD_CLIP ||
180                 type == CigarOperationType::REFERENCE_SKIP) {
181                 return false;
182             } else if (type == CigarOperationType::SOFT_CLIP && exciseSoftClipsFromAln) {
183                 return false;
184             } else if (!isAligned && (type == CigarOperationType::DELETION ||
185                                       type == CigarOperationType::PADDING)) {
186                 return false;
187             } else
188                 return true;
189         };
190 
191         size_t outputLength = 0;
192         const auto cigar = impl.CigarData();
193         for (const CigarOperation& op : cigar) {
194             if (incrementsOutputLength(op.Type(), aligned, exciseSoftClips))
195                 outputLength += op.Length();
196         }
197 
198         // move original data to temp, prep output container size
199         F originalSeq = std::move(*seq);
200         seq->resize(outputLength);
201 
202         // apply CIGAR ops
203         size_t srcIndex = 0;
204         size_t dstIndex = 0;
205         for (const CigarOperation& op : cigar) {
206             const auto opType = op.Type();
207             const auto opLength = op.Length();
208 
209             // nothing to do for hard-clipped & ref-skipped positions
210             if (opType == CigarOperationType::HARD_CLIP ||
211                 opType == CigarOperationType::REFERENCE_SKIP) {
212                 continue;
213             }
214 
215             // maybe skip soft-clipped positions
216             else if (opType == CigarOperationType::SOFT_CLIP) {
217                 if (exciseSoftClips)
218                     srcIndex += opLength;
219                 else {
220                     Move_N(originalSeq.begin() + srcIndex, opLength, seq->begin() + dstIndex);
221                     srcIndex += opLength;
222                     dstIndex += opLength;
223                 }
224             }
225 
226             // maybe add deletion/padding values
227             else if (aligned && opType == CigarOperationType::DELETION) {
228                 for (size_t i = 0; i < opLength; ++i)
229                     (*seq)[dstIndex++] = deletionNullValue;
230             } else if (aligned && opType == CigarOperationType::PADDING) {
231                 for (size_t i = 0; i < opLength; ++i)
232                     (*seq)[dstIndex++] = paddingNullValue;
233             }
234 
235             // all other CIGAR ops
236             else {
237                 Move_N(originalSeq.begin() + srcIndex, opLength, seq->begin() + dstIndex);
238                 srcIndex += opLength;
239                 dstIndex += opLength;
240             }
241         }
242     }
243 }
244 
ClipAndGapifyBases(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::string * seq)245 static inline void ClipAndGapifyBases(const BamRecordImpl& impl, const bool aligned,
246                                       const bool exciseSoftClips, std::string* seq)
247 {
248     ClipAndGapify<std::string, char>(impl, aligned, exciseSoftClips, seq, '*', '-');
249 }
250 
ClipAndGapifyFrames(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,Frames * frames)251 static inline void ClipAndGapifyFrames(const BamRecordImpl& impl, const bool aligned,
252                                        const bool exciseSoftClips, Frames* frames)
253 {
254     assert(frames);
255     std::vector<uint16_t> data{std::move(frames->Data())};
256     ClipAndGapify<std::vector<uint16_t>, uint16_t>(impl, aligned, exciseSoftClips, &data, 0, 0);
257     frames->Data(data);
258 }
259 
ClipAndGapifyPhotons(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::vector<float> * data)260 static inline void ClipAndGapifyPhotons(const BamRecordImpl& impl, const bool aligned,
261                                         const bool exciseSoftClips, std::vector<float>* data)
262 {
263     ClipAndGapify<std::vector<float>, float>(impl, aligned, exciseSoftClips, data, 0.0, 0.0);
264 }
265 
ClipAndGapifyQualities(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,QualityValues * quals)266 static inline void ClipAndGapifyQualities(const BamRecordImpl& impl, const bool aligned,
267                                           const bool exciseSoftClips, QualityValues* quals)
268 {
269     ClipAndGapify<QualityValues, QualityValue>(impl, aligned, exciseSoftClips, quals,
270                                                QualityValue(0), QualityValue(0));
271 }
272 
ClipAndGapifyUInts(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::vector<uint32_t> * data)273 static inline void ClipAndGapifyUInts(const BamRecordImpl& impl, const bool aligned,
274                                       const bool exciseSoftClips, std::vector<uint32_t>* data)
275 {
276     ClipAndGapify<std::vector<uint32_t>, uint32_t>(impl, aligned, exciseSoftClips, data, 0, 0);
277 }
278 
ClipAndGapifyUInt8s(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::vector<uint8_t> * data)279 static inline void ClipAndGapifyUInt8s(const BamRecordImpl& impl, const bool aligned,
280                                        const bool exciseSoftClips, std::vector<uint8_t>* data)
281 {
282     ClipAndGapify<std::vector<uint8_t>, uint8_t>(impl, aligned, exciseSoftClips, data, 0, 0);
283 }
284 
NameToType(const std::string & name)285 static RecordType NameToType(const std::string& name)
286 {
287     if (name == recordTypeName_Subread) return RecordType::SUBREAD;
288     if (name == recordTypeName_ZMW || name == recordTypeName_Polymerase) return RecordType::ZMW;
289     if (name == recordTypeName_HqRegion) return RecordType::HQREGION;
290     if (name == recordTypeName_CCS) return RecordType::CCS;
291     if (name == recordTypeName_Scrap) return RecordType::SCRAP;
292     if (name == recordTypeName_Transcript) return RecordType::TRANSCRIPT;
293     return RecordType::UNKNOWN;
294 }
295 
OrientBasesAsRequested(std::string * bases,Orientation current,Orientation requested,bool isReverseStrand,bool isPulse)296 static void OrientBasesAsRequested(std::string* bases, Orientation current, Orientation requested,
297                                    bool isReverseStrand, bool isPulse)
298 {
299     assert(bases);
300     if (current != requested && isReverseStrand) {
301         if (isPulse)
302             internal::ReverseComplementCaseSens(*bases);
303         else
304             internal::ReverseComplement(*bases);
305     }
306 }
307 
308 template <typename Container>
OrientTagDataAsRequested(Container * data,Orientation current,Orientation requested,bool isReverseStrand)309 inline void OrientTagDataAsRequested(Container* data, Orientation current, Orientation requested,
310                                      bool isReverseStrand)
311 {
312     assert(data);
313     if (current != requested && isReverseStrand) std::reverse(data->begin(), data->end());
314 }
315 
ConsumesQuery(const CigarOperationType type)316 static inline bool ConsumesQuery(const CigarOperationType type)
317 {
318     return (bam_cigar_type(static_cast<int>(type)) & 0x1) != 0;
319 }
320 
ConsumesReference(const CigarOperationType type)321 static inline bool ConsumesReference(const CigarOperationType type)
322 {
323     return (bam_cigar_type(static_cast<int>(type)) & 0x2) != 0;
324 }
325 
326 }  // namespace internal
327 
328 const float BamRecord::photonFactor = 10.0;
329 
BamRecord()330 BamRecord::BamRecord()
331     : alignedStart_{PacBio::BAM::UnmappedPosition}, alignedEnd_{PacBio::BAM::UnmappedPosition}
332 {
333 }
334 
BamRecord(BamHeader header)335 BamRecord::BamRecord(BamHeader header)
336     : header_{std::move(header)}
337     , alignedStart_{PacBio::BAM::UnmappedPosition}
338     , alignedEnd_{PacBio::BAM::UnmappedPosition}
339 {
340 }
341 
BamRecord(BamRecordImpl impl)342 BamRecord::BamRecord(BamRecordImpl impl)
343     : impl_{std::move(impl)}
344     , alignedStart_{PacBio::BAM::UnmappedPosition}
345     , alignedEnd_{PacBio::BAM::UnmappedPosition}
346 {
347 }
348 
BamRecord(const BamRecord & other)349 BamRecord::BamRecord(const BamRecord& other)
350     : impl_{other.impl_}
351     , header_{other.header_}
352     , alignedStart_{other.alignedStart_}
353     , alignedEnd_{other.alignedEnd_}
354 {
355 }
356 
BamRecord(BamRecord && other)357 BamRecord::BamRecord(BamRecord&& other)
358     : impl_{std::move(other.impl_)}
359     , header_{std::move(other.header_)}
360     , alignedStart_{std::move(other.alignedStart_)}
361     , alignedEnd_{std::move(other.alignedEnd_)}
362     , p2bCache_{std::move(other.p2bCache_)}
363 {
364 }
365 
operator =(const BamRecord & other)366 BamRecord& BamRecord::operator=(const BamRecord& other)
367 {
368     if (this != &other) {
369         impl_ = other.impl_;
370         header_ = other.header_;
371         alignedStart_ = other.alignedStart_;
372         alignedEnd_ = other.alignedEnd_;
373         p2bCache_.reset();  // just reset, for now at least
374     }
375     return *this;
376 }
377 
operator =(BamRecord && other)378 BamRecord& BamRecord::operator=(BamRecord&& other)
379 {
380     if (this != &other) {
381         impl_ = std::move(other.impl_);
382         header_ = std::move(other.header_);
383         alignedStart_ = std::move(other.alignedStart_);
384         alignedEnd_ = std::move(other.alignedEnd_);
385         p2bCache_ = std::move(other.p2bCache_);
386     }
387     return *this;
388 }
389 
~BamRecord()390 BamRecord::~BamRecord() {}
391 
AlignedEnd() const392 Position BamRecord::AlignedEnd() const
393 {
394     if (alignedEnd_ == PacBio::BAM::UnmappedPosition) CalculateAlignedPositions();
395     return alignedEnd_;
396 }
397 
AlignedStart() const398 Position BamRecord::AlignedStart() const
399 {
400     if (alignedStart_ == PacBio::BAM::UnmappedPosition) CalculateAlignedPositions();
401     return alignedStart_;
402 }
403 
AlignedStrand() const404 Strand BamRecord::AlignedStrand() const
405 {
406     return impl_.IsReverseStrand() ? Strand::REVERSE : Strand::FORWARD;
407 }
408 
AltLabelQV(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const409 QualityValues BamRecord::AltLabelQV(Orientation orientation, bool aligned, bool exciseSoftClips,
410                                     PulseBehavior pulseBehavior) const
411 {
412     return FetchQualities(BamRecordTag::ALT_LABEL_QV, orientation, aligned, exciseSoftClips,
413                           pulseBehavior);
414 }
415 
AltLabelQV(const QualityValues & altLabelQVs)416 BamRecord& BamRecord::AltLabelQV(const QualityValues& altLabelQVs)
417 {
418     internal::CreateOrEdit(BamRecordTag::ALT_LABEL_QV, altLabelQVs.Fastq(), &impl_);
419     return *this;
420 }
421 
AltLabelTag(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const422 std::string BamRecord::AltLabelTag(Orientation orientation, bool aligned, bool exciseSoftClips,
423                                    PulseBehavior pulseBehavior) const
424 {
425     return FetchBases(BamRecordTag::ALT_LABEL_TAG, orientation, aligned, exciseSoftClips,
426                       pulseBehavior);
427 }
428 
AltLabelTag(const std::string & tags)429 BamRecord& BamRecord::AltLabelTag(const std::string& tags)
430 {
431     internal::CreateOrEdit(BamRecordTag::ALT_LABEL_TAG, tags, &impl_);
432     return *this;
433 }
434 
BarcodeForward() const435 int16_t BamRecord::BarcodeForward() const { return Barcodes().first; }
436 
BarcodeReverse() const437 int16_t BamRecord::BarcodeReverse() const { return Barcodes().second; }
438 
BarcodeQuality() const439 uint8_t BamRecord::BarcodeQuality() const
440 {
441     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::BARCODE_QUALITY);
442     const auto bq = impl_.TagValue(tagName);
443     if (bq.IsNull())
444         return 0;  // ?? "missing" value for tags ?? should we consider boost::optional<T> for these kind of guys ??
445     return bq.ToUInt8();
446 }
447 
BarcodeQuality(const uint8_t quality)448 BamRecord& BamRecord::BarcodeQuality(const uint8_t quality)
449 {
450     internal::CreateOrEdit(BamRecordTag::BARCODE_QUALITY, quality, &impl_);
451     return *this;
452 }
453 
Barcodes() const454 std::pair<int16_t, int16_t> BamRecord::Barcodes() const
455 {
456     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::BARCODES);
457     const Tag bc = impl_.TagValue(tagName);
458     if (bc.IsNull()) throw std::runtime_error{"barcode tag (bc) was requested but is missing"};
459 
460     // NOTE: barcodes are still stored, per the spec, as uint16, even though
461     // we're now using them as int16_t in the API (bug 31511)
462     //
463     if (!bc.IsUInt16Array())
464         throw std::runtime_error{
465             "barcode tag (bc) is malformed: should be a uint16_t array of size==2."};
466     const auto bcArray = bc.ToUInt16Array();
467     if (bcArray.size() != 2)
468         throw std::runtime_error{
469             "barcode tag (bc) is malformed: should be a uint16_t array of size==2."};
470 
471     return {boost::numeric_cast<int16_t>(bcArray[0]), boost::numeric_cast<int16_t>(bcArray[1])};
472 }
473 
Barcodes(const std::pair<int16_t,int16_t> & barcodeIds)474 BamRecord& BamRecord::Barcodes(const std::pair<int16_t, int16_t>& barcodeIds)
475 {
476     const std::vector<uint16_t> data{boost::numeric_cast<uint16_t>(barcodeIds.first),
477                                      boost::numeric_cast<uint16_t>(barcodeIds.second)};
478     internal::CreateOrEdit(BamRecordTag::BARCODES, data, &impl_);
479     return *this;
480 }
481 
CalculateAlignedPositions() const482 void BamRecord::CalculateAlignedPositions() const
483 {
484     // reset
485     ResetCachedPositions();
486 
487     // skip if unmapped, or has no queryStart/End
488     if (!impl_.IsMapped()) return;
489 
490     // get the query start/end
491     const auto seqLength = static_cast<int>(impl_.SequenceLength());
492     const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
493     const Position qStart = isCcsOrTranscript ? 0 : QueryStart();
494     const Position qEnd = isCcsOrTranscript ? seqLength : QueryEnd();
495 
496     if (qStart == PacBio::BAM::UnmappedPosition || qEnd == PacBio::BAM::UnmappedPosition) return;
497 
498     // determine clipped end ranges
499     const auto alignedOffsets = internal::AlignedOffsets(*this, seqLength);
500     const auto startOffset = alignedOffsets.first;
501     const auto endOffset = alignedOffsets.second;
502     if (endOffset == -1 || startOffset == -1) return;  // TODO: handle error more??
503 
504     // store aligned positions (polymerase read coordinates)
505     if (impl_.IsReverseStrand()) {
506         alignedStart_ = qStart + (seqLength - endOffset);
507         alignedEnd_ = qEnd - startOffset;
508     } else {
509         alignedStart_ = qStart + startOffset;
510         alignedEnd_ = qEnd - (seqLength - endOffset);
511     }
512 }
513 
CalculatePulse2BaseCache() const514 void BamRecord::CalculatePulse2BaseCache() const
515 {
516     // skip already calculated
517     if (p2bCache_) return;
518 
519     // else try to calculate p2b cache.
520     if (!HasPulseCall())
521         throw std::runtime_error{"BamRecord cannot calculate pulse2base mapping without 'pc' tag."};
522     const auto pulseCalls =
523         FetchBases(BamRecordTag::PULSE_CALL, Orientation::NATIVE, false, false, PulseBehavior::ALL);
524     p2bCache_ = std::make_unique<internal::Pulse2BaseCache>(pulseCalls);
525 }
526 
CigarData(bool exciseAllClips) const527 Cigar BamRecord::CigarData(bool exciseAllClips) const
528 {
529     auto isClippingOp = [](const CigarOperation& op) {
530         const auto type = op.Type();
531         return type == CigarOperationType::SOFT_CLIP || type == CigarOperationType::HARD_CLIP;
532     };
533 
534     auto cigar = impl_.CigarData();
535     if (exciseAllClips) {
536         cigar.erase(std::remove_if(cigar.begin(), cigar.end(), isClippingOp), cigar.end());
537     }
538     return cigar;
539 }
540 
Clip(const ClipType clipType,const Position start,const Position end)541 BamRecord& BamRecord::Clip(const ClipType clipType, const Position start, const Position end)
542 {
543     switch (clipType) {
544         case ClipType::CLIP_NONE:
545             return *this;
546         case ClipType::CLIP_TO_QUERY:
547             return ClipToQuery(start, end);
548         case ClipType::CLIP_TO_REFERENCE:
549             return ClipToReference(start, end);
550         default:
551             throw std::runtime_error{"unsupported clip type requested"};
552     }
553 }
554 
ClipTags(const size_t clipFrom,const size_t clipLength)555 void BamRecord::ClipTags(const size_t clipFrom, const size_t clipLength)
556 {
557     const auto ipdCodec = ReadGroup().IpdCodec();
558     const auto pwCodec = ReadGroup().PulseWidthCodec();
559 
560     // update BAM tags
561     TagCollection tags = impl_.Tags();
562     if (HasDeletionQV())
563         tags[internal::Label(BamRecordTag::DELETION_QV)] =
564             internal::Clip(DeletionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
565     if (HasInsertionQV())
566         tags[internal::Label(BamRecordTag::INSERTION_QV)] =
567             internal::Clip(InsertionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
568     if (HasMergeQV())
569         tags[internal::Label(BamRecordTag::MERGE_QV)] =
570             internal::Clip(MergeQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
571     if (HasSubstitutionQV())
572         tags[internal::Label(BamRecordTag::SUBSTITUTION_QV)] =
573             internal::Clip(SubstitutionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
574     if (HasIPD()) {
575         if (ipdCodec == FrameCodec::RAW)
576             tags[internal::Label(BamRecordTag::IPD)] =
577                 internal::Clip(IPD(Orientation::NATIVE).Data(), clipFrom, clipLength);
578         else if (ipdCodec == FrameCodec::V1)
579             tags[internal::Label(BamRecordTag::IPD)] =
580                 internal::Clip(IPD(Orientation::NATIVE).Encode(), clipFrom, clipLength);
581     }
582     if (HasPulseWidth()) {
583         if (pwCodec == FrameCodec::RAW)
584             tags[internal::Label(BamRecordTag::PULSE_WIDTH)] =
585                 internal::Clip(PulseWidth(Orientation::NATIVE).Data(), clipFrom, clipLength);
586         else if (pwCodec == FrameCodec::V1)
587             tags[internal::Label(BamRecordTag::PULSE_WIDTH)] =
588                 internal::Clip(PulseWidth(Orientation::NATIVE).Encode(), clipFrom, clipLength);
589     }
590     if (HasDeletionTag())
591         tags[internal::Label(BamRecordTag::DELETION_TAG)] =
592             internal::Clip(DeletionTag(Orientation::NATIVE), clipFrom, clipLength);
593     if (HasSubstitutionTag())
594         tags[internal::Label(BamRecordTag::SUBSTITUTION_TAG)] =
595             internal::Clip(SubstitutionTag(Orientation::NATIVE), clipFrom, clipLength);
596 
597     // internal BAM tags
598     if (HasPulseCall()) {
599 
600         // ensure p2bCache initialized
601         CalculatePulse2BaseCache();
602         internal::Pulse2BaseCache* p2bCache = p2bCache_.get();
603 
604         if (HasAltLabelQV())
605             tags[internal::Label(BamRecordTag::ALT_LABEL_QV)] =
606                 internal::ClipPulse(AltLabelQV(Orientation::NATIVE), p2bCache, clipFrom, clipLength)
607                     .Fastq();
608         if (HasLabelQV())
609             tags[internal::Label(BamRecordTag::LABEL_QV)] =
610                 internal::ClipPulse(LabelQV(Orientation::NATIVE), p2bCache, clipFrom, clipLength)
611                     .Fastq();
612         if (HasPulseMergeQV())
613             tags[internal::Label(BamRecordTag::PULSE_MERGE_QV)] =
614                 internal::ClipPulse(PulseMergeQV(Orientation::NATIVE), p2bCache, clipFrom,
615                                     clipLength)
616                     .Fastq();
617         if (HasAltLabelTag())
618             tags[internal::Label(BamRecordTag::ALT_LABEL_TAG)] = internal::ClipPulse(
619                 AltLabelTag(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
620         if (HasPulseCall())
621             tags[internal::Label(BamRecordTag::PULSE_CALL)] =
622                 internal::ClipPulse(PulseCall(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
623         if (HasPkmean())
624             tags[internal::Label(BamRecordTag::PKMEAN)] = EncodePhotons(
625                 internal::ClipPulse(Pkmean(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
626         if (HasPkmid())
627             tags[internal::Label(BamRecordTag::PKMID)] = EncodePhotons(
628                 internal::ClipPulse(Pkmid(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
629         if (HasPkmean2())
630             tags[internal::Label(BamRecordTag::PKMEAN_2)] = EncodePhotons(
631                 internal::ClipPulse(Pkmean2(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
632         if (HasPkmid2())
633             tags[internal::Label(BamRecordTag::PKMID_2)] = EncodePhotons(
634                 internal::ClipPulse(Pkmid2(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
635         if (HasPrePulseFrames())
636             tags[internal::Label(BamRecordTag::PRE_PULSE_FRAMES)] = internal::ClipPulse(
637                 PrePulseFrames(Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength);
638         if (HasPulseCallWidth())
639             tags[internal::Label(BamRecordTag::PULSE_CALL_WIDTH)] = internal::ClipPulse(
640                 PulseCallWidth(Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength);
641         if (HasStartFrame())
642             tags[internal::Label(BamRecordTag::START_FRAME)] = internal::ClipPulse(
643                 StartFrame(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
644     }
645 
646     impl_.Tags(tags);
647 }
648 
ClipFields(const size_t clipFrom,const size_t clipLength)649 void BamRecord::ClipFields(const size_t clipFrom, const size_t clipLength)
650 {
651     const bool isForwardStrand = (AlignedStrand() == Strand::FORWARD);
652 
653     // clip seq, quals
654     std::string sequence{internal::Clip(Sequence(Orientation::NATIVE), clipFrom, clipLength)};
655     QualityValues qualities{internal::Clip(Qualities(Orientation::NATIVE), clipFrom, clipLength)};
656     if (!isForwardStrand) {
657         internal::ReverseComplement(sequence);
658         internal::Reverse(qualities);
659     }
660     impl_.SetSequenceAndQualities(sequence, qualities.Fastq());
661 
662     ClipTags(clipFrom, clipLength);
663 }
664 
ClipToQuery(const Position start,const Position end)665 BamRecord& BamRecord::ClipToQuery(const Position start, const Position end)
666 {
667     // cache original coords, skip out if clip not needed
668     const auto seqLength = static_cast<int>(impl_.SequenceLength());
669     const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
670     const Position origQStart = isCcsOrTranscript ? 0 : QueryStart();
671     const Position origQEnd = isCcsOrTranscript ? seqLength : QueryEnd();
672     if (start <= origQStart && end >= origQEnd) return *this;
673 
674     // determine new offsets into data
675     const size_t startOffset = start - origQStart;
676     const size_t endOffset = origQEnd - end;
677 
678     // maybe update CIGAR & aligned position
679     if (IsMapped()) {
680 
681         // fetch a 'working copy' of CIGAR data
682         Cigar cigar = impl_.CigarData();
683 
684         // clip leading CIGAR ops
685         size_t referencePositionOffset = 0;
686         size_t remaining = startOffset;
687         while (remaining > 0 && !cigar.empty()) {
688             CigarOperation& firstOp = cigar.front();
689             const auto firstOpLength = firstOp.Length();
690             const bool consumesQuery = internal::ConsumesQuery(firstOp.Type());
691             const bool consumesRef = internal::ConsumesReference(firstOp.Type());
692 
693             // if (!consumesQuery)
694             //    just pop (e.g. deletion) ?
695             // else {
696             //    check bounds, like clip to reference ?
697             // }
698 
699             // CIGAR op ends at or before clip
700             if (firstOpLength <= remaining) {
701                 cigar.erase(cigar.begin());
702                 if (consumesQuery) remaining -= firstOpLength;
703                 if (consumesRef) referencePositionOffset += firstOpLength;
704             }
705 
706             // CIGAR op straddles clip
707             else {
708                 firstOp.Length(firstOpLength - remaining);
709                 if (consumesRef) referencePositionOffset += remaining;
710                 remaining = 0;
711             }
712         }
713 
714         // clip trailing CIGAR ops
715         remaining = endOffset;
716         while (remaining > 0 && !cigar.empty()) {
717             CigarOperation& lastOp = cigar.back();
718             const auto lastOpLength = lastOp.Length();
719             const bool consumesQuery = internal::ConsumesQuery(lastOp.Type());
720 
721             // CIGAR op ends at or after clip
722             if (lastOpLength <= remaining) {
723                 cigar.pop_back();
724                 if (consumesQuery) remaining -= lastOpLength;
725             }
726 
727             // CIGAR op straddles clip
728             else {
729                 lastOp.Length(lastOpLength - remaining);
730                 remaining = 0;
731             }
732         }
733 
734         // update CIGAR & position
735         impl_.CigarData(cigar);
736         impl_.Position(impl_.Position() + referencePositionOffset);
737     }
738 
739     // clip SEQ, QUAL, & tags
740     const size_t clipFrom = startOffset;
741     const size_t clipLength = (end - start);
742     ClipFields(clipFrom, clipLength);
743 
744     // update query start/end
745     // TODO: update name to reflect new QS/QE ???
746     internal::CreateOrEdit(BamRecordTag::QUERY_START, start, &impl_);
747     internal::CreateOrEdit(BamRecordTag::QUERY_END, end, &impl_);
748     //    UpdateName();
749 
750     // reset any cached aligned start/end
751     ResetCachedPositions();
752     return *this;
753 }
754 
ClipToReference(const Position start,const Position end)755 BamRecord& BamRecord::ClipToReference(const Position start, const Position end)
756 {
757     // skip if not mapped, clipping to reference doesn't make sense
758     // or should we even consider throwing here?
759     if (!IsMapped()) return *this;
760 
761     const bool isForwardStrand = (AlignedStrand() == Strand::FORWARD);
762     return (isForwardStrand ? ClipToReferenceForward(start, end)
763                             : ClipToReferenceReverse(start, end));
764 }
765 
ClipToReferenceForward(const PacBio::BAM::Position start,const PacBio::BAM::Position end)766 BamRecord& BamRecord::ClipToReferenceForward(const PacBio::BAM::Position start,
767                                              const PacBio::BAM::Position end)
768 {
769     assert(IsMapped());
770     assert(AlignedStrand() == Strand::FORWARD);
771 
772     // cache original coords
773     const int seqLength = static_cast<int>(impl_.SequenceLength());
774     const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
775     const Position origQStart = isCcsOrTranscript ? 0 : QueryStart();
776     const Position origQEnd = isCcsOrTranscript ? seqLength : QueryEnd();
777     const Position origTStart = ReferenceStart();
778     const Position origTEnd = ReferenceEnd();
779     assert(AlignedStart() >= origQStart);
780     assert(AlignedEnd() <= origQEnd);
781 
782     // skip if already within requested clip range
783     if (start <= origTStart && end >= origTEnd) return *this;
784 
785     const Position newTStart = std::max(origTStart, start);
786     const Position newTEnd = std::min(origTEnd, end);
787 
788     // fetch a 'working copy' of CIGAR data
789     Cigar cigar = impl_.CigarData();
790 
791     // we're going to skip query sequence outside aligned region
792     size_t queryPosRemovedFront = 0;
793     size_t queryPosRemovedBack = 0;
794 
795     // ------------------------
796     // clip leading CIGAR ops
797     // ------------------------
798 
799     size_t remaining = newTStart - origTStart;
800     while (remaining > 0 && !cigar.empty()) {
801         CigarOperation& firstOp = cigar.front();
802         const auto firstOpLength = firstOp.Length();
803         const bool consumesQuery = internal::ConsumesQuery(firstOp.Type());
804         const bool consumesRef = internal::ConsumesReference(firstOp.Type());
805 
806         if (!consumesRef) {
807 
808             // e.g. softclip - just pop it completely
809             cigar.erase(cigar.begin());
810             if (consumesQuery) queryPosRemovedFront += firstOpLength;
811 
812         } else {
813             assert(consumesRef);
814 
815             // CIGAR ends at or before clip
816             if (firstOpLength <= remaining) {
817                 cigar.erase(cigar.begin());
818                 if (consumesQuery) queryPosRemovedFront += firstOpLength;
819                 if (consumesRef) remaining -= firstOpLength;
820             }
821 
822             // CIGAR straddles clip
823             else {
824                 assert(firstOpLength > remaining);
825                 firstOp.Length(firstOpLength - remaining);
826                 if (consumesQuery) queryPosRemovedFront += remaining;
827                 remaining = 0;
828             }
829         }
830     }
831 
832     // -------------------------
833     // clip trailing CIGAR ops
834     // -------------------------
835 
836     remaining = origTEnd - newTEnd;
837     while (remaining > 0 && !cigar.empty()) {
838         CigarOperation& lastOp = cigar.back();
839         const auto lastOpLength = lastOp.Length();
840         const bool consumesQuery = internal::ConsumesQuery(lastOp.Type());
841         const bool consumesRef = internal::ConsumesReference(lastOp.Type());
842 
843         if (!consumesRef) {
844 
845             // e.g. softclip - just pop it completely
846             cigar.pop_back();
847             if (consumesQuery) queryPosRemovedBack += lastOpLength;
848 
849         } else {
850             assert(consumesRef);
851 
852             // CIGAR ends at or after clip
853             if (lastOpLength <= remaining) {
854                 cigar.pop_back();
855                 if (consumesQuery) queryPosRemovedBack += lastOpLength;
856                 if (consumesRef) remaining -= lastOpLength;
857             }
858 
859             // CIGAR straddles clip
860             else {
861                 assert(lastOpLength > remaining);
862                 lastOp.Length(lastOpLength - remaining);
863                 if (consumesQuery) queryPosRemovedBack += remaining;
864                 remaining = 0;
865             }
866         }
867     }
868 
869     // update CIGAR and position
870     impl_.CigarData(cigar);
871     impl_.Position(newTStart);
872 
873     // clip SEQ, QUAL, tags
874     const Position qStart = origQStart + queryPosRemovedFront;
875     const Position qEnd = origQEnd - queryPosRemovedBack;
876     const size_t clipFrom = queryPosRemovedFront;
877     const size_t clipLength = qEnd - qStart;
878     ClipFields(clipFrom, clipLength);
879 
880     // update query start/end
881     internal::CreateOrEdit(BamRecordTag::QUERY_START, qStart, &impl_);
882     internal::CreateOrEdit(BamRecordTag::QUERY_END, qEnd, &impl_);
883     //    UpdateName();
884 
885     // reset any cached aligned start/end
886     ResetCachedPositions();
887     return *this;
888 }
889 
ClipToReferenceReverse(const PacBio::BAM::Position start,const PacBio::BAM::Position end)890 BamRecord& BamRecord::ClipToReferenceReverse(const PacBio::BAM::Position start,
891                                              const PacBio::BAM::Position end)
892 {
893     assert(IsMapped());
894     assert(AlignedStrand() == Strand::REVERSE);
895 
896     // cache original coords
897     const int seqLength = static_cast<int>(impl_.SequenceLength());
898     const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
899     const Position origQStart = isCcsOrTranscript ? 0 : QueryStart();
900     const Position origQEnd = isCcsOrTranscript ? seqLength : QueryEnd();
901     const Position origTStart = ReferenceStart();
902     const Position origTEnd = ReferenceEnd();
903 
904     // skip if already within requested clip range
905     if (start <= origTStart && end >= origTEnd) return *this;
906     assert(AlignedStart() >= origQStart);
907     assert(AlignedEnd() <= origQEnd);
908 
909     const Position newTStart = std::max(origTStart, start);
910     const Position newTEnd = std::min(origTEnd, end);
911 
912     Cigar cigar = impl_.CigarData();
913 
914     size_t queryPosRemovedFront = 0;
915     size_t queryPosRemovedBack = 0;
916 
917     // update CIGAR - clip front ops, then clip back ops
918     size_t remaining = newTStart - origTStart;
919     while (remaining > 0 && !cigar.empty()) {
920         CigarOperation& firstOp = cigar.front();
921         const auto firstOpType = firstOp.Type();
922         const auto firstOpLength = firstOp.Length();
923         const bool consumesQuery = internal::ConsumesQuery(firstOpType);
924         const bool consumesRef = internal::ConsumesReference(firstOpType);
925 
926         if (!consumesRef) {
927 
928             // e.g. softclip - just pop it completely
929             cigar.erase(cigar.begin());
930             if (consumesQuery) queryPosRemovedBack += firstOpLength;
931 
932         } else {
933             assert(consumesRef);
934 
935             // CIGAR ends at or before clip
936             if (firstOpLength <= remaining) {
937                 cigar.erase(cigar.begin());
938                 if (consumesQuery) queryPosRemovedBack += firstOpLength;
939                 if (consumesRef) remaining -= firstOpLength;
940             }
941 
942             // CIGAR straddles clip
943             else {
944                 assert(firstOpLength > remaining);
945                 firstOp.Length(firstOpLength - remaining);
946                 if (consumesQuery) queryPosRemovedBack += remaining;
947                 remaining = 0;
948             }
949         }
950     }
951 
952     remaining = origTEnd - newTEnd;
953     while (remaining > 0 && !cigar.empty()) {
954         CigarOperation& lastOp = cigar.back();
955         const auto lastOpType = lastOp.Type();
956         const auto lastOpLength = lastOp.Length();
957         const bool consumesQuery = internal::ConsumesQuery(lastOpType);
958         const bool consumesRef = internal::ConsumesReference(lastOpType);
959 
960         if (!consumesRef) {
961 
962             // e.g. softclip - just pop it completely
963             cigar.pop_back();
964             if (consumesQuery) queryPosRemovedFront += lastOpLength;
965 
966         } else {
967             assert(consumesRef);
968 
969             // CIGAR ends at or before clip
970             if (lastOpLength <= remaining) {
971                 cigar.pop_back();
972                 if (consumesQuery) queryPosRemovedFront += lastOpLength;
973                 if (consumesRef) remaining -= lastOpLength;
974             }
975 
976             // CIGAR straddles clip
977             else {
978                 assert(lastOpLength > remaining);
979                 lastOp.Length(lastOpLength - remaining);
980                 if (consumesQuery) queryPosRemovedFront += remaining;
981                 remaining = 0;
982             }
983         }
984     }
985     impl_.CigarData(cigar);
986 
987     // update aligned reference position
988     impl_.Position(newTStart);
989 
990     // clip SEQ, QUAL, tags
991     const Position qStart = origQStart + queryPosRemovedFront;
992     const Position qEnd = origQEnd - queryPosRemovedBack;
993     const size_t clipFrom = queryPosRemovedFront;
994     const size_t clipLength = qEnd - qStart;
995     ClipFields(clipFrom, clipLength);
996 
997     // update query start/end
998     internal::CreateOrEdit(BamRecordTag::QUERY_START, qStart, &impl_);
999     internal::CreateOrEdit(BamRecordTag::QUERY_END, qEnd, &impl_);
1000     //    UpdateName();
1001 
1002     // reset any cached aligned start/end
1003     ResetCachedPositions();
1004     return *this;
1005 }
1006 
DeletionQV(Orientation orientation,bool aligned,bool exciseSoftClips) const1007 QualityValues BamRecord::DeletionQV(Orientation orientation, bool aligned,
1008                                     bool exciseSoftClips) const
1009 {
1010     return FetchQualities(BamRecordTag::DELETION_QV, orientation, aligned, exciseSoftClips);
1011 }
1012 
DeletionQV(const QualityValues & deletionQVs)1013 BamRecord& BamRecord::DeletionQV(const QualityValues& deletionQVs)
1014 {
1015     internal::CreateOrEdit(BamRecordTag::DELETION_QV, deletionQVs.Fastq(), &impl_);
1016     return *this;
1017 }
1018 
DeletionTag(Orientation orientation,bool aligned,bool exciseSoftClips) const1019 std::string BamRecord::DeletionTag(Orientation orientation, bool aligned,
1020                                    bool exciseSoftClips) const
1021 {
1022     return FetchBases(BamRecordTag::DELETION_TAG, orientation, aligned, exciseSoftClips);
1023 }
1024 
DeletionTag(const std::string & tags)1025 BamRecord& BamRecord::DeletionTag(const std::string& tags)
1026 {
1027     internal::CreateOrEdit(BamRecordTag::DELETION_TAG, tags, &impl_);
1028     return *this;
1029 }
1030 
EncodePhotons(const std::vector<float> & data)1031 std::vector<uint16_t> BamRecord::EncodePhotons(const std::vector<float>& data)
1032 {
1033     std::vector<uint16_t> encoded;
1034     encoded.reserve(data.size());
1035     for (const auto& d : data)
1036         encoded.emplace_back(d * photonFactor);
1037     return encoded;
1038 }
1039 
FetchBasesRaw(const BamRecordTag tag) const1040 std::string BamRecord::FetchBasesRaw(const BamRecordTag tag) const
1041 {
1042     const Tag seqTag = impl_.TagValue(tag);
1043     return seqTag.ToString();
1044 }
1045 
FetchBases(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1046 std::string BamRecord::FetchBases(const BamRecordTag tag, const Orientation orientation,
1047                                   const bool aligned, const bool exciseSoftClips,
1048                                   const PulseBehavior pulseBehavior) const
1049 {
1050     const bool isBamSeq = (tag == BamRecordTag::SEQ);
1051     const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1052 
1053     // fetch raw
1054     std::string bases;
1055     Orientation current;
1056     if (isBamSeq) {  // SEQ stored in genomic orientation
1057         bases = impl_.Sequence();
1058         current = Orientation::GENOMIC;
1059     } else {  // all tags stored in native orientation
1060         bases = FetchBasesRaw(tag);
1061         current = Orientation::NATIVE;
1062     }
1063 
1064     // maybe strip 'squashed' pulse loci
1065     if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1066         CalculatePulse2BaseCache();
1067         bases = p2bCache_->RemoveSquashedPulses(bases);
1068     }
1069 
1070     // if we need to touch CIGAR
1071     if (aligned || exciseSoftClips) {
1072 
1073         if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1074             throw std::runtime_error{
1075                 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1076                 "Use PulseBehavior::BASECALLS_ONLY instead."};
1077 
1078         // force into genomic orientation
1079         internal::OrientBasesAsRequested(&bases, current, Orientation::GENOMIC,
1080                                          impl_.IsReverseStrand(), isPulse);
1081         current = Orientation::GENOMIC;
1082 
1083         // clip & gapify as requested
1084         internal::ClipAndGapifyBases(impl_, aligned, exciseSoftClips, &bases);
1085     }
1086 
1087     // return in the orientation requested
1088     internal::OrientBasesAsRequested(&bases, current, orientation, impl_.IsReverseStrand(),
1089                                      isPulse);
1090     return bases;
1091 }
1092 
FetchFramesRaw(const BamRecordTag tag) const1093 Frames BamRecord::FetchFramesRaw(const BamRecordTag tag) const
1094 {
1095     const Tag frameTag = impl_.TagValue(tag);
1096     if (frameTag.IsNull()) return {};  // throw ?
1097 
1098     // lossy frame codes
1099     if (frameTag.IsUInt8Array()) {
1100         const auto codes = frameTag.ToUInt8Array();
1101         return Frames::Decode(codes);
1102     }
1103 
1104     // lossless frame data
1105     else {
1106         assert(frameTag.IsUInt16Array());
1107         return Frames{frameTag.ToUInt16Array()};
1108     }
1109 }
1110 
FetchFrames(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1111 Frames BamRecord::FetchFrames(const BamRecordTag tag, const Orientation orientation,
1112                               const bool aligned, const bool exciseSoftClips,
1113                               const PulseBehavior pulseBehavior) const
1114 {
1115     const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1116 
1117     // fetch raw
1118     Frames frames = FetchFramesRaw(tag);
1119     Orientation current = Orientation::NATIVE;
1120 
1121     // maybe strip 'squashed' pulse loci
1122     if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1123         CalculatePulse2BaseCache();
1124         frames.DataRaw() = p2bCache_->RemoveSquashedPulses(frames.Data());
1125     }
1126 
1127     // if we need to touch the CIGAR
1128     if (aligned || exciseSoftClips) {
1129 
1130         if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1131             throw std::runtime_error{
1132                 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1133                 "Use PulseBehavior::BASECALLS_ONLY instead."};
1134 
1135         // force into genomic orientation
1136         internal::OrientTagDataAsRequested(&frames, current, Orientation::GENOMIC,
1137                                            impl_.IsReverseStrand());
1138         current = Orientation::GENOMIC;
1139 
1140         // clip & gapify as requested
1141         internal::ClipAndGapifyFrames(impl_, aligned, exciseSoftClips, &frames);
1142     }
1143 
1144     // return in the orientation requested
1145     internal::OrientTagDataAsRequested(&frames, current, orientation, impl_.IsReverseStrand());
1146     return frames;
1147 }
1148 
FetchPhotonsRaw(const BamRecordTag tag) const1149 std::vector<float> BamRecord::FetchPhotonsRaw(const BamRecordTag tag) const
1150 {
1151     const Tag frameTag = impl_.TagValue(tag);
1152     if (frameTag.IsNull()) return {};
1153     if (!frameTag.IsUInt16Array())
1154         throw std::runtime_error{"Photons are not a uint16_t array, tag " +
1155                                  internal::BamRecordTags::LabelFor(tag)};
1156 
1157     const auto data = frameTag.ToUInt16Array();
1158     std::vector<float> photons;
1159     photons.reserve(data.size());
1160     for (const auto& d : data)
1161         photons.emplace_back(d / photonFactor);
1162     return photons;
1163 }
1164 
FetchPhotons(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1165 std::vector<float> BamRecord::FetchPhotons(const BamRecordTag tag, const Orientation orientation,
1166                                            const bool aligned, const bool exciseSoftClips,
1167                                            const PulseBehavior pulseBehavior) const
1168 {
1169     const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1170 
1171     // fetch raw
1172     auto data = FetchPhotonsRaw(tag);
1173     Orientation current = Orientation::NATIVE;
1174 
1175     if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1176         // strip 'squashed' pulse loci
1177         CalculatePulse2BaseCache();
1178         data = p2bCache_->RemoveSquashedPulses(data);
1179     }
1180 
1181     if (aligned || exciseSoftClips) {
1182 
1183         if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1184             throw std::runtime_error{
1185                 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1186                 "Use PulseBehavior::BASECALLS_ONLY instead."};
1187 
1188         // force into genomic orientation
1189         internal::OrientTagDataAsRequested(&data, current, Orientation::GENOMIC,
1190                                            impl_.IsReverseStrand());
1191         current = Orientation::GENOMIC;
1192 
1193         // clip & gapify as requested
1194         internal::ClipAndGapifyPhotons(impl_, aligned, exciseSoftClips, &data);
1195     }
1196 
1197     // return in the orientation requested
1198     internal::OrientTagDataAsRequested(&data, current, orientation, impl_.IsReverseStrand());
1199     return data;
1200 }
1201 
FetchQualitiesRaw(const BamRecordTag tag) const1202 QualityValues BamRecord::FetchQualitiesRaw(const BamRecordTag tag) const
1203 {
1204     const Tag qvsTag = impl_.TagValue(tag);
1205     return QualityValues::FromFastq(qvsTag.ToString());
1206 }
1207 
FetchQualities(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1208 QualityValues BamRecord::FetchQualities(const BamRecordTag tag, const Orientation orientation,
1209                                         const bool aligned, const bool exciseSoftClips,
1210                                         const PulseBehavior pulseBehavior) const
1211 {
1212     // requested data info
1213     const bool isBamQual = (tag == BamRecordTag::QUAL);
1214     const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1215 
1216     // fetch raw
1217     QualityValues quals;
1218     Orientation current;
1219     if (isBamQual) {  // QUAL stored in genomic orientation
1220         quals = impl_.Qualities();
1221         current = Orientation::GENOMIC;
1222     } else {  // all tags stored in native orientation
1223         quals = FetchQualitiesRaw(tag);
1224         current = Orientation::NATIVE;
1225     }
1226 
1227     if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1228         // strip 'squashed' pulse loci
1229         CalculatePulse2BaseCache();
1230         quals = p2bCache_->RemoveSquashedPulses(quals);
1231     }
1232 
1233     // if we need to touch CIGAR
1234     if (aligned || exciseSoftClips) {
1235 
1236         if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1237             throw std::runtime_error{
1238                 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1239                 "Use PulseBehavior::BASECALLS_ONLY instead."};
1240 
1241         // force into genomic orientation
1242         internal::OrientTagDataAsRequested(&quals, current, Orientation::GENOMIC,
1243                                            impl_.IsReverseStrand());
1244         current = Orientation::GENOMIC;
1245 
1246         // clip & gapify as requested
1247         internal::ClipAndGapifyQualities(impl_, aligned, exciseSoftClips, &quals);
1248     }
1249 
1250     // return in the orientation requested
1251     internal::OrientTagDataAsRequested(&quals, current, orientation, impl_.IsReverseStrand());
1252     return quals;
1253 }
1254 
FetchUInt32sRaw(const BamRecordTag tag) const1255 std::vector<uint32_t> BamRecord::FetchUInt32sRaw(const BamRecordTag tag) const
1256 {
1257     // fetch tag data
1258     const Tag frameTag = impl_.TagValue(tag);
1259     if (frameTag.IsNull()) return {};
1260     if (!frameTag.IsUInt32Array())
1261         throw std::runtime_error{"Tag data are not a uint32_t array, tag " +
1262                                  internal::BamRecordTags::LabelFor(tag)};
1263     return frameTag.ToUInt32Array();
1264 }
1265 
FetchUInt32s(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1266 std::vector<uint32_t> BamRecord::FetchUInt32s(const BamRecordTag tag, const Orientation orientation,
1267                                               const bool aligned, const bool exciseSoftClips,
1268                                               const PulseBehavior pulseBehavior) const
1269 {
1270     const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1271 
1272     // fetch raw
1273     auto arr = FetchUInt32sRaw(tag);
1274     Orientation current = Orientation::NATIVE;
1275 
1276     if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1277         // strip 'squashed' pulse loci
1278         CalculatePulse2BaseCache();
1279         arr = p2bCache_->RemoveSquashedPulses(arr);
1280     }
1281 
1282     if (aligned || exciseSoftClips) {
1283 
1284         if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1285             throw std::runtime_error{
1286                 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1287                 "Use PulseBehavior::BASECALLS_ONLY instead."};
1288 
1289         // force into genomic orientation
1290         internal::OrientTagDataAsRequested(&arr, current, Orientation::GENOMIC,
1291                                            impl_.IsReverseStrand());
1292         current = Orientation::GENOMIC;
1293 
1294         // clip & gapify as requested
1295         internal::ClipAndGapifyUInts(impl_, aligned, exciseSoftClips, &arr);
1296     }
1297 
1298     // return in the orientation requested
1299     internal::OrientTagDataAsRequested(&arr, current, orientation, impl_.IsReverseStrand());
1300     return arr;
1301 }
1302 
FetchUInt8sRaw(const BamRecordTag tag) const1303 std::vector<uint8_t> BamRecord::FetchUInt8sRaw(const BamRecordTag tag) const
1304 {
1305     // fetch tag data
1306     const Tag frameTag = impl_.TagValue(tag);
1307     if (frameTag.IsNull()) return {};
1308     if (!frameTag.IsUInt8Array())
1309         throw std::runtime_error{"Tag data are not a uint8_t array, tag " +
1310                                  internal::BamRecordTags::LabelFor(tag)};
1311     return frameTag.ToUInt8Array();
1312 }
1313 
FetchUInt8s(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1314 std::vector<uint8_t> BamRecord::FetchUInt8s(const BamRecordTag tag, const Orientation orientation,
1315                                             const bool aligned, const bool exciseSoftClips,
1316                                             const PulseBehavior pulseBehavior) const
1317 {
1318     const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1319 
1320     // fetch raw
1321     auto arr = FetchUInt8sRaw(tag);
1322     Orientation current = Orientation::NATIVE;
1323 
1324     if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1325         // strip 'squashed' pulse loci
1326         CalculatePulse2BaseCache();
1327         arr = p2bCache_->RemoveSquashedPulses(arr);
1328     }
1329 
1330     if (aligned || exciseSoftClips) {
1331 
1332         if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1333             throw std::runtime_error{
1334                 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1335                 "Use PulseBehavior::BASECALLS_ONLY instead."};
1336 
1337         // force into genomic orientation
1338         internal::OrientTagDataAsRequested(&arr, current, Orientation::GENOMIC,
1339                                            impl_.IsReverseStrand());
1340         current = Orientation::GENOMIC;
1341 
1342         // clip & gapify as requested
1343         internal::ClipAndGapifyUInt8s(impl_, aligned, exciseSoftClips, &arr);
1344     }
1345 
1346     // return in the orientation requested
1347     internal::OrientTagDataAsRequested(&arr, current, orientation, impl_.IsReverseStrand());
1348     return arr;
1349 }
1350 
FullName() const1351 std::string BamRecord::FullName() const { return impl_.Name(); }
1352 
HasAltLabelQV() const1353 bool BamRecord::HasAltLabelQV() const { return impl_.HasTag(BamRecordTag::ALT_LABEL_QV); }
1354 
HasAltLabelTag() const1355 bool BamRecord::HasAltLabelTag() const { return impl_.HasTag(BamRecordTag::ALT_LABEL_TAG); }
1356 
HasBarcodes() const1357 bool BamRecord::HasBarcodes() const { return impl_.HasTag(BamRecordTag::BARCODES); }
1358 
HasBarcodeQuality() const1359 bool BamRecord::HasBarcodeQuality() const { return impl_.HasTag(BamRecordTag::BARCODE_QUALITY); }
1360 
HasLabelQV() const1361 bool BamRecord::HasLabelQV() const { return impl_.HasTag(BamRecordTag::LABEL_QV); }
1362 
HasDeletionQV() const1363 bool BamRecord::HasDeletionQV() const { return impl_.HasTag(BamRecordTag::DELETION_QV); }
1364 
HasDeletionTag() const1365 bool BamRecord::HasDeletionTag() const { return impl_.HasTag(BamRecordTag::DELETION_TAG); }
1366 
HasHoleNumber() const1367 bool BamRecord::HasHoleNumber() const
1368 {
1369     return impl_.HasTag(BamRecordTag::HOLE_NUMBER) &&
1370            !impl_.TagValue(BamRecordTag::HOLE_NUMBER).IsNull();
1371 }
1372 
HasInsertionQV() const1373 bool BamRecord::HasInsertionQV() const { return impl_.HasTag(BamRecordTag::INSERTION_QV); }
1374 
HasNumPasses() const1375 bool BamRecord::HasNumPasses() const { return impl_.HasTag(BamRecordTag::NUM_PASSES); }
1376 
HasPreBaseFrames() const1377 bool BamRecord::HasPreBaseFrames() const { return HasIPD(); }
1378 
HasIPD() const1379 bool BamRecord::HasIPD() const { return impl_.HasTag(BamRecordTag::IPD); }
1380 
HasLocalContextFlags() const1381 bool BamRecord::HasLocalContextFlags() const { return impl_.HasTag(BamRecordTag::CONTEXT_FLAGS); }
1382 
HasMergeQV() const1383 bool BamRecord::HasMergeQV() const { return impl_.HasTag(BamRecordTag::MERGE_QV); }
1384 
HasPulseMergeQV() const1385 bool BamRecord::HasPulseMergeQV() const { return impl_.HasTag(BamRecordTag::PULSE_MERGE_QV); }
1386 
HasPkmean() const1387 bool BamRecord::HasPkmean() const { return impl_.HasTag(BamRecordTag::PKMEAN); }
1388 
HasPkmean2() const1389 bool BamRecord::HasPkmean2() const { return impl_.HasTag(BamRecordTag::PKMEAN_2); }
1390 
HasPkmid() const1391 bool BamRecord::HasPkmid() const { return impl_.HasTag(BamRecordTag::PKMID); }
1392 
HasPkmid2() const1393 bool BamRecord::HasPkmid2() const { return impl_.HasTag(BamRecordTag::PKMID_2); }
1394 
HasPrePulseFrames() const1395 bool BamRecord::HasPrePulseFrames() const { return impl_.HasTag(BamRecordTag::PRE_PULSE_FRAMES); }
1396 
HasPulseCall() const1397 bool BamRecord::HasPulseCall() const
1398 {
1399     return impl_.HasTag(BamRecordTag::PULSE_CALL) &&
1400            !impl_.TagValue(BamRecordTag::PULSE_CALL).IsNull();
1401 }
1402 
HasPulseExclusion(void) const1403 bool BamRecord::HasPulseExclusion(void) const
1404 {
1405     return impl_.HasTag(BamRecordTag::PULSE_EXCLUSION);
1406 }
1407 
HasPulseCallWidth(void) const1408 bool BamRecord::HasPulseCallWidth(void) const
1409 {
1410     return impl_.HasTag(BamRecordTag::PULSE_CALL_WIDTH);
1411 }
1412 
HasPulseWidth() const1413 bool BamRecord::HasPulseWidth() const { return impl_.HasTag(BamRecordTag::PULSE_WIDTH); }
1414 
HasQueryEnd() const1415 bool BamRecord::HasQueryEnd() const { return impl_.HasTag(BamRecordTag::QUERY_END); }
1416 
HasQueryStart() const1417 bool BamRecord::HasQueryStart() const { return impl_.HasTag(BamRecordTag::QUERY_START); }
1418 
HasReadAccuracy() const1419 bool BamRecord::HasReadAccuracy() const
1420 {
1421     return impl_.HasTag(BamRecordTag::READ_ACCURACY) &&
1422            !impl_.TagValue(BamRecordTag::READ_ACCURACY).IsNull();
1423 }
1424 
HasScrapRegionType() const1425 bool BamRecord::HasScrapRegionType() const
1426 {
1427     return impl_.HasTag(BamRecordTag::SCRAP_REGION_TYPE) &&
1428            !impl_.TagValue(BamRecordTag::SCRAP_REGION_TYPE).IsNull();
1429 }
1430 
HasScrapZmwType() const1431 bool BamRecord::HasScrapZmwType() const
1432 {
1433     return impl_.HasTag(BamRecordTag::SCRAP_ZMW_TYPE) &&
1434            !impl_.TagValue(BamRecordTag::SCRAP_ZMW_TYPE).IsNull();
1435 }
1436 
HasStartFrame() const1437 bool BamRecord::HasStartFrame() const { return impl_.HasTag(BamRecordTag::START_FRAME); }
1438 
HasSignalToNoise() const1439 bool BamRecord::HasSignalToNoise() const { return impl_.HasTag(BamRecordTag::SNR); }
1440 
HasSubstitutionQV() const1441 bool BamRecord::HasSubstitutionQV() const { return impl_.HasTag(BamRecordTag::SUBSTITUTION_QV); }
1442 
HasSubstitutionTag() const1443 bool BamRecord::HasSubstitutionTag() const { return impl_.HasTag(BamRecordTag::SUBSTITUTION_TAG); }
1444 
Header() const1445 BamHeader BamRecord::Header() const { return header_; }
1446 
HoleNumber() const1447 int32_t BamRecord::HoleNumber() const
1448 {
1449     const Tag holeNumber = impl_.TagValue(BamRecordTag::HOLE_NUMBER);
1450     if (!holeNumber.IsNull()) return holeNumber.ToInt32();
1451 
1452     // missing zm tag - try to pull from name
1453     return internal::HoleNumberFromName(FullName());
1454 }
1455 
HoleNumber(const int32_t holeNumber)1456 BamRecord& BamRecord::HoleNumber(const int32_t holeNumber)
1457 {
1458     internal::CreateOrEdit(BamRecordTag::HOLE_NUMBER, holeNumber, &impl_);
1459     return *this;
1460 }
1461 
Impl()1462 BamRecordImpl& BamRecord::Impl() { return impl_; }
1463 
Impl() const1464 const BamRecordImpl& BamRecord::Impl() const { return impl_; }
1465 
InsertionQV(Orientation orientation,bool aligned,bool exciseSoftClips) const1466 QualityValues BamRecord::InsertionQV(Orientation orientation, bool aligned,
1467                                      bool exciseSoftClips) const
1468 {
1469     return FetchQualities(BamRecordTag::INSERTION_QV, orientation, aligned, exciseSoftClips);
1470 }
1471 
InsertionQV(const QualityValues & insertionQVs)1472 BamRecord& BamRecord::InsertionQV(const QualityValues& insertionQVs)
1473 {
1474     internal::CreateOrEdit(BamRecordTag::INSERTION_QV, insertionQVs.Fastq(), &impl_);
1475     return *this;
1476 }
1477 
IPD(Orientation orientation,bool aligned,bool exciseSoftClips) const1478 Frames BamRecord::IPD(Orientation orientation, bool aligned, bool exciseSoftClips) const
1479 {
1480     return FetchFrames(BamRecordTag::IPD, orientation, aligned, exciseSoftClips);
1481 }
1482 
IPD(const Frames & frames,const FrameEncodingType encoding)1483 BamRecord& BamRecord::IPD(const Frames& frames, const FrameEncodingType encoding)
1484 {
1485     if (encoding == FrameEncodingType::LOSSY)
1486         internal::CreateOrEdit(BamRecordTag::IPD, frames.Encode(), &impl_);
1487     else
1488         internal::CreateOrEdit(BamRecordTag::IPD, frames.Data(), &impl_);
1489     return *this;
1490 }
1491 
IPDRaw(Orientation orientation) const1492 Frames BamRecord::IPDRaw(Orientation orientation) const
1493 {
1494     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::IPD);
1495     const Tag frameTag = impl_.TagValue(tagName);
1496     if (frameTag.IsNull()) return {};
1497 
1498     Frames frames;
1499 
1500     // lossy frame codes
1501     if (frameTag.IsUInt8Array()) {
1502         const auto codes = frameTag.ToUInt8Array();
1503         const std::vector<uint16_t> codes16(codes.begin(), codes.end());
1504         frames.Data(std::move(codes16));
1505     }
1506 
1507     // lossless frame data
1508     else {
1509         assert(frameTag.IsUInt16Array());
1510         frames.Data(frameTag.ToUInt16Array());
1511     }
1512 
1513     // return in requested orientation
1514     internal::OrientTagDataAsRequested(&frames,
1515                                        Orientation::NATIVE,  // current
1516                                        orientation,          // requested
1517                                        impl_.IsReverseStrand());
1518     return frames;
1519 }
1520 
IsMapped() const1521 bool BamRecord::IsMapped() const { return impl_.IsMapped(); }
1522 
LabelQV(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1523 QualityValues BamRecord::LabelQV(Orientation orientation, bool aligned, bool exciseSoftClips,
1524                                  PulseBehavior pulseBehavior) const
1525 {
1526     return FetchQualities(BamRecordTag::LABEL_QV, orientation, aligned, exciseSoftClips,
1527                           pulseBehavior);
1528 }
1529 
LabelQV(const QualityValues & labelQVs)1530 BamRecord& BamRecord::LabelQV(const QualityValues& labelQVs)
1531 {
1532     internal::CreateOrEdit(BamRecordTag::LABEL_QV, labelQVs.Fastq(), &impl_);
1533     return *this;
1534 }
1535 
LocalContextFlags() const1536 LocalContextFlags BamRecord::LocalContextFlags() const
1537 {
1538     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::CONTEXT_FLAGS);
1539     const Tag cxTag = impl_.TagValue(tagName);
1540     return static_cast<PacBio::BAM::LocalContextFlags>(cxTag.ToUInt8());
1541 }
1542 
LocalContextFlags(const PacBio::BAM::LocalContextFlags flags)1543 BamRecord& BamRecord::LocalContextFlags(const PacBio::BAM::LocalContextFlags flags)
1544 {
1545     internal::CreateOrEdit(BamRecordTag::CONTEXT_FLAGS, static_cast<uint8_t>(flags), &impl_);
1546     return *this;
1547 }
1548 
Map(const int32_t referenceId,const Position refStart,const Strand strand,const Cigar & cigar,const uint8_t mappingQuality)1549 BamRecord& BamRecord::Map(const int32_t referenceId, const Position refStart, const Strand strand,
1550                           const Cigar& cigar, const uint8_t mappingQuality)
1551 {
1552     impl_.Position(refStart);
1553     impl_.ReferenceId(referenceId);
1554     impl_.CigarData(cigar);
1555     impl_.MapQuality(mappingQuality);
1556     impl_.SetMapped(true);
1557 
1558     if (strand == Strand::FORWARD)
1559         impl_.SetReverseStrand(false);
1560 
1561     else {
1562         assert(strand == Strand::REVERSE);
1563         impl_.SetReverseStrand(true);
1564 
1565         // switch seq & qual
1566         std::string sequence = impl_.Sequence();
1567         QualityValues qualities = impl_.Qualities();
1568 
1569         internal::ReverseComplement(sequence);
1570         internal::Reverse(qualities);
1571 
1572         impl_.SetSequenceAndQualities(sequence, qualities.Fastq());
1573     }
1574 
1575     // reset any cached aligned start/end
1576     alignedStart_ = PacBio::BAM::UnmappedPosition;
1577     alignedEnd_ = PacBio::BAM::UnmappedPosition;
1578 
1579     return *this;
1580 }
1581 
MapQuality() const1582 uint8_t BamRecord::MapQuality() const { return impl_.MapQuality(); }
1583 
MergeQV(Orientation orientation,bool aligned,bool exciseSoftClips) const1584 QualityValues BamRecord::MergeQV(Orientation orientation, bool aligned, bool exciseSoftClips) const
1585 {
1586     return FetchQualities(BamRecordTag::MERGE_QV, orientation, aligned, exciseSoftClips);
1587 }
1588 
MergeQV(const QualityValues & mergeQVs)1589 BamRecord& BamRecord::MergeQV(const QualityValues& mergeQVs)
1590 {
1591     internal::CreateOrEdit(BamRecordTag::MERGE_QV, mergeQVs.Fastq(), &impl_);
1592     return *this;
1593 }
1594 
MovieName() const1595 std::string BamRecord::MovieName() const { return ReadGroup().MovieName(); }
1596 
NumDeletedBases() const1597 size_t BamRecord::NumDeletedBases() const
1598 {
1599     auto tEnd = ReferenceEnd();
1600     auto tStart = ReferenceStart();
1601     auto numMatchesAndMismatches = NumMatchesAndMismatches();
1602     auto nM = numMatchesAndMismatches.first;
1603     auto nMM = numMatchesAndMismatches.second;
1604     return (tEnd - tStart - nM - nMM);
1605 }
1606 
NumInsertedBases() const1607 size_t BamRecord::NumInsertedBases() const
1608 {
1609     auto aEnd = AlignedEnd();
1610     auto aStart = AlignedStart();
1611     auto numMatchesAndMismatches = NumMatchesAndMismatches();
1612     auto nM = numMatchesAndMismatches.first;
1613     auto nMM = numMatchesAndMismatches.second;
1614     return (aEnd - aStart - nM - nMM);
1615 }
1616 
NumMatches() const1617 size_t BamRecord::NumMatches() const { return NumMatchesAndMismatches().first; }
1618 
NumMatchesAndMismatches() const1619 std::pair<size_t, size_t> BamRecord::NumMatchesAndMismatches() const
1620 {
1621     std::pair<size_t, size_t> result = std::make_pair(0, 0);
1622 
1623     auto b = internal::BamRecordMemory::GetRawData(this);
1624     uint32_t* cigarData = bam_get_cigar(b.get());
1625     for (uint32_t i = 0; i < b->core.n_cigar; ++i) {
1626         const auto type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
1627         if (type == CigarOperationType::SEQUENCE_MATCH)
1628             result.first += bam_cigar_oplen(cigarData[i]);
1629         else if (type == CigarOperationType::SEQUENCE_MISMATCH)
1630             result.second += bam_cigar_oplen(cigarData[i]);
1631     }
1632     return result;
1633 }
1634 
NumMismatches() const1635 size_t BamRecord::NumMismatches() const { return NumMatchesAndMismatches().second; }
1636 
NumPasses() const1637 int32_t BamRecord::NumPasses() const
1638 {
1639     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::NUM_PASSES);
1640     const Tag numPasses = impl_.TagValue(tagName);
1641     return numPasses.ToInt32();
1642 }
1643 
NumPasses(const int32_t numPasses)1644 BamRecord& BamRecord::NumPasses(const int32_t numPasses)
1645 {
1646     internal::CreateOrEdit(BamRecordTag::NUM_PASSES, numPasses, &impl_);
1647     return *this;
1648 }
1649 
Pkmean(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1650 std::vector<float> BamRecord::Pkmean(Orientation orientation, bool aligned, bool exciseSoftClips,
1651                                      PulseBehavior pulseBehavior) const
1652 {
1653     return FetchPhotons(BamRecordTag::PKMEAN, orientation, aligned, exciseSoftClips, pulseBehavior);
1654 }
1655 
Pkmean(const std::vector<float> & photons)1656 BamRecord& BamRecord::Pkmean(const std::vector<float>& photons)
1657 {
1658     Pkmean(EncodePhotons(photons));
1659     return *this;
1660 }
1661 
Pkmean(const std::vector<uint16_t> & encodedPhotons)1662 BamRecord& BamRecord::Pkmean(const std::vector<uint16_t>& encodedPhotons)
1663 {
1664     internal::CreateOrEdit(BamRecordTag::PKMEAN, encodedPhotons, &impl_);
1665     return *this;
1666 }
1667 
Pkmid(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1668 std::vector<float> BamRecord::Pkmid(Orientation orientation, bool aligned, bool exciseSoftClips,
1669                                     PulseBehavior pulseBehavior) const
1670 {
1671     return FetchPhotons(BamRecordTag::PKMID, orientation, aligned, exciseSoftClips, pulseBehavior);
1672 }
1673 
Pkmid(const std::vector<float> & photons)1674 BamRecord& BamRecord::Pkmid(const std::vector<float>& photons)
1675 {
1676     Pkmid(EncodePhotons(photons));
1677     return *this;
1678 }
1679 
Pkmid(const std::vector<uint16_t> & encodedPhotons)1680 BamRecord& BamRecord::Pkmid(const std::vector<uint16_t>& encodedPhotons)
1681 {
1682     internal::CreateOrEdit(BamRecordTag::PKMID, encodedPhotons, &impl_);
1683     return *this;
1684 }
1685 
Pkmean2(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1686 std::vector<float> BamRecord::Pkmean2(Orientation orientation, bool aligned, bool exciseSoftClips,
1687                                       PulseBehavior pulseBehavior) const
1688 {
1689     return FetchPhotons(BamRecordTag::PKMEAN_2, orientation, aligned, exciseSoftClips,
1690                         pulseBehavior);
1691 }
1692 
Pkmean2(const std::vector<float> & photons)1693 BamRecord& BamRecord::Pkmean2(const std::vector<float>& photons)
1694 {
1695     Pkmean2(EncodePhotons(photons));
1696     return *this;
1697 }
1698 
Pkmean2(const std::vector<uint16_t> & encodedPhotons)1699 BamRecord& BamRecord::Pkmean2(const std::vector<uint16_t>& encodedPhotons)
1700 {
1701     internal::CreateOrEdit(BamRecordTag::PKMEAN_2, encodedPhotons, &impl_);
1702     return *this;
1703 }
1704 
Pkmid2(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1705 std::vector<float> BamRecord::Pkmid2(Orientation orientation, bool aligned, bool exciseSoftClips,
1706                                      PulseBehavior pulseBehavior) const
1707 {
1708     return FetchPhotons(BamRecordTag::PKMID_2, orientation, aligned, exciseSoftClips,
1709                         pulseBehavior);
1710 }
1711 
Pkmid2(const std::vector<float> & photons)1712 BamRecord& BamRecord::Pkmid2(const std::vector<float>& photons)
1713 {
1714     Pkmid2(EncodePhotons(photons));
1715     return *this;
1716 }
1717 
Pkmid2(const std::vector<uint16_t> & encodedPhotons)1718 BamRecord& BamRecord::Pkmid2(const std::vector<uint16_t>& encodedPhotons)
1719 {
1720     internal::CreateOrEdit(BamRecordTag::PKMID_2, encodedPhotons, &impl_);
1721     return *this;
1722 }
1723 
PreBaseFrames(Orientation orientation,bool aligned,bool exciseSoftClips) const1724 Frames BamRecord::PreBaseFrames(Orientation orientation, bool aligned, bool exciseSoftClips) const
1725 {
1726     return IPD(orientation, aligned, exciseSoftClips);
1727 }
1728 
PreBaseFrames(const Frames & frames,const FrameEncodingType encoding)1729 BamRecord& BamRecord::PreBaseFrames(const Frames& frames, const FrameEncodingType encoding)
1730 {
1731     return IPD(frames, encoding);
1732 }
1733 
PrePulseFrames(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1734 Frames BamRecord::PrePulseFrames(Orientation orientation, bool aligned, bool exciseSoftClips,
1735                                  PulseBehavior pulseBehavior) const
1736 {
1737     return FetchFrames(BamRecordTag::PRE_PULSE_FRAMES, orientation, aligned, exciseSoftClips,
1738                        pulseBehavior);
1739 }
1740 
PrePulseFrames(const Frames & frames,const FrameEncodingType encoding)1741 BamRecord& BamRecord::PrePulseFrames(const Frames& frames, const FrameEncodingType encoding)
1742 {
1743     if (encoding == FrameEncodingType::LOSSY) {
1744         internal::CreateOrEdit(BamRecordTag::PRE_PULSE_FRAMES, frames.Encode(), &impl_);
1745     } else {
1746         internal::CreateOrEdit(BamRecordTag::PRE_PULSE_FRAMES, frames.Data(), &impl_);
1747     }
1748     return *this;
1749 }
1750 
PulseWidthRaw(Orientation orientation,bool,bool) const1751 Frames BamRecord::PulseWidthRaw(Orientation orientation, bool /* aligned */,
1752                                 bool /* exciseSoftClips */) const
1753 {
1754     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::PULSE_WIDTH);
1755     const Tag frameTag = impl_.TagValue(tagName);
1756     if (frameTag.IsNull()) return {};
1757 
1758     Frames frames;
1759 
1760     // lossy frame codes
1761     if (frameTag.IsUInt8Array()) {
1762         const auto codes = frameTag.ToUInt8Array();
1763         const std::vector<uint16_t> codes16(codes.begin(), codes.end());
1764         frames.Data(std::move(codes16));
1765     }
1766 
1767     // lossless frame data
1768     else {
1769         assert(frameTag.IsUInt16Array());
1770         frames.Data(frameTag.ToUInt16Array());
1771     }
1772 
1773     // return in requested orientation
1774     internal::OrientTagDataAsRequested(&frames,
1775                                        Orientation::NATIVE,  // current
1776                                        orientation,          // requested
1777                                        impl_.IsReverseStrand());
1778     return frames;
1779 }
1780 
PulseMergeQV(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1781 QualityValues BamRecord::PulseMergeQV(Orientation orientation, bool aligned, bool exciseSoftClips,
1782                                       PulseBehavior pulseBehavior) const
1783 {
1784     return FetchQualities(BamRecordTag::PULSE_MERGE_QV, orientation, aligned, exciseSoftClips,
1785                           pulseBehavior);
1786 }
1787 
PulseMergeQV(const QualityValues & mergeQVs)1788 BamRecord& BamRecord::PulseMergeQV(const QualityValues& mergeQVs)
1789 {
1790     internal::CreateOrEdit(BamRecordTag::PULSE_MERGE_QV, mergeQVs.Fastq(), &impl_);
1791     return *this;
1792 }
1793 
PulseCall(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1794 std::string BamRecord::PulseCall(Orientation orientation, bool aligned, bool exciseSoftClips,
1795                                  PulseBehavior pulseBehavior) const
1796 {
1797     return FetchBases(BamRecordTag::PULSE_CALL, orientation, aligned, exciseSoftClips,
1798                       pulseBehavior);
1799 }
1800 
PulseCall(const std::string & tags)1801 BamRecord& BamRecord::PulseCall(const std::string& tags)
1802 {
1803     internal::CreateOrEdit(BamRecordTag::PULSE_CALL, tags, &impl_);
1804     return *this;
1805 }
1806 
PulseCallWidth(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1807 Frames BamRecord::PulseCallWidth(Orientation orientation, bool aligned, bool exciseSoftClips,
1808                                  PulseBehavior pulseBehavior) const
1809 {
1810     return FetchFrames(BamRecordTag::PULSE_CALL_WIDTH, orientation, aligned, exciseSoftClips,
1811                        pulseBehavior);
1812 }
1813 
PulseCallWidth(const Frames & frames,const FrameEncodingType encoding)1814 BamRecord& BamRecord::PulseCallWidth(const Frames& frames, const FrameEncodingType encoding)
1815 {
1816     if (encoding == FrameEncodingType::LOSSY) {
1817         internal::CreateOrEdit(BamRecordTag::PULSE_CALL_WIDTH, frames.Encode(), &impl_);
1818     } else {
1819         internal::CreateOrEdit(BamRecordTag::PULSE_CALL_WIDTH, frames.Data(), &impl_);
1820     }
1821     return *this;
1822 }
1823 
PulseExclusionReason(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1824 std::vector<PacBio::BAM::PulseExclusionReason> BamRecord::PulseExclusionReason(
1825     Orientation orientation, bool aligned, bool exciseSoftClips, PulseBehavior pulseBehavior) const
1826 {
1827     std::vector<PacBio::BAM::PulseExclusionReason> reasons;
1828 
1829     const auto reasonNums = FetchUInt8s(BamRecordTag::PULSE_EXCLUSION, orientation, aligned,
1830                                         exciseSoftClips, pulseBehavior);
1831 
1832     std::transform(
1833         reasonNums.cbegin(), reasonNums.cend(), std::back_inserter(reasons),
1834         [](const uint8_t num) { return static_cast<PacBio::BAM::PulseExclusionReason>(num); });
1835 
1836     return reasons;
1837 }
1838 
PulseExclusionReason(const std::vector<PacBio::BAM::PulseExclusionReason> & reasons)1839 BamRecord& BamRecord::PulseExclusionReason(
1840     const std::vector<PacBio::BAM::PulseExclusionReason>& reasons)
1841 {
1842     std::vector<uint8_t> reasonNums;
1843     std::transform(reasons.cbegin(), reasons.cend(), std::back_inserter(reasonNums),
1844                    [](const PacBio::BAM::PulseExclusionReason& reason) {
1845                        return static_cast<uint8_t>(reason);
1846                    });
1847 
1848     internal::CreateOrEdit(BamRecordTag::PULSE_EXCLUSION, reasonNums, &impl_);
1849     return *this;
1850 }
1851 
PulseWidth(Orientation orientation,bool aligned,bool exciseSoftClips) const1852 Frames BamRecord::PulseWidth(Orientation orientation, bool aligned, bool exciseSoftClips) const
1853 {
1854     return FetchFrames(BamRecordTag::PULSE_WIDTH, orientation, aligned, exciseSoftClips,
1855                        PulseBehavior::ALL);
1856 }
1857 
PulseWidth(const Frames & frames,const FrameEncodingType encoding)1858 BamRecord& BamRecord::PulseWidth(const Frames& frames, const FrameEncodingType encoding)
1859 {
1860     if (encoding == FrameEncodingType::LOSSY) {
1861         internal::CreateOrEdit(BamRecordTag::PULSE_WIDTH, frames.Encode(), &impl_);
1862     } else {
1863         internal::CreateOrEdit(BamRecordTag::PULSE_WIDTH, frames.Data(), &impl_);
1864     }
1865     return *this;
1866 }
1867 
Qualities(Orientation orientation,bool aligned,bool exciseSoftClips) const1868 QualityValues BamRecord::Qualities(Orientation orientation, bool aligned,
1869                                    bool exciseSoftClips) const
1870 {
1871     return FetchQualities(BamRecordTag::QUAL, orientation, aligned, exciseSoftClips);
1872 }
1873 
QueryEnd() const1874 Position BamRecord::QueryEnd() const
1875 {
1876     // try 'qe' tag
1877     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::QUERY_END);
1878     const Tag qe = impl_.TagValue(tagName);
1879     if (!qe.IsNull()) return qe.ToInt32();
1880 
1881     // tag missing, need to check movie name (fallback for non-PB BAMs, but ignore for CCS reads)
1882     RecordType type;
1883     try {
1884         type = Type();
1885     } catch (std::exception&) {
1886         return 0;
1887     }
1888     if (type == RecordType::CCS) throw std::runtime_error{"no query end for CCS read type"};
1889     if (type == RecordType::TRANSCRIPT)
1890         throw std::runtime_error{"no query end for transcript read type"};
1891 
1892     // PacBio BAM, non-CCS/transcript
1893     try {
1894         return internal::QueryEndFromName(FullName());
1895     } catch (std::exception&) {
1896         // return fallback position
1897         return 0;
1898     }
1899 }
1900 
QueryEnd(const Position pos)1901 BamRecord& BamRecord::QueryEnd(const Position pos)
1902 {
1903     internal::CreateOrEdit(BamRecordTag::QUERY_END, static_cast<int32_t>(pos), &impl_);
1904     UpdateName();
1905     return *this;
1906 }
1907 
QueryStart() const1908 Position BamRecord::QueryStart() const
1909 {
1910     // try 'qs' tag
1911     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::QUERY_START);
1912     const Tag qs = impl_.TagValue(tagName);
1913     if (!qs.IsNull()) return qs.ToInt32();
1914 
1915     // tag missing, need to check movie name (fallback for non-PB BAMs, but ignore for CCS reads)
1916     RecordType type;
1917     try {
1918         type = Type();
1919     } catch (std::exception&) {
1920         return 0;
1921     }
1922     if (type == RecordType::CCS) throw std::runtime_error{"no query start for CCS read type"};
1923     if (type == RecordType::TRANSCRIPT)
1924         throw std::runtime_error{"no query start for transcript read type"};
1925 
1926     // PacBio BAM, non-CCS/transcript
1927     try {
1928         return internal::QueryStartFromName(FullName());
1929     } catch (std::exception&) {
1930         // return fallback position
1931         return 0;
1932     }
1933 }
1934 
QueryStart(const Position pos)1935 BamRecord& BamRecord::QueryStart(const Position pos)
1936 {
1937     internal::CreateOrEdit(BamRecordTag::QUERY_START, static_cast<int32_t>(pos), &impl_);
1938     UpdateName();
1939     return *this;
1940 }
1941 
ReadAccuracy() const1942 Accuracy BamRecord::ReadAccuracy() const
1943 {
1944     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::READ_ACCURACY);
1945     const Tag readAccuracy = impl_.TagValue(tagName);
1946     return {readAccuracy.ToFloat()};
1947 }
1948 
ReadAccuracy(const Accuracy & accuracy)1949 BamRecord& BamRecord::ReadAccuracy(const Accuracy& accuracy)
1950 {
1951     internal::CreateOrEdit(BamRecordTag::READ_ACCURACY, static_cast<float>(accuracy), &impl_);
1952     return *this;
1953 }
1954 
ReadGroup() const1955 ReadGroupInfo BamRecord::ReadGroup() const { return header_.ReadGroup(ReadGroupId()); }
1956 
ReadGroup(const ReadGroupInfo & rg)1957 BamRecord& BamRecord::ReadGroup(const ReadGroupInfo& rg)
1958 {
1959     internal::CreateOrEdit(BamRecordTag::READ_GROUP, rg.Id(), &impl_);
1960     UpdateName();
1961     return *this;
1962 }
1963 
ReadGroupId() const1964 std::string BamRecord::ReadGroupId() const
1965 {
1966     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::READ_GROUP);
1967     const Tag rgTag = impl_.TagValue(tagName);
1968     if (rgTag.IsNull()) return {};
1969     return rgTag.ToString();
1970 }
1971 
ReadGroupId(const std::string & id)1972 BamRecord& BamRecord::ReadGroupId(const std::string& id)
1973 {
1974     internal::CreateOrEdit(BamRecordTag::READ_GROUP, id, &impl_);
1975     UpdateName();
1976     return *this;
1977 }
1978 
ReadGroupNumericId() const1979 int32_t BamRecord::ReadGroupNumericId() const { return ReadGroupInfo::IdToInt(ReadGroupId()); }
1980 
ReferenceEnd() const1981 Position BamRecord::ReferenceEnd() const
1982 {
1983     if (!impl_.IsMapped()) return PacBio::BAM::UnmappedPosition;
1984     const auto htsData = internal::BamRecordMemory::GetRawData(impl_);
1985     if (!htsData) return PacBio::BAM::UnmappedPosition;
1986     return bam_endpos(htsData.get());
1987 }
1988 
ReferenceId() const1989 int32_t BamRecord::ReferenceId() const { return impl_.ReferenceId(); }
1990 
ReferenceName() const1991 std::string BamRecord::ReferenceName() const
1992 {
1993     if (IsMapped())
1994         return Header().SequenceName(ReferenceId());
1995     else
1996         throw std::runtime_error{"unmapped record has no associated reference name"};
1997 }
1998 
ReferenceStart() const1999 Position BamRecord::ReferenceStart() const { return impl_.Position(); }
2000 
ResetCachedPositions() const2001 void BamRecord::ResetCachedPositions() const
2002 {
2003     alignedEnd_ = PacBio::BAM::UnmappedPosition;
2004     alignedStart_ = PacBio::BAM::UnmappedPosition;
2005 }
2006 
ResetCachedPositions()2007 void BamRecord::ResetCachedPositions()
2008 {
2009     alignedEnd_ = PacBio::BAM::UnmappedPosition;
2010     alignedStart_ = PacBio::BAM::UnmappedPosition;
2011 }
2012 
ScrapRegionType() const2013 VirtualRegionType BamRecord::ScrapRegionType() const
2014 {
2015     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SCRAP_REGION_TYPE);
2016     const Tag srTag = impl_.TagValue(tagName);
2017     return VirtualRegionTypeMap::ParseChar[srTag.ToUInt8()];
2018 }
2019 
ScrapRegionType(const VirtualRegionType type)2020 BamRecord& BamRecord::ScrapRegionType(const VirtualRegionType type)
2021 {
2022     internal::CreateOrEdit(BamRecordTag::SCRAP_REGION_TYPE, static_cast<uint8_t>(type), &impl_);
2023     return *this;
2024 }
2025 
ScrapRegionType(const char type)2026 BamRecord& BamRecord::ScrapRegionType(const char type)
2027 {
2028     internal::CreateOrEdit(BamRecordTag::SCRAP_REGION_TYPE, type, &impl_);
2029     return *this;
2030 }
2031 
ScrapZmwType() const2032 ZmwType BamRecord::ScrapZmwType() const
2033 {
2034     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SCRAP_ZMW_TYPE);
2035     const Tag szTag = impl_.TagValue(tagName);
2036     return ZmwTypeMap::ParseChar[szTag.ToUInt8()];
2037 }
2038 
ScrapZmwType(const ZmwType type)2039 BamRecord& BamRecord::ScrapZmwType(const ZmwType type)
2040 {
2041     internal::CreateOrEdit(BamRecordTag::SCRAP_ZMW_TYPE, static_cast<uint8_t>(type), &impl_);
2042     return *this;
2043 }
2044 
ScrapZmwType(const char type)2045 BamRecord& BamRecord::ScrapZmwType(const char type)
2046 {
2047     internal::CreateOrEdit(BamRecordTag::SCRAP_ZMW_TYPE, type, &impl_);
2048     return *this;
2049 }
2050 
Sequence(const Orientation orientation,bool aligned,bool exciseSoftClips) const2051 std::string BamRecord::Sequence(const Orientation orientation, bool aligned,
2052                                 bool exciseSoftClips) const
2053 {
2054     return FetchBases(BamRecordTag::SEQ, orientation, aligned, exciseSoftClips);
2055 }
2056 
SignalToNoise() const2057 std::vector<float> BamRecord::SignalToNoise() const
2058 {
2059     const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SNR);
2060     const Tag snTag = impl_.TagValue(tagName);
2061     return snTag.ToFloatArray();
2062 }
2063 
SignalToNoise(const std::vector<float> & snr)2064 BamRecord& BamRecord::SignalToNoise(const std::vector<float>& snr)
2065 {
2066     internal::CreateOrEdit(BamRecordTag::SNR, snr, &impl_);
2067     return *this;
2068 }
2069 
StartFrame(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const2070 std::vector<uint32_t> BamRecord::StartFrame(Orientation orientation, bool aligned,
2071                                             bool exciseSoftClips, PulseBehavior pulseBehavior) const
2072 {
2073     return FetchUInt32s(BamRecordTag::START_FRAME, orientation, aligned, exciseSoftClips,
2074                         pulseBehavior);
2075 }
2076 
StartFrame(const std::vector<uint32_t> & startFrame)2077 BamRecord& BamRecord::StartFrame(const std::vector<uint32_t>& startFrame)
2078 {
2079     internal::CreateOrEdit(BamRecordTag::START_FRAME, startFrame, &impl_);
2080     return *this;
2081 }
2082 
SubstitutionQV(Orientation orientation,bool aligned,bool exciseSoftClips) const2083 QualityValues BamRecord::SubstitutionQV(Orientation orientation, bool aligned,
2084                                         bool exciseSoftClips) const
2085 {
2086     return FetchQualities(BamRecordTag::SUBSTITUTION_QV, orientation, aligned, exciseSoftClips);
2087 }
2088 
SubstitutionQV(const QualityValues & substitutionQVs)2089 BamRecord& BamRecord::SubstitutionQV(const QualityValues& substitutionQVs)
2090 {
2091     internal::CreateOrEdit(BamRecordTag::SUBSTITUTION_QV, substitutionQVs.Fastq(), &impl_);
2092     return *this;
2093 }
2094 
SubstitutionTag(Orientation orientation,bool aligned,bool exciseSoftClips) const2095 std::string BamRecord::SubstitutionTag(Orientation orientation, bool aligned,
2096                                        bool exciseSoftClips) const
2097 {
2098     return FetchBases(BamRecordTag::SUBSTITUTION_TAG, orientation, aligned, exciseSoftClips);
2099 }
2100 
SubstitutionTag(const std::string & tags)2101 BamRecord& BamRecord::SubstitutionTag(const std::string& tags)
2102 {
2103     internal::CreateOrEdit(BamRecordTag::SUBSTITUTION_TAG, tags, &impl_);
2104     return *this;
2105 }
2106 
Type() const2107 RecordType BamRecord::Type() const
2108 {
2109     try {
2110         const auto typeName = ReadGroup().ReadType();
2111         return internal::NameToType(typeName);
2112     } catch (std::exception&) {
2113 
2114         // read group not found, peek at name to see if we're possibly
2115         // CCS or TRANSCRIPT
2116         //
2117         const auto name = FullName();
2118         if (name.find("transcript") == 0)
2119             return RecordType::TRANSCRIPT;
2120         else if (name.find("/ccs") != std::string::npos)
2121             return RecordType::CCS;
2122         else
2123             return RecordType::UNKNOWN;
2124     }
2125 }
2126 
UpdateName()2127 void BamRecord::UpdateName()
2128 {
2129     std::string newName;
2130     newName.reserve(100);
2131 
2132     const auto holeNumber = (HasHoleNumber() ? std::to_string(HoleNumber()) : "?");
2133     if (Type() == RecordType::TRANSCRIPT) {
2134         newName = "transcript/" + holeNumber;
2135     } else {
2136         newName += MovieName();
2137         newName += "/";
2138         newName += holeNumber;
2139         newName += "/";
2140 
2141         if (Type() == RecordType::CCS)
2142             newName += "ccs";
2143 
2144         else {
2145             if (HasQueryStart())
2146                 newName += std::to_string(QueryStart());
2147             else
2148                 newName += "?";
2149 
2150             newName += '_';
2151 
2152             if (HasQueryEnd())
2153                 newName += std::to_string(QueryEnd());
2154             else
2155                 newName += "?";
2156         }
2157     }
2158     impl_.Name(newName);
2159 }
2160 
2161 }  // namespace BAM
2162 }  // namespace PacBio
2163