1 // File Description
2 /// \file BamRecord.cpp
3 /// \brief Implements the BamRecord class.
4 //
5 // Author: Derek Barnett
6
7 #include "PbbamInternalConfig.h"
8
9 #include "pbbam/BamRecord.h"
10
11 #include <cstddef>
12 #include <cstdint>
13 #include <iostream>
14 #include <stdexcept>
15
16 #include <htslib/sam.h>
17 #include <boost/numeric/conversion/cast.hpp>
18
19 #include "BamRecordTags.h"
20 #include "MemoryUtils.h"
21 #include "Pulse2BaseCache.h"
22 #include "SequenceUtils.h"
23 #include "pbbam/MakeUnique.h"
24 #include "pbbam/ZmwTypeMap.h"
25 #include "pbbam/virtual/VirtualRegionTypeMap.h"
26
27 namespace PacBio {
28 namespace BAM {
29 namespace internal {
30
31 // record type names
32 static const std::string recordTypeName_ZMW{"ZMW"};
33 static const std::string recordTypeName_Polymerase{"POLYMERASE"};
34 static const std::string recordTypeName_HqRegion{"HQREGION"};
35 static const std::string recordTypeName_Subread{"SUBREAD"};
36 static const std::string recordTypeName_CCS{"CCS"};
37 static const std::string recordTypeName_Scrap{"SCRAP"};
38 static const std::string recordTypeName_Transcript{"TRANSCRIPT"};
39 static const std::string recordTypeName_Unknown{"UNKNOWN"};
40
HoleNumberFromName(const std::string & fullName)41 static int32_t HoleNumberFromName(const std::string& fullName)
42 {
43 const auto mainTokens = Split(fullName, '/');
44 if (mainTokens.at(0) == "transcript") {
45 if (mainTokens.size() != 2) throw std::runtime_error{"malformed transcript record name"};
46 return std::stoi(mainTokens.at(1));
47 } else {
48 if (mainTokens.size() != 3) throw std::runtime_error("malformed record name");
49 return std::stoi(mainTokens.at(1));
50 }
51 }
52
QueryEndFromName(const std::string & fullName)53 static Position QueryEndFromName(const std::string& fullName)
54 {
55 const auto mainTokens = Split(fullName, '/');
56 if (mainTokens.size() != 3) throw std::runtime_error{"malformed record name"};
57
58 const auto queryTokens = Split(mainTokens.at(2), '_');
59 if (queryTokens.size() != 2) throw std::runtime_error{"malformed record name"};
60
61 return stoi(queryTokens.at(1));
62 }
63
QueryStartFromName(const std::string & fullName)64 static Position QueryStartFromName(const std::string& fullName)
65 {
66 const auto mainTokens = Split(fullName, '/');
67 if (mainTokens.size() != 3) throw std::runtime_error{"malformed record name"};
68
69 const auto queryTokens = Split(mainTokens.at(2), '_');
70 if (queryTokens.size() != 2) throw std::runtime_error{"malformed record name"};
71
72 return stoi(queryTokens.at(0));
73 }
74
Label(const BamRecordTag tag)75 static inline std::string Label(const BamRecordTag tag) { return BamRecordTags::LabelFor(tag); }
76
CreateOrEdit(const BamRecordTag tag,const Tag & value,BamRecordImpl * impl)77 static BamRecordImpl* CreateOrEdit(const BamRecordTag tag, const Tag& value, BamRecordImpl* impl)
78 {
79 if (impl->HasTag(tag))
80 impl->EditTag(tag, value);
81 else
82 impl->AddTag(tag, value);
83 return impl;
84 }
85
AlignedOffsets(const BamRecord & record,const int seqLength)86 static std::pair<int32_t, int32_t> AlignedOffsets(const BamRecord& record, const int seqLength)
87 {
88 int32_t startOffset = 0;
89 int32_t endOffset = seqLength;
90
91 const auto b = internal::BamRecordMemory::GetRawData(record);
92 uint32_t* cigarData = bam_get_cigar(b.get());
93 const size_t numCigarOps = b->core.n_cigar;
94 if (numCigarOps > 0) {
95
96 // start offset
97 for (size_t i = 0; i < numCigarOps; ++i) {
98 const auto type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
99 if (type == CigarOperationType::HARD_CLIP) {
100 if (startOffset != 0 && startOffset != seqLength) {
101 startOffset = -1;
102 break;
103 }
104 } else if (type == CigarOperationType::SOFT_CLIP)
105 startOffset += bam_cigar_oplen(cigarData[i]);
106 else
107 break;
108 }
109
110 // end offset
111 for (int i = numCigarOps - 1; i >= 0; --i) {
112 const auto type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
113 if (type == CigarOperationType::HARD_CLIP) {
114 if (endOffset != 0 && endOffset != seqLength) {
115 endOffset = -1;
116 break;
117 }
118 } else if (type == CigarOperationType::SOFT_CLIP)
119 endOffset -= bam_cigar_oplen(cigarData[i]);
120 else
121 break;
122 }
123
124 if (endOffset == 0) endOffset = seqLength;
125 }
126 return {startOffset, endOffset};
127 }
128
129 template <typename T>
Clip(const T & input,const size_t pos,const size_t len)130 T Clip(const T& input, const size_t pos, const size_t len)
131 {
132 if (input.empty()) return {};
133 return T{input.cbegin() + pos, input.cbegin() + pos + len};
134 }
135
136 template <typename T>
ClipPulse(const T & input,internal::Pulse2BaseCache * p2bCache,const size_t pos,const size_t len)137 T ClipPulse(const T& input, internal::Pulse2BaseCache* p2bCache, const size_t pos, const size_t len)
138 {
139 assert(p2bCache);
140 if (input.empty()) return {};
141
142 // find start
143 size_t start = p2bCache->FindFirst();
144 size_t basesSeen = 0;
145 while (basesSeen < pos) {
146 start = p2bCache->FindNext(start);
147 ++basesSeen;
148 }
149
150 // find end
151 size_t end = start;
152 size_t seen = 1;
153 while (seen < len) {
154 end = p2bCache->FindNext(end);
155 ++seen;
156 }
157
158 // return clipped
159 return {input.cbegin() + start, input.cbegin() + end + 1};
160 }
161
162 template <class InputIt, class Size, class OutputIt>
Move_N(InputIt first,Size count,OutputIt result)163 OutputIt Move_N(InputIt first, Size count, OutputIt result)
164 {
165 return std::move(first, first + count, result);
166 }
167
168 template <typename F, typename N>
ClipAndGapify(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,F * seq,N paddingNullValue,N deletionNullValue)169 static void ClipAndGapify(const BamRecordImpl& impl, const bool aligned, const bool exciseSoftClips,
170 F* seq, N paddingNullValue, N deletionNullValue)
171 {
172 assert(seq);
173
174 const bool clipOrGapRequested = aligned || exciseSoftClips;
175 if (impl.IsMapped() && clipOrGapRequested) {
176 // determine final container length
177 auto incrementsOutputLength = [](const CigarOperationType type, const bool isAligned,
178 const bool exciseSoftClipsFromAln) {
179 if (type == CigarOperationType::HARD_CLIP ||
180 type == CigarOperationType::REFERENCE_SKIP) {
181 return false;
182 } else if (type == CigarOperationType::SOFT_CLIP && exciseSoftClipsFromAln) {
183 return false;
184 } else if (!isAligned && (type == CigarOperationType::DELETION ||
185 type == CigarOperationType::PADDING)) {
186 return false;
187 } else
188 return true;
189 };
190
191 size_t outputLength = 0;
192 const auto cigar = impl.CigarData();
193 for (const CigarOperation& op : cigar) {
194 if (incrementsOutputLength(op.Type(), aligned, exciseSoftClips))
195 outputLength += op.Length();
196 }
197
198 // move original data to temp, prep output container size
199 F originalSeq = std::move(*seq);
200 seq->resize(outputLength);
201
202 // apply CIGAR ops
203 size_t srcIndex = 0;
204 size_t dstIndex = 0;
205 for (const CigarOperation& op : cigar) {
206 const auto opType = op.Type();
207 const auto opLength = op.Length();
208
209 // nothing to do for hard-clipped & ref-skipped positions
210 if (opType == CigarOperationType::HARD_CLIP ||
211 opType == CigarOperationType::REFERENCE_SKIP) {
212 continue;
213 }
214
215 // maybe skip soft-clipped positions
216 else if (opType == CigarOperationType::SOFT_CLIP) {
217 if (exciseSoftClips)
218 srcIndex += opLength;
219 else {
220 Move_N(originalSeq.begin() + srcIndex, opLength, seq->begin() + dstIndex);
221 srcIndex += opLength;
222 dstIndex += opLength;
223 }
224 }
225
226 // maybe add deletion/padding values
227 else if (aligned && opType == CigarOperationType::DELETION) {
228 for (size_t i = 0; i < opLength; ++i)
229 (*seq)[dstIndex++] = deletionNullValue;
230 } else if (aligned && opType == CigarOperationType::PADDING) {
231 for (size_t i = 0; i < opLength; ++i)
232 (*seq)[dstIndex++] = paddingNullValue;
233 }
234
235 // all other CIGAR ops
236 else {
237 Move_N(originalSeq.begin() + srcIndex, opLength, seq->begin() + dstIndex);
238 srcIndex += opLength;
239 dstIndex += opLength;
240 }
241 }
242 }
243 }
244
ClipAndGapifyBases(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::string * seq)245 static inline void ClipAndGapifyBases(const BamRecordImpl& impl, const bool aligned,
246 const bool exciseSoftClips, std::string* seq)
247 {
248 ClipAndGapify<std::string, char>(impl, aligned, exciseSoftClips, seq, '*', '-');
249 }
250
ClipAndGapifyFrames(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,Frames * frames)251 static inline void ClipAndGapifyFrames(const BamRecordImpl& impl, const bool aligned,
252 const bool exciseSoftClips, Frames* frames)
253 {
254 assert(frames);
255 std::vector<uint16_t> data{std::move(frames->Data())};
256 ClipAndGapify<std::vector<uint16_t>, uint16_t>(impl, aligned, exciseSoftClips, &data, 0, 0);
257 frames->Data(data);
258 }
259
ClipAndGapifyPhotons(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::vector<float> * data)260 static inline void ClipAndGapifyPhotons(const BamRecordImpl& impl, const bool aligned,
261 const bool exciseSoftClips, std::vector<float>* data)
262 {
263 ClipAndGapify<std::vector<float>, float>(impl, aligned, exciseSoftClips, data, 0.0, 0.0);
264 }
265
ClipAndGapifyQualities(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,QualityValues * quals)266 static inline void ClipAndGapifyQualities(const BamRecordImpl& impl, const bool aligned,
267 const bool exciseSoftClips, QualityValues* quals)
268 {
269 ClipAndGapify<QualityValues, QualityValue>(impl, aligned, exciseSoftClips, quals,
270 QualityValue(0), QualityValue(0));
271 }
272
ClipAndGapifyUInts(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::vector<uint32_t> * data)273 static inline void ClipAndGapifyUInts(const BamRecordImpl& impl, const bool aligned,
274 const bool exciseSoftClips, std::vector<uint32_t>* data)
275 {
276 ClipAndGapify<std::vector<uint32_t>, uint32_t>(impl, aligned, exciseSoftClips, data, 0, 0);
277 }
278
ClipAndGapifyUInt8s(const BamRecordImpl & impl,const bool aligned,const bool exciseSoftClips,std::vector<uint8_t> * data)279 static inline void ClipAndGapifyUInt8s(const BamRecordImpl& impl, const bool aligned,
280 const bool exciseSoftClips, std::vector<uint8_t>* data)
281 {
282 ClipAndGapify<std::vector<uint8_t>, uint8_t>(impl, aligned, exciseSoftClips, data, 0, 0);
283 }
284
NameToType(const std::string & name)285 static RecordType NameToType(const std::string& name)
286 {
287 if (name == recordTypeName_Subread) return RecordType::SUBREAD;
288 if (name == recordTypeName_ZMW || name == recordTypeName_Polymerase) return RecordType::ZMW;
289 if (name == recordTypeName_HqRegion) return RecordType::HQREGION;
290 if (name == recordTypeName_CCS) return RecordType::CCS;
291 if (name == recordTypeName_Scrap) return RecordType::SCRAP;
292 if (name == recordTypeName_Transcript) return RecordType::TRANSCRIPT;
293 return RecordType::UNKNOWN;
294 }
295
OrientBasesAsRequested(std::string * bases,Orientation current,Orientation requested,bool isReverseStrand,bool isPulse)296 static void OrientBasesAsRequested(std::string* bases, Orientation current, Orientation requested,
297 bool isReverseStrand, bool isPulse)
298 {
299 assert(bases);
300 if (current != requested && isReverseStrand) {
301 if (isPulse)
302 internal::ReverseComplementCaseSens(*bases);
303 else
304 internal::ReverseComplement(*bases);
305 }
306 }
307
308 template <typename Container>
OrientTagDataAsRequested(Container * data,Orientation current,Orientation requested,bool isReverseStrand)309 inline void OrientTagDataAsRequested(Container* data, Orientation current, Orientation requested,
310 bool isReverseStrand)
311 {
312 assert(data);
313 if (current != requested && isReverseStrand) std::reverse(data->begin(), data->end());
314 }
315
ConsumesQuery(const CigarOperationType type)316 static inline bool ConsumesQuery(const CigarOperationType type)
317 {
318 return (bam_cigar_type(static_cast<int>(type)) & 0x1) != 0;
319 }
320
ConsumesReference(const CigarOperationType type)321 static inline bool ConsumesReference(const CigarOperationType type)
322 {
323 return (bam_cigar_type(static_cast<int>(type)) & 0x2) != 0;
324 }
325
326 } // namespace internal
327
328 const float BamRecord::photonFactor = 10.0;
329
BamRecord()330 BamRecord::BamRecord()
331 : alignedStart_{PacBio::BAM::UnmappedPosition}, alignedEnd_{PacBio::BAM::UnmappedPosition}
332 {
333 }
334
BamRecord(BamHeader header)335 BamRecord::BamRecord(BamHeader header)
336 : header_{std::move(header)}
337 , alignedStart_{PacBio::BAM::UnmappedPosition}
338 , alignedEnd_{PacBio::BAM::UnmappedPosition}
339 {
340 }
341
BamRecord(BamRecordImpl impl)342 BamRecord::BamRecord(BamRecordImpl impl)
343 : impl_{std::move(impl)}
344 , alignedStart_{PacBio::BAM::UnmappedPosition}
345 , alignedEnd_{PacBio::BAM::UnmappedPosition}
346 {
347 }
348
BamRecord(const BamRecord & other)349 BamRecord::BamRecord(const BamRecord& other)
350 : impl_{other.impl_}
351 , header_{other.header_}
352 , alignedStart_{other.alignedStart_}
353 , alignedEnd_{other.alignedEnd_}
354 {
355 }
356
BamRecord(BamRecord && other)357 BamRecord::BamRecord(BamRecord&& other)
358 : impl_{std::move(other.impl_)}
359 , header_{std::move(other.header_)}
360 , alignedStart_{std::move(other.alignedStart_)}
361 , alignedEnd_{std::move(other.alignedEnd_)}
362 , p2bCache_{std::move(other.p2bCache_)}
363 {
364 }
365
operator =(const BamRecord & other)366 BamRecord& BamRecord::operator=(const BamRecord& other)
367 {
368 if (this != &other) {
369 impl_ = other.impl_;
370 header_ = other.header_;
371 alignedStart_ = other.alignedStart_;
372 alignedEnd_ = other.alignedEnd_;
373 p2bCache_.reset(); // just reset, for now at least
374 }
375 return *this;
376 }
377
operator =(BamRecord && other)378 BamRecord& BamRecord::operator=(BamRecord&& other)
379 {
380 if (this != &other) {
381 impl_ = std::move(other.impl_);
382 header_ = std::move(other.header_);
383 alignedStart_ = std::move(other.alignedStart_);
384 alignedEnd_ = std::move(other.alignedEnd_);
385 p2bCache_ = std::move(other.p2bCache_);
386 }
387 return *this;
388 }
389
~BamRecord()390 BamRecord::~BamRecord() {}
391
AlignedEnd() const392 Position BamRecord::AlignedEnd() const
393 {
394 if (alignedEnd_ == PacBio::BAM::UnmappedPosition) CalculateAlignedPositions();
395 return alignedEnd_;
396 }
397
AlignedStart() const398 Position BamRecord::AlignedStart() const
399 {
400 if (alignedStart_ == PacBio::BAM::UnmappedPosition) CalculateAlignedPositions();
401 return alignedStart_;
402 }
403
AlignedStrand() const404 Strand BamRecord::AlignedStrand() const
405 {
406 return impl_.IsReverseStrand() ? Strand::REVERSE : Strand::FORWARD;
407 }
408
AltLabelQV(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const409 QualityValues BamRecord::AltLabelQV(Orientation orientation, bool aligned, bool exciseSoftClips,
410 PulseBehavior pulseBehavior) const
411 {
412 return FetchQualities(BamRecordTag::ALT_LABEL_QV, orientation, aligned, exciseSoftClips,
413 pulseBehavior);
414 }
415
AltLabelQV(const QualityValues & altLabelQVs)416 BamRecord& BamRecord::AltLabelQV(const QualityValues& altLabelQVs)
417 {
418 internal::CreateOrEdit(BamRecordTag::ALT_LABEL_QV, altLabelQVs.Fastq(), &impl_);
419 return *this;
420 }
421
AltLabelTag(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const422 std::string BamRecord::AltLabelTag(Orientation orientation, bool aligned, bool exciseSoftClips,
423 PulseBehavior pulseBehavior) const
424 {
425 return FetchBases(BamRecordTag::ALT_LABEL_TAG, orientation, aligned, exciseSoftClips,
426 pulseBehavior);
427 }
428
AltLabelTag(const std::string & tags)429 BamRecord& BamRecord::AltLabelTag(const std::string& tags)
430 {
431 internal::CreateOrEdit(BamRecordTag::ALT_LABEL_TAG, tags, &impl_);
432 return *this;
433 }
434
BarcodeForward() const435 int16_t BamRecord::BarcodeForward() const { return Barcodes().first; }
436
BarcodeReverse() const437 int16_t BamRecord::BarcodeReverse() const { return Barcodes().second; }
438
BarcodeQuality() const439 uint8_t BamRecord::BarcodeQuality() const
440 {
441 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::BARCODE_QUALITY);
442 const auto bq = impl_.TagValue(tagName);
443 if (bq.IsNull())
444 return 0; // ?? "missing" value for tags ?? should we consider boost::optional<T> for these kind of guys ??
445 return bq.ToUInt8();
446 }
447
BarcodeQuality(const uint8_t quality)448 BamRecord& BamRecord::BarcodeQuality(const uint8_t quality)
449 {
450 internal::CreateOrEdit(BamRecordTag::BARCODE_QUALITY, quality, &impl_);
451 return *this;
452 }
453
Barcodes() const454 std::pair<int16_t, int16_t> BamRecord::Barcodes() const
455 {
456 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::BARCODES);
457 const Tag bc = impl_.TagValue(tagName);
458 if (bc.IsNull()) throw std::runtime_error{"barcode tag (bc) was requested but is missing"};
459
460 // NOTE: barcodes are still stored, per the spec, as uint16, even though
461 // we're now using them as int16_t in the API (bug 31511)
462 //
463 if (!bc.IsUInt16Array())
464 throw std::runtime_error{
465 "barcode tag (bc) is malformed: should be a uint16_t array of size==2."};
466 const auto bcArray = bc.ToUInt16Array();
467 if (bcArray.size() != 2)
468 throw std::runtime_error{
469 "barcode tag (bc) is malformed: should be a uint16_t array of size==2."};
470
471 return {boost::numeric_cast<int16_t>(bcArray[0]), boost::numeric_cast<int16_t>(bcArray[1])};
472 }
473
Barcodes(const std::pair<int16_t,int16_t> & barcodeIds)474 BamRecord& BamRecord::Barcodes(const std::pair<int16_t, int16_t>& barcodeIds)
475 {
476 const std::vector<uint16_t> data{boost::numeric_cast<uint16_t>(barcodeIds.first),
477 boost::numeric_cast<uint16_t>(barcodeIds.second)};
478 internal::CreateOrEdit(BamRecordTag::BARCODES, data, &impl_);
479 return *this;
480 }
481
CalculateAlignedPositions() const482 void BamRecord::CalculateAlignedPositions() const
483 {
484 // reset
485 ResetCachedPositions();
486
487 // skip if unmapped, or has no queryStart/End
488 if (!impl_.IsMapped()) return;
489
490 // get the query start/end
491 const auto seqLength = static_cast<int>(impl_.SequenceLength());
492 const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
493 const Position qStart = isCcsOrTranscript ? 0 : QueryStart();
494 const Position qEnd = isCcsOrTranscript ? seqLength : QueryEnd();
495
496 if (qStart == PacBio::BAM::UnmappedPosition || qEnd == PacBio::BAM::UnmappedPosition) return;
497
498 // determine clipped end ranges
499 const auto alignedOffsets = internal::AlignedOffsets(*this, seqLength);
500 const auto startOffset = alignedOffsets.first;
501 const auto endOffset = alignedOffsets.second;
502 if (endOffset == -1 || startOffset == -1) return; // TODO: handle error more??
503
504 // store aligned positions (polymerase read coordinates)
505 if (impl_.IsReverseStrand()) {
506 alignedStart_ = qStart + (seqLength - endOffset);
507 alignedEnd_ = qEnd - startOffset;
508 } else {
509 alignedStart_ = qStart + startOffset;
510 alignedEnd_ = qEnd - (seqLength - endOffset);
511 }
512 }
513
CalculatePulse2BaseCache() const514 void BamRecord::CalculatePulse2BaseCache() const
515 {
516 // skip already calculated
517 if (p2bCache_) return;
518
519 // else try to calculate p2b cache.
520 if (!HasPulseCall())
521 throw std::runtime_error{"BamRecord cannot calculate pulse2base mapping without 'pc' tag."};
522 const auto pulseCalls =
523 FetchBases(BamRecordTag::PULSE_CALL, Orientation::NATIVE, false, false, PulseBehavior::ALL);
524 p2bCache_ = std::make_unique<internal::Pulse2BaseCache>(pulseCalls);
525 }
526
CigarData(bool exciseAllClips) const527 Cigar BamRecord::CigarData(bool exciseAllClips) const
528 {
529 auto isClippingOp = [](const CigarOperation& op) {
530 const auto type = op.Type();
531 return type == CigarOperationType::SOFT_CLIP || type == CigarOperationType::HARD_CLIP;
532 };
533
534 auto cigar = impl_.CigarData();
535 if (exciseAllClips) {
536 cigar.erase(std::remove_if(cigar.begin(), cigar.end(), isClippingOp), cigar.end());
537 }
538 return cigar;
539 }
540
Clip(const ClipType clipType,const Position start,const Position end)541 BamRecord& BamRecord::Clip(const ClipType clipType, const Position start, const Position end)
542 {
543 switch (clipType) {
544 case ClipType::CLIP_NONE:
545 return *this;
546 case ClipType::CLIP_TO_QUERY:
547 return ClipToQuery(start, end);
548 case ClipType::CLIP_TO_REFERENCE:
549 return ClipToReference(start, end);
550 default:
551 throw std::runtime_error{"unsupported clip type requested"};
552 }
553 }
554
ClipTags(const size_t clipFrom,const size_t clipLength)555 void BamRecord::ClipTags(const size_t clipFrom, const size_t clipLength)
556 {
557 const auto ipdCodec = ReadGroup().IpdCodec();
558 const auto pwCodec = ReadGroup().PulseWidthCodec();
559
560 // update BAM tags
561 TagCollection tags = impl_.Tags();
562 if (HasDeletionQV())
563 tags[internal::Label(BamRecordTag::DELETION_QV)] =
564 internal::Clip(DeletionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
565 if (HasInsertionQV())
566 tags[internal::Label(BamRecordTag::INSERTION_QV)] =
567 internal::Clip(InsertionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
568 if (HasMergeQV())
569 tags[internal::Label(BamRecordTag::MERGE_QV)] =
570 internal::Clip(MergeQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
571 if (HasSubstitutionQV())
572 tags[internal::Label(BamRecordTag::SUBSTITUTION_QV)] =
573 internal::Clip(SubstitutionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
574 if (HasIPD()) {
575 if (ipdCodec == FrameCodec::RAW)
576 tags[internal::Label(BamRecordTag::IPD)] =
577 internal::Clip(IPD(Orientation::NATIVE).Data(), clipFrom, clipLength);
578 else if (ipdCodec == FrameCodec::V1)
579 tags[internal::Label(BamRecordTag::IPD)] =
580 internal::Clip(IPD(Orientation::NATIVE).Encode(), clipFrom, clipLength);
581 }
582 if (HasPulseWidth()) {
583 if (pwCodec == FrameCodec::RAW)
584 tags[internal::Label(BamRecordTag::PULSE_WIDTH)] =
585 internal::Clip(PulseWidth(Orientation::NATIVE).Data(), clipFrom, clipLength);
586 else if (pwCodec == FrameCodec::V1)
587 tags[internal::Label(BamRecordTag::PULSE_WIDTH)] =
588 internal::Clip(PulseWidth(Orientation::NATIVE).Encode(), clipFrom, clipLength);
589 }
590 if (HasDeletionTag())
591 tags[internal::Label(BamRecordTag::DELETION_TAG)] =
592 internal::Clip(DeletionTag(Orientation::NATIVE), clipFrom, clipLength);
593 if (HasSubstitutionTag())
594 tags[internal::Label(BamRecordTag::SUBSTITUTION_TAG)] =
595 internal::Clip(SubstitutionTag(Orientation::NATIVE), clipFrom, clipLength);
596
597 // internal BAM tags
598 if (HasPulseCall()) {
599
600 // ensure p2bCache initialized
601 CalculatePulse2BaseCache();
602 internal::Pulse2BaseCache* p2bCache = p2bCache_.get();
603
604 if (HasAltLabelQV())
605 tags[internal::Label(BamRecordTag::ALT_LABEL_QV)] =
606 internal::ClipPulse(AltLabelQV(Orientation::NATIVE), p2bCache, clipFrom, clipLength)
607 .Fastq();
608 if (HasLabelQV())
609 tags[internal::Label(BamRecordTag::LABEL_QV)] =
610 internal::ClipPulse(LabelQV(Orientation::NATIVE), p2bCache, clipFrom, clipLength)
611 .Fastq();
612 if (HasPulseMergeQV())
613 tags[internal::Label(BamRecordTag::PULSE_MERGE_QV)] =
614 internal::ClipPulse(PulseMergeQV(Orientation::NATIVE), p2bCache, clipFrom,
615 clipLength)
616 .Fastq();
617 if (HasAltLabelTag())
618 tags[internal::Label(BamRecordTag::ALT_LABEL_TAG)] = internal::ClipPulse(
619 AltLabelTag(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
620 if (HasPulseCall())
621 tags[internal::Label(BamRecordTag::PULSE_CALL)] =
622 internal::ClipPulse(PulseCall(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
623 if (HasPkmean())
624 tags[internal::Label(BamRecordTag::PKMEAN)] = EncodePhotons(
625 internal::ClipPulse(Pkmean(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
626 if (HasPkmid())
627 tags[internal::Label(BamRecordTag::PKMID)] = EncodePhotons(
628 internal::ClipPulse(Pkmid(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
629 if (HasPkmean2())
630 tags[internal::Label(BamRecordTag::PKMEAN_2)] = EncodePhotons(
631 internal::ClipPulse(Pkmean2(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
632 if (HasPkmid2())
633 tags[internal::Label(BamRecordTag::PKMID_2)] = EncodePhotons(
634 internal::ClipPulse(Pkmid2(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
635 if (HasPrePulseFrames())
636 tags[internal::Label(BamRecordTag::PRE_PULSE_FRAMES)] = internal::ClipPulse(
637 PrePulseFrames(Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength);
638 if (HasPulseCallWidth())
639 tags[internal::Label(BamRecordTag::PULSE_CALL_WIDTH)] = internal::ClipPulse(
640 PulseCallWidth(Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength);
641 if (HasStartFrame())
642 tags[internal::Label(BamRecordTag::START_FRAME)] = internal::ClipPulse(
643 StartFrame(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
644 }
645
646 impl_.Tags(tags);
647 }
648
ClipFields(const size_t clipFrom,const size_t clipLength)649 void BamRecord::ClipFields(const size_t clipFrom, const size_t clipLength)
650 {
651 const bool isForwardStrand = (AlignedStrand() == Strand::FORWARD);
652
653 // clip seq, quals
654 std::string sequence{internal::Clip(Sequence(Orientation::NATIVE), clipFrom, clipLength)};
655 QualityValues qualities{internal::Clip(Qualities(Orientation::NATIVE), clipFrom, clipLength)};
656 if (!isForwardStrand) {
657 internal::ReverseComplement(sequence);
658 internal::Reverse(qualities);
659 }
660 impl_.SetSequenceAndQualities(sequence, qualities.Fastq());
661
662 ClipTags(clipFrom, clipLength);
663 }
664
ClipToQuery(const Position start,const Position end)665 BamRecord& BamRecord::ClipToQuery(const Position start, const Position end)
666 {
667 // cache original coords, skip out if clip not needed
668 const auto seqLength = static_cast<int>(impl_.SequenceLength());
669 const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
670 const Position origQStart = isCcsOrTranscript ? 0 : QueryStart();
671 const Position origQEnd = isCcsOrTranscript ? seqLength : QueryEnd();
672 if (start <= origQStart && end >= origQEnd) return *this;
673
674 // determine new offsets into data
675 const size_t startOffset = start - origQStart;
676 const size_t endOffset = origQEnd - end;
677
678 // maybe update CIGAR & aligned position
679 if (IsMapped()) {
680
681 // fetch a 'working copy' of CIGAR data
682 Cigar cigar = impl_.CigarData();
683
684 // clip leading CIGAR ops
685 size_t referencePositionOffset = 0;
686 size_t remaining = startOffset;
687 while (remaining > 0 && !cigar.empty()) {
688 CigarOperation& firstOp = cigar.front();
689 const auto firstOpLength = firstOp.Length();
690 const bool consumesQuery = internal::ConsumesQuery(firstOp.Type());
691 const bool consumesRef = internal::ConsumesReference(firstOp.Type());
692
693 // if (!consumesQuery)
694 // just pop (e.g. deletion) ?
695 // else {
696 // check bounds, like clip to reference ?
697 // }
698
699 // CIGAR op ends at or before clip
700 if (firstOpLength <= remaining) {
701 cigar.erase(cigar.begin());
702 if (consumesQuery) remaining -= firstOpLength;
703 if (consumesRef) referencePositionOffset += firstOpLength;
704 }
705
706 // CIGAR op straddles clip
707 else {
708 firstOp.Length(firstOpLength - remaining);
709 if (consumesRef) referencePositionOffset += remaining;
710 remaining = 0;
711 }
712 }
713
714 // clip trailing CIGAR ops
715 remaining = endOffset;
716 while (remaining > 0 && !cigar.empty()) {
717 CigarOperation& lastOp = cigar.back();
718 const auto lastOpLength = lastOp.Length();
719 const bool consumesQuery = internal::ConsumesQuery(lastOp.Type());
720
721 // CIGAR op ends at or after clip
722 if (lastOpLength <= remaining) {
723 cigar.pop_back();
724 if (consumesQuery) remaining -= lastOpLength;
725 }
726
727 // CIGAR op straddles clip
728 else {
729 lastOp.Length(lastOpLength - remaining);
730 remaining = 0;
731 }
732 }
733
734 // update CIGAR & position
735 impl_.CigarData(cigar);
736 impl_.Position(impl_.Position() + referencePositionOffset);
737 }
738
739 // clip SEQ, QUAL, & tags
740 const size_t clipFrom = startOffset;
741 const size_t clipLength = (end - start);
742 ClipFields(clipFrom, clipLength);
743
744 // update query start/end
745 // TODO: update name to reflect new QS/QE ???
746 internal::CreateOrEdit(BamRecordTag::QUERY_START, start, &impl_);
747 internal::CreateOrEdit(BamRecordTag::QUERY_END, end, &impl_);
748 // UpdateName();
749
750 // reset any cached aligned start/end
751 ResetCachedPositions();
752 return *this;
753 }
754
ClipToReference(const Position start,const Position end)755 BamRecord& BamRecord::ClipToReference(const Position start, const Position end)
756 {
757 // skip if not mapped, clipping to reference doesn't make sense
758 // or should we even consider throwing here?
759 if (!IsMapped()) return *this;
760
761 const bool isForwardStrand = (AlignedStrand() == Strand::FORWARD);
762 return (isForwardStrand ? ClipToReferenceForward(start, end)
763 : ClipToReferenceReverse(start, end));
764 }
765
ClipToReferenceForward(const PacBio::BAM::Position start,const PacBio::BAM::Position end)766 BamRecord& BamRecord::ClipToReferenceForward(const PacBio::BAM::Position start,
767 const PacBio::BAM::Position end)
768 {
769 assert(IsMapped());
770 assert(AlignedStrand() == Strand::FORWARD);
771
772 // cache original coords
773 const int seqLength = static_cast<int>(impl_.SequenceLength());
774 const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
775 const Position origQStart = isCcsOrTranscript ? 0 : QueryStart();
776 const Position origQEnd = isCcsOrTranscript ? seqLength : QueryEnd();
777 const Position origTStart = ReferenceStart();
778 const Position origTEnd = ReferenceEnd();
779 assert(AlignedStart() >= origQStart);
780 assert(AlignedEnd() <= origQEnd);
781
782 // skip if already within requested clip range
783 if (start <= origTStart && end >= origTEnd) return *this;
784
785 const Position newTStart = std::max(origTStart, start);
786 const Position newTEnd = std::min(origTEnd, end);
787
788 // fetch a 'working copy' of CIGAR data
789 Cigar cigar = impl_.CigarData();
790
791 // we're going to skip query sequence outside aligned region
792 size_t queryPosRemovedFront = 0;
793 size_t queryPosRemovedBack = 0;
794
795 // ------------------------
796 // clip leading CIGAR ops
797 // ------------------------
798
799 size_t remaining = newTStart - origTStart;
800 while (remaining > 0 && !cigar.empty()) {
801 CigarOperation& firstOp = cigar.front();
802 const auto firstOpLength = firstOp.Length();
803 const bool consumesQuery = internal::ConsumesQuery(firstOp.Type());
804 const bool consumesRef = internal::ConsumesReference(firstOp.Type());
805
806 if (!consumesRef) {
807
808 // e.g. softclip - just pop it completely
809 cigar.erase(cigar.begin());
810 if (consumesQuery) queryPosRemovedFront += firstOpLength;
811
812 } else {
813 assert(consumesRef);
814
815 // CIGAR ends at or before clip
816 if (firstOpLength <= remaining) {
817 cigar.erase(cigar.begin());
818 if (consumesQuery) queryPosRemovedFront += firstOpLength;
819 if (consumesRef) remaining -= firstOpLength;
820 }
821
822 // CIGAR straddles clip
823 else {
824 assert(firstOpLength > remaining);
825 firstOp.Length(firstOpLength - remaining);
826 if (consumesQuery) queryPosRemovedFront += remaining;
827 remaining = 0;
828 }
829 }
830 }
831
832 // -------------------------
833 // clip trailing CIGAR ops
834 // -------------------------
835
836 remaining = origTEnd - newTEnd;
837 while (remaining > 0 && !cigar.empty()) {
838 CigarOperation& lastOp = cigar.back();
839 const auto lastOpLength = lastOp.Length();
840 const bool consumesQuery = internal::ConsumesQuery(lastOp.Type());
841 const bool consumesRef = internal::ConsumesReference(lastOp.Type());
842
843 if (!consumesRef) {
844
845 // e.g. softclip - just pop it completely
846 cigar.pop_back();
847 if (consumesQuery) queryPosRemovedBack += lastOpLength;
848
849 } else {
850 assert(consumesRef);
851
852 // CIGAR ends at or after clip
853 if (lastOpLength <= remaining) {
854 cigar.pop_back();
855 if (consumesQuery) queryPosRemovedBack += lastOpLength;
856 if (consumesRef) remaining -= lastOpLength;
857 }
858
859 // CIGAR straddles clip
860 else {
861 assert(lastOpLength > remaining);
862 lastOp.Length(lastOpLength - remaining);
863 if (consumesQuery) queryPosRemovedBack += remaining;
864 remaining = 0;
865 }
866 }
867 }
868
869 // update CIGAR and position
870 impl_.CigarData(cigar);
871 impl_.Position(newTStart);
872
873 // clip SEQ, QUAL, tags
874 const Position qStart = origQStart + queryPosRemovedFront;
875 const Position qEnd = origQEnd - queryPosRemovedBack;
876 const size_t clipFrom = queryPosRemovedFront;
877 const size_t clipLength = qEnd - qStart;
878 ClipFields(clipFrom, clipLength);
879
880 // update query start/end
881 internal::CreateOrEdit(BamRecordTag::QUERY_START, qStart, &impl_);
882 internal::CreateOrEdit(BamRecordTag::QUERY_END, qEnd, &impl_);
883 // UpdateName();
884
885 // reset any cached aligned start/end
886 ResetCachedPositions();
887 return *this;
888 }
889
ClipToReferenceReverse(const PacBio::BAM::Position start,const PacBio::BAM::Position end)890 BamRecord& BamRecord::ClipToReferenceReverse(const PacBio::BAM::Position start,
891 const PacBio::BAM::Position end)
892 {
893 assert(IsMapped());
894 assert(AlignedStrand() == Strand::REVERSE);
895
896 // cache original coords
897 const int seqLength = static_cast<int>(impl_.SequenceLength());
898 const bool isCcsOrTranscript = IsCcsOrTranscript(Type());
899 const Position origQStart = isCcsOrTranscript ? 0 : QueryStart();
900 const Position origQEnd = isCcsOrTranscript ? seqLength : QueryEnd();
901 const Position origTStart = ReferenceStart();
902 const Position origTEnd = ReferenceEnd();
903
904 // skip if already within requested clip range
905 if (start <= origTStart && end >= origTEnd) return *this;
906 assert(AlignedStart() >= origQStart);
907 assert(AlignedEnd() <= origQEnd);
908
909 const Position newTStart = std::max(origTStart, start);
910 const Position newTEnd = std::min(origTEnd, end);
911
912 Cigar cigar = impl_.CigarData();
913
914 size_t queryPosRemovedFront = 0;
915 size_t queryPosRemovedBack = 0;
916
917 // update CIGAR - clip front ops, then clip back ops
918 size_t remaining = newTStart - origTStart;
919 while (remaining > 0 && !cigar.empty()) {
920 CigarOperation& firstOp = cigar.front();
921 const auto firstOpType = firstOp.Type();
922 const auto firstOpLength = firstOp.Length();
923 const bool consumesQuery = internal::ConsumesQuery(firstOpType);
924 const bool consumesRef = internal::ConsumesReference(firstOpType);
925
926 if (!consumesRef) {
927
928 // e.g. softclip - just pop it completely
929 cigar.erase(cigar.begin());
930 if (consumesQuery) queryPosRemovedBack += firstOpLength;
931
932 } else {
933 assert(consumesRef);
934
935 // CIGAR ends at or before clip
936 if (firstOpLength <= remaining) {
937 cigar.erase(cigar.begin());
938 if (consumesQuery) queryPosRemovedBack += firstOpLength;
939 if (consumesRef) remaining -= firstOpLength;
940 }
941
942 // CIGAR straddles clip
943 else {
944 assert(firstOpLength > remaining);
945 firstOp.Length(firstOpLength - remaining);
946 if (consumesQuery) queryPosRemovedBack += remaining;
947 remaining = 0;
948 }
949 }
950 }
951
952 remaining = origTEnd - newTEnd;
953 while (remaining > 0 && !cigar.empty()) {
954 CigarOperation& lastOp = cigar.back();
955 const auto lastOpType = lastOp.Type();
956 const auto lastOpLength = lastOp.Length();
957 const bool consumesQuery = internal::ConsumesQuery(lastOpType);
958 const bool consumesRef = internal::ConsumesReference(lastOpType);
959
960 if (!consumesRef) {
961
962 // e.g. softclip - just pop it completely
963 cigar.pop_back();
964 if (consumesQuery) queryPosRemovedFront += lastOpLength;
965
966 } else {
967 assert(consumesRef);
968
969 // CIGAR ends at or before clip
970 if (lastOpLength <= remaining) {
971 cigar.pop_back();
972 if (consumesQuery) queryPosRemovedFront += lastOpLength;
973 if (consumesRef) remaining -= lastOpLength;
974 }
975
976 // CIGAR straddles clip
977 else {
978 assert(lastOpLength > remaining);
979 lastOp.Length(lastOpLength - remaining);
980 if (consumesQuery) queryPosRemovedFront += remaining;
981 remaining = 0;
982 }
983 }
984 }
985 impl_.CigarData(cigar);
986
987 // update aligned reference position
988 impl_.Position(newTStart);
989
990 // clip SEQ, QUAL, tags
991 const Position qStart = origQStart + queryPosRemovedFront;
992 const Position qEnd = origQEnd - queryPosRemovedBack;
993 const size_t clipFrom = queryPosRemovedFront;
994 const size_t clipLength = qEnd - qStart;
995 ClipFields(clipFrom, clipLength);
996
997 // update query start/end
998 internal::CreateOrEdit(BamRecordTag::QUERY_START, qStart, &impl_);
999 internal::CreateOrEdit(BamRecordTag::QUERY_END, qEnd, &impl_);
1000 // UpdateName();
1001
1002 // reset any cached aligned start/end
1003 ResetCachedPositions();
1004 return *this;
1005 }
1006
DeletionQV(Orientation orientation,bool aligned,bool exciseSoftClips) const1007 QualityValues BamRecord::DeletionQV(Orientation orientation, bool aligned,
1008 bool exciseSoftClips) const
1009 {
1010 return FetchQualities(BamRecordTag::DELETION_QV, orientation, aligned, exciseSoftClips);
1011 }
1012
DeletionQV(const QualityValues & deletionQVs)1013 BamRecord& BamRecord::DeletionQV(const QualityValues& deletionQVs)
1014 {
1015 internal::CreateOrEdit(BamRecordTag::DELETION_QV, deletionQVs.Fastq(), &impl_);
1016 return *this;
1017 }
1018
DeletionTag(Orientation orientation,bool aligned,bool exciseSoftClips) const1019 std::string BamRecord::DeletionTag(Orientation orientation, bool aligned,
1020 bool exciseSoftClips) const
1021 {
1022 return FetchBases(BamRecordTag::DELETION_TAG, orientation, aligned, exciseSoftClips);
1023 }
1024
DeletionTag(const std::string & tags)1025 BamRecord& BamRecord::DeletionTag(const std::string& tags)
1026 {
1027 internal::CreateOrEdit(BamRecordTag::DELETION_TAG, tags, &impl_);
1028 return *this;
1029 }
1030
EncodePhotons(const std::vector<float> & data)1031 std::vector<uint16_t> BamRecord::EncodePhotons(const std::vector<float>& data)
1032 {
1033 std::vector<uint16_t> encoded;
1034 encoded.reserve(data.size());
1035 for (const auto& d : data)
1036 encoded.emplace_back(d * photonFactor);
1037 return encoded;
1038 }
1039
FetchBasesRaw(const BamRecordTag tag) const1040 std::string BamRecord::FetchBasesRaw(const BamRecordTag tag) const
1041 {
1042 const Tag seqTag = impl_.TagValue(tag);
1043 return seqTag.ToString();
1044 }
1045
FetchBases(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1046 std::string BamRecord::FetchBases(const BamRecordTag tag, const Orientation orientation,
1047 const bool aligned, const bool exciseSoftClips,
1048 const PulseBehavior pulseBehavior) const
1049 {
1050 const bool isBamSeq = (tag == BamRecordTag::SEQ);
1051 const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1052
1053 // fetch raw
1054 std::string bases;
1055 Orientation current;
1056 if (isBamSeq) { // SEQ stored in genomic orientation
1057 bases = impl_.Sequence();
1058 current = Orientation::GENOMIC;
1059 } else { // all tags stored in native orientation
1060 bases = FetchBasesRaw(tag);
1061 current = Orientation::NATIVE;
1062 }
1063
1064 // maybe strip 'squashed' pulse loci
1065 if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1066 CalculatePulse2BaseCache();
1067 bases = p2bCache_->RemoveSquashedPulses(bases);
1068 }
1069
1070 // if we need to touch CIGAR
1071 if (aligned || exciseSoftClips) {
1072
1073 if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1074 throw std::runtime_error{
1075 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1076 "Use PulseBehavior::BASECALLS_ONLY instead."};
1077
1078 // force into genomic orientation
1079 internal::OrientBasesAsRequested(&bases, current, Orientation::GENOMIC,
1080 impl_.IsReverseStrand(), isPulse);
1081 current = Orientation::GENOMIC;
1082
1083 // clip & gapify as requested
1084 internal::ClipAndGapifyBases(impl_, aligned, exciseSoftClips, &bases);
1085 }
1086
1087 // return in the orientation requested
1088 internal::OrientBasesAsRequested(&bases, current, orientation, impl_.IsReverseStrand(),
1089 isPulse);
1090 return bases;
1091 }
1092
FetchFramesRaw(const BamRecordTag tag) const1093 Frames BamRecord::FetchFramesRaw(const BamRecordTag tag) const
1094 {
1095 const Tag frameTag = impl_.TagValue(tag);
1096 if (frameTag.IsNull()) return {}; // throw ?
1097
1098 // lossy frame codes
1099 if (frameTag.IsUInt8Array()) {
1100 const auto codes = frameTag.ToUInt8Array();
1101 return Frames::Decode(codes);
1102 }
1103
1104 // lossless frame data
1105 else {
1106 assert(frameTag.IsUInt16Array());
1107 return Frames{frameTag.ToUInt16Array()};
1108 }
1109 }
1110
FetchFrames(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1111 Frames BamRecord::FetchFrames(const BamRecordTag tag, const Orientation orientation,
1112 const bool aligned, const bool exciseSoftClips,
1113 const PulseBehavior pulseBehavior) const
1114 {
1115 const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1116
1117 // fetch raw
1118 Frames frames = FetchFramesRaw(tag);
1119 Orientation current = Orientation::NATIVE;
1120
1121 // maybe strip 'squashed' pulse loci
1122 if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1123 CalculatePulse2BaseCache();
1124 frames.DataRaw() = p2bCache_->RemoveSquashedPulses(frames.Data());
1125 }
1126
1127 // if we need to touch the CIGAR
1128 if (aligned || exciseSoftClips) {
1129
1130 if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1131 throw std::runtime_error{
1132 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1133 "Use PulseBehavior::BASECALLS_ONLY instead."};
1134
1135 // force into genomic orientation
1136 internal::OrientTagDataAsRequested(&frames, current, Orientation::GENOMIC,
1137 impl_.IsReverseStrand());
1138 current = Orientation::GENOMIC;
1139
1140 // clip & gapify as requested
1141 internal::ClipAndGapifyFrames(impl_, aligned, exciseSoftClips, &frames);
1142 }
1143
1144 // return in the orientation requested
1145 internal::OrientTagDataAsRequested(&frames, current, orientation, impl_.IsReverseStrand());
1146 return frames;
1147 }
1148
FetchPhotonsRaw(const BamRecordTag tag) const1149 std::vector<float> BamRecord::FetchPhotonsRaw(const BamRecordTag tag) const
1150 {
1151 const Tag frameTag = impl_.TagValue(tag);
1152 if (frameTag.IsNull()) return {};
1153 if (!frameTag.IsUInt16Array())
1154 throw std::runtime_error{"Photons are not a uint16_t array, tag " +
1155 internal::BamRecordTags::LabelFor(tag)};
1156
1157 const auto data = frameTag.ToUInt16Array();
1158 std::vector<float> photons;
1159 photons.reserve(data.size());
1160 for (const auto& d : data)
1161 photons.emplace_back(d / photonFactor);
1162 return photons;
1163 }
1164
FetchPhotons(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1165 std::vector<float> BamRecord::FetchPhotons(const BamRecordTag tag, const Orientation orientation,
1166 const bool aligned, const bool exciseSoftClips,
1167 const PulseBehavior pulseBehavior) const
1168 {
1169 const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1170
1171 // fetch raw
1172 auto data = FetchPhotonsRaw(tag);
1173 Orientation current = Orientation::NATIVE;
1174
1175 if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1176 // strip 'squashed' pulse loci
1177 CalculatePulse2BaseCache();
1178 data = p2bCache_->RemoveSquashedPulses(data);
1179 }
1180
1181 if (aligned || exciseSoftClips) {
1182
1183 if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1184 throw std::runtime_error{
1185 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1186 "Use PulseBehavior::BASECALLS_ONLY instead."};
1187
1188 // force into genomic orientation
1189 internal::OrientTagDataAsRequested(&data, current, Orientation::GENOMIC,
1190 impl_.IsReverseStrand());
1191 current = Orientation::GENOMIC;
1192
1193 // clip & gapify as requested
1194 internal::ClipAndGapifyPhotons(impl_, aligned, exciseSoftClips, &data);
1195 }
1196
1197 // return in the orientation requested
1198 internal::OrientTagDataAsRequested(&data, current, orientation, impl_.IsReverseStrand());
1199 return data;
1200 }
1201
FetchQualitiesRaw(const BamRecordTag tag) const1202 QualityValues BamRecord::FetchQualitiesRaw(const BamRecordTag tag) const
1203 {
1204 const Tag qvsTag = impl_.TagValue(tag);
1205 return QualityValues::FromFastq(qvsTag.ToString());
1206 }
1207
FetchQualities(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1208 QualityValues BamRecord::FetchQualities(const BamRecordTag tag, const Orientation orientation,
1209 const bool aligned, const bool exciseSoftClips,
1210 const PulseBehavior pulseBehavior) const
1211 {
1212 // requested data info
1213 const bool isBamQual = (tag == BamRecordTag::QUAL);
1214 const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1215
1216 // fetch raw
1217 QualityValues quals;
1218 Orientation current;
1219 if (isBamQual) { // QUAL stored in genomic orientation
1220 quals = impl_.Qualities();
1221 current = Orientation::GENOMIC;
1222 } else { // all tags stored in native orientation
1223 quals = FetchQualitiesRaw(tag);
1224 current = Orientation::NATIVE;
1225 }
1226
1227 if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1228 // strip 'squashed' pulse loci
1229 CalculatePulse2BaseCache();
1230 quals = p2bCache_->RemoveSquashedPulses(quals);
1231 }
1232
1233 // if we need to touch CIGAR
1234 if (aligned || exciseSoftClips) {
1235
1236 if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1237 throw std::runtime_error{
1238 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1239 "Use PulseBehavior::BASECALLS_ONLY instead."};
1240
1241 // force into genomic orientation
1242 internal::OrientTagDataAsRequested(&quals, current, Orientation::GENOMIC,
1243 impl_.IsReverseStrand());
1244 current = Orientation::GENOMIC;
1245
1246 // clip & gapify as requested
1247 internal::ClipAndGapifyQualities(impl_, aligned, exciseSoftClips, &quals);
1248 }
1249
1250 // return in the orientation requested
1251 internal::OrientTagDataAsRequested(&quals, current, orientation, impl_.IsReverseStrand());
1252 return quals;
1253 }
1254
FetchUInt32sRaw(const BamRecordTag tag) const1255 std::vector<uint32_t> BamRecord::FetchUInt32sRaw(const BamRecordTag tag) const
1256 {
1257 // fetch tag data
1258 const Tag frameTag = impl_.TagValue(tag);
1259 if (frameTag.IsNull()) return {};
1260 if (!frameTag.IsUInt32Array())
1261 throw std::runtime_error{"Tag data are not a uint32_t array, tag " +
1262 internal::BamRecordTags::LabelFor(tag)};
1263 return frameTag.ToUInt32Array();
1264 }
1265
FetchUInt32s(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1266 std::vector<uint32_t> BamRecord::FetchUInt32s(const BamRecordTag tag, const Orientation orientation,
1267 const bool aligned, const bool exciseSoftClips,
1268 const PulseBehavior pulseBehavior) const
1269 {
1270 const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1271
1272 // fetch raw
1273 auto arr = FetchUInt32sRaw(tag);
1274 Orientation current = Orientation::NATIVE;
1275
1276 if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1277 // strip 'squashed' pulse loci
1278 CalculatePulse2BaseCache();
1279 arr = p2bCache_->RemoveSquashedPulses(arr);
1280 }
1281
1282 if (aligned || exciseSoftClips) {
1283
1284 if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1285 throw std::runtime_error{
1286 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1287 "Use PulseBehavior::BASECALLS_ONLY instead."};
1288
1289 // force into genomic orientation
1290 internal::OrientTagDataAsRequested(&arr, current, Orientation::GENOMIC,
1291 impl_.IsReverseStrand());
1292 current = Orientation::GENOMIC;
1293
1294 // clip & gapify as requested
1295 internal::ClipAndGapifyUInts(impl_, aligned, exciseSoftClips, &arr);
1296 }
1297
1298 // return in the orientation requested
1299 internal::OrientTagDataAsRequested(&arr, current, orientation, impl_.IsReverseStrand());
1300 return arr;
1301 }
1302
FetchUInt8sRaw(const BamRecordTag tag) const1303 std::vector<uint8_t> BamRecord::FetchUInt8sRaw(const BamRecordTag tag) const
1304 {
1305 // fetch tag data
1306 const Tag frameTag = impl_.TagValue(tag);
1307 if (frameTag.IsNull()) return {};
1308 if (!frameTag.IsUInt8Array())
1309 throw std::runtime_error{"Tag data are not a uint8_t array, tag " +
1310 internal::BamRecordTags::LabelFor(tag)};
1311 return frameTag.ToUInt8Array();
1312 }
1313
FetchUInt8s(const BamRecordTag tag,const Orientation orientation,const bool aligned,const bool exciseSoftClips,const PulseBehavior pulseBehavior) const1314 std::vector<uint8_t> BamRecord::FetchUInt8s(const BamRecordTag tag, const Orientation orientation,
1315 const bool aligned, const bool exciseSoftClips,
1316 const PulseBehavior pulseBehavior) const
1317 {
1318 const bool isPulse = internal::BamRecordTags::IsPulse(tag);
1319
1320 // fetch raw
1321 auto arr = FetchUInt8sRaw(tag);
1322 Orientation current = Orientation::NATIVE;
1323
1324 if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
1325 // strip 'squashed' pulse loci
1326 CalculatePulse2BaseCache();
1327 arr = p2bCache_->RemoveSquashedPulses(arr);
1328 }
1329
1330 if (aligned || exciseSoftClips) {
1331
1332 if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
1333 throw std::runtime_error{
1334 "Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
1335 "Use PulseBehavior::BASECALLS_ONLY instead."};
1336
1337 // force into genomic orientation
1338 internal::OrientTagDataAsRequested(&arr, current, Orientation::GENOMIC,
1339 impl_.IsReverseStrand());
1340 current = Orientation::GENOMIC;
1341
1342 // clip & gapify as requested
1343 internal::ClipAndGapifyUInt8s(impl_, aligned, exciseSoftClips, &arr);
1344 }
1345
1346 // return in the orientation requested
1347 internal::OrientTagDataAsRequested(&arr, current, orientation, impl_.IsReverseStrand());
1348 return arr;
1349 }
1350
FullName() const1351 std::string BamRecord::FullName() const { return impl_.Name(); }
1352
HasAltLabelQV() const1353 bool BamRecord::HasAltLabelQV() const { return impl_.HasTag(BamRecordTag::ALT_LABEL_QV); }
1354
HasAltLabelTag() const1355 bool BamRecord::HasAltLabelTag() const { return impl_.HasTag(BamRecordTag::ALT_LABEL_TAG); }
1356
HasBarcodes() const1357 bool BamRecord::HasBarcodes() const { return impl_.HasTag(BamRecordTag::BARCODES); }
1358
HasBarcodeQuality() const1359 bool BamRecord::HasBarcodeQuality() const { return impl_.HasTag(BamRecordTag::BARCODE_QUALITY); }
1360
HasLabelQV() const1361 bool BamRecord::HasLabelQV() const { return impl_.HasTag(BamRecordTag::LABEL_QV); }
1362
HasDeletionQV() const1363 bool BamRecord::HasDeletionQV() const { return impl_.HasTag(BamRecordTag::DELETION_QV); }
1364
HasDeletionTag() const1365 bool BamRecord::HasDeletionTag() const { return impl_.HasTag(BamRecordTag::DELETION_TAG); }
1366
HasHoleNumber() const1367 bool BamRecord::HasHoleNumber() const
1368 {
1369 return impl_.HasTag(BamRecordTag::HOLE_NUMBER) &&
1370 !impl_.TagValue(BamRecordTag::HOLE_NUMBER).IsNull();
1371 }
1372
HasInsertionQV() const1373 bool BamRecord::HasInsertionQV() const { return impl_.HasTag(BamRecordTag::INSERTION_QV); }
1374
HasNumPasses() const1375 bool BamRecord::HasNumPasses() const { return impl_.HasTag(BamRecordTag::NUM_PASSES); }
1376
HasPreBaseFrames() const1377 bool BamRecord::HasPreBaseFrames() const { return HasIPD(); }
1378
HasIPD() const1379 bool BamRecord::HasIPD() const { return impl_.HasTag(BamRecordTag::IPD); }
1380
HasLocalContextFlags() const1381 bool BamRecord::HasLocalContextFlags() const { return impl_.HasTag(BamRecordTag::CONTEXT_FLAGS); }
1382
HasMergeQV() const1383 bool BamRecord::HasMergeQV() const { return impl_.HasTag(BamRecordTag::MERGE_QV); }
1384
HasPulseMergeQV() const1385 bool BamRecord::HasPulseMergeQV() const { return impl_.HasTag(BamRecordTag::PULSE_MERGE_QV); }
1386
HasPkmean() const1387 bool BamRecord::HasPkmean() const { return impl_.HasTag(BamRecordTag::PKMEAN); }
1388
HasPkmean2() const1389 bool BamRecord::HasPkmean2() const { return impl_.HasTag(BamRecordTag::PKMEAN_2); }
1390
HasPkmid() const1391 bool BamRecord::HasPkmid() const { return impl_.HasTag(BamRecordTag::PKMID); }
1392
HasPkmid2() const1393 bool BamRecord::HasPkmid2() const { return impl_.HasTag(BamRecordTag::PKMID_2); }
1394
HasPrePulseFrames() const1395 bool BamRecord::HasPrePulseFrames() const { return impl_.HasTag(BamRecordTag::PRE_PULSE_FRAMES); }
1396
HasPulseCall() const1397 bool BamRecord::HasPulseCall() const
1398 {
1399 return impl_.HasTag(BamRecordTag::PULSE_CALL) &&
1400 !impl_.TagValue(BamRecordTag::PULSE_CALL).IsNull();
1401 }
1402
HasPulseExclusion(void) const1403 bool BamRecord::HasPulseExclusion(void) const
1404 {
1405 return impl_.HasTag(BamRecordTag::PULSE_EXCLUSION);
1406 }
1407
HasPulseCallWidth(void) const1408 bool BamRecord::HasPulseCallWidth(void) const
1409 {
1410 return impl_.HasTag(BamRecordTag::PULSE_CALL_WIDTH);
1411 }
1412
HasPulseWidth() const1413 bool BamRecord::HasPulseWidth() const { return impl_.HasTag(BamRecordTag::PULSE_WIDTH); }
1414
HasQueryEnd() const1415 bool BamRecord::HasQueryEnd() const { return impl_.HasTag(BamRecordTag::QUERY_END); }
1416
HasQueryStart() const1417 bool BamRecord::HasQueryStart() const { return impl_.HasTag(BamRecordTag::QUERY_START); }
1418
HasReadAccuracy() const1419 bool BamRecord::HasReadAccuracy() const
1420 {
1421 return impl_.HasTag(BamRecordTag::READ_ACCURACY) &&
1422 !impl_.TagValue(BamRecordTag::READ_ACCURACY).IsNull();
1423 }
1424
HasScrapRegionType() const1425 bool BamRecord::HasScrapRegionType() const
1426 {
1427 return impl_.HasTag(BamRecordTag::SCRAP_REGION_TYPE) &&
1428 !impl_.TagValue(BamRecordTag::SCRAP_REGION_TYPE).IsNull();
1429 }
1430
HasScrapZmwType() const1431 bool BamRecord::HasScrapZmwType() const
1432 {
1433 return impl_.HasTag(BamRecordTag::SCRAP_ZMW_TYPE) &&
1434 !impl_.TagValue(BamRecordTag::SCRAP_ZMW_TYPE).IsNull();
1435 }
1436
HasStartFrame() const1437 bool BamRecord::HasStartFrame() const { return impl_.HasTag(BamRecordTag::START_FRAME); }
1438
HasSignalToNoise() const1439 bool BamRecord::HasSignalToNoise() const { return impl_.HasTag(BamRecordTag::SNR); }
1440
HasSubstitutionQV() const1441 bool BamRecord::HasSubstitutionQV() const { return impl_.HasTag(BamRecordTag::SUBSTITUTION_QV); }
1442
HasSubstitutionTag() const1443 bool BamRecord::HasSubstitutionTag() const { return impl_.HasTag(BamRecordTag::SUBSTITUTION_TAG); }
1444
Header() const1445 BamHeader BamRecord::Header() const { return header_; }
1446
HoleNumber() const1447 int32_t BamRecord::HoleNumber() const
1448 {
1449 const Tag holeNumber = impl_.TagValue(BamRecordTag::HOLE_NUMBER);
1450 if (!holeNumber.IsNull()) return holeNumber.ToInt32();
1451
1452 // missing zm tag - try to pull from name
1453 return internal::HoleNumberFromName(FullName());
1454 }
1455
HoleNumber(const int32_t holeNumber)1456 BamRecord& BamRecord::HoleNumber(const int32_t holeNumber)
1457 {
1458 internal::CreateOrEdit(BamRecordTag::HOLE_NUMBER, holeNumber, &impl_);
1459 return *this;
1460 }
1461
Impl()1462 BamRecordImpl& BamRecord::Impl() { return impl_; }
1463
Impl() const1464 const BamRecordImpl& BamRecord::Impl() const { return impl_; }
1465
InsertionQV(Orientation orientation,bool aligned,bool exciseSoftClips) const1466 QualityValues BamRecord::InsertionQV(Orientation orientation, bool aligned,
1467 bool exciseSoftClips) const
1468 {
1469 return FetchQualities(BamRecordTag::INSERTION_QV, orientation, aligned, exciseSoftClips);
1470 }
1471
InsertionQV(const QualityValues & insertionQVs)1472 BamRecord& BamRecord::InsertionQV(const QualityValues& insertionQVs)
1473 {
1474 internal::CreateOrEdit(BamRecordTag::INSERTION_QV, insertionQVs.Fastq(), &impl_);
1475 return *this;
1476 }
1477
IPD(Orientation orientation,bool aligned,bool exciseSoftClips) const1478 Frames BamRecord::IPD(Orientation orientation, bool aligned, bool exciseSoftClips) const
1479 {
1480 return FetchFrames(BamRecordTag::IPD, orientation, aligned, exciseSoftClips);
1481 }
1482
IPD(const Frames & frames,const FrameEncodingType encoding)1483 BamRecord& BamRecord::IPD(const Frames& frames, const FrameEncodingType encoding)
1484 {
1485 if (encoding == FrameEncodingType::LOSSY)
1486 internal::CreateOrEdit(BamRecordTag::IPD, frames.Encode(), &impl_);
1487 else
1488 internal::CreateOrEdit(BamRecordTag::IPD, frames.Data(), &impl_);
1489 return *this;
1490 }
1491
IPDRaw(Orientation orientation) const1492 Frames BamRecord::IPDRaw(Orientation orientation) const
1493 {
1494 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::IPD);
1495 const Tag frameTag = impl_.TagValue(tagName);
1496 if (frameTag.IsNull()) return {};
1497
1498 Frames frames;
1499
1500 // lossy frame codes
1501 if (frameTag.IsUInt8Array()) {
1502 const auto codes = frameTag.ToUInt8Array();
1503 const std::vector<uint16_t> codes16(codes.begin(), codes.end());
1504 frames.Data(std::move(codes16));
1505 }
1506
1507 // lossless frame data
1508 else {
1509 assert(frameTag.IsUInt16Array());
1510 frames.Data(frameTag.ToUInt16Array());
1511 }
1512
1513 // return in requested orientation
1514 internal::OrientTagDataAsRequested(&frames,
1515 Orientation::NATIVE, // current
1516 orientation, // requested
1517 impl_.IsReverseStrand());
1518 return frames;
1519 }
1520
IsMapped() const1521 bool BamRecord::IsMapped() const { return impl_.IsMapped(); }
1522
LabelQV(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1523 QualityValues BamRecord::LabelQV(Orientation orientation, bool aligned, bool exciseSoftClips,
1524 PulseBehavior pulseBehavior) const
1525 {
1526 return FetchQualities(BamRecordTag::LABEL_QV, orientation, aligned, exciseSoftClips,
1527 pulseBehavior);
1528 }
1529
LabelQV(const QualityValues & labelQVs)1530 BamRecord& BamRecord::LabelQV(const QualityValues& labelQVs)
1531 {
1532 internal::CreateOrEdit(BamRecordTag::LABEL_QV, labelQVs.Fastq(), &impl_);
1533 return *this;
1534 }
1535
LocalContextFlags() const1536 LocalContextFlags BamRecord::LocalContextFlags() const
1537 {
1538 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::CONTEXT_FLAGS);
1539 const Tag cxTag = impl_.TagValue(tagName);
1540 return static_cast<PacBio::BAM::LocalContextFlags>(cxTag.ToUInt8());
1541 }
1542
LocalContextFlags(const PacBio::BAM::LocalContextFlags flags)1543 BamRecord& BamRecord::LocalContextFlags(const PacBio::BAM::LocalContextFlags flags)
1544 {
1545 internal::CreateOrEdit(BamRecordTag::CONTEXT_FLAGS, static_cast<uint8_t>(flags), &impl_);
1546 return *this;
1547 }
1548
Map(const int32_t referenceId,const Position refStart,const Strand strand,const Cigar & cigar,const uint8_t mappingQuality)1549 BamRecord& BamRecord::Map(const int32_t referenceId, const Position refStart, const Strand strand,
1550 const Cigar& cigar, const uint8_t mappingQuality)
1551 {
1552 impl_.Position(refStart);
1553 impl_.ReferenceId(referenceId);
1554 impl_.CigarData(cigar);
1555 impl_.MapQuality(mappingQuality);
1556 impl_.SetMapped(true);
1557
1558 if (strand == Strand::FORWARD)
1559 impl_.SetReverseStrand(false);
1560
1561 else {
1562 assert(strand == Strand::REVERSE);
1563 impl_.SetReverseStrand(true);
1564
1565 // switch seq & qual
1566 std::string sequence = impl_.Sequence();
1567 QualityValues qualities = impl_.Qualities();
1568
1569 internal::ReverseComplement(sequence);
1570 internal::Reverse(qualities);
1571
1572 impl_.SetSequenceAndQualities(sequence, qualities.Fastq());
1573 }
1574
1575 // reset any cached aligned start/end
1576 alignedStart_ = PacBio::BAM::UnmappedPosition;
1577 alignedEnd_ = PacBio::BAM::UnmappedPosition;
1578
1579 return *this;
1580 }
1581
MapQuality() const1582 uint8_t BamRecord::MapQuality() const { return impl_.MapQuality(); }
1583
MergeQV(Orientation orientation,bool aligned,bool exciseSoftClips) const1584 QualityValues BamRecord::MergeQV(Orientation orientation, bool aligned, bool exciseSoftClips) const
1585 {
1586 return FetchQualities(BamRecordTag::MERGE_QV, orientation, aligned, exciseSoftClips);
1587 }
1588
MergeQV(const QualityValues & mergeQVs)1589 BamRecord& BamRecord::MergeQV(const QualityValues& mergeQVs)
1590 {
1591 internal::CreateOrEdit(BamRecordTag::MERGE_QV, mergeQVs.Fastq(), &impl_);
1592 return *this;
1593 }
1594
MovieName() const1595 std::string BamRecord::MovieName() const { return ReadGroup().MovieName(); }
1596
NumDeletedBases() const1597 size_t BamRecord::NumDeletedBases() const
1598 {
1599 auto tEnd = ReferenceEnd();
1600 auto tStart = ReferenceStart();
1601 auto numMatchesAndMismatches = NumMatchesAndMismatches();
1602 auto nM = numMatchesAndMismatches.first;
1603 auto nMM = numMatchesAndMismatches.second;
1604 return (tEnd - tStart - nM - nMM);
1605 }
1606
NumInsertedBases() const1607 size_t BamRecord::NumInsertedBases() const
1608 {
1609 auto aEnd = AlignedEnd();
1610 auto aStart = AlignedStart();
1611 auto numMatchesAndMismatches = NumMatchesAndMismatches();
1612 auto nM = numMatchesAndMismatches.first;
1613 auto nMM = numMatchesAndMismatches.second;
1614 return (aEnd - aStart - nM - nMM);
1615 }
1616
NumMatches() const1617 size_t BamRecord::NumMatches() const { return NumMatchesAndMismatches().first; }
1618
NumMatchesAndMismatches() const1619 std::pair<size_t, size_t> BamRecord::NumMatchesAndMismatches() const
1620 {
1621 std::pair<size_t, size_t> result = std::make_pair(0, 0);
1622
1623 auto b = internal::BamRecordMemory::GetRawData(this);
1624 uint32_t* cigarData = bam_get_cigar(b.get());
1625 for (uint32_t i = 0; i < b->core.n_cigar; ++i) {
1626 const auto type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
1627 if (type == CigarOperationType::SEQUENCE_MATCH)
1628 result.first += bam_cigar_oplen(cigarData[i]);
1629 else if (type == CigarOperationType::SEQUENCE_MISMATCH)
1630 result.second += bam_cigar_oplen(cigarData[i]);
1631 }
1632 return result;
1633 }
1634
NumMismatches() const1635 size_t BamRecord::NumMismatches() const { return NumMatchesAndMismatches().second; }
1636
NumPasses() const1637 int32_t BamRecord::NumPasses() const
1638 {
1639 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::NUM_PASSES);
1640 const Tag numPasses = impl_.TagValue(tagName);
1641 return numPasses.ToInt32();
1642 }
1643
NumPasses(const int32_t numPasses)1644 BamRecord& BamRecord::NumPasses(const int32_t numPasses)
1645 {
1646 internal::CreateOrEdit(BamRecordTag::NUM_PASSES, numPasses, &impl_);
1647 return *this;
1648 }
1649
Pkmean(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1650 std::vector<float> BamRecord::Pkmean(Orientation orientation, bool aligned, bool exciseSoftClips,
1651 PulseBehavior pulseBehavior) const
1652 {
1653 return FetchPhotons(BamRecordTag::PKMEAN, orientation, aligned, exciseSoftClips, pulseBehavior);
1654 }
1655
Pkmean(const std::vector<float> & photons)1656 BamRecord& BamRecord::Pkmean(const std::vector<float>& photons)
1657 {
1658 Pkmean(EncodePhotons(photons));
1659 return *this;
1660 }
1661
Pkmean(const std::vector<uint16_t> & encodedPhotons)1662 BamRecord& BamRecord::Pkmean(const std::vector<uint16_t>& encodedPhotons)
1663 {
1664 internal::CreateOrEdit(BamRecordTag::PKMEAN, encodedPhotons, &impl_);
1665 return *this;
1666 }
1667
Pkmid(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1668 std::vector<float> BamRecord::Pkmid(Orientation orientation, bool aligned, bool exciseSoftClips,
1669 PulseBehavior pulseBehavior) const
1670 {
1671 return FetchPhotons(BamRecordTag::PKMID, orientation, aligned, exciseSoftClips, pulseBehavior);
1672 }
1673
Pkmid(const std::vector<float> & photons)1674 BamRecord& BamRecord::Pkmid(const std::vector<float>& photons)
1675 {
1676 Pkmid(EncodePhotons(photons));
1677 return *this;
1678 }
1679
Pkmid(const std::vector<uint16_t> & encodedPhotons)1680 BamRecord& BamRecord::Pkmid(const std::vector<uint16_t>& encodedPhotons)
1681 {
1682 internal::CreateOrEdit(BamRecordTag::PKMID, encodedPhotons, &impl_);
1683 return *this;
1684 }
1685
Pkmean2(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1686 std::vector<float> BamRecord::Pkmean2(Orientation orientation, bool aligned, bool exciseSoftClips,
1687 PulseBehavior pulseBehavior) const
1688 {
1689 return FetchPhotons(BamRecordTag::PKMEAN_2, orientation, aligned, exciseSoftClips,
1690 pulseBehavior);
1691 }
1692
Pkmean2(const std::vector<float> & photons)1693 BamRecord& BamRecord::Pkmean2(const std::vector<float>& photons)
1694 {
1695 Pkmean2(EncodePhotons(photons));
1696 return *this;
1697 }
1698
Pkmean2(const std::vector<uint16_t> & encodedPhotons)1699 BamRecord& BamRecord::Pkmean2(const std::vector<uint16_t>& encodedPhotons)
1700 {
1701 internal::CreateOrEdit(BamRecordTag::PKMEAN_2, encodedPhotons, &impl_);
1702 return *this;
1703 }
1704
Pkmid2(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1705 std::vector<float> BamRecord::Pkmid2(Orientation orientation, bool aligned, bool exciseSoftClips,
1706 PulseBehavior pulseBehavior) const
1707 {
1708 return FetchPhotons(BamRecordTag::PKMID_2, orientation, aligned, exciseSoftClips,
1709 pulseBehavior);
1710 }
1711
Pkmid2(const std::vector<float> & photons)1712 BamRecord& BamRecord::Pkmid2(const std::vector<float>& photons)
1713 {
1714 Pkmid2(EncodePhotons(photons));
1715 return *this;
1716 }
1717
Pkmid2(const std::vector<uint16_t> & encodedPhotons)1718 BamRecord& BamRecord::Pkmid2(const std::vector<uint16_t>& encodedPhotons)
1719 {
1720 internal::CreateOrEdit(BamRecordTag::PKMID_2, encodedPhotons, &impl_);
1721 return *this;
1722 }
1723
PreBaseFrames(Orientation orientation,bool aligned,bool exciseSoftClips) const1724 Frames BamRecord::PreBaseFrames(Orientation orientation, bool aligned, bool exciseSoftClips) const
1725 {
1726 return IPD(orientation, aligned, exciseSoftClips);
1727 }
1728
PreBaseFrames(const Frames & frames,const FrameEncodingType encoding)1729 BamRecord& BamRecord::PreBaseFrames(const Frames& frames, const FrameEncodingType encoding)
1730 {
1731 return IPD(frames, encoding);
1732 }
1733
PrePulseFrames(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1734 Frames BamRecord::PrePulseFrames(Orientation orientation, bool aligned, bool exciseSoftClips,
1735 PulseBehavior pulseBehavior) const
1736 {
1737 return FetchFrames(BamRecordTag::PRE_PULSE_FRAMES, orientation, aligned, exciseSoftClips,
1738 pulseBehavior);
1739 }
1740
PrePulseFrames(const Frames & frames,const FrameEncodingType encoding)1741 BamRecord& BamRecord::PrePulseFrames(const Frames& frames, const FrameEncodingType encoding)
1742 {
1743 if (encoding == FrameEncodingType::LOSSY) {
1744 internal::CreateOrEdit(BamRecordTag::PRE_PULSE_FRAMES, frames.Encode(), &impl_);
1745 } else {
1746 internal::CreateOrEdit(BamRecordTag::PRE_PULSE_FRAMES, frames.Data(), &impl_);
1747 }
1748 return *this;
1749 }
1750
PulseWidthRaw(Orientation orientation,bool,bool) const1751 Frames BamRecord::PulseWidthRaw(Orientation orientation, bool /* aligned */,
1752 bool /* exciseSoftClips */) const
1753 {
1754 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::PULSE_WIDTH);
1755 const Tag frameTag = impl_.TagValue(tagName);
1756 if (frameTag.IsNull()) return {};
1757
1758 Frames frames;
1759
1760 // lossy frame codes
1761 if (frameTag.IsUInt8Array()) {
1762 const auto codes = frameTag.ToUInt8Array();
1763 const std::vector<uint16_t> codes16(codes.begin(), codes.end());
1764 frames.Data(std::move(codes16));
1765 }
1766
1767 // lossless frame data
1768 else {
1769 assert(frameTag.IsUInt16Array());
1770 frames.Data(frameTag.ToUInt16Array());
1771 }
1772
1773 // return in requested orientation
1774 internal::OrientTagDataAsRequested(&frames,
1775 Orientation::NATIVE, // current
1776 orientation, // requested
1777 impl_.IsReverseStrand());
1778 return frames;
1779 }
1780
PulseMergeQV(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1781 QualityValues BamRecord::PulseMergeQV(Orientation orientation, bool aligned, bool exciseSoftClips,
1782 PulseBehavior pulseBehavior) const
1783 {
1784 return FetchQualities(BamRecordTag::PULSE_MERGE_QV, orientation, aligned, exciseSoftClips,
1785 pulseBehavior);
1786 }
1787
PulseMergeQV(const QualityValues & mergeQVs)1788 BamRecord& BamRecord::PulseMergeQV(const QualityValues& mergeQVs)
1789 {
1790 internal::CreateOrEdit(BamRecordTag::PULSE_MERGE_QV, mergeQVs.Fastq(), &impl_);
1791 return *this;
1792 }
1793
PulseCall(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1794 std::string BamRecord::PulseCall(Orientation orientation, bool aligned, bool exciseSoftClips,
1795 PulseBehavior pulseBehavior) const
1796 {
1797 return FetchBases(BamRecordTag::PULSE_CALL, orientation, aligned, exciseSoftClips,
1798 pulseBehavior);
1799 }
1800
PulseCall(const std::string & tags)1801 BamRecord& BamRecord::PulseCall(const std::string& tags)
1802 {
1803 internal::CreateOrEdit(BamRecordTag::PULSE_CALL, tags, &impl_);
1804 return *this;
1805 }
1806
PulseCallWidth(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1807 Frames BamRecord::PulseCallWidth(Orientation orientation, bool aligned, bool exciseSoftClips,
1808 PulseBehavior pulseBehavior) const
1809 {
1810 return FetchFrames(BamRecordTag::PULSE_CALL_WIDTH, orientation, aligned, exciseSoftClips,
1811 pulseBehavior);
1812 }
1813
PulseCallWidth(const Frames & frames,const FrameEncodingType encoding)1814 BamRecord& BamRecord::PulseCallWidth(const Frames& frames, const FrameEncodingType encoding)
1815 {
1816 if (encoding == FrameEncodingType::LOSSY) {
1817 internal::CreateOrEdit(BamRecordTag::PULSE_CALL_WIDTH, frames.Encode(), &impl_);
1818 } else {
1819 internal::CreateOrEdit(BamRecordTag::PULSE_CALL_WIDTH, frames.Data(), &impl_);
1820 }
1821 return *this;
1822 }
1823
PulseExclusionReason(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const1824 std::vector<PacBio::BAM::PulseExclusionReason> BamRecord::PulseExclusionReason(
1825 Orientation orientation, bool aligned, bool exciseSoftClips, PulseBehavior pulseBehavior) const
1826 {
1827 std::vector<PacBio::BAM::PulseExclusionReason> reasons;
1828
1829 const auto reasonNums = FetchUInt8s(BamRecordTag::PULSE_EXCLUSION, orientation, aligned,
1830 exciseSoftClips, pulseBehavior);
1831
1832 std::transform(
1833 reasonNums.cbegin(), reasonNums.cend(), std::back_inserter(reasons),
1834 [](const uint8_t num) { return static_cast<PacBio::BAM::PulseExclusionReason>(num); });
1835
1836 return reasons;
1837 }
1838
PulseExclusionReason(const std::vector<PacBio::BAM::PulseExclusionReason> & reasons)1839 BamRecord& BamRecord::PulseExclusionReason(
1840 const std::vector<PacBio::BAM::PulseExclusionReason>& reasons)
1841 {
1842 std::vector<uint8_t> reasonNums;
1843 std::transform(reasons.cbegin(), reasons.cend(), std::back_inserter(reasonNums),
1844 [](const PacBio::BAM::PulseExclusionReason& reason) {
1845 return static_cast<uint8_t>(reason);
1846 });
1847
1848 internal::CreateOrEdit(BamRecordTag::PULSE_EXCLUSION, reasonNums, &impl_);
1849 return *this;
1850 }
1851
PulseWidth(Orientation orientation,bool aligned,bool exciseSoftClips) const1852 Frames BamRecord::PulseWidth(Orientation orientation, bool aligned, bool exciseSoftClips) const
1853 {
1854 return FetchFrames(BamRecordTag::PULSE_WIDTH, orientation, aligned, exciseSoftClips,
1855 PulseBehavior::ALL);
1856 }
1857
PulseWidth(const Frames & frames,const FrameEncodingType encoding)1858 BamRecord& BamRecord::PulseWidth(const Frames& frames, const FrameEncodingType encoding)
1859 {
1860 if (encoding == FrameEncodingType::LOSSY) {
1861 internal::CreateOrEdit(BamRecordTag::PULSE_WIDTH, frames.Encode(), &impl_);
1862 } else {
1863 internal::CreateOrEdit(BamRecordTag::PULSE_WIDTH, frames.Data(), &impl_);
1864 }
1865 return *this;
1866 }
1867
Qualities(Orientation orientation,bool aligned,bool exciseSoftClips) const1868 QualityValues BamRecord::Qualities(Orientation orientation, bool aligned,
1869 bool exciseSoftClips) const
1870 {
1871 return FetchQualities(BamRecordTag::QUAL, orientation, aligned, exciseSoftClips);
1872 }
1873
QueryEnd() const1874 Position BamRecord::QueryEnd() const
1875 {
1876 // try 'qe' tag
1877 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::QUERY_END);
1878 const Tag qe = impl_.TagValue(tagName);
1879 if (!qe.IsNull()) return qe.ToInt32();
1880
1881 // tag missing, need to check movie name (fallback for non-PB BAMs, but ignore for CCS reads)
1882 RecordType type;
1883 try {
1884 type = Type();
1885 } catch (std::exception&) {
1886 return 0;
1887 }
1888 if (type == RecordType::CCS) throw std::runtime_error{"no query end for CCS read type"};
1889 if (type == RecordType::TRANSCRIPT)
1890 throw std::runtime_error{"no query end for transcript read type"};
1891
1892 // PacBio BAM, non-CCS/transcript
1893 try {
1894 return internal::QueryEndFromName(FullName());
1895 } catch (std::exception&) {
1896 // return fallback position
1897 return 0;
1898 }
1899 }
1900
QueryEnd(const Position pos)1901 BamRecord& BamRecord::QueryEnd(const Position pos)
1902 {
1903 internal::CreateOrEdit(BamRecordTag::QUERY_END, static_cast<int32_t>(pos), &impl_);
1904 UpdateName();
1905 return *this;
1906 }
1907
QueryStart() const1908 Position BamRecord::QueryStart() const
1909 {
1910 // try 'qs' tag
1911 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::QUERY_START);
1912 const Tag qs = impl_.TagValue(tagName);
1913 if (!qs.IsNull()) return qs.ToInt32();
1914
1915 // tag missing, need to check movie name (fallback for non-PB BAMs, but ignore for CCS reads)
1916 RecordType type;
1917 try {
1918 type = Type();
1919 } catch (std::exception&) {
1920 return 0;
1921 }
1922 if (type == RecordType::CCS) throw std::runtime_error{"no query start for CCS read type"};
1923 if (type == RecordType::TRANSCRIPT)
1924 throw std::runtime_error{"no query start for transcript read type"};
1925
1926 // PacBio BAM, non-CCS/transcript
1927 try {
1928 return internal::QueryStartFromName(FullName());
1929 } catch (std::exception&) {
1930 // return fallback position
1931 return 0;
1932 }
1933 }
1934
QueryStart(const Position pos)1935 BamRecord& BamRecord::QueryStart(const Position pos)
1936 {
1937 internal::CreateOrEdit(BamRecordTag::QUERY_START, static_cast<int32_t>(pos), &impl_);
1938 UpdateName();
1939 return *this;
1940 }
1941
ReadAccuracy() const1942 Accuracy BamRecord::ReadAccuracy() const
1943 {
1944 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::READ_ACCURACY);
1945 const Tag readAccuracy = impl_.TagValue(tagName);
1946 return {readAccuracy.ToFloat()};
1947 }
1948
ReadAccuracy(const Accuracy & accuracy)1949 BamRecord& BamRecord::ReadAccuracy(const Accuracy& accuracy)
1950 {
1951 internal::CreateOrEdit(BamRecordTag::READ_ACCURACY, static_cast<float>(accuracy), &impl_);
1952 return *this;
1953 }
1954
ReadGroup() const1955 ReadGroupInfo BamRecord::ReadGroup() const { return header_.ReadGroup(ReadGroupId()); }
1956
ReadGroup(const ReadGroupInfo & rg)1957 BamRecord& BamRecord::ReadGroup(const ReadGroupInfo& rg)
1958 {
1959 internal::CreateOrEdit(BamRecordTag::READ_GROUP, rg.Id(), &impl_);
1960 UpdateName();
1961 return *this;
1962 }
1963
ReadGroupId() const1964 std::string BamRecord::ReadGroupId() const
1965 {
1966 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::READ_GROUP);
1967 const Tag rgTag = impl_.TagValue(tagName);
1968 if (rgTag.IsNull()) return {};
1969 return rgTag.ToString();
1970 }
1971
ReadGroupId(const std::string & id)1972 BamRecord& BamRecord::ReadGroupId(const std::string& id)
1973 {
1974 internal::CreateOrEdit(BamRecordTag::READ_GROUP, id, &impl_);
1975 UpdateName();
1976 return *this;
1977 }
1978
ReadGroupNumericId() const1979 int32_t BamRecord::ReadGroupNumericId() const { return ReadGroupInfo::IdToInt(ReadGroupId()); }
1980
ReferenceEnd() const1981 Position BamRecord::ReferenceEnd() const
1982 {
1983 if (!impl_.IsMapped()) return PacBio::BAM::UnmappedPosition;
1984 const auto htsData = internal::BamRecordMemory::GetRawData(impl_);
1985 if (!htsData) return PacBio::BAM::UnmappedPosition;
1986 return bam_endpos(htsData.get());
1987 }
1988
ReferenceId() const1989 int32_t BamRecord::ReferenceId() const { return impl_.ReferenceId(); }
1990
ReferenceName() const1991 std::string BamRecord::ReferenceName() const
1992 {
1993 if (IsMapped())
1994 return Header().SequenceName(ReferenceId());
1995 else
1996 throw std::runtime_error{"unmapped record has no associated reference name"};
1997 }
1998
ReferenceStart() const1999 Position BamRecord::ReferenceStart() const { return impl_.Position(); }
2000
ResetCachedPositions() const2001 void BamRecord::ResetCachedPositions() const
2002 {
2003 alignedEnd_ = PacBio::BAM::UnmappedPosition;
2004 alignedStart_ = PacBio::BAM::UnmappedPosition;
2005 }
2006
ResetCachedPositions()2007 void BamRecord::ResetCachedPositions()
2008 {
2009 alignedEnd_ = PacBio::BAM::UnmappedPosition;
2010 alignedStart_ = PacBio::BAM::UnmappedPosition;
2011 }
2012
ScrapRegionType() const2013 VirtualRegionType BamRecord::ScrapRegionType() const
2014 {
2015 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SCRAP_REGION_TYPE);
2016 const Tag srTag = impl_.TagValue(tagName);
2017 return VirtualRegionTypeMap::ParseChar[srTag.ToUInt8()];
2018 }
2019
ScrapRegionType(const VirtualRegionType type)2020 BamRecord& BamRecord::ScrapRegionType(const VirtualRegionType type)
2021 {
2022 internal::CreateOrEdit(BamRecordTag::SCRAP_REGION_TYPE, static_cast<uint8_t>(type), &impl_);
2023 return *this;
2024 }
2025
ScrapRegionType(const char type)2026 BamRecord& BamRecord::ScrapRegionType(const char type)
2027 {
2028 internal::CreateOrEdit(BamRecordTag::SCRAP_REGION_TYPE, type, &impl_);
2029 return *this;
2030 }
2031
ScrapZmwType() const2032 ZmwType BamRecord::ScrapZmwType() const
2033 {
2034 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SCRAP_ZMW_TYPE);
2035 const Tag szTag = impl_.TagValue(tagName);
2036 return ZmwTypeMap::ParseChar[szTag.ToUInt8()];
2037 }
2038
ScrapZmwType(const ZmwType type)2039 BamRecord& BamRecord::ScrapZmwType(const ZmwType type)
2040 {
2041 internal::CreateOrEdit(BamRecordTag::SCRAP_ZMW_TYPE, static_cast<uint8_t>(type), &impl_);
2042 return *this;
2043 }
2044
ScrapZmwType(const char type)2045 BamRecord& BamRecord::ScrapZmwType(const char type)
2046 {
2047 internal::CreateOrEdit(BamRecordTag::SCRAP_ZMW_TYPE, type, &impl_);
2048 return *this;
2049 }
2050
Sequence(const Orientation orientation,bool aligned,bool exciseSoftClips) const2051 std::string BamRecord::Sequence(const Orientation orientation, bool aligned,
2052 bool exciseSoftClips) const
2053 {
2054 return FetchBases(BamRecordTag::SEQ, orientation, aligned, exciseSoftClips);
2055 }
2056
SignalToNoise() const2057 std::vector<float> BamRecord::SignalToNoise() const
2058 {
2059 const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SNR);
2060 const Tag snTag = impl_.TagValue(tagName);
2061 return snTag.ToFloatArray();
2062 }
2063
SignalToNoise(const std::vector<float> & snr)2064 BamRecord& BamRecord::SignalToNoise(const std::vector<float>& snr)
2065 {
2066 internal::CreateOrEdit(BamRecordTag::SNR, snr, &impl_);
2067 return *this;
2068 }
2069
StartFrame(Orientation orientation,bool aligned,bool exciseSoftClips,PulseBehavior pulseBehavior) const2070 std::vector<uint32_t> BamRecord::StartFrame(Orientation orientation, bool aligned,
2071 bool exciseSoftClips, PulseBehavior pulseBehavior) const
2072 {
2073 return FetchUInt32s(BamRecordTag::START_FRAME, orientation, aligned, exciseSoftClips,
2074 pulseBehavior);
2075 }
2076
StartFrame(const std::vector<uint32_t> & startFrame)2077 BamRecord& BamRecord::StartFrame(const std::vector<uint32_t>& startFrame)
2078 {
2079 internal::CreateOrEdit(BamRecordTag::START_FRAME, startFrame, &impl_);
2080 return *this;
2081 }
2082
SubstitutionQV(Orientation orientation,bool aligned,bool exciseSoftClips) const2083 QualityValues BamRecord::SubstitutionQV(Orientation orientation, bool aligned,
2084 bool exciseSoftClips) const
2085 {
2086 return FetchQualities(BamRecordTag::SUBSTITUTION_QV, orientation, aligned, exciseSoftClips);
2087 }
2088
SubstitutionQV(const QualityValues & substitutionQVs)2089 BamRecord& BamRecord::SubstitutionQV(const QualityValues& substitutionQVs)
2090 {
2091 internal::CreateOrEdit(BamRecordTag::SUBSTITUTION_QV, substitutionQVs.Fastq(), &impl_);
2092 return *this;
2093 }
2094
SubstitutionTag(Orientation orientation,bool aligned,bool exciseSoftClips) const2095 std::string BamRecord::SubstitutionTag(Orientation orientation, bool aligned,
2096 bool exciseSoftClips) const
2097 {
2098 return FetchBases(BamRecordTag::SUBSTITUTION_TAG, orientation, aligned, exciseSoftClips);
2099 }
2100
SubstitutionTag(const std::string & tags)2101 BamRecord& BamRecord::SubstitutionTag(const std::string& tags)
2102 {
2103 internal::CreateOrEdit(BamRecordTag::SUBSTITUTION_TAG, tags, &impl_);
2104 return *this;
2105 }
2106
Type() const2107 RecordType BamRecord::Type() const
2108 {
2109 try {
2110 const auto typeName = ReadGroup().ReadType();
2111 return internal::NameToType(typeName);
2112 } catch (std::exception&) {
2113
2114 // read group not found, peek at name to see if we're possibly
2115 // CCS or TRANSCRIPT
2116 //
2117 const auto name = FullName();
2118 if (name.find("transcript") == 0)
2119 return RecordType::TRANSCRIPT;
2120 else if (name.find("/ccs") != std::string::npos)
2121 return RecordType::CCS;
2122 else
2123 return RecordType::UNKNOWN;
2124 }
2125 }
2126
UpdateName()2127 void BamRecord::UpdateName()
2128 {
2129 std::string newName;
2130 newName.reserve(100);
2131
2132 const auto holeNumber = (HasHoleNumber() ? std::to_string(HoleNumber()) : "?");
2133 if (Type() == RecordType::TRANSCRIPT) {
2134 newName = "transcript/" + holeNumber;
2135 } else {
2136 newName += MovieName();
2137 newName += "/";
2138 newName += holeNumber;
2139 newName += "/";
2140
2141 if (Type() == RecordType::CCS)
2142 newName += "ccs";
2143
2144 else {
2145 if (HasQueryStart())
2146 newName += std::to_string(QueryStart());
2147 else
2148 newName += "?";
2149
2150 newName += '_';
2151
2152 if (HasQueryEnd())
2153 newName += std::to_string(QueryEnd());
2154 else
2155 newName += "?";
2156 }
2157 }
2158 impl_.Name(newName);
2159 }
2160
2161 } // namespace BAM
2162 } // namespace PacBio
2163