1 // Author: Derek Barnett
2 
3 #include "PbbamInternalConfig.h"
4 
5 #include "pbbam/BamRecordImpl.h"
6 
7 #include <algorithm>
8 #include <array>
9 #include <cassert>
10 #include <cstddef>
11 #include <cstdint>
12 #include <cstdlib>
13 #include <cstring>
14 #include <iostream>
15 #include <utility>
16 
17 #include <htslib/hts_endian.h>
18 
19 #include "BamRecordTags.h"
20 #include "MemoryUtils.h"
21 #include "pbbam/BamTagCodec.h"
22 
23 namespace PacBio {
24 namespace BAM {
25 
26 namespace {
27 
FetchRawCigar(const uint32_t * const src,const uint32_t len)28 Cigar FetchRawCigar(const uint32_t* const src, const uint32_t len)
29 {
30     Cigar result;
31     result.reserve(len);
32     for (uint32_t i = 0; i < len; ++i) {
33         const uint32_t length = bam_cigar_oplen(src[i]);
34         const auto type = static_cast<CigarOperationType>(bam_cigar_op(src[i]));
35         result.push_back(CigarOperation(type, length));
36     }
37     return result;
38 }
39 
HasLongCigar(const bam1_t * const b)40 bool HasLongCigar(const bam1_t* const b)
41 {
42     auto* c = &b->core;
43 
44     // if empty CIGAR or unmapped
45     if (c->n_cigar == 0 || c->tid < 0 || c->pos < 0) return false;
46 
47     // if existing CIGAR doesn't look like a 'fake CIGAR'
48     const auto firstCigarOp = *(bam_get_cigar(b));
49     if (bam_cigar_op(firstCigarOp) != static_cast<uint32_t>(CigarOperationType::SOFT_CLIP) ||
50         static_cast<int32_t>(bam_cigar_oplen(firstCigarOp)) != c->l_qseq) {
51         return false;
52     }
53 
54     // if CG tag missing, not expected type
55     const uint8_t* const CG = bam_aux_get(b, "CG");
56     if (CG == nullptr) return false;
57     if (CG[0] != 'B' || CG[1] != 'I') return false;
58 
59     // if CG tag data is empty
60     uint32_t numElements = 0;
61     memcpy(&numElements, &CG[2], sizeof(uint32_t));
62     if (numElements == 0) return false;
63 
64     // we've found long CIGAR data in the CG tag
65     return true;
66 }
67 
68 }  // namespace anonymous
69 
BamRecordImpl()70 BamRecordImpl::BamRecordImpl() : d_(nullptr)
71 {
72     InitializeData();
73     assert(d_);
74 }
75 
BamRecordImpl(const BamRecordImpl & other)76 BamRecordImpl::BamRecordImpl(const BamRecordImpl& other)
77     : d_{bam_dup1(other.d_.get()), internal::HtslibRecordDeleter()}, tagOffsets_{other.tagOffsets_}
78 {
79     assert(d_);
80 }
81 
BamRecordImpl(BamRecordImpl && other)82 BamRecordImpl::BamRecordImpl(BamRecordImpl&& other) : tagOffsets_{std::move(other.tagOffsets_)}
83 {
84     d_.swap(other.d_);
85     other.d_.reset();
86     assert(d_);
87 }
88 
operator =(const BamRecordImpl & other)89 BamRecordImpl& BamRecordImpl::operator=(const BamRecordImpl& other)
90 {
91     if (this != &other) {
92         if (d_ == nullptr) InitializeData();
93         bam_copy1(d_.get(), other.d_.get());
94         tagOffsets_ = other.tagOffsets_;
95     }
96     assert(d_);
97     return *this;
98 }
99 
operator =(BamRecordImpl && other)100 BamRecordImpl& BamRecordImpl::operator=(BamRecordImpl&& other)
101 {
102     if (this != &other) {
103         d_.swap(other.d_);
104         other.d_.reset();
105 
106         tagOffsets_ = std::move(other.tagOffsets_);
107     }
108     assert(d_);
109     return *this;
110 }
111 
AddTag(const std::string & tagName,const Tag & value)112 bool BamRecordImpl::AddTag(const std::string& tagName, const Tag& value)
113 {
114     return AddTag(tagName, value, TagModifier::NONE);
115 }
116 
AddTag(const BamRecordTag tag,const Tag & value)117 bool BamRecordImpl::AddTag(const BamRecordTag tag, const Tag& value)
118 {
119     return AddTag(internal::BamRecordTags::LabelFor(tag), value, TagModifier::NONE);
120 }
121 
AddTag(const std::string & tagName,const Tag & value,const TagModifier additionalModifier)122 bool BamRecordImpl::AddTag(const std::string& tagName, const Tag& value,
123                            const TagModifier additionalModifier)
124 {
125     if (tagName.size() != 2 || HasTag(tagName)) return false;
126     const auto added = AddTagImpl(tagName, value, additionalModifier);
127     if (added) UpdateTagMap();
128     return added;
129 }
130 
AddTag(const BamRecordTag tag,const Tag & value,const TagModifier additionalModifier)131 bool BamRecordImpl::AddTag(const BamRecordTag tag, const Tag& value,
132                            const TagModifier additionalModifier)
133 {
134     return AddTag(internal::BamRecordTags::LabelFor(tag), value, additionalModifier);
135 }
136 
AddTagImpl(const std::string & tagName,const Tag & value,const TagModifier additionalModifier)137 bool BamRecordImpl::AddTagImpl(const std::string& tagName, const Tag& value,
138                                const TagModifier additionalModifier)
139 {
140     const auto rawData = BamTagCodec::ToRawData(value, additionalModifier);
141     if (rawData.empty()) return false;
142 
143     bam_aux_append(d_.get(), tagName.c_str(), BamTagCodec::TagTypeCode(value, additionalModifier),
144                    rawData.size(), const_cast<uint8_t*>(rawData.data()));
145     return true;
146 }
147 
CigarData() const148 Cigar BamRecordImpl::CigarData() const
149 {
150     const auto* b = d_.get();
151     if (HasLongCigar(b)) {
152         // fetch long CIGAR from tag
153         const auto cigarTag = TagValue("CG");
154         const auto cigarTagValue = cigarTag.ToUInt32Array();
155         return FetchRawCigar(cigarTagValue.data(), cigarTagValue.size());
156     } else {
157         // fetch normal, short CIGAR from the standard location
158         return FetchRawCigar(bam_get_cigar(b), b->core.n_cigar);
159     }
160 }
161 
CigarData(const Cigar & cigar)162 BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
163 {
164     // Set normal, "short" CIGAR and remove CG tag if present.
165     if (cigar.size() < 65536) {
166         SetCigarData(cigar);
167         if (HasTag("CG")) RemoveTag("CG");
168     }
169 
170     // Set long CIGAR data
171     else {
172         // Add the 'fake' CIGAR in normal place.
173         Cigar fake;
174         fake.emplace_back(CigarOperationType::SOFT_CLIP, SequenceLength());
175         const uint32_t alignedLength =
176             static_cast<uint32_t>(bam_cigar2rlen(d_->core.n_cigar, bam_get_cigar(d_.get())));
177         fake.emplace_back(CigarOperationType::REFERENCE_SKIP, alignedLength);
178         SetCigarData(fake);
179 
180         // Add raw CIGAR data to CG tag.
181         std::vector<uint32_t> cigarData(cigar.size());
182         cigarData.reserve(cigar.size());
183         for (size_t i = 0; i < cigar.size(); ++i) {
184             const CigarOperation& op = cigar.at(i);
185             cigarData[i] = bam_cigar_gen(op.Length(), static_cast<int>(op.Type()));
186         }
187         if (HasTag("CG"))
188             EditTag("CG", Tag{cigarData});
189         else
190             AddTag("CG", Tag{cigarData});
191     }
192 
193     return *this;
194 }
195 
CigarData(const std::string & cigarString)196 BamRecordImpl& BamRecordImpl::CigarData(const std::string& cigarString)
197 {
198     return CigarData(Cigar::FromStdString(cigarString));
199 }
200 
EditTag(const std::string & tagName,const Tag & newValue)201 bool BamRecordImpl::EditTag(const std::string& tagName, const Tag& newValue)
202 {
203     return EditTag(tagName, newValue, TagModifier::NONE);
204 }
205 
EditTag(const BamRecordTag tag,const Tag & newValue)206 bool BamRecordImpl::EditTag(const BamRecordTag tag, const Tag& newValue)
207 {
208     return EditTag(internal::BamRecordTags::LabelFor(tag), newValue, TagModifier::NONE);
209 }
210 
EditTag(const std::string & tagName,const Tag & newValue,const TagModifier additionalModifier)211 bool BamRecordImpl::EditTag(const std::string& tagName, const Tag& newValue,
212                             const TagModifier additionalModifier)
213 {
214     // try remove old value (with delayed tag map update)
215     const bool removed = RemoveTagImpl(tagName);
216     if (!removed) return false;
217 
218     // if old value removed, add new value
219     const bool added = AddTagImpl(tagName, newValue, additionalModifier);
220     if (added) UpdateTagMap();
221     return added;
222 }
223 
EditTag(const BamRecordTag tag,const Tag & newValue,const TagModifier additionalModifier)224 bool BamRecordImpl::EditTag(const BamRecordTag tag, const Tag& newValue,
225                             const TagModifier additionalModifier)
226 {
227     return EditTag(internal::BamRecordTags::LabelFor(tag), newValue, additionalModifier);
228 }
229 
FromRawData(const std::shared_ptr<bam1_t> & rawData)230 BamRecordImpl BamRecordImpl::FromRawData(const std::shared_ptr<bam1_t>& rawData)
231 {
232     BamRecordImpl result;
233     bam_copy1(result.d_.get(), rawData.get());
234     return result;
235 }
236 
HasTag(const std::string & tagName) const237 bool BamRecordImpl::HasTag(const std::string& tagName) const
238 {
239     if (tagName.size() != 2) return false;
240     return TagOffset(tagName) != -1;
241 
242     // 27635
243     //    return bam_aux_get(d_.get(), tagName.c_str()) != 0;
244 }
245 
HasTag(const BamRecordTag tag) const246 bool BamRecordImpl::HasTag(const BamRecordTag tag) const
247 {
248     return HasTag(internal::BamRecordTags::LabelFor(tag));
249 }
250 
InitializeData()251 void BamRecordImpl::InitializeData()
252 {
253     d_.reset(bam_init1(), internal::HtslibRecordDeleter());
254     d_->data = static_cast<uint8_t*>(
255         calloc(0x800, sizeof(uint8_t)));  // maybe make this value tune-able later?
256     d_->m_data = 0x800;
257 
258     // init unmapped
259     Position(PacBio::BAM::UnmappedPosition);
260     MatePosition(PacBio::BAM::UnmappedPosition);
261     ReferenceId(-1);
262     MateReferenceId(-1);
263     SetMapped(false);
264     MapQuality(255);
265 
266     // initialized with empty qname (null term + 3 'extra nulls' for alignment
267     d_->core.l_extranul = 3;
268     d_->core.l_qname = 4;
269     d_->l_data = 4;
270 }
271 
MaybeReallocData()272 void BamRecordImpl::MaybeReallocData()
273 {
274     // about to grow data contents to l_data size, but m_data is our current max.
275     // so we may need to grow. if so, use kroundup to double to next power of 2
276     //
277     // from sam.h:
278     //   decltype(m_data) = uint32_t
279     //   decltype(l_data) = int
280     if (d_->m_data < static_cast<uint32_t>(d_->l_data)) {
281         d_->m_data = d_->l_data;
282         kroundup32(d_->m_data);
283         d_->data = static_cast<uint8_t*>(realloc(d_->data, d_->m_data));
284     }
285 }
286 
Name() const287 std::string BamRecordImpl::Name() const { return std::string(bam_get_qname(d_)); }
288 
Name(const std::string & name)289 BamRecordImpl& BamRecordImpl::Name(const std::string& name)
290 {
291     // determine change in memory needed
292     // diffNumBytes: pos -> growing, neg -> shrinking
293     const size_t numChars = name.size() + 1;  // +1 for NULL-term
294     const size_t numExtraNulls = 4 - (numChars % 4);
295     const size_t totalNameSize = numChars + numExtraNulls;
296 
297     const int diffNumBytes = totalNameSize - d_->core.l_qname;
298     const int oldLengthData = d_->l_data;
299     d_->l_data += diffNumBytes;
300     MaybeReallocData();
301 
302     // shift trailing data (cigar, seq, qual, tags) as needed
303     const uint32_t* oldCigarStart = bam_get_cigar(d_);
304     const size_t trailingDataLength =
305         oldLengthData - (reinterpret_cast<const unsigned char*>(oldCigarStart) -
306                          reinterpret_cast<const unsigned char*>(d_->data));
307     d_->core.l_qname = totalNameSize;
308     d_->core.l_extranul = numExtraNulls;
309     uint32_t* newCigarStart = bam_get_cigar(d_);
310     memmove(newCigarStart, oldCigarStart, trailingDataLength);
311 
312     // fill in new name
313     memcpy(d_->data, name.c_str(), numChars);
314     memset(d_->data + numChars, '\0', numExtraNulls);
315     return *this;
316 }
317 
Qualities() const318 QualityValues BamRecordImpl::Qualities() const
319 {
320     if (d_->core.l_qseq == 0) return QualityValues();
321 
322     uint8_t* qualData = bam_get_qual(d_);
323     if (qualData[0] == 0xff) return QualityValues();
324 
325     const size_t numQuals = d_->core.l_qseq;
326     QualityValues result;
327     result.reserve(numQuals);
328     for (size_t i = 0; i < numQuals; ++i)
329         result.push_back(QualityValue(qualData[i]));
330     return result;
331 }
332 
RemoveTag(const std::string & tagName)333 bool BamRecordImpl::RemoveTag(const std::string& tagName)
334 {
335     const bool removed = RemoveTagImpl(tagName);
336     if (removed) UpdateTagMap();
337     return removed;
338 }
339 
RemoveTag(const BamRecordTag tag)340 bool BamRecordImpl::RemoveTag(const BamRecordTag tag)
341 {
342     return RemoveTag(internal::BamRecordTags::LabelFor(tag));
343 }
344 
RemoveTagImpl(const std::string & tagName)345 bool BamRecordImpl::RemoveTagImpl(const std::string& tagName)
346 {
347     if (tagName.size() != 2) return false;
348     uint8_t* data = bam_aux_get(d_.get(), tagName.c_str());
349     if (data == nullptr) return false;
350     const bool ok = bam_aux_del(d_.get(), data) == 0;
351     return ok;
352 }
353 
Sequence() const354 std::string BamRecordImpl::Sequence() const
355 {
356     std::string result(d_->core.l_qseq, '\0');
357     static const constexpr std::array<char, 16> DnaLookup{
358         {'=', 'A', 'C', 'M', 'G', 'R', 'S', 'V', 'T', 'W', 'Y', 'H', 'K', 'D', 'B', 'N'}};
359     const uint8_t* seqData = bam_get_seq(d_);
360     for (int i = 0; i < d_->core.l_qseq; ++i)
361         result[i] = DnaLookup[bam_seqi(seqData, i)];
362     return result;
363 }
364 
SequenceLength() const365 size_t BamRecordImpl::SequenceLength() const { return d_->core.l_qseq; }
366 
SetCigarData(const Cigar & cigar)367 void BamRecordImpl::SetCigarData(const Cigar& cigar)
368 {
369     // determine change in memory needed
370     // diffNumBytes: pos -> growing, neg -> shrinking
371     const size_t numCigarOps = cigar.size();
372     const int diffNumCigars = numCigarOps - d_->core.n_cigar;
373     const int diffNumBytes = diffNumCigars * sizeof(uint32_t);
374     const int oldLengthData = d_->l_data;
375     d_->l_data += diffNumBytes;
376     MaybeReallocData();
377 
378     // shift trailing data (seq, qual, tags) as needed
379     const uint8_t* oldSequenceStart = bam_get_seq(d_);
380     const size_t trailingDataLength = oldLengthData - (oldSequenceStart - d_->data);
381     d_->core.n_cigar = numCigarOps;
382     uint8_t* newSequenceStart = bam_get_seq(d_);
383     memmove(newSequenceStart, oldSequenceStart, trailingDataLength);
384 
385     // fill in new CIGAR data
386     uint32_t* cigarDataStart = bam_get_cigar(d_);
387     for (size_t i = 0; i < numCigarOps; ++i) {
388         const CigarOperation& cigarOp = cigar.at(i);
389         cigarDataStart[i] = bam_cigar_gen(cigarOp.Length(), static_cast<int>(cigarOp.Type()));
390     }
391 }
392 
SetSequenceAndQualities(const std::string & sequence,const std::string & qualities)393 BamRecordImpl& BamRecordImpl::SetSequenceAndQualities(const std::string& sequence,
394                                                       const std::string& qualities)
395 {
396     if (!qualities.empty() && (sequence.size() != qualities.size()))
397         throw std::runtime_error{"If QUAL provided, must be of the same length as SEQ"};
398 
399     return SetSequenceAndQualitiesInternal(sequence.c_str(), sequence.size(), qualities.c_str(),
400                                            false);
401 }
402 
SetSequenceAndQualities(const char * sequence,const size_t sequenceLength,const char * qualities)403 BamRecordImpl& BamRecordImpl::SetSequenceAndQualities(const char* sequence,
404                                                       const size_t sequenceLength,
405                                                       const char* qualities)
406 {
407     return SetSequenceAndQualitiesInternal(sequence, sequenceLength, qualities, false);
408 }
409 
SetPreencodedSequenceAndQualities(const char * encodedSequence,const size_t rawSequenceLength,const char * qualities)410 BamRecordImpl& BamRecordImpl::SetPreencodedSequenceAndQualities(const char* encodedSequence,
411                                                                 const size_t rawSequenceLength,
412                                                                 const char* qualities)
413 {
414     return SetSequenceAndQualitiesInternal(encodedSequence, rawSequenceLength, qualities, true);
415 }
416 
SetSequenceAndQualitiesInternal(const char * sequence,const size_t sequenceLength,const char * qualities,bool isPreencoded)417 BamRecordImpl& BamRecordImpl::SetSequenceAndQualitiesInternal(const char* sequence,
418                                                               const size_t sequenceLength,
419                                                               const char* qualities,
420                                                               bool isPreencoded)
421 {
422     // determine change in memory needed
423     // diffNumBytes: pos -> growing, neg -> shrinking
424     const auto encodedSequenceLength = static_cast<int>((sequenceLength + 1) / 2);
425     const int oldSeqAndQualLength =
426         ((d_->core.l_qseq + 1) / 2) + d_->core.l_qseq;                       // encoded seq + qual
427     const int newSeqAndQualLength = encodedSequenceLength + sequenceLength;  // encoded seq + qual
428     const int diffNumBytes = newSeqAndQualLength - oldSeqAndQualLength;
429     const int oldLengthData = d_->l_data;
430     d_->l_data += diffNumBytes;
431     MaybeReallocData();
432 
433     // shift trailing data (tags) as needed
434     const unsigned char* oldTagStart = bam_get_aux(d_);
435     const size_t trailingDataLength =
436         oldLengthData - (oldTagStart - reinterpret_cast<const unsigned char*>(d_->data));
437     d_->core.l_qseq = sequenceLength;
438     uint8_t* newTagStart = bam_get_aux(d_);
439     memmove(newTagStart, oldTagStart, trailingDataLength);
440 
441     // fill in new sequence
442     uint8_t* pEncodedSequence = bam_get_seq(d_);
443     if (isPreencoded) {
444         memcpy(pEncodedSequence, sequence, encodedSequenceLength);
445     } else {
446         memset(pEncodedSequence, 0, encodedSequenceLength);
447         for (size_t i = 0; i < sequenceLength; ++i)
448             pEncodedSequence[i >> 1] |= seq_nt16_table[static_cast<int>(sequence[i])]
449                                         << ((~i & 1) << 2);
450     }
451 
452     // fill in quality values
453     uint8_t* encodedQualities = bam_get_qual(d_);
454     if ((qualities == nullptr) || (strlen(qualities) == 0))
455         memset(encodedQualities, 0xff, sequenceLength);
456     else {
457         for (size_t i = 0; i < sequenceLength; ++i)
458             encodedQualities[i] = qualities[i] - 33;  // FASTQ ASCII -> int conversion
459     }
460     return *this;
461 }
462 
TagOffset(const std::string & tagName) const463 int BamRecordImpl::TagOffset(const std::string& tagName) const
464 {
465     if (tagName.size() != 2) throw std::runtime_error{"invalid tag name size"};
466 
467     if (tagOffsets_.empty()) UpdateTagMap();
468 
469     const uint16_t tagCode =
470         (static_cast<uint8_t>(tagName.at(0)) << 8) | static_cast<uint8_t>(tagName.at(1));
471     const auto found = tagOffsets_.find(tagCode);
472     return (found != tagOffsets_.cend() ? found->second : -1);
473 }
474 
Tags(const TagCollection & tags)475 BamRecordImpl& BamRecordImpl::Tags(const TagCollection& tags)
476 {
477     // convert tags to binary
478     const std::vector<uint8_t> tagData = BamTagCodec::Encode(tags);
479     const size_t numBytes = tagData.size();
480     const uint8_t* data = tagData.data();
481 
482     // determine change in memory needed
483     uint8_t* tagStart = bam_get_aux(d_);
484     const size_t oldNumBytes = d_->l_data - (tagStart - d_->data);
485     const int diffNumBytes = numBytes - oldNumBytes;
486     d_->l_data += diffNumBytes;
487     MaybeReallocData();
488     tagStart = bam_get_aux(d_);
489 
490     // fill in new tag data
491     memcpy(static_cast<void*>(tagStart), data, numBytes);
492 
493     // update tag info
494     UpdateTagMap();
495     return *this;
496 }
497 
Tags() const498 TagCollection BamRecordImpl::Tags() const
499 {
500     const uint8_t* tagDataStart = bam_get_aux(d_);
501     const size_t numBytes = d_->l_data - (tagDataStart - d_->data);
502     return BamTagCodec::Decode(std::vector<uint8_t>(tagDataStart, tagDataStart + numBytes));
503 }
504 
TagValue(const std::string & tagName) const505 Tag BamRecordImpl::TagValue(const std::string& tagName) const
506 {
507     if (tagName.size() != 2) return {};
508 
509     const int offset = TagOffset(tagName);
510     if (offset == -1) return {};
511 
512     bam1_t* b = d_.get();
513     assert(bam_get_aux(b));
514     uint8_t* tagData = bam_get_aux(b) + offset;
515     if (offset >= b->l_data) return {};
516 
517     // skip tag name
518     return BamTagCodec::FromRawData(tagData);
519 }
520 
TagValue(const BamRecordTag tag) const521 Tag BamRecordImpl::TagValue(const BamRecordTag tag) const
522 {
523     return TagValue(internal::BamRecordTags::LabelFor(tag));
524 }
525 
UpdateTagMap() const526 void BamRecordImpl::UpdateTagMap() const
527 {
528     // clear out offsets, leave map structure basically intact
529     for (auto& tag : tagOffsets_)
530         tag.second = -1;
531 
532     const uint8_t* tagStart = bam_get_aux(d_);
533     if (tagStart == nullptr) return;
534     const ptrdiff_t numBytes = d_->l_data - (tagStart - d_->data);
535 
536     // NOTE: using a 16-bit 'code' for tag name here instead of string, to avoid
537     // a lot of string constructions & comparisons. All valid tags will be 2 chars
538     // anyway, so this should be a nice lookup mechanism.
539     //
540     uint16_t tagNameCode;
541     int64_t i = 0;
542     while (i < numBytes) {
543 
544         // store (tag name code -> start offset into tag data)
545         tagNameCode = static_cast<char>(tagStart[i]) << 8 | static_cast<char>(tagStart[i + 1]);
546         i += 2;
547         tagOffsets_[tagNameCode] = i;
548 
549         // skip tag contents
550         const auto tagType = static_cast<char>(tagStart[i++]);
551         switch (tagType) {
552             case 'A':
553             case 'a':
554             case 'c':
555             case 'C': {
556                 i += 1;
557                 break;
558             }
559             case 's':
560             case 'S': {
561                 i += 2;
562                 break;
563             }
564             case 'i':
565             case 'I':
566             case 'f': {
567                 i += 4;
568                 break;
569             }
570 
571             case 'Z':
572             case 'H': {
573                 // null-terminated string
574                 i += strlen(reinterpret_cast<const char*>(&tagStart[i])) + 1;
575                 break;
576             }
577 
578             case 'B': {
579                 const char subTagType = tagStart[i++];
580                 size_t elementSize = 0;
581                 switch (subTagType) {
582                     case 'c':
583                     case 'C':
584                         elementSize = 1;
585                         break;
586                     case 's':
587                     case 'S':
588                         elementSize = 2;
589                         break;
590                     case 'i':
591                     case 'I':
592                     case 'f':
593                         elementSize = 4;
594                         break;
595 
596                     // unknown subTagType
597                     default:
598                         throw std::runtime_error{"unsupported array-tag-type encountered: " +
599                                                  std::string{1, subTagType}};
600                 }
601 
602                 uint32_t numElements = 0;
603                 memcpy(&numElements, &tagStart[i], sizeof(uint32_t));
604                 i += (4 + (elementSize * numElements));
605                 break;
606             }
607 
608             // unknown tagType
609             default:
610                 throw std::runtime_error{"unsupported tag-type encountered: " +
611                                          std::string{1, tagType}};
612         }
613     }
614 }
615 
616 }  // namespace BAM
617 }  // namespace PacBio
618