1 // Author: Derek Barnett
2 
3 #include <array>
4 #include <cstddef>
5 #include <cstdint>
6 #include <initializer_list>
7 #include <string>
8 #include <vector>
9 
10 #include <gtest/gtest.h>
11 
12 #include <pbbam/BamRecord.h>
13 #include <pbbam/BamTagCodec.h>
14 #include "../src/MemoryUtils.h"
15 
16 // clang-format off
17 
18 using namespace PacBio;
19 using namespace PacBio::BAM;
20 
21 namespace BamRecordTests {
22 
23 static
CreateBamImpl()24 BamRecordImpl CreateBamImpl()
25 {
26     TagCollection tags;
27     tags["HX"] = std::string("1abc75");
28     tags["HX"].Modifier(TagModifier::HEX_STRING);
29     tags["CA"] = std::vector<uint8_t>({34, 5, 125});
30     tags["XY"] = int32_t{-42};
31 
32     BamRecordImpl bam;
33     bam.Bin(42);
34     bam.Flag(42);
35     bam.InsertSize(42);
36     bam.MapQuality(42);
37     bam.MatePosition(42);
38     bam.MateReferenceId(42);
39     bam.Position(42);
40     bam.ReferenceId(42);
41     bam.Tags(tags);
42     return bam;
43 }
44 
45 static inline
CreateBam()46 BamRecord CreateBam()
47 { return BamRecord{ CreateBamImpl() }; }
48 
49 static
CheckRawData(const BamRecordImpl & bam)50 void CheckRawData(const BamRecordImpl& bam)
51 {
52     // ensure raw data (lengths at least) matches API-facing data
53     const uint32_t expectedNameBytes = bam.Name().size() + 1;  // include NULL term
54     const uint32_t expectedNameNulls = 4 - (expectedNameBytes % 4);
55     const uint32_t expectedNameLength = expectedNameBytes + expectedNameNulls;
56     const uint32_t expectedNumCigarOps = bam.CigarData().size();
57     const int32_t expectedSeqLength = bam.Sequence().length();
58     const size_t expectedTagsLength = BamTagCodec::Encode(bam.Tags()).size();
59 
60     //  Name        CIGAR         Sequence       Quals      Tags
61     // l_qname + (n_cigar * 4) + (l_qseq+1)/2 + l_qseq + <encoded length>
62     const int expectedTotalDataLength = expectedNameLength + (expectedNumCigarOps * 4) +
63                                         (expectedSeqLength + 1) / 2 + expectedSeqLength +
64                                         expectedTagsLength;
65 
66     const auto rawData = PacBio::BAM::internal::BamRecordMemory::GetRawData(bam);
67     ASSERT_TRUE(static_cast<bool>(rawData));
68 
69     EXPECT_EQ(expectedNameNulls, rawData->core.l_extranul);
70     EXPECT_EQ(expectedNameLength, rawData->core.l_qname);
71     EXPECT_EQ(expectedNumCigarOps, rawData->core.n_cigar);
72     EXPECT_EQ(expectedSeqLength, rawData->core.l_qseq);
73     EXPECT_EQ(expectedTotalDataLength, rawData->l_data);
74 }
75 
76 static inline
CheckRawData(const BamRecord & bam)77 void CheckRawData(const BamRecord& bam)
78 { CheckRawData(bam.Impl()); }
79 
80 static
MakeCigaredImpl(const std::string & seq,const std::string & cigar,const Strand strand)81 BamRecordImpl MakeCigaredImpl(const std::string& seq,
82                               const std::string& cigar,
83                               const Strand strand)
84 {
85     BamRecordImpl impl;
86     impl.SetMapped(true).ReferenceId(0).Position(0).MapQuality(0);
87     impl.CigarData(Cigar::FromStdString(cigar));
88     impl.MateReferenceId(-1).MatePosition(-1).InsertSize(0);
89     impl.SetSequenceAndQualities(seq, std::string(seq.size(), '*'));
90     impl.SetReverseStrand(strand == Strand::REVERSE);
91     return impl;
92 }
93 
94 static inline
MakeCigaredRecord(const std::string & seq,const std::string & cigar,const Strand strand)95 BamRecord MakeCigaredRecord(const std::string& seq,
96                             const std::string& cigar,
97                             const Strand strand)
98 { return BamRecord{ MakeCigaredImpl(seq, cigar, strand) }; }
99 
100 static
MakeCigaredBaseRecord(const std::string & bases,const std::string & cigar,const Strand strand)101 BamRecord MakeCigaredBaseRecord(const std::string& bases,
102                                 const std::string& cigar,
103                                 const Strand strand)
104 {
105     TagCollection tags;
106     tags["dt"] = bases;
107     tags["st"] = bases;
108 
109     const std::string seq = std::string(bases.size(), 'N');
110     BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
111     impl.Tags(tags);
112     return BamRecord(std::move(impl));
113 }
114 
115 static
MakeCigaredFrameRecord(const std::vector<uint16_t> & frames,const std::string & cigar,const Strand strand)116 BamRecord MakeCigaredFrameRecord(const std::vector<uint16_t>& frames,
117                                  const std::string& cigar,
118                                  const Strand strand)
119 {
120     TagCollection tags;
121     tags["ip"] = frames;
122     tags["pw"] = frames;
123 
124     const std::string seq = std::string(frames.size(), 'N');
125     BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
126     impl.Tags(tags);
127     return BamRecord(std::move(impl));
128 }
129 
130 static
MakeCigaredQualRecord(const std::string & quals,const std::string & cigar,const Strand strand)131 BamRecord MakeCigaredQualRecord(const std::string& quals,
132                                 const std::string& cigar,
133                                 const Strand strand)
134 {
135     TagCollection tags;
136     tags["dq"] = quals;
137     tags["iq"] = quals;
138     tags["mq"] = quals;
139     tags["sq"] = quals;
140 
141     const std::string seq = std::string(quals.size(), 'N');
142     BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
143     impl.Tags(tags);
144     return BamRecord(std::move(impl));
145 }
146 
147 static
MakeCigaredPulseBaseRecord(const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseBases,const std::string & cigar,const Strand strand)148 BamRecord MakeCigaredPulseBaseRecord(const std::string& seqBases,
149                                      const std::string& pulseCalls,
150                                      const std::string& pulseBases,
151                                      const std::string& cigar,
152                                      const Strand strand)
153 {
154     TagCollection tags;
155     tags["pc"] = pulseCalls; // PulseCall
156     tags["pt"] = pulseBases; // AltLabelTag
157 
158     BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
159     impl.Tags(tags);
160     return BamRecord(std::move(impl));
161 }
162 
163 static
MakeCigaredPulseQualRecord(const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseQuals,const std::string & cigar,const Strand strand)164 BamRecord MakeCigaredPulseQualRecord(const std::string& seqBases,
165                                      const std::string& pulseCalls,
166                                      const std::string& pulseQuals,
167                                      const std::string& cigar,
168                                      const Strand strand)
169 {
170     TagCollection tags;
171     tags["pc"] = pulseCalls;
172     tags["pv"] = pulseQuals; // AltLabelQV
173     tags["pq"] = pulseQuals; // LabelQV
174     tags["pg"] = pulseQuals; // PulseMergeQV
175 
176     BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
177     impl.Tags(tags);
178     return BamRecord(std::move(impl));
179 }
180 
181 static
MakeCigaredPulseFrameRecord(const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint16_t> & pulseFrames,const std::string & cigar,const Strand strand)182 BamRecord MakeCigaredPulseFrameRecord(const std::string& seqBases,
183                                      const std::string& pulseCalls,
184                                      const std::vector<uint16_t>& pulseFrames,
185                                      const std::string& cigar,
186                                      const Strand strand)
187 {
188     TagCollection tags;
189     tags["pc"] = pulseCalls;
190     tags["pd"] = pulseFrames; // PrePulseFrames
191     tags["px"] = pulseFrames; // PulseCallWidth
192 
193     BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
194     impl.Tags(tags);
195     return BamRecord(std::move(impl));
196 }
197 
198 static
MakeCigaredPulseUIntRecord(const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint32_t> & pulseUInts,const std::string & cigar,const Strand strand)199 BamRecord MakeCigaredPulseUIntRecord(const std::string& seqBases,
200                                      const std::string& pulseCalls,
201                                      const std::vector<uint32_t>& pulseUInts,
202                                      const std::string& cigar,
203                                      const Strand strand)
204 {
205     TagCollection tags;
206     tags["pc"] = pulseCalls;
207     tags["sf"] = pulseUInts; // StartFrame
208 
209     BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
210     impl.Tags(tags);
211     return BamRecord(std::move(impl));
212 }
213 
214 // ----------------------------------------------------------
215 // helper structs and methods for checking combinations of:
216 //   aligned strand, orientation requested, alignment, clipping
217 // ----------------------------------------------------------
218 
219 // generic result holder for various requested states
220 template<typename T>
221 struct ExpectedResult
222 {
223 public:
ExpectedResultBamRecordTests::ExpectedResult224     ExpectedResult(std::initializer_list<T> init)
225         : d_(init)
226     {
227         assert(12 == init.size());
228     }
229 
ForwardGenomicBamRecordTests::ExpectedResult230     T ForwardGenomic() const               { return d_.at(0); }
ForwardNativeBamRecordTests::ExpectedResult231     T ForwardNative() const                { return d_.at(1); }
ForwardGenomicAlignedBamRecordTests::ExpectedResult232     T ForwardGenomicAligned() const        { return d_.at(2); }
ForwardNativeAlignedBamRecordTests::ExpectedResult233     T ForwardNativeAligned() const         { return d_.at(3); }
ForwardGenomicAlignedClippedBamRecordTests::ExpectedResult234     T ForwardGenomicAlignedClipped() const { return d_.at(4); }
ForwardNativeAlignedClippedBamRecordTests::ExpectedResult235     T ForwardNativeAlignedClipped() const  { return d_.at(5); }
ReverseGenomicBamRecordTests::ExpectedResult236     T ReverseGenomic() const               { return d_.at(6); }
ReverseNativeBamRecordTests::ExpectedResult237     T ReverseNative() const                { return d_.at(7); }
ReverseGenomicAlignedBamRecordTests::ExpectedResult238     T ReverseGenomicAligned() const        { return d_.at(8); }
ReverseNativeAlignedBamRecordTests::ExpectedResult239     T ReverseNativeAligned() const         { return d_.at(9); }
ReverseGenomicAlignedClippedBamRecordTests::ExpectedResult240     T ReverseGenomicAlignedClipped() const { return d_.at(10); }
ReverseNativeAlignedClippedBamRecordTests::ExpectedResult241     T ReverseNativeAlignedClipped() const  { return d_.at(11); }
242 
243 private:
244     std::vector<T> d_;
245 };
246 
247 // generic data type checker on the various requested states
248 template<typename DataType, typename MakeRecordType, typename FetchDataType>
CheckAlignAndClip(const std::string & cigar,const DataType & input,const BamRecordTests::ExpectedResult<DataType> & e,const MakeRecordType & makeRecord,const FetchDataType & fetchData)249 void CheckAlignAndClip(const std::string& cigar,
250                        const DataType& input,
251                        const BamRecordTests::ExpectedResult<DataType>& e,
252                        const MakeRecordType& makeRecord,
253                        const FetchDataType& fetchData)
254 {
255     {   // map to forward strand
256         const BamRecord b = makeRecord(input, cigar, Strand::FORWARD);
257         EXPECT_EQ(e.ForwardGenomic(),               fetchData(b, Orientation::GENOMIC, false, false));
258         EXPECT_EQ(e.ForwardNative(),                fetchData(b, Orientation::NATIVE,  false, false));
259         EXPECT_EQ(e.ForwardGenomicAligned(),        fetchData(b, Orientation::GENOMIC, true,  false));
260         EXPECT_EQ(e.ForwardNativeAligned(),         fetchData(b, Orientation::NATIVE,  true,  false));
261         EXPECT_EQ(e.ForwardGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true,  true));
262         EXPECT_EQ(e.ForwardNativeAlignedClipped(),  fetchData(b, Orientation::NATIVE,  true,  true));
263     }
264     {   // map to reverse strand
265         const BamRecord b = makeRecord(input, cigar, Strand::REVERSE);
266         EXPECT_EQ(e.ReverseGenomic(),               fetchData(b, Orientation::GENOMIC, false, false));
267         EXPECT_EQ(e.ReverseNative(),                fetchData(b, Orientation::NATIVE,  false, false));
268         EXPECT_EQ(e.ReverseGenomicAligned(),        fetchData(b, Orientation::GENOMIC, true,  false));
269         EXPECT_EQ(e.ReverseNativeAligned(),         fetchData(b, Orientation::NATIVE,  true,  false));
270         EXPECT_EQ(e.ReverseGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true,  true));
271         EXPECT_EQ(e.ReverseNativeAlignedClipped(),  fetchData(b, Orientation::NATIVE,  true,  true));
272     }
273 }
274 
275 template<typename DataType, typename MakeRecordType, typename FetchDataType>
CheckPulseDataAlignAndClip(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const DataType & input,const BamRecordTests::ExpectedResult<DataType> & allPulses,const BamRecordTests::ExpectedResult<DataType> & basecallsOnly,const MakeRecordType & makeRecord,const FetchDataType & fetchData)276 void CheckPulseDataAlignAndClip(const std::string& cigar,
277                                 const std::string& seqBases,
278                                 const std::string& pulseCalls,
279                                 const DataType& input,
280                                 const BamRecordTests::ExpectedResult<DataType>& allPulses,
281                                 const BamRecordTests::ExpectedResult<DataType>& basecallsOnly,
282                                 const MakeRecordType& makeRecord,
283                                 const FetchDataType& fetchData)
284 {
285     {   // map to forward strand
286         const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::FORWARD);
287 
288         EXPECT_EQ(allPulses.ForwardGenomic(),               fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
289         EXPECT_EQ(allPulses.ForwardNative(),                fetchData(b, Orientation::NATIVE,  false, false, PulseBehavior::ALL));
290         // no align/clipping operations available on ALL pulses
291 
292         EXPECT_EQ(basecallsOnly.ForwardGenomic(),               fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
293         EXPECT_EQ(basecallsOnly.ForwardNative(),                fetchData(b, Orientation::NATIVE,  false, false, PulseBehavior::BASECALLS_ONLY));
294         EXPECT_EQ(basecallsOnly.ForwardGenomicAligned(),        fetchData(b, Orientation::GENOMIC, true,  false, PulseBehavior::BASECALLS_ONLY));
295         EXPECT_EQ(basecallsOnly.ForwardNativeAligned(),         fetchData(b, Orientation::NATIVE,  true,  false, PulseBehavior::BASECALLS_ONLY));
296         EXPECT_EQ(basecallsOnly.ForwardGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true,  true,  PulseBehavior::BASECALLS_ONLY));
297         EXPECT_EQ(basecallsOnly.ForwardNativeAlignedClipped(),  fetchData(b, Orientation::NATIVE,  true,  true,  PulseBehavior::BASECALLS_ONLY));
298     }
299     {   // map to reverse strand
300         const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::REVERSE);
301 
302         EXPECT_EQ(allPulses.ReverseGenomic(),               fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
303         EXPECT_EQ(allPulses.ReverseNative(),                fetchData(b, Orientation::NATIVE,  false, false, PulseBehavior::ALL));
304         // no align/clipping operations available on ALL pulses
305 
306         EXPECT_EQ(basecallsOnly.ReverseGenomic(),               fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
307         EXPECT_EQ(basecallsOnly.ReverseNative(),                fetchData(b, Orientation::NATIVE,  false, false, PulseBehavior::BASECALLS_ONLY));
308         EXPECT_EQ(basecallsOnly.ReverseGenomicAligned(),        fetchData(b, Orientation::GENOMIC, true,  false, PulseBehavior::BASECALLS_ONLY));
309         EXPECT_EQ(basecallsOnly.ReverseNativeAligned(),         fetchData(b, Orientation::NATIVE,  true,  false, PulseBehavior::BASECALLS_ONLY));
310         EXPECT_EQ(basecallsOnly.ReverseGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true,  true,  PulseBehavior::BASECALLS_ONLY));
311         EXPECT_EQ(basecallsOnly.ReverseNativeAlignedClipped(),  fetchData(b, Orientation::NATIVE,  true,  true,  PulseBehavior::BASECALLS_ONLY));
312     }
313 }
314 
315 static
CheckBaseTagsClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)316 void CheckBaseTagsClippedAndAligned(const std::string& cigar,
317                                     const std::string& input,
318                                     const ExpectedResult<std::string>& e)
319 {
320     // aligned record + DeletionTag, SubstitutionTag
321     auto makeRecord = [](const std::string& newBases,
322                          const std::string& newCigar,
323                          const Strand newStrand)
324     { return MakeCigaredBaseRecord(newBases, newCigar, newStrand); };
325 
326     // DeletionTag
327     CheckAlignAndClip(cigar, input, e, makeRecord,
328                       [](const BamRecord& b,
329                          Orientation orientation,
330                          bool aligned,
331                          bool exciseSoftClips)
332                       { return b.DeletionTag(orientation, aligned, exciseSoftClips); }
333     );
334 
335     // SubstitutionTag
336     CheckAlignAndClip(cigar, input, e, makeRecord,
337                       [](const BamRecord& b,
338                          Orientation orientation,
339                          bool aligned,
340                          bool exciseSoftClips)
341                       { return b.SubstitutionTag(orientation, aligned, exciseSoftClips); }
342     );
343 }
344 
345 static
CheckFrameTagsClippedAndAligned(const std::string & cigar,const std::vector<uint16_t> & input,const ExpectedResult<std::vector<uint16_t>> & e)346 void CheckFrameTagsClippedAndAligned(const std::string& cigar,
347                                      const std::vector<uint16_t>& input,
348                                      const ExpectedResult<std::vector<uint16_t> >& e)
349 {
350 
351     // aligned record + IPD, PulseWidth
352     auto makeRecord = [](const std::vector<uint16_t>& newFrames,
353                          const std::string& newCigar,
354                          const Strand newStrand)
355     { return BamRecordTests::MakeCigaredFrameRecord(newFrames, newCigar, newStrand); };
356 
357     // IPD
358     CheckAlignAndClip(cigar, input, e, makeRecord,
359                       [](const BamRecord& b,
360                          Orientation orientation,
361                          bool aligned,
362                          bool exciseSoftClips)
363                       { return b.IPD(orientation, aligned, exciseSoftClips).Data(); }
364     );
365 
366     // PulseWidth
367     CheckAlignAndClip(cigar, input, e, makeRecord,
368                       [](const BamRecord& b,
369                          Orientation orientation,
370                          bool aligned,
371                          bool exciseSoftClips)
372                       { return b.PulseWidth(orientation, aligned, exciseSoftClips).Data(); }
373     );
374 }
375 
376 static
CheckQualityTagsClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)377 void CheckQualityTagsClippedAndAligned(const std::string& cigar,
378                                        const std::string& input,
379                                        const ExpectedResult<std::string>& e)
380 {
381     // aligned record + DeletionQV, InsertionQV, MergeQV, SubstitutionQV
382     auto makeRecord = [](const std::string& newQuals,
383                          const std::string& newCigar,
384                          const Strand newStrand)
385     { return BamRecordTests::MakeCigaredQualRecord(newQuals, newCigar, newStrand); };
386 
387     // DeletionQV
388     CheckAlignAndClip(cigar, input, e, makeRecord,
389                       [](const BamRecord& b,
390                          Orientation orientation,
391                          bool aligned,
392                          bool exciseSoftClips)
393                       { return b.DeletionQV(orientation, aligned, exciseSoftClips).Fastq(); }
394     );
395 
396     // InsertionQV
397     CheckAlignAndClip(cigar, input, e, makeRecord,
398                       [](const BamRecord& b,
399                          Orientation orientation,
400                          bool aligned,
401                          bool exciseSoftClips)
402                       { return b.InsertionQV(orientation, aligned, exciseSoftClips).Fastq(); }
403     );
404 
405     // MergeQV
406     CheckAlignAndClip(cigar, input, e, makeRecord,
407                       [](const BamRecord& b,
408                          Orientation orientation,
409                          bool aligned,
410                          bool exciseSoftClips)
411                       { return b.MergeQV(orientation, aligned, exciseSoftClips).Fastq(); }
412     );
413 
414     // SubstitutionQV
415     CheckAlignAndClip(cigar, input, e, makeRecord,
416                       [](const BamRecord& b,
417                          Orientation orientation,
418                          bool aligned,
419                          bool exciseSoftClips)
420                       { return b.SubstitutionQV(orientation, aligned, exciseSoftClips).Fastq(); }
421     );
422 }
423 
424 static
CheckQualitiesClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)425 void CheckQualitiesClippedAndAligned(const std::string& cigar,
426                                      const std::string& input,
427                                      const ExpectedResult<std::string>& e)
428 {
429     // aligned record w/ dummy SEQ & QUALs under test
430     auto makeRecord = [](const std::string& newQuals,
431                          const std::string& newCigar,
432                          const Strand newStrand)
433     {
434         const std::string seq = std::string(newQuals.size(), 'N');
435         auto record = BamRecordTests::MakeCigaredRecord(seq, newCigar, newStrand);
436         record.Impl().SetSequenceAndQualities(seq, newQuals);
437         return record;
438     };
439 
440     // QUAL
441     CheckAlignAndClip(cigar, input, e, makeRecord,
442                       [](const BamRecord& b,
443                          Orientation orientation,
444                          bool aligned,
445                          bool exciseSoftClips)
446                       { return b.Qualities(orientation, aligned, exciseSoftClips).Fastq(); }
447     );
448 }
449 
450 static
CheckSequenceClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)451 void CheckSequenceClippedAndAligned(const std::string& cigar,
452                                     const std::string& input,
453                                     const ExpectedResult<std::string>& e)
454 {
455     // aligned record w/ SEQ
456     auto makeRecord = [](const std::string& newSeq,
457                          const std::string& newCigar,
458                          const Strand newStrand)
459     { return BamRecordTests::MakeCigaredRecord(newSeq, newCigar, newStrand); };
460 
461     // SEQ
462     CheckAlignAndClip(cigar, input, e, makeRecord,
463                       [](const BamRecord& b,
464                          Orientation orientation,
465                          bool aligned,
466                          bool exciseSoftClips)
467                       { return b.Sequence(orientation, aligned, exciseSoftClips); }
468     );
469 }
470 
471 static
CheckPulseBaseTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseBases,const ExpectedResult<std::string> & allPulses,const ExpectedResult<std::string> & basecallsOnly)472 void CheckPulseBaseTags(const std::string& cigar,
473                         const std::string& seqBases,
474                         const std::string& pulseCalls,
475                         const std::string& pulseBases,
476                         const ExpectedResult<std::string>& allPulses,
477                         const ExpectedResult<std::string>& basecallsOnly)
478 {
479     // aligned record + AltLabelTag
480     auto makeRecord = [](const std::string& newSeqBases,
481                          const std::string& newPulseCalls,
482                          const std::string& newPulseBases,
483                          const std::string& newCigar,
484                          const Strand newStrand)
485     { return MakeCigaredPulseBaseRecord(newSeqBases, newPulseCalls, newPulseBases, newCigar, newStrand); };
486 
487     // AltLabelTag
488     CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
489                               [](const BamRecord& b,
490                                  Orientation orientation,
491                                  bool aligned,
492                                  bool exciseSoftClips,
493                                  PulseBehavior pulseBehavior)
494                               { return b.AltLabelTag(orientation, aligned, exciseSoftClips, pulseBehavior); }
495     );
496     // PulseCall
497     CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
498                               [](const BamRecord& b,
499                                  Orientation orientation,
500                                  bool aligned,
501                                  bool exciseSoftClips,
502                                  PulseBehavior pulseBehavior)
503                               { return b.PulseCall(orientation, aligned, exciseSoftClips, pulseBehavior); }
504     );
505 }
506 
507 static
CheckPulseFrameTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint16_t> & pulseFrames,const ExpectedResult<std::vector<uint16_t>> & allPulses,const ExpectedResult<std::vector<uint16_t>> & basecallsOnly)508 void CheckPulseFrameTags(const std::string& cigar,
509                          const std::string& seqBases,
510                          const std::string& pulseCalls,
511                          const std::vector<uint16_t>& pulseFrames,
512                          const ExpectedResult<std::vector<uint16_t>>& allPulses,
513                          const ExpectedResult<std::vector<uint16_t>>& basecallsOnly)
514 {
515     // aligned record + PrePulseFrames
516     auto makeRecord = [](const std::string& newSeqBases,
517                          const std::string& newPulseCalls,
518                          const std::vector<uint16_t>& newPulseFrames,
519                          const std::string& newCigar,
520                          const Strand newStrand)
521     { return MakeCigaredPulseFrameRecord(newSeqBases, newPulseCalls, newPulseFrames, newCigar, newStrand); };
522 
523     // PrePulseFrame
524     CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
525                               [](const BamRecord& b,
526                                  Orientation orientation,
527                                  bool aligned,
528                                  bool exciseSoftClips,
529                                  PulseBehavior pulseBehavior)
530                               { return b.PrePulseFrames(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
531     );
532     // PulseCallWidth
533     CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
534                               [](const BamRecord& b,
535                                  Orientation orientation,
536                                  bool aligned,
537                                  bool exciseSoftClips,
538                                  PulseBehavior pulseBehavior)
539                               { return b.PulseCallWidth(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
540     );
541 }
542 
543 /*
544 
545     { BamRecordTag::PKMEAN,            {"pa", true}  },   photons (vector<float>
546     { BamRecordTag::PKMEAN_2,          {"ps", true}  },   photons
547     { BamRecordTag::PKMID,             {"pm", true}  },   photons
548     { BamRecordTag::PKMID_2,           {"pi", true}  },   photons
549 */
550 
551 static
CheckPulseQualityTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseQuals,const ExpectedResult<std::string> & allPulses,const ExpectedResult<std::string> & basecallsOnly)552 void CheckPulseQualityTags(const std::string& cigar,
553                            const std::string& seqBases,
554                            const std::string& pulseCalls,
555                            const std::string& pulseQuals,
556                            const ExpectedResult<std::string>& allPulses,
557                            const ExpectedResult<std::string>& basecallsOnly)
558 {
559     // aligned record + AltLabelQV
560     auto makeRecord = [](const std::string& newSeqBases,
561                          const std::string& newPulseCalls,
562                          const std::string& newPulseQuals,
563                          const std::string& newCigar,
564                          const Strand newStrand)
565     { return MakeCigaredPulseQualRecord(newSeqBases, newPulseCalls, newPulseQuals, newCigar, newStrand); };
566 
567     // AltLabelQV
568     CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
569                               [](const BamRecord& b,
570                                  Orientation orientation,
571                                  bool aligned,
572                                  bool exciseSoftClips,
573                                  PulseBehavior pulseBehavior)
574                               { return b.AltLabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
575     );
576     // LabelQV
577     CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
578                               [](const BamRecord& b,
579                                  Orientation orientation,
580                                  bool aligned,
581                                  bool exciseSoftClips,
582                                  PulseBehavior pulseBehavior)
583                               { return b.LabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
584     );
585     // PulseMergeQV
586     CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
587                               [](const BamRecord& b,
588                                  Orientation orientation,
589                                  bool aligned,
590                                  bool exciseSoftClips,
591                                  PulseBehavior pulseBehavior)
592                               { return b.PulseMergeQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
593     );
594 }
595 
596 static
CheckPulseUIntTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint32_t> & startFrames,const ExpectedResult<std::vector<uint32_t>> & allPulses,const ExpectedResult<std::vector<uint32_t>> & basecallsOnly)597 void CheckPulseUIntTags(const std::string& cigar,
598                         const std::string& seqBases,
599                         const std::string& pulseCalls,
600                         const std::vector<uint32_t>& startFrames,
601                         const ExpectedResult<std::vector<uint32_t>>& allPulses,
602                         const ExpectedResult<std::vector<uint32_t>>& basecallsOnly)
603 {
604    // aligned record + StartFrame
605    auto makeRecord = [](const std::string& newSeqBases,
606                         const std::string& newPulseCalls,
607                         const std::vector<uint32_t>& newStartFrames,
608                         const std::string& newCigar,
609                         const Strand newStrand)
610    { return MakeCigaredPulseUIntRecord(newSeqBases, newPulseCalls, newStartFrames, newCigar, newStrand); };
611 
612    // StartFrame
613    CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, startFrames, allPulses, basecallsOnly, makeRecord,
614                              [](const BamRecord& b,
615                                 Orientation orientation,
616                                 bool aligned,
617                                 bool exciseSoftClips,
618                                 PulseBehavior pulseBehavior)
619                              { return b.StartFrame(orientation, aligned, exciseSoftClips, pulseBehavior); }
620    );
621 }
622 
623 
624 
625 } // namespace BamRecordTests
626 
TEST(BamRecordTest,DefaultValues)627 TEST(BamRecordTest, DefaultValues)
628 {
629     BamRecord bam;
630     const std::string emptyString;
631 
632     // BamRecordImpl data
633     EXPECT_EQ(0, bam.Impl().Bin());
634     EXPECT_EQ(BamRecordImpl::UNMAPPED, bam.Impl().Flag());  // forced init unmapped
635     EXPECT_EQ(0, bam.Impl().InsertSize());
636     EXPECT_EQ(255, bam.Impl().MapQuality());
637     EXPECT_EQ(-1, bam.Impl().MateReferenceId());
638     EXPECT_EQ(-1, bam.Impl().MatePosition());
639     EXPECT_EQ(-1, bam.Impl().Position());
640     EXPECT_EQ(-1, bam.Impl().ReferenceId());
641     EXPECT_EQ(0, bam.Impl().Tags().size());
642 
643     EXPECT_FALSE(bam.Impl().IsDuplicate());
644     EXPECT_FALSE(bam.Impl().IsFailedQC());
645     EXPECT_FALSE(bam.Impl().IsFirstMate());
646     EXPECT_FALSE(bam.Impl().IsMapped());             // forced init unmapped
647     EXPECT_TRUE(bam.Impl().IsMateMapped());
648     EXPECT_FALSE(bam.Impl().IsMateReverseStrand());
649     EXPECT_FALSE(bam.Impl().IsPaired());
650     EXPECT_TRUE(bam.Impl().IsPrimaryAlignment());
651     EXPECT_FALSE(bam.Impl().IsProperPair());
652     EXPECT_FALSE(bam.Impl().IsReverseStrand());
653     EXPECT_FALSE(bam.Impl().IsSecondMate());
654     EXPECT_FALSE(bam.Impl().IsSupplementaryAlignment());
655 
656     EXPECT_EQ(emptyString, bam.Impl().Name());
657     EXPECT_EQ(emptyString, bam.Impl().CigarData().ToStdString());
658     EXPECT_EQ(emptyString, bam.Impl().Sequence());
659     EXPECT_EQ(emptyString, bam.Impl().Qualities().Fastq());
660 
661     // PacBio data
662     EXPECT_EQ(-1, bam.AlignedStart());
663     EXPECT_EQ(-1, bam.AlignedEnd());
664 
665     EXPECT_FALSE(bam.HasHoleNumber());
666     EXPECT_FALSE(bam.HasNumPasses());
667     EXPECT_FALSE(bam.HasQueryEnd());
668     EXPECT_FALSE(bam.HasQueryStart());
669     EXPECT_FALSE(bam.HasReadAccuracy());
670 
671     EXPECT_THROW(bam.HoleNumber(), std::exception);
672     EXPECT_THROW(bam.NumPasses(), std::exception);
673     EXPECT_EQ(int32_t{0}, bam.QueryEnd());
674     EXPECT_EQ(int32_t{0}, bam.QueryStart());
675     EXPECT_THROW(bam.ReadAccuracy(), std::exception);
676 
677     EXPECT_FALSE(bam.HasDeletionQV());
678     EXPECT_FALSE(bam.HasDeletionTag());
679     EXPECT_FALSE(bam.HasInsertionQV());
680     EXPECT_FALSE(bam.HasMergeQV());
681     EXPECT_FALSE(bam.HasSubstitutionQV());
682     EXPECT_FALSE(bam.HasSubstitutionTag());
683 
684     EXPECT_THROW(bam.DeletionQV(),      std::exception);
685     EXPECT_THROW(bam.DeletionTag(),     std::exception);
686     EXPECT_THROW(bam.InsertionQV(),     std::exception);
687     EXPECT_THROW(bam.MergeQV(),         std::exception);
688     EXPECT_THROW(bam.SubstitutionQV(),  std::exception);
689     EXPECT_THROW(bam.SubstitutionTag(), std::exception);
690 
691     // raw data
692     BamRecordTests::CheckRawData(bam);
693 }
694 
TEST(BamRecordTest,FromBamRecordImpl)695 TEST(BamRecordTest, FromBamRecordImpl)
696 {
697     // check generic data
698     BamRecordImpl genericBam = BamRecordTests::CreateBamImpl();
699 
700     EXPECT_EQ(42, genericBam.Bin());
701     EXPECT_EQ(42, genericBam.Flag());
702     EXPECT_EQ(42, genericBam.InsertSize());
703     EXPECT_EQ(42, genericBam.MapQuality());
704     EXPECT_EQ(42, genericBam.MateReferenceId());
705     EXPECT_EQ(42, genericBam.MatePosition());
706     EXPECT_EQ(42, genericBam.Position());
707     EXPECT_EQ(42, genericBam.ReferenceId());
708 
709     const TagCollection genericTags = genericBam.Tags();
710     EXPECT_TRUE(genericTags.at("HX").HasModifier(TagModifier::HEX_STRING));
711     EXPECT_EQ(std::string("1abc75"), genericTags.at("HX").ToString());
712     EXPECT_EQ(int32_t{-42}, genericTags.at("XY").ToInt32());
713     EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), genericTags.at("CA").ToUInt8Array());
714 
715     // copy ctor
716     BamRecord bam1(genericBam);
717 
718     EXPECT_EQ(42, bam1.Impl().Bin());
719     EXPECT_EQ(42, bam1.Impl().Flag());
720     EXPECT_EQ(42, bam1.Impl().InsertSize());
721     EXPECT_EQ(42, bam1.Impl().MapQuality());
722     EXPECT_EQ(42, bam1.Impl().MateReferenceId());
723     EXPECT_EQ(42, bam1.Impl().MatePosition());
724     EXPECT_EQ(42, bam1.Impl().Position());
725     EXPECT_EQ(42, bam1.Impl().ReferenceId());
726 
727     const TagCollection bam1Tags = bam1.Impl().Tags();
728     EXPECT_TRUE(bam1Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
729     EXPECT_EQ(std::string("1abc75"), bam1Tags.at("HX").ToString());
730     EXPECT_EQ(int32_t{-42}, bam1Tags.at("XY").ToInt32());
731     EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam1Tags.at("CA").ToUInt8Array());
732 
733     // copy assignment
734     BamRecord bam2;
735     bam2 = genericBam;
736 
737     EXPECT_EQ(42, bam2.Impl().Bin());
738     EXPECT_EQ(42, bam2.Impl().Flag());
739     EXPECT_EQ(42, bam2.Impl().InsertSize());
740     EXPECT_EQ(42, bam2.Impl().MapQuality());
741     EXPECT_EQ(42, bam2.Impl().MateReferenceId());
742     EXPECT_EQ(42, bam2.Impl().MatePosition());
743     EXPECT_EQ(42, bam2.Impl().Position());
744     EXPECT_EQ(42, bam2.Impl().ReferenceId());
745 
746     const TagCollection bam2Tags = bam2.Impl().Tags();
747     EXPECT_TRUE(bam2Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
748     EXPECT_EQ(std::string("1abc75"), bam2Tags.at("HX").ToString());
749     EXPECT_EQ(int32_t{-42}, bam2Tags.at("XY").ToInt32());
750     EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam2Tags.at("CA").ToUInt8Array());
751 
752     // change genericBam, make sure we deep copied bam1 & bam2
753     genericBam.Position(2000);
754 
755     EXPECT_EQ(2000, genericBam.Position());
756     EXPECT_EQ(42, bam1.Impl().Position());
757     EXPECT_EQ(42, bam2.Impl().Position());
758 
759     // move ctor
760 #ifdef __clang__
761 #pragma clang diagnostic push
762 #pragma clang diagnostic ignored "-Wpessimizing-move"
763 #endif
764     BamRecord bam3(std::move(BamRecordTests::CreateBamImpl()));
765 #ifdef __clang__
766 #pragma clang diagnostic pop
767 #endif
768 
769     EXPECT_EQ(42, bam3.Impl().Bin());
770     EXPECT_EQ(42, bam3.Impl().Flag());
771     EXPECT_EQ(42, bam3.Impl().InsertSize());
772     EXPECT_EQ(42, bam3.Impl().MapQuality());
773     EXPECT_EQ(42, bam3.Impl().MateReferenceId());
774     EXPECT_EQ(42, bam3.Impl().MatePosition());
775     EXPECT_EQ(42, bam3.Impl().Position());
776     EXPECT_EQ(42, bam3.Impl().ReferenceId());
777 
778     const TagCollection bam3Tags = bam3.Impl().Tags();
779     EXPECT_TRUE(bam3Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
780     EXPECT_EQ(std::string("1abc75"), bam3Tags.at("HX").ToString());
781     EXPECT_EQ(int32_t{-42}, bam3Tags.at("XY").ToInt32());
782     EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam3Tags.at("CA").ToUInt8Array());
783 
784     // move assignment
785     BamRecord bam4;
786 #ifdef __clang__
787 #pragma clang diagnostic push
788 #pragma clang diagnostic ignored "-Wpessimizing-move"
789 #endif
790     bam4 = std::move(BamRecordTests::CreateBamImpl());
791 #ifdef __clang__
792 #pragma clang diagnostic pop
793 #endif
794 
795     EXPECT_EQ(42, bam4.Impl().Bin());
796     EXPECT_EQ(42, bam4.Impl().Flag());
797     EXPECT_EQ(42, bam4.Impl().InsertSize());
798     EXPECT_EQ(42, bam4.Impl().MapQuality());
799     EXPECT_EQ(42, bam4.Impl().MateReferenceId());
800     EXPECT_EQ(42, bam4.Impl().MatePosition());
801     EXPECT_EQ(42, bam4.Impl().Position());
802     EXPECT_EQ(42, bam4.Impl().ReferenceId());
803 
804     const TagCollection bam4Tags = bam4.Impl().Tags();
805     EXPECT_TRUE(bam4Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
806     EXPECT_EQ(std::string("1abc75"), bam4Tags.at("HX").ToString());
807     EXPECT_EQ(int32_t{-42}, bam4Tags.at("XY").ToInt32());
808     EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam4Tags.at("CA").ToUInt8Array());
809 }
810 
TEST(BamRecordTest,SelfAssignmentTolerated)811 TEST(BamRecordTest, SelfAssignmentTolerated)
812 {
813     BamRecord bam1;
814     bam1.Impl().Bin(42);
815     bam1.Impl().Flag(42);
816     bam1.Impl().InsertSize(42);
817     bam1.Impl().MapQuality(42);
818     bam1.Impl().MatePosition(42);
819     bam1.Impl().MateReferenceId(42);
820     bam1.Impl().Position(42);
821     bam1.Impl().ReferenceId(42);
822 
823     TagCollection tags;
824     tags["HX"] = std::string("1abc75");
825     tags["HX"].Modifier(TagModifier::HEX_STRING);
826     tags["CA"] = std::vector<uint8_t>({34, 5, 125});
827     tags["XY"] = int32_t{-42};
828     bam1.Impl().Tags(tags);
829 
830     bam1 = bam1;
831 
832     EXPECT_EQ(42, bam1.Impl().Bin());
833     EXPECT_EQ(42, bam1.Impl().Flag());
834     EXPECT_EQ(42, bam1.Impl().InsertSize());
835     EXPECT_EQ(42, bam1.Impl().MapQuality());
836     EXPECT_EQ(42, bam1.Impl().MateReferenceId());
837     EXPECT_EQ(42, bam1.Impl().MatePosition());
838     EXPECT_EQ(42, bam1.Impl().Position());
839     EXPECT_EQ(42, bam1.Impl().ReferenceId());
840 
841     const TagCollection fetchedTags1 = bam1.Impl().Tags();
842     EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
843     EXPECT_EQ(std::string("1abc75"), fetchedTags1.at("HX").ToString());
844     EXPECT_EQ(int32_t{-42}, fetchedTags1.at("XY").ToInt32());
845     EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
846 
847     BamRecordTests::CheckRawData(bam1);
848 }
849 
TEST(BamRecordTest,CoreSetters)850 TEST(BamRecordTest, CoreSetters)
851 {
852     // create basic BAM with (generic) data
853     BamRecord bam = BamRecordTests::CreateBam();
854 
855     QualityValues testQVs;
856     testQVs.push_back(0);
857     testQVs.push_back(1);
858 
859     const std::string testTags = "GATTACA";
860 
861     // now set PacBio data
862 //    bam.AlignedStart(42);
863 //    bam.AlignedEnd(42);
864 //    bam.DeletionQVs(testQVs);
865 //    bam.DeletionTags(testTags);
866 //    bam.HoleNumber(42);
867 //    bam.InsertionQVs(testQVs);
868 //    bam.MergeQVs(testQVs);
869 //    bam.NumPasses(42);
870 //    bam.QueryEnd(42);
871 //    bam.QueryStart(42);
872 //    bam.ReadAccuracy(42);
873 //    bam.ReferenceEnd(42);
874 //    bam.ReferenceStart(42);
875 //    bam.SubstitutionQVs(testQVs);
876 //    bam.SubstitutionTags(testTags);
877 
878     // check generic data
879     EXPECT_EQ(42, bam.Impl().Bin());
880     EXPECT_EQ(42, bam.Impl().Flag());
881     EXPECT_EQ(42, bam.Impl().InsertSize());
882     EXPECT_EQ(42, bam.Impl().MapQuality());
883     EXPECT_EQ(42, bam.Impl().MateReferenceId());
884     EXPECT_EQ(42, bam.Impl().MatePosition());
885     EXPECT_EQ(42, bam.Impl().Position());
886     EXPECT_EQ(42, bam.Impl().ReferenceId());
887 
888     // check PacBio data
889 //    EXPECT_EQ(42, bam.AlignedStart());
890 //    EXPECT_EQ(42, bam.AlignedEnd());
891 //    EXPECT_EQ(testQVs, bam.DeletionQVs());
892 //    EXPECT_EQ(testTags, bam.DeletionTags());
893 //    EXPECT_EQ(42, bam.HoleNumber());
894 //    EXPECT_EQ(testQVs, bam.InsertionQVs());
895 //    EXPECT_EQ(testQVs, bam.MergeQVs());
896 
897 //    EXPECT_EQ(42, bam.NumPasses());
898 //    EXPECT_EQ(42, bam.QueryEnd());
899 //    EXPECT_EQ(42, bam.QueryStart());
900 //    EXPECT_EQ(42, bam.ReadAccuracy());
901 //    EXPECT_EQ(42, bam.ReferenceEnd());
902 //    EXPECT_EQ(42, bam.ReferenceStart());
903 //    EXPECT_EQ(testQVs, bam.SubstitutionQVs());
904 //    EXPECT_EQ(testTags, bam.SubstitutionTags());
905 
906     // check tags
907     const TagCollection fetchedTags = bam.Impl().Tags();
908     EXPECT_TRUE(fetchedTags.at("HX").HasModifier(TagModifier::HEX_STRING));
909     EXPECT_EQ(std::string("1abc75"), fetchedTags.at("HX").ToString());
910     EXPECT_EQ(int32_t{-42}, fetchedTags.at("XY").ToInt32());
911     EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags.at("CA").ToUInt8Array());
912 
913     BamRecordTests::CheckRawData(bam);
914 }
915 
TEST(BamRecordTest,SequenceOrientation)916 TEST(BamRecordTest, SequenceOrientation)
917 {
918     {
919         SCOPED_TRACE("Simple CIGAR Sequence");
920         BamRecordTests::CheckSequenceClippedAndAligned(
921             "13=",                  // CIGAR
922             "ATATATCCCGGCG",        // input
923             {
924                 "ATATATCCCGGCG",    // forward strand, genomic
925                 "ATATATCCCGGCG",    // forward strand, native
926                 "ATATATCCCGGCG",    // forward strand, genomic, aligned
927                 "ATATATCCCGGCG",    // forward strand, native,  aligned
928                 "ATATATCCCGGCG",    // forward strand, genomic, aligned + clipped
929                 "ATATATCCCGGCG",    // forward strand, native,  aligned + clipped
930                 "ATATATCCCGGCG",    // reverse strand, genomic
931                 "CGCCGGGATATAT",    // reverse strand, native
932                 "ATATATCCCGGCG",    // reverse strand, genomic, aligned
933                 "CGCCGGGATATAT",    // reverse strand, native,  aligned
934                 "ATATATCCCGGCG",    // reverse strand, genomic, aligned + clipped
935                 "CGCCGGGATATAT"     // reverse strand, native,  aligned + clipped
936             }
937         );
938     }
939 }
940 
TEST(BamRecordTest,QualitiesOrientation)941 TEST(BamRecordTest, QualitiesOrientation)
942 {
943     {
944         SCOPED_TRACE("Simple CIGAR Qualities");
945         BamRecordTests::CheckQualitiesClippedAndAligned(
946             "13=",                  // CIGAR
947             "?]?]?]?]?]?]*",        // input
948             {
949                 "?]?]?]?]?]?]*",    // forward strand, genomic
950                 "?]?]?]?]?]?]*",    // forward strand, native
951                 "?]?]?]?]?]?]*",    // forward strand, genomic, aligned
952                 "?]?]?]?]?]?]*",    // forward strand, native,  aligned
953                 "?]?]?]?]?]?]*",    // forward strand, genomic, aligned + clipped
954                 "?]?]?]?]?]?]*",    // forward strand, native,  aligned + clipped
955                 "?]?]?]?]?]?]*",    // reverse strand, genomic
956                 "*]?]?]?]?]?]?",    // reverse strand, native
957                 "?]?]?]?]?]?]*",    // reverse strand, genomic, aligned
958                 "*]?]?]?]?]?]?",    // reverse strand, native,  aligned
959                 "?]?]?]?]?]?]*",    // reverse strand, genomic, aligned + clipped
960                 "*]?]?]?]?]?]?"     // reverse strand, native,  aligned + clipped
961             }
962         );
963     }
964 }
965 
TEST(BamRecordTest,SequenceTagsOrientation)966 TEST(BamRecordTest, SequenceTagsOrientation)
967 {
968     {
969         SCOPED_TRACE("Simple CIGAR Base Tags");
970         BamRecordTests::CheckBaseTagsClippedAndAligned(
971             "13=",                  // CIGAR
972             "ATATATCCCGGCG",        // input
973             {
974                 "ATATATCCCGGCG",    // forward strand, genomic
975                 "ATATATCCCGGCG",    // forward strand, native
976                 "ATATATCCCGGCG",    // forward strand, genomic, aligned
977                 "ATATATCCCGGCG",    // forward strand, native, aligned
978                 "ATATATCCCGGCG",    // forward strand, genomic, aligned, clipped
979                 "ATATATCCCGGCG",    // forward strand, native, aligned, clipped
980                 "CGCCGGGATATAT",    // reverse strand, genomic
981                 "ATATATCCCGGCG",    // reverse strand, native
982                 "CGCCGGGATATAT",    // reverse strand, genomic, aligned
983                 "ATATATCCCGGCG",    // reverse strand, native, aligned
984                 "CGCCGGGATATAT",    // reverse strand, genomic, aligned, clipped
985                 "ATATATCCCGGCG"     // reverse strand, native, aligned, clipped
986             }
987         );
988     }
989 }
990 
TEST(BamRecordTest,FrameTagsOrientation)991 TEST(BamRecordTest, FrameTagsOrientation)
992 {
993     {
994         SCOPED_TRACE("Simple CIGAR Frames");
995         BamRecordTests::CheckFrameTagsClippedAndAligned(
996             "5=",                   // CIGAR
997             { 0, 1, 2, 3, 4 },      // input
998             {
999                 { 0, 1, 2, 3, 4 },  // forward strand, genomic
1000                 { 0, 1, 2, 3, 4 },  // forward strand, native
1001                 { 0, 1, 2, 3, 4 },  // forward strand, genomic, aligned
1002                 { 0, 1, 2, 3, 4 },  // forward strand, native, aligned
1003                 { 0, 1, 2, 3, 4 },  // forward strand, genomic, aligned, clipped
1004                 { 0, 1, 2, 3, 4 },  // forward strand, native, aligned, clipped
1005                 { 4, 3, 2, 1, 0 },  // reverse strand, genomic
1006                 { 0, 1, 2, 3, 4 },  // reverse strand, native
1007                 { 4, 3, 2, 1, 0 },  // reverse strand, genomic, aligned
1008                 { 0, 1, 2, 3, 4 },  // reverse strand, native, aligned
1009                 { 4, 3, 2, 1, 0 },  // reverse strand, genomic, aligned, clipped
1010                 { 0, 1, 2, 3, 4 }   // reverse strand, native, aligned, clipped
1011             }
1012         );
1013     }
1014 }
1015 
TEST(BamRecordTest,QualityTagsOrientation)1016 TEST(BamRecordTest, QualityTagsOrientation)
1017 {
1018     {
1019         SCOPED_TRACE("Simple CIGAR Quality Tags");
1020         BamRecordTests::CheckQualityTagsClippedAndAligned(
1021             "13=",                  // CIGAR
1022             "?]?]?]?]?]?]*",        // input
1023             {
1024                 "?]?]?]?]?]?]*",    // forward strand, genomic
1025                 "?]?]?]?]?]?]*",    // forward strand, native
1026                 "?]?]?]?]?]?]*",    // forward strand, genomic, aligned
1027                 "?]?]?]?]?]?]*",    // forward strand, native,  aligned
1028                 "?]?]?]?]?]?]*",    // forward strand, genomic, aligned + clipped
1029                 "?]?]?]?]?]?]*",    // forward strand, native,  aligned + clipped
1030                 "*]?]?]?]?]?]?",    // reverse strand, genomic
1031                 "?]?]?]?]?]?]*",    // reverse strand, native
1032                 "*]?]?]?]?]?]?",    // reverse strand, genomic, aligned
1033                 "?]?]?]?]?]?]*",    // reverse strand, native,  aligned
1034                 "*]?]?]?]?]?]?",    // reverse strand, genomic, aligned + clipped
1035                 "?]?]?]?]?]?]*"     // reverse strand, native,  aligned + clipped
1036             }
1037         );
1038     }
1039 }
1040 
TEST(BamRecordTest,SequenceClippedAndAligned)1041 TEST(BamRecordTest, SequenceClippedAndAligned)
1042 {
1043     {
1044         SCOPED_TRACE("CIGAR: 10=");
1045         BamRecordTests::CheckSequenceClippedAndAligned(
1046             "10=",              // CIGAR
1047             "ATCCGCGGTT",       // input
1048             {
1049                 "ATCCGCGGTT",   // forward strand, genomic
1050                 "ATCCGCGGTT",   // forward strand, native
1051                 "ATCCGCGGTT",   // forward strand, genomic, aligned
1052                 "ATCCGCGGTT",   // forward strand, native,  aligned
1053                 "ATCCGCGGTT",   // forward strand, genomic, aligned + clipped
1054                 "ATCCGCGGTT",   // forward strand, native,  aligned + clipped
1055                 "ATCCGCGGTT",   // reverse strand, genomic
1056                 "AACCGCGGAT",   // reverse strand, native
1057                 "ATCCGCGGTT",   // reverse strand, genomic, aligned
1058                 "AACCGCGGAT",   // reverse strand, native,  aligned
1059                 "ATCCGCGGTT",   // reverse strand, genomic, aligned + clipped
1060                 "AACCGCGGAT"    // reverse strand, native,  aligned + clipped
1061             }
1062         );
1063     }
1064     {
1065         SCOPED_TRACE("CIGAR: 3=4N3=");
1066         BamRecordTests::CheckSequenceClippedAndAligned(
1067             "3=4N3=",       // CIGAR
1068             "ACGTTT",        // input
1069             {
1070                 "ACGTTT",    // forward strand, genomic
1071                 "ACGTTT",    // forward strand, native
1072                 "ACGTTT",    // forward strand, genomic, aligned
1073                 "ACGTTT",    // forward strand, native,  aligned
1074                 "ACGTTT",    // forward strand, genomic, aligned + clipped
1075                 "ACGTTT",    // forward strand, native,  aligned + clipped
1076                 "ACGTTT",    // reverse strand, genomic
1077                 "AAACGT",    // reverse strand, native
1078                 "ACGTTT",    // reverse strand, genomic, aligned
1079                 "AAACGT",    // reverse strand, native,  aligned
1080                 "ACGTTT",    // reverse strand, genomic, aligned + clipped
1081                 "AAACGT"     // reverse strand, native,  aligned + clipped
1082             }
1083         );
1084     }
1085     {
1086         SCOPED_TRACE("CIGAR: 1S8=1S");
1087         BamRecordTests::CheckSequenceClippedAndAligned(
1088             "1S8=1S",           // CIGAR
1089             "ACCCGCGGTT",       // input
1090             {
1091                 "ACCCGCGGTT",   // forward strand, genomic
1092                 "ACCCGCGGTT",   // forward strand, native
1093                 "ACCCGCGGTT",   // forward strand, genomic, aligned
1094                 "ACCCGCGGTT",   // forward strand, native,  aligned
1095                 "CCCGCGGT",     // forward strand, genomic, aligned + clipped
1096                 "CCCGCGGT",     // forward strand, native,  aligned + clipped
1097                 "ACCCGCGGTT",   // reverse strand, genomic
1098                 "AACCGCGGGT",   // reverse strand, native
1099                 "ACCCGCGGTT",   // reverse strand, genomic, aligned
1100                 "AACCGCGGGT",   // reverse strand, native,  aligned
1101                 "CCCGCGGT",     // reverse strand, genomic, aligned + clipped
1102                 "ACCGCGGG"      // reverse strand, native,  aligned + clipped
1103             }
1104         );
1105     }
1106     {
1107         SCOPED_TRACE("CIGAR: 1H8=1H");
1108         BamRecordTests::CheckSequenceClippedAndAligned(
1109             "1H8=1H",           // CIGAR
1110             "ATCGCGGT",         // input
1111             {
1112                 "ATCGCGGT",     // forward strand, genomic
1113                 "ATCGCGGT",     // forward strand, native
1114                 "ATCGCGGT",     // forward strand, genomic, aligned
1115                 "ATCGCGGT",     // forward strand, native,  aligned
1116                 "ATCGCGGT",     // forward strand, genomic, aligned + clipped
1117                 "ATCGCGGT",     // forward strand, native,  aligned + clipped
1118                 "ATCGCGGT",     // reverse strand, genomic
1119                 "ACCGCGAT",     // reverse strand, native
1120                 "ATCGCGGT",     // reverse strand, genomic, aligned
1121                 "ACCGCGAT",     // reverse strand, native,  aligned
1122                 "ATCGCGGT",     // reverse strand, genomic, aligned + clipped
1123                 "ACCGCGAT"      // reverse strand, native,  aligned + clipped
1124             }
1125         );
1126     }
1127     {
1128         SCOPED_TRACE("CIGAR: 2S6=2S");
1129         BamRecordTests::CheckSequenceClippedAndAligned(
1130             "2S6=2S",           // CIGAR
1131             "AGCCGCGGTT",       // input
1132             {
1133                 "AGCCGCGGTT",   // forward strand, genomic
1134                 "AGCCGCGGTT",   // forward strand, native
1135                 "AGCCGCGGTT",   // forward strand, genomic, aligned
1136                 "AGCCGCGGTT",   // forward strand, native,  aligned
1137                 "CCGCGG",       // forward strand, genomic, aligned + clipped
1138                 "CCGCGG",       // forward strand, native,  aligned + clipped
1139                 "AGCCGCGGTT",   // reverse strand, genomic
1140                 "AACCGCGGCT",   // reverse strand, native
1141                 "AGCCGCGGTT",   // reverse strand, genomic, aligned
1142                 "AACCGCGGCT",   // reverse strand, native,  aligned
1143                 "CCGCGG",       // reverse strand, genomic, aligned + clipped
1144                 "CCGCGG"        // reverse strand, native,  aligned + clipped
1145             }
1146         );
1147     }
1148     {
1149         SCOPED_TRACE("CIGAR: 2S3=2I3=2S");
1150         BamRecordTests::CheckSequenceClippedAndAligned(
1151             "2S3=2I3=2S",           // CIGAR
1152             "ATCCGNNCGGTT",         // input
1153             {
1154                 "ATCCGNNCGGTT",     // forward strand, genomic
1155                 "ATCCGNNCGGTT",     // forward strand, native
1156                 "ATCCGNNCGGTT",     // forward strand, genomic, aligned
1157                 "ATCCGNNCGGTT",     // forward strand, native,  aligned
1158                 "CCGNNCGG",         // forward strand, genomic, aligned + clipped
1159                 "CCGNNCGG",         // forward strand, native,  aligned + clipped
1160                 "ATCCGNNCGGTT",     // reverse strand, genomic
1161                 "AACCGNNCGGAT",     // reverse strand, native
1162                 "ATCCGNNCGGTT",     // reverse strand, genomic, aligned
1163                 "AACCGNNCGGAT",     // reverse strand, native,  aligned
1164                 "CCGNNCGG",         // reverse strand, genomic, aligned + clipped
1165                 "CCGNNCGG"          // reverse strand, native,  aligned + clipped
1166             }
1167         );
1168     }
1169     {
1170         SCOPED_TRACE("CIGAR: 2H6=2H");
1171         BamRecordTests::CheckSequenceClippedAndAligned(
1172             "2H6=2H",       // CIGAR
1173             "CAGCGG",       // input
1174             {
1175                 "CAGCGG",   // forward strand, genomic
1176                 "CAGCGG",   // forward strand, native
1177                 "CAGCGG",   // forward strand, genomic, aligned
1178                 "CAGCGG",   // forward strand, native,  aligned
1179                 "CAGCGG",   // forward strand, genomic, aligned + clipped
1180                 "CAGCGG",   // forward strand, native,  aligned + clipped
1181                 "CAGCGG",   // reverse strand, genomic
1182                 "CCGCTG",   // reverse strand, native
1183                 "CAGCGG",   // reverse strand, genomic, aligned
1184                 "CCGCTG",   // reverse strand, native,  aligned
1185                 "CAGCGG",   // reverse strand, genomic, aligned + clipped
1186                 "CCGCTG"    // reverse strand, native,  aligned + clipped
1187             }
1188         );
1189     }
1190 }
1191 
TEST(BamRecordTest,ClippingOrientationAndAlignment)1192 TEST(BamRecordTest, ClippingOrientationAndAlignment)
1193 {
1194     {
1195         SCOPED_TRACE("CIGAR: 4=3D4=");
1196         BamRecordTests::CheckSequenceClippedAndAligned(
1197             "4=3D4=",           // CIGAR
1198             "AACCGTTA",         // input
1199             {
1200                 "AACCGTTA",     // forward strand, genomic
1201                 "AACCGTTA",     // forward strand, native
1202                 "AACC---GTTA",  // forward strand, genomic, aligned
1203                 "AACC---GTTA",  // forward strand, native,  aligned
1204                 "AACC---GTTA",  // forward strand, genomic, aligned + clipped
1205                 "AACC---GTTA",  // forward strand, native,  aligned + clipped
1206                 "AACCGTTA",     // reverse strand, genomic
1207                 "TAACGGTT",     // reverse strand, native
1208                 "AACC---GTTA",  // reverse strand, genomic, aligned
1209                 "TAAC---GGTT",  // reverse strand, native,  aligned
1210                 "AACC---GTTA",  // reverse strand, genomic, aligned + clipped
1211                 "TAAC---GGTT"   // reverse strand, native,  aligned + clipped
1212             }
1213         );
1214     }
1215     {
1216         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1217         BamRecordTests::CheckSequenceClippedAndAligned(
1218             "4=1D2I2D4=",           // CIGAR
1219             "ATCCTAGGTT",           // input
1220             {
1221                 "ATCCTAGGTT",       // forward strand, genomic
1222                 "ATCCTAGGTT",       // forward strand, native
1223                 "ATCC-TA--GGTT",    // forward strand, genomic, aligned
1224                 "ATCC-TA--GGTT",    // forward strand, native,  aligned
1225                 "ATCC-TA--GGTT",    // forward strand, genomic, aligned + clipped
1226                 "ATCC-TA--GGTT",    // forward strand, native,  aligned + clipped
1227                 "ATCCTAGGTT",       // reverse strand, genomic
1228                 "AACCTAGGAT",       // reverse strand, native
1229                 "ATCC-TA--GGTT",    // reverse strand, genomic, aligned
1230                 "AACC--TA-GGAT",    // reverse strand, native,  aligned
1231                 "ATCC-TA--GGTT",    // reverse strand, genomic, aligned + clipped
1232                 "AACC--TA-GGAT"     // reverse strand, native,  aligned + clipped
1233             }
1234         );
1235     }
1236     {
1237         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1238         BamRecordTests::CheckSequenceClippedAndAligned(
1239             "4=1D2P2I2P2D4=",           // CIGAR
1240             "ATCCTAGGTT",               // input
1241             {
1242                 "ATCCTAGGTT",           // forward strand, genomic
1243                 "ATCCTAGGTT",           // forward strand, native
1244                 "ATCC-**TA**--GGTT",    // forward strand, genomic, aligned
1245                 "ATCC-**TA**--GGTT",    // forward strand, native,  aligned
1246                 "ATCC-**TA**--GGTT",    // forward strand, genomic, aligned + clipped
1247                 "ATCC-**TA**--GGTT",    // forward strand, native,  aligned + clipped
1248                 "ATCCTAGGTT",           // reverse strand, genomic
1249                 "AACCTAGGAT",           // reverse strand, native
1250                 "ATCC-**TA**--GGTT",    // reverse strand, genomic, aligned
1251                 "AACC--**TA**-GGAT",    // reverse strand, native,  aligned
1252                 "ATCC-**TA**--GGTT",    // reverse strand, genomic, aligned + clipped
1253                 "AACC--**TA**-GGAT"     // reverse strand, native,  aligned + clipped
1254             }
1255         );
1256     }
1257     {
1258         SCOPED_TRACE("CIGAR: 2S4=3D4=3S");
1259         BamRecordTests::CheckSequenceClippedAndAligned(
1260             "2S4=3D4=3S",               // CIGAR
1261             "TTAACCGTTACCG",            // input
1262             {
1263                 "TTAACCGTTACCG",        // forward strand, genomic
1264                 "TTAACCGTTACCG",        // forward strand, native
1265                 "TTAACC---GTTACCG",     // forward strand, genomic, aligned
1266                 "TTAACC---GTTACCG",     // forward strand, native,  aligned
1267                 "AACC---GTTA",          // forward strand, genomic, aligned + clipped
1268                 "AACC---GTTA",          // forward strand, native,  aligned + clipped
1269                 "TTAACCGTTACCG",        // reverse strand, genomic
1270                 "CGGTAACGGTTAA",        // reverse strand, native
1271                 "TTAACC---GTTACCG",     // reverse strand, genomic, aligned
1272                 "CGGTAAC---GGTTAA",     // reverse strand, native,  aligned
1273                 "AACC---GTTA",          // reverse strand, genomic, aligned + clipped
1274                 "TAAC---GGTT"           // reverse strand, native,  aligned + clipped
1275             }
1276         );
1277     }
1278     {
1279         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1280         BamRecordTests::CheckSequenceClippedAndAligned(
1281             "2H4=3D4=3H",       // CIGAR
1282             "AACCGTTA",         // input
1283             {
1284                 "AACCGTTA",     // forward strand, genomic
1285                 "AACCGTTA",     // forward strand, native
1286                 "AACC---GTTA",  // forward strand, genomic, aligned
1287                 "AACC---GTTA",  // forward strand, native,  aligned
1288                 "AACC---GTTA",  // forward strand, genomic, aligned + clipped
1289                 "AACC---GTTA",  // forward strand, native,  aligned + clipped
1290                 "AACCGTTA",     // reverse strand, genomic
1291                 "TAACGGTT",     // reverse strand, native
1292                 "AACC---GTTA",  // reverse strand, genomic, aligned
1293                 "TAAC---GGTT",  // reverse strand, native,  aligned
1294                 "AACC---GTTA",  // reverse strand, genomic, aligned + clipped
1295                 "TAAC---GGTT"   // reverse strand, native,  aligned + clipped
1296             }
1297         );
1298     }
1299     {
1300         SCOPED_TRACE("CIGAR: 2H2S4=3D4=3S3H");
1301         BamRecordTests::CheckSequenceClippedAndAligned(
1302             "2H2S4=3D4=3S3H",           // CIGAR
1303             "TTAACCGTTACCG",            // input
1304             {
1305                 "TTAACCGTTACCG",        // forward strand, genomic
1306                 "TTAACCGTTACCG",        // forward strand, native
1307                 "TTAACC---GTTACCG",     // forward strand, genomic, aligned
1308                 "TTAACC---GTTACCG",     // forward strand, native,  aligned
1309                 "AACC---GTTA",          // forward strand, genomic, aligned + clipped
1310                 "AACC---GTTA",          // forward strand, native,  aligned + clipped
1311                 "TTAACCGTTACCG",        // reverse strand, genomic
1312                 "CGGTAACGGTTAA",        // reverse strand, native
1313                 "TTAACC---GTTACCG",     // reverse strand, genomic, aligned
1314                 "CGGTAAC---GGTTAA",     // reverse strand, native,  aligned
1315                 "AACC---GTTA",          // reverse strand, genomic, aligned + clipped
1316                 "TAAC---GGTT"           // reverse strand, native,  aligned + clipped
1317             }
1318         );
1319     }
1320 }
1321 
TEST(BamRecordTest,QualityTagsClippedAndAligned)1322 TEST(BamRecordTest, QualityTagsClippedAndAligned)
1323 {
1324     {
1325         SCOPED_TRACE("CIGAR: 4=3D4=");
1326         BamRecordTests::CheckQualityTagsClippedAndAligned(
1327             "4=3D4=",           // CIGAR
1328             "?]?]?]?@",         // input
1329             {
1330                 "?]?]?]?@",     // forward strand, genomic
1331                 "?]?]?]?@",     // forward strand, native
1332                 "?]?]!!!?]?@",  // forward strand, genomic, aligned
1333                 "?]?]!!!?]?@",  // forward strand, native,  aligned
1334                 "?]?]!!!?]?@",  // forward strand, genomic, aligned + clipped
1335                 "?]?]!!!?]?@",  // forward strand, native,  aligned + clipped
1336                 "@?]?]?]?",     // reverse strand, genomic
1337                 "?]?]?]?@",     // reverse strand, native
1338                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned
1339                 "?]?]!!!?]?@",  // reverse strand, native,  aligned
1340                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned + clipped
1341                 "?]?]!!!?]?@"   // reverse strand, native,  aligned + clipped
1342             }
1343         );
1344     }
1345     {
1346         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1347         BamRecordTests::CheckQualityTagsClippedAndAligned(
1348             "4=1D2I2D4=",           // CIGAR
1349             "?]?]87?]?@",           // input
1350             {
1351                 "?]?]87?]?@",       // forward strand, genomic
1352                 "?]?]87?]?@",       // forward strand, native
1353                 "?]?]!87!!?]?@",    // forward strand, genomic, aligned
1354                 "?]?]!87!!?]?@",    // forward strand, native,  aligned
1355                 "?]?]!87!!?]?@",    // forward strand, genomic, aligned + clipped
1356                 "?]?]!87!!?]?@",    // forward strand, native,  aligned + clipped
1357                 "@?]?78]?]?",       // reverse strand, genomic
1358                 "?]?]87?]?@",       // reverse strand, native
1359                 "@?]?!78!!]?]?",    // reverse strand, genomic, aligned
1360                 "?]?]!!87!?]?@",    // reverse strand, native,  aligned
1361                 "@?]?!78!!]?]?",    // reverse strand, genomic, aligned + clipped
1362                 "?]?]!!87!?]?@"     // reverse strand, native,  aligned + clipped
1363             }
1364         );
1365     }
1366     {
1367         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1368         BamRecordTests::CheckQualityTagsClippedAndAligned(
1369             "4=1D2P2I2P2D4=",       // CIGAR
1370             "?]?]87?]?@",           // input
1371         {
1372             "?]?]87?]?@",           // forward strand, genomic
1373             "?]?]87?]?@",           // forward strand, native
1374             "?]?]!!!87!!!!?]?@",    // forward strand, genomic, aligned
1375             "?]?]!!!87!!!!?]?@",    // forward strand, native,  aligned
1376             "?]?]!!!87!!!!?]?@",    // forward strand, genomic, aligned + clipped
1377             "?]?]!!!87!!!!?]?@",    // forward strand, native,  aligned + clipped
1378             "@?]?78]?]?",           // reverse strand, genomic
1379             "?]?]87?]?@",           // reverse strand, native
1380             "@?]?!!!78!!!!]?]?",    // reverse strand, genomic, aligned
1381             "?]?]!!!!87!!!?]?@",    // reverse strand, native,  aligned
1382             "@?]?!!!78!!!!]?]?",    // reverse strand, genomic, aligned + clipped
1383             "?]?]!!!!87!!!?]?@"     // reverse strand, native,  aligned + clipped
1384         }
1385         );
1386     }
1387     {
1388         SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1389         BamRecordTests::CheckQualityTagsClippedAndAligned(
1390             "3S4=3D4=3S",               // CIGAR
1391             "vvv?]?]?]?@xxx",           // input
1392             {
1393                 "vvv?]?]?]?@xxx",       // forward strand, genomic
1394                 "vvv?]?]?]?@xxx",       // forward strand, native
1395                 "vvv?]?]!!!?]?@xxx",    // forward strand, genomic, aligned
1396                 "vvv?]?]!!!?]?@xxx",    // forward strand, native, aligned
1397                 "?]?]!!!?]?@",          // forward strand, genomic, aligned, clipped
1398                 "?]?]!!!?]?@",          // forward strand, native, aligned, clipped
1399                 "xxx@?]?]?]?vvv",       // reverse strand, genomic
1400                 "vvv?]?]?]?@xxx",       // reverse strand, native
1401                 "xxx@?]?!!!]?]?vvv",    // reverse strand, genomic, aligned
1402                 "vvv?]?]!!!?]?@xxx",    // reverse strand, native, aligned
1403                 "@?]?!!!]?]?",          // reverse strand, genomic, aligned, clipped
1404                 "?]?]!!!?]?@"           // reverse strand, native, aligned, clipped
1405             }
1406         );
1407     }
1408     {
1409         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1410         BamRecordTests::CheckQualityTagsClippedAndAligned(
1411             "2H4=3D4=3H",       // CIGAR
1412             "?]?]?]?@",         // input
1413             {
1414                 "?]?]?]?@",     // forward strand, genomic
1415                 "?]?]?]?@",     // forward strand, native
1416                 "?]?]!!!?]?@",  // forward strand, genomic, aligned
1417                 "?]?]!!!?]?@",  // forward strand, native, aligned
1418                 "?]?]!!!?]?@",  // forward strand, genomic, aligned, clipped
1419                 "?]?]!!!?]?@",  // forward strand, native, aligned, clipped
1420                 "@?]?]?]?",     // reverse strand, genomic
1421                 "?]?]?]?@",     // reverse strand, native
1422                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned
1423                 "?]?]!!!?]?@",  // reverse strand, native, aligned
1424                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned, clipped
1425                 "?]?]!!!?]?@"   // reverse strand, native, aligned, clipped
1426             }
1427         );
1428     }
1429     {
1430         SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1431         BamRecordTests::CheckQualityTagsClippedAndAligned(
1432             "2H3S4=3D4=3S3H",           // CIGAR
1433             "vvv?]?]?]?@xxx",           // input
1434             {
1435                 "vvv?]?]?]?@xxx",       // forward strand, genomic
1436                 "vvv?]?]?]?@xxx",       // forward strand, native
1437                 "vvv?]?]!!!?]?@xxx",    // forward strand, genomic, aligned
1438                 "vvv?]?]!!!?]?@xxx",    // forward strand, native, aligned
1439                 "?]?]!!!?]?@",          // forward strand, genomic, aligned, clipped
1440                 "?]?]!!!?]?@",          // forward strand, native, aligned, clipped
1441                 "xxx@?]?]?]?vvv",       // reverse strand, genomic
1442                 "vvv?]?]?]?@xxx",       // reverse strand, native
1443                 "xxx@?]?!!!]?]?vvv",    // reverse strand, genomic, aligned
1444                 "vvv?]?]!!!?]?@xxx",    // reverse strand, native, aligned
1445                 "@?]?!!!]?]?",          // reverse strand, genomic, aligned, clipped
1446                 "?]?]!!!?]?@"           // reverse strand, native, aligned, clipped
1447             }
1448         );
1449     }
1450 }
1451 
TEST(BamRecordTest,BaseTagsClippedAndAligned)1452 TEST(BamRecordTest, BaseTagsClippedAndAligned)
1453 {
1454     {
1455         SCOPED_TRACE("CIGAR: 4=3D4=");
1456         BamRecordTests::CheckBaseTagsClippedAndAligned(
1457             "4=3D4=",           // CIGAR
1458             "AACCGTTA",         // input
1459             {
1460                 "AACCGTTA",     // forward strand, genomic
1461                 "AACCGTTA",     // forward strand, native
1462                 "AACC---GTTA",  // forward strand, genomic, aligned
1463                 "AACC---GTTA",  // forward strand, native, aligned
1464                 "AACC---GTTA",  // forward strand, genomic, aligned, clipped
1465                 "AACC---GTTA",  // forward strand, native, aligned, clipped
1466                 "TAACGGTT",     // reverse strand, genomic
1467                 "AACCGTTA",     // reverse strand, native
1468                 "TAAC---GGTT",  // reverse strand, genomic, aligned
1469                 "AACC---GTTA",  // reverse strand, native, aligned
1470                 "TAAC---GGTT",  // reverse strand, genomic, aligned, clipped
1471                 "AACC---GTTA"   // reverse strand, native, aligned, clipped
1472             }
1473         );
1474     }
1475     {
1476         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1477         BamRecordTests::CheckBaseTagsClippedAndAligned(
1478             "4=1D2I2D4=",           // CIGAR
1479             "ATCCTAGGTT",           // input
1480             {
1481                 "ATCCTAGGTT",       // forward strand, genomic
1482                 "ATCCTAGGTT",       // forward strand, native
1483                 "ATCC-TA--GGTT",    // forward strand, genomic, aligned
1484                 "ATCC-TA--GGTT",    // forward strand, native, aligned
1485                 "ATCC-TA--GGTT",    // forward strand, genomic, aligned, clipped
1486                 "ATCC-TA--GGTT",    // forward strand, native, aligned, clipped
1487                 "AACCTAGGAT",       // reverse strand, genomic
1488                 "ATCCTAGGTT",       // reverse strand, native
1489                 "AACC-TA--GGAT",    // reverse strand, genomic, aligned
1490                 "ATCC--TA-GGTT",    // reverse strand, native, aligned
1491                 "AACC-TA--GGAT",    // reverse strand, genomic, aligned, clipped
1492                 "ATCC--TA-GGTT"     // reverse strand, native, aligned, clipped
1493             }
1494         );
1495     }
1496     {
1497         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1498         BamRecordTests::CheckBaseTagsClippedAndAligned(
1499             "4=1D2P2I2P2D4=",           // CIGAR
1500             "ATCCTAGGTT",               // input
1501             {
1502                 "ATCCTAGGTT",           // forward strand, genomic
1503                 "ATCCTAGGTT",           // forward strand, native
1504                 "ATCC-**TA**--GGTT",    // forward strand, genomic, aligned
1505                 "ATCC-**TA**--GGTT",    // forward strand, native, aligned
1506                 "ATCC-**TA**--GGTT",    // forward strand, genomic, aligned, clipped
1507                 "ATCC-**TA**--GGTT",    // forward strand, native, aligned, clipped
1508                 "AACCTAGGAT",           // reverse strand, genomic
1509                 "ATCCTAGGTT",           // reverse strand, native
1510                 "AACC-**TA**--GGAT",    // reverse strand, genomic, aligned
1511                 "ATCC--**TA**-GGTT",    // reverse strand, native, aligned
1512                 "AACC-**TA**--GGAT",    // reverse strand, genomic, aligned, clipped
1513                 "ATCC--**TA**-GGTT"     // reverse strand, native, aligned, clipped
1514             }
1515         );
1516     }
1517     {
1518         SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1519         BamRecordTests::CheckBaseTagsClippedAndAligned(
1520             "3S4=3D4=3S",               // CIGAR
1521             "TTTAACCGTTACCG",           // input
1522             {
1523                 "TTTAACCGTTACCG",       // forward strand, genomic
1524                 "TTTAACCGTTACCG",       // forward strand, native
1525                 "TTTAACC---GTTACCG",    // forward strand, genomic, aligned
1526                 "TTTAACC---GTTACCG",    // forward strand, native, aligned
1527                 "AACC---GTTA",          // forward strand, genomic, aligned, clipped
1528                 "AACC---GTTA",          // forward strand, native, aligned, clipped
1529                 "CGGTAACGGTTAAA",       // reverse strand, genomic
1530                 "TTTAACCGTTACCG",       // reverse strand, native
1531                 "CGGTAAC---GGTTAAA",    // reverse strand, genomic, aligned
1532                 "TTTAACC---GTTACCG",    // reverse strand, native, aligned
1533                 "TAAC---GGTT",          // reverse strand, genomic, aligned, clipped
1534                 "AACC---GTTA"           // reverse strand, native, aligned, clipped
1535             }
1536         );
1537     }
1538     {
1539         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1540         BamRecordTests::CheckBaseTagsClippedAndAligned(
1541             "2H4=3D4=3H",       // CIGAR
1542             "AACCGTTA",         // input
1543             {
1544                 "AACCGTTA",     // forward strand, genomic
1545                 "AACCGTTA",     // forward strand, native
1546                 "AACC---GTTA",  // forward strand, genomic, aligned
1547                 "AACC---GTTA",  // forward strand, native, aligned
1548                 "AACC---GTTA",  // forward strand, genomic, aligned, clipped
1549                 "AACC---GTTA",  // forward strand, native, aligned, clipped
1550                 "TAACGGTT",     // reverse strand, genomic
1551                 "AACCGTTA",     // reverse strand, native
1552                 "TAAC---GGTT",  // reverse strand, genomic, aligned
1553                 "AACC---GTTA",  // reverse strand, native, aligned
1554                 "TAAC---GGTT",  // reverse strand, genomic, aligned, clipped
1555                 "AACC---GTTA"   // reverse strand, native, aligned, clipped
1556             }
1557         );
1558     }
1559     {
1560         SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1561         BamRecordTests::CheckBaseTagsClippedAndAligned(
1562             "2H3S4=3D4=3S3H",           // CIGAR
1563             "TTTAACCGTTACCG",           // input
1564             {
1565                 "TTTAACCGTTACCG",       // forward strand, genomic
1566                 "TTTAACCGTTACCG",       // forward strand, native
1567                 "TTTAACC---GTTACCG",    // forward strand, genomic, aligned
1568                 "TTTAACC---GTTACCG",    // forward strand, native, aligned
1569                 "AACC---GTTA",          // forward strand, genomic, aligned, clipped
1570                 "AACC---GTTA",          // forward strand, native, aligned, clipped
1571                 "CGGTAACGGTTAAA",       // reverse strand, genomic
1572                 "TTTAACCGTTACCG",       // reverse strand, native
1573                 "CGGTAAC---GGTTAAA",    // reverse strand, genomic, aligned
1574                 "TTTAACC---GTTACCG",    // reverse strand, native, aligned
1575                 "TAAC---GGTT",          // reverse strand, genomic, aligned, clipped
1576                 "AACC---GTTA"           // reverse strand, native, aligned, clipped
1577             }
1578         );
1579     }
1580 }
1581 
TEST(BamRecordTest,FrameTagsClippedAndAligned)1582 TEST(BamRecordTest, FrameTagsClippedAndAligned)
1583 {
1584     {
1585         SCOPED_TRACE("CIGAR: 4=3D4=");
1586         BamRecordTests::CheckFrameTagsClippedAndAligned(
1587             "4=3D4=",                                           // CIGAR
1588             { 10, 20, 10, 20, 10, 20, 10, 30 },                 // input
1589             {
1590                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, genomic
1591                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, native
1592                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
1593                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
1594                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
1595                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
1596                 { 30, 10, 20, 10, 20, 10, 20, 10 },             // reverse strand, genomic
1597                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // reverse strand, native
1598                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
1599                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
1600                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
1601                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
1602             }
1603         );
1604     }
1605     {
1606         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1607         BamRecordTests::CheckFrameTagsClippedAndAligned(
1608             "4=1D2I2D4=",                                               // CIGAR
1609             { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                 // input
1610             {
1611                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // forward strand, genomic
1612                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // forward strand, native
1613                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
1614                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
1615                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
1616                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
1617                 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 },             // reverse strand, genomic
1618                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // reverse strand, native
1619                 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
1620                 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
1621                 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
1622                 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
1623             }
1624         );
1625     }
1626     {
1627         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1628         BamRecordTests::CheckFrameTagsClippedAndAligned(
1629             "4=1D2P2I2P2D4=",                                                   // CIGAR
1630             { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // input
1631         {
1632             { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // forward strand, genomic
1633             { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // forward strand, native
1634             { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
1635             { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
1636             { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
1637             { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
1638             { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 },                         // reverse strand, genomic
1639             { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // reverse strand, native
1640             { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
1641             { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
1642             { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
1643             { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
1644         }
1645         );
1646     }
1647     {
1648         SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1649         BamRecordTests::CheckFrameTagsClippedAndAligned(
1650             "3S4=3D4=3S",                                                               // CIGAR
1651             { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },                 // input
1652             {
1653                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, genomic
1654                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, native
1655                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, genomic, aligned
1656                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, native, aligned
1657                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, genomic, aligned, clipped
1658                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, native, aligned, clipped
1659                 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 },             // reverse strand, genomic
1660                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // reverse strand, native
1661                 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 },    // reverse strand, genomic, aligned
1662                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // reverse strand, native, aligned
1663                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },                            // reverse strand, genomic, aligned, clipped
1664                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }                             // reverse strand, native, aligned, clipped
1665             }
1666         );
1667     }
1668     {
1669         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1670         BamRecordTests::CheckFrameTagsClippedAndAligned(
1671             "2H4=3D4=3H",                                       // CIGAR
1672             { 10, 20, 10, 20, 10, 20, 10, 30 },                 // input
1673             {
1674                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, genomic
1675                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, native
1676                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
1677                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
1678                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
1679                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
1680                 { 30, 10, 20, 10, 20, 10, 20, 10 },             // reverse strand, genomic
1681                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // reverse strand, native
1682                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
1683                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
1684                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
1685                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
1686             }
1687         );
1688     }
1689     {
1690         SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1691         BamRecordTests::CheckFrameTagsClippedAndAligned(
1692             "2H3S4=3D4=3S3H",                                                           // CIGAR
1693             { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },                 // input
1694             {
1695                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, genomic
1696                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, native
1697                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, genomic, aligned
1698                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, native, aligned
1699                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, genomic, aligned, clipped
1700                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, native, aligned, clipped
1701                 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 },             // reverse strand, genomic
1702                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // reverse strand, native
1703                 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 },    // reverse strand, genomic, aligned
1704                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // reverse strand, native, aligned
1705                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },                            // reverse strand, genomic, aligned, clipped
1706                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }                             // reverse strand, native, aligned, clipped
1707             }
1708         );
1709     }
1710 }
1711 
TEST(BamRecordTest,PulseBaseTags)1712 TEST(BamRecordTest, PulseBaseTags)
1713 {
1714     {
1715         SCOPED_TRACE("CIGAR: 4=3D4=");
1716         BamRecordTests::CheckPulseBaseTags(
1717             "4=3D4=",           // CIGAR
1718             "AACCGTTA",         // seqBases
1719             "AAaaCCGggTTA",     // pulseCalls
1720             "AAaaCCGggTTA",     // tag data
1721 
1722             {   // all pulses
1723 
1724                 "AAaaCCGggTTA",     // forward strand, genomic
1725                 "AAaaCCGggTTA",     // forward strand, native
1726                 "",  // forward strand, genomic, aligned
1727                 "",  // forward strand, native, aligned
1728                 "",  // forward strand, genomic, aligned, clipped
1729                 "",  // forward strand, native, aligned, clipped
1730                 "TAAccCGGttTT",     // reverse strand, genomic
1731                 "AAaaCCGggTTA",     // reverse strand, native
1732                 "",  // reverse strand, genomic, aligned
1733                 "",  // reverse strand, native, aligned
1734                 "",  // reverse strand, genomic, aligned, clipped
1735                 ""   // reverse strand, native, aligned, clipped
1736             },
1737             {   // basecalls only
1738 
1739                 "AACCGTTA",     // forward strand, genomic
1740                 "AACCGTTA",     // forward strand, native
1741                 "AACC---GTTA",  // forward strand, genomic, aligned
1742                 "AACC---GTTA",  // forward strand, native, aligned
1743                 "AACC---GTTA",  // forward strand, genomic, aligned, clipped
1744                 "AACC---GTTA",  // forward strand, native, aligned, clipped
1745                 "TAACGGTT",     // reverse strand, genomic
1746                 "AACCGTTA",     // reverse strand, native
1747                 "TAAC---GGTT",  // reverse strand, genomic, aligned
1748                 "AACC---GTTA",  // reverse strand, native, aligned
1749                 "TAAC---GGTT",  // reverse strand, genomic, aligned, clipped
1750                 "AACC---GTTA"   // reverse strand, native, aligned, clipped
1751             }
1752         );
1753     }
1754     {
1755         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1756         BamRecordTests::CheckPulseBaseTags(
1757             "4=1D2I2D4=",       // CIGAR
1758             "ATCCTAGGTT",       // seqBases
1759             "ATttCCTtAGGggTT",  // pulseCalls
1760             "ATttCCTtAGGggTT",  // tag data
1761 
1762             {   // all pulses
1763 
1764                 "ATttCCTtAGGggTT",       // forward strand, genomic
1765                 "ATttCCTtAGGggTT",       // forward strand, native
1766                 "",    // forward strand, genomic, aligned
1767                 "",    // forward strand, native, aligned
1768                 "",    // forward strand, genomic, aligned, clipped
1769                 "",    // forward strand, native, aligned, clipped
1770                 "AAccCCTaAGGaaAT",       // reverse strand, genomic
1771                 "ATttCCTtAGGggTT",       // reverse strand, native
1772                 "",    // reverse strand, genomic, aligned
1773                 "",    // reverse strand, native, aligned
1774                 "",    // reverse strand, genomic, aligned, clipped
1775                 ""     // reverse strand, native, aligned, clipped
1776             },
1777             {   // basecalls only
1778 
1779                 "ATCCTAGGTT",       // forward strand, genomic
1780                 "ATCCTAGGTT",       // forward strand, native
1781                 "ATCC-TA--GGTT",    // forward strand, genomic, aligned
1782                 "ATCC-TA--GGTT",    // forward strand, native, aligned
1783                 "ATCC-TA--GGTT",    // forward strand, genomic, aligned, clipped
1784                 "ATCC-TA--GGTT",    // forward strand, native, aligned, clipped
1785                 "AACCTAGGAT",       // reverse strand, genomic
1786                 "ATCCTAGGTT",       // reverse strand, native
1787                 "AACC-TA--GGAT",    // reverse strand, genomic, aligned
1788                 "ATCC--TA-GGTT",    // reverse strand, native, aligned
1789                 "AACC-TA--GGAT",    // reverse strand, genomic, aligned, clipped
1790                 "ATCC--TA-GGTT"     // reverse strand, native, aligned, clipped
1791             }
1792         );
1793     }
1794     {
1795         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1796         BamRecordTests::CheckPulseBaseTags(
1797             "4=1D2P2I2P2D4=",   // CIGAR
1798             "ATCCTAGGTT",       // seqBases
1799             "ATttCCTtAGGggTT",  // pulseCalls
1800             "ATttCCTtAGGggTT",  // tag data
1801             {
1802                 "ATttCCTtAGGggTT",           // forward strand, genomic
1803                 "ATttCCTtAGGggTT",           // forward strand, native
1804                 "",    // forward strand, genomic, aligned
1805                 "",    // forward strand, native, aligned
1806                 "",    // forward strand, genomic, aligned, clipped
1807                 "",    // forward strand, native, aligned, clipped
1808                 "AAccCCTaAGGaaAT",           // reverse strand, genomic
1809                 "ATttCCTtAGGggTT",           // reverse strand, native
1810                 "",    // reverse strand, genomic, aligned
1811                 "",    // reverse strand, native, aligned
1812                 "",    // reverse strand, genomic, aligned, clipped
1813                 ""     // reverse strand, native, aligned, clipped
1814             },
1815             {
1816                 "ATCCTAGGTT",           // forward strand, genomic
1817                 "ATCCTAGGTT",           // forward strand, native
1818                 "ATCC-**TA**--GGTT",    // forward strand, genomic, aligned
1819                 "ATCC-**TA**--GGTT",    // forward strand, native, aligned
1820                 "ATCC-**TA**--GGTT",    // forward strand, genomic, aligned, clipped
1821                 "ATCC-**TA**--GGTT",    // forward strand, native, aligned, clipped
1822                 "AACCTAGGAT",           // reverse strand, genomic
1823                 "ATCCTAGGTT",           // reverse strand, native
1824                 "AACC-**TA**--GGAT",    // reverse strand, genomic, aligned
1825                 "ATCC--**TA**-GGTT",    // reverse strand, native, aligned
1826                 "AACC-**TA**--GGAT",    // reverse strand, genomic, aligned, clipped
1827                 "ATCC--**TA**-GGTT"     // reverse strand, native, aligned, clipped
1828             }
1829         );
1830     }
1831     {
1832         SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1833         BamRecordTests::CheckPulseBaseTags(
1834             "3S4=3D4=3S",               // CIGAR
1835             "TTTAACCGTTACCG",           // seqBases
1836             "TTTttAACCccGTTAaaCCG",     // pulseCalls
1837             "TTTttAACCccGTTAaaCCG",     // tag data
1838 
1839             {   // all pulses
1840 
1841                 "TTTttAACCccGTTAaaCCG",       // forward strand, genomic
1842                 "TTTttAACCccGTTAaaCCG",       // forward strand, native
1843                 "",         // forward strand, genomic, aligned
1844                 "",         // forward strand, native, aligned
1845                 "",          // forward strand, genomic, aligned, clipped
1846                 "",          // forward strand, native, aligned, clipped
1847                 "CGGttTAACggGGTTaaAAA",       // reverse strand, genomic
1848                 "TTTttAACCccGTTAaaCCG",       // reverse strand, native
1849                 "",    // reverse strand, genomic, aligned
1850                 "",    // reverse strand, native, aligned
1851                 "",     // reverse strand, genomic, aligned, clipped
1852                 ""           // reverse strand, native, aligned, clipped
1853             },
1854             {   // basecalls only
1855 
1856                 "TTTAACCGTTACCG",       // forward strand, genomic
1857                 "TTTAACCGTTACCG",       // forward strand, native
1858                 "TTTAACC---GTTACCG",    // forward strand, genomic, aligned
1859                 "TTTAACC---GTTACCG",    // forward strand, native, aligned
1860                 "AACC---GTTA",          // forward strand, genomic, aligned, clipped
1861                 "AACC---GTTA",          // forward strand, native, aligned, clipped
1862                 "CGGTAACGGTTAAA",       // reverse strand, genomic
1863                 "TTTAACCGTTACCG",       // reverse strand, native
1864                 "CGGTAAC---GGTTAAA",    // reverse strand, genomic, aligned
1865                 "TTTAACC---GTTACCG",    // reverse strand, native, aligned
1866                 "TAAC---GGTT",          // reverse strand, genomic, aligned, clipped
1867                 "AACC---GTTA"           // reverse strand, native, aligned, clipped
1868             }
1869         );
1870     }
1871     {
1872         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1873         BamRecordTests::CheckPulseBaseTags(
1874             "2H4=3D4=3H",       // CIGAR
1875             "AACCGTTA",         // seqBases
1876             "AAaaCCGggTTA",     // pulseCalls
1877             "AAaaCCGggTTA",     // tag data
1878 
1879             {   // all pulses
1880 
1881                 "AAaaCCGggTTA",     // forward strand, genomic
1882                 "AAaaCCGggTTA",     // forward strand, native
1883                 "",  // forward strand, genomic, aligned
1884                 "",  // forward strand, native, aligned
1885                 "",  // forward strand, genomic, aligned, clipped
1886                 "",  // forward strand, native, aligned, clipped
1887                 "TAAccCGGttTT",     // reverse strand, genomic
1888                 "AAaaCCGggTTA",     // reverse strand, native
1889                 "",  // reverse strand, genomic, aligned
1890                 "",  // reverse strand, native, aligned
1891                 "",  // reverse strand, genomic, aligned, clipped
1892                 ""   // reverse strand, native, aligned, clipped
1893             },
1894             {   // basecalls only
1895 
1896                 "AACCGTTA",     // forward strand, genomic
1897                 "AACCGTTA",     // forward strand, native
1898                 "AACC---GTTA",  // forward strand, genomic, aligned
1899                 "AACC---GTTA",  // forward strand, native, aligned
1900                 "AACC---GTTA",  // forward strand, genomic, aligned, clipped
1901                 "AACC---GTTA",  // forward strand, native, aligned, clipped
1902                 "TAACGGTT",     // reverse strand, genomic
1903                 "AACCGTTA",     // reverse strand, native
1904                 "TAAC---GGTT",  // reverse strand, genomic, aligned
1905                 "AACC---GTTA",  // reverse strand, native, aligned
1906                 "TAAC---GGTT",  // reverse strand, genomic, aligned, clipped
1907                 "AACC---GTTA"   // reverse strand, native, aligned, clipped
1908             }
1909         );
1910     }
1911     {
1912         SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1913         BamRecordTests::CheckPulseBaseTags(
1914             "2H3S4=3D4=3S3H",           // CIGAR
1915             "TTTAACCGTTACCG",           // seqBases
1916             "TTTttAACCccGTTAaaCCG",     // pulseCalls
1917             "TTTttAACCccGTTAaaCCG",     // tag data
1918 
1919             {   // all pulses
1920 
1921                 "TTTttAACCccGTTAaaCCG",       // forward strand, genomic
1922                 "TTTttAACCccGTTAaaCCG",       // forward strand, native
1923                 "",         // forward strand, genomic, aligned
1924                 "",         // forward strand, native, aligned
1925                 "",          // forward strand, genomic, aligned, clipped
1926                 "",          // forward strand, native, aligned, clipped
1927                 "CGGttTAACggGGTTaaAAA",       // reverse strand, genomic
1928                 "TTTttAACCccGTTAaaCCG",       // reverse strand, native
1929                 "",         // reverse strand, genomic, aligned
1930                 "",         // reverse strand, native, aligned
1931                 "",          // reverse strand, genomic, aligned, clipped
1932                 ""           // reverse strand, native, aligned, clipped
1933             },
1934             {   // basecalls only
1935 
1936                 "TTTAACCGTTACCG",       // forward strand, genomic
1937                 "TTTAACCGTTACCG",       // forward strand, native
1938                 "TTTAACC---GTTACCG",    // forward strand, genomic, aligned
1939                 "TTTAACC---GTTACCG",    // forward strand, native, aligned
1940                 "AACC---GTTA",          // forward strand, genomic, aligned, clipped
1941                 "AACC---GTTA",          // forward strand, native, aligned, clipped
1942                 "CGGTAACGGTTAAA",       // reverse strand, genomic
1943                 "TTTAACCGTTACCG",       // reverse strand, native
1944                 "CGGTAAC---GGTTAAA",    // reverse strand, genomic, aligned
1945                 "TTTAACC---GTTACCG",    // reverse strand, native, aligned
1946                 "TAAC---GGTT",          // reverse strand, genomic, aligned, clipped
1947                 "AACC---GTTA"           // reverse strand, native, aligned, clipped
1948             }
1949         );
1950     }
1951 }
1952 
TEST(BamRecordTest,PulseQualityTags)1953 TEST(BamRecordTest, PulseQualityTags)
1954 {
1955     {
1956         SCOPED_TRACE("CIGAR: 4=3D4=");
1957         BamRecordTests::CheckPulseQualityTags(
1958             "4=3D4=",           // CIGAR
1959             "AACCGTTA",         // seqBases
1960             "AAaaCCGggTTA",     // pulseCalls
1961             "?]!!?]?!!]?@",     // tag data
1962 
1963             {   // all pulses
1964 
1965                 "?]!!?]?!!]?@",     // forward strand, genomic
1966                 "?]!!?]?!!]?@",     // forward strand, native
1967                 "",  // forward strand, genomic, aligned
1968                 "",  // forward strand, native,  aligned
1969                 "",  // forward strand, genomic, aligned + clipped
1970                 "",  // forward strand, native,  aligned + clipped
1971                 "@?]!!?]?!!]?",     // reverse strand, genomic
1972                 "?]!!?]?!!]?@",     // reverse strand, native
1973                 "",  // reverse strand, genomic, aligned
1974                 "",  // reverse strand, native,  aligned
1975                 "",  // reverse strand, genomic, aligned + clipped
1976                 ""   // reverse strand, native,  aligned + clipped
1977             },
1978             {   // basecalls only
1979 
1980                 "?]?]?]?@",     // forward strand, genomic
1981                 "?]?]?]?@",     // forward strand, native
1982                 "?]?]!!!?]?@",  // forward strand, genomic, aligned
1983                 "?]?]!!!?]?@",  // forward strand, native,  aligned
1984                 "?]?]!!!?]?@",  // forward strand, genomic, aligned + clipped
1985                 "?]?]!!!?]?@",  // forward strand, native,  aligned + clipped
1986                 "@?]?]?]?",     // reverse strand, genomic
1987                 "?]?]?]?@",     // reverse strand, native
1988                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned
1989                 "?]?]!!!?]?@",  // reverse strand, native,  aligned
1990                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned + clipped
1991                 "?]?]!!!?]?@"   // reverse strand, native,  aligned + clipped
1992             }
1993         );
1994     }
1995     {
1996         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1997         BamRecordTests::CheckPulseQualityTags(
1998             "4=1D2I2D4=",       // CIGAR
1999             "ATCCTAGGTT",       // seqBases
2000             "ATttCCTtAGGggTT",  // pulseCalls
2001             "?]!!?]8!7?]!!?@",  // tag data
2002 
2003             {   // all pulses
2004 
2005                 "?]!!?]8!7?]!!?@",       // forward strand, genomic
2006                 "?]!!?]8!7?]!!?@",       // forward strand, native
2007                 "",    // forward strand, genomic, aligned
2008                 "",    // forward strand, native,  aligned
2009                 "",    // forward strand, genomic, aligned + clipped
2010                 "",    // forward strand, native,  aligned + clipped
2011                 "@?!!]?7!8]?!!]?",       // reverse strand, genomic
2012                 "?]!!?]8!7?]!!?@",       // reverse strand, native
2013                 "",    // reverse strand, genomic, aligned
2014                 "",    // reverse strand, native,  aligned
2015                 "",    // reverse strand, genomic, aligned + clipped
2016                 ""     // reverse strand, native,  aligned + clipped
2017             },
2018             {   // basecalls only
2019 
2020                 "?]?]87?]?@",       // forward strand, genomic
2021                 "?]?]87?]?@",       // forward strand, native
2022                 "?]?]!87!!?]?@",    // forward strand, genomic, aligned
2023                 "?]?]!87!!?]?@",    // forward strand, native,  aligned
2024                 "?]?]!87!!?]?@",    // forward strand, genomic, aligned + clipped
2025                 "?]?]!87!!?]?@",    // forward strand, native,  aligned + clipped
2026                 "@?]?78]?]?",       // reverse strand, genomic
2027                 "?]?]87?]?@",       // reverse strand, native
2028                 "@?]?!78!!]?]?",    // reverse strand, genomic, aligned
2029                 "?]?]!!87!?]?@",    // reverse strand, native,  aligned
2030                 "@?]?!78!!]?]?",    // reverse strand, genomic, aligned + clipped
2031                 "?]?]!!87!?]?@"     // reverse strand, native,  aligned + clipped
2032             }
2033         );
2034     }
2035     {
2036         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
2037         BamRecordTests::CheckPulseQualityTags(
2038             "4=1D2P2I2P2D4=",   // CIGAR
2039             "ATCCTAGGTT",       // seqBases
2040             "ATttCCTtAGGggTT",  // pulseCalls
2041             "?]!!?]8!7?]!!?@",  // tag data
2042         {
2043             "?]!!?]8!7?]!!?@",           // forward strand, genomic
2044             "?]!!?]8!7?]!!?@",           // forward strand, native
2045             "",    // forward strand, genomic, aligned
2046             "",    // forward strand, native,  aligned
2047             "",    // forward strand, genomic, aligned + clipped
2048             "",    // forward strand, native,  aligned + clipped
2049             "@?!!]?7!8]?!!]?",           // reverse strand, genomic
2050             "?]!!?]8!7?]!!?@",           // reverse strand, native
2051             "",    // reverse strand, genomic, aligned
2052             "",    // reverse strand, native,  aligned
2053             "",    // reverse strand, genomic, aligned + clipped
2054             ""     // reverse strand, native,  aligned + clipped
2055         },
2056         {
2057             "?]?]87?]?@",           // forward strand, genomic
2058             "?]?]87?]?@",           // forward strand, native
2059             "?]?]!!!87!!!!?]?@",    // forward strand, genomic, aligned
2060             "?]?]!!!87!!!!?]?@",    // forward strand, native,  aligned
2061             "?]?]!!!87!!!!?]?@",    // forward strand, genomic, aligned + clipped
2062             "?]?]!!!87!!!!?]?@",    // forward strand, native,  aligned + clipped
2063             "@?]?78]?]?",           // reverse strand, genomic
2064             "?]?]87?]?@",           // reverse strand, native
2065             "@?]?!!!78!!!!]?]?",    // reverse strand, genomic, aligned
2066             "?]?]!!!!87!!!?]?@",    // reverse strand, native,  aligned
2067             "@?]?!!!78!!!!]?]?",    // reverse strand, genomic, aligned + clipped
2068             "?]?]!!!!87!!!?]?@"     // reverse strand, native,  aligned + clipped
2069         }
2070         );
2071     }
2072     {
2073         SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
2074         BamRecordTests::CheckPulseQualityTags(
2075             "3S4=3D4=3S",               // CIGAR
2076             "TTTAACCGTTACCG",           // seqBases
2077             "TTTttAACCccGTTAaaCCG",     // pulseCalls
2078             "vvv!!?]?]!!?]?@!!xxx",     // tag data
2079 
2080             {   // all pulses
2081 
2082                 "vvv!!?]?]!!?]?@!!xxx",       // forward strand, genomic
2083                 "vvv!!?]?]!!?]?@!!xxx",       // forward strand, native
2084                 "",    // forward strand, genomic, aligned
2085                 "",    // forward strand, native, aligned
2086                 "",          // forward strand, genomic, aligned, clipped
2087                 "",          // forward strand, native, aligned, clipped
2088                 "xxx!!@?]?!!]?]?!!vvv",       // reverse strand, genomic
2089                 "vvv!!?]?]!!?]?@!!xxx",       // reverse strand, native
2090                 "",    // reverse strand, genomic, aligned
2091                 "",    // reverse strand, native, aligned
2092                 "",          // reverse strand, genomic, aligned, clipped
2093                 ""           // reverse strand, native, aligned, clipped
2094             },
2095             {   // basecalls only
2096 
2097                 "vvv?]?]?]?@xxx",       // forward strand, genomic
2098                 "vvv?]?]?]?@xxx",       // forward strand, native
2099                 "vvv?]?]!!!?]?@xxx",    // forward strand, genomic, aligned
2100                 "vvv?]?]!!!?]?@xxx",    // forward strand, native, aligned
2101                 "?]?]!!!?]?@",          // forward strand, genomic, aligned, clipped
2102                 "?]?]!!!?]?@",          // forward strand, native, aligned, clipped
2103                 "xxx@?]?]?]?vvv",       // reverse strand, genomic
2104                 "vvv?]?]?]?@xxx",       // reverse strand, native
2105                 "xxx@?]?!!!]?]?vvv",    // reverse strand, genomic, aligned
2106                 "vvv?]?]!!!?]?@xxx",    // reverse strand, native, aligned
2107                 "@?]?!!!]?]?",          // reverse strand, genomic, aligned, clipped
2108                 "?]?]!!!?]?@"           // reverse strand, native, aligned, clipped
2109             }
2110         );
2111     }
2112     {
2113         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
2114         BamRecordTests::CheckPulseQualityTags(
2115             "2H4=3D4=3H",       // CIGAR
2116             "AACCGTTA",         // seqBases
2117             "AAaaCCGggTTA",     // pulseCalls
2118             "?]!!?]?!!]?@",     // tag data
2119 
2120             {   // all pulses
2121 
2122                 "?]!!?]?!!]?@",     // forward strand, genomic
2123                 "?]!!?]?!!]?@",     // forward strand, native
2124                 "",  // forward strand, genomic, aligned
2125                 "",  // forward strand, native, aligned
2126                 "",  // forward strand, genomic, aligned, clipped
2127                 "",  // forward strand, native, aligned, clipped
2128                 "@?]!!?]?!!]?",     // reverse strand, genomic
2129                 "?]!!?]?!!]?@",     // reverse strand, native
2130                 "",  // reverse strand, genomic, aligned
2131                 "",  // reverse strand, native, aligned
2132                 "",  // reverse strand, genomic, aligned, clipped
2133                 ""   // reverse strand, native, aligned, clipped
2134             },
2135             {   // basecalls only
2136 
2137                 "?]?]?]?@",     // forward strand, genomic
2138                 "?]?]?]?@",     // forward strand, native
2139                 "?]?]!!!?]?@",  // forward strand, genomic, aligned
2140                 "?]?]!!!?]?@",  // forward strand, native, aligned
2141                 "?]?]!!!?]?@",  // forward strand, genomic, aligned, clipped
2142                 "?]?]!!!?]?@",  // forward strand, native, aligned, clipped
2143                 "@?]?]?]?",     // reverse strand, genomic
2144                 "?]?]?]?@",     // reverse strand, native
2145                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned
2146                 "?]?]!!!?]?@",  // reverse strand, native, aligned
2147                 "@?]?!!!]?]?",  // reverse strand, genomic, aligned, clipped
2148                 "?]?]!!!?]?@"   // reverse strand, native, aligned, clipped
2149             }
2150         );
2151     }
2152     {
2153         SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
2154         BamRecordTests::CheckPulseQualityTags(
2155             "2H3S4=3D4=3S3H",           // CIGAR
2156             "TTTAACCGTTACCG",           // seqBases
2157             "TTTttAACCccGTTAaaCCG",     // pulseCalls
2158             "vvv!!?]?]!!?]?@!!xxx",     // tag data
2159 
2160             {   // all pulses
2161 
2162                 "vvv!!?]?]!!?]?@!!xxx",       // forward strand, genomic
2163                 "vvv!!?]?]!!?]?@!!xxx",       // forward strand, native
2164                 "",    // forward strand, genomic, aligned
2165                 "",    // forward strand, native, aligned
2166                 "",          // forward strand, genomic, aligned, clipped
2167                 "",          // forward strand, native, aligned, clipped
2168                 "xxx!!@?]?!!]?]?!!vvv",       // reverse strand, genomic
2169                 "vvv!!?]?]!!?]?@!!xxx",       // reverse strand, native
2170                 "",    // reverse strand, genomic, aligned
2171                 "",    // reverse strand, native, aligned
2172                 "",          // reverse strand, genomic, aligned, clipped
2173                 ""           // reverse strand, native, aligned, clipped
2174             },
2175             {   // basecalls only
2176 
2177                 "vvv?]?]?]?@xxx",       // forward strand, genomic
2178                 "vvv?]?]?]?@xxx",       // forward strand, native
2179                 "vvv?]?]!!!?]?@xxx",    // forward strand, genomic, aligned
2180                 "vvv?]?]!!!?]?@xxx",    // forward strand, native, aligned
2181                 "?]?]!!!?]?@",          // forward strand, genomic, aligned, clipped
2182                 "?]?]!!!?]?@",          // forward strand, native, aligned, clipped
2183                 "xxx@?]?]?]?vvv",       // reverse strand, genomic
2184                 "vvv?]?]?]?@xxx",       // reverse strand, native
2185                 "xxx@?]?!!!]?]?vvv",    // reverse strand, genomic, aligned
2186                 "vvv?]?]!!!?]?@xxx",    // reverse strand, native, aligned
2187                 "@?]?!!!]?]?",          // reverse strand, genomic, aligned, clipped
2188                 "?]?]!!!?]?@"           // reverse strand, native, aligned, clipped
2189             }
2190         );
2191     }
2192 }
2193 
TEST(BamRecordTest,PulseFrameTags)2194 TEST(BamRecordTest, PulseFrameTags)
2195 {
2196     {
2197         SCOPED_TRACE("CIGAR: 4=3D4=");
2198         BamRecordTests::CheckPulseFrameTags(
2199             "4=3D4=",       // CIGAR
2200             "AACCGTTA",     // seqBases
2201             "AAaaCCGggTTA", // pulseCalls
2202             { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },   // tag data
2203 
2204             {   // all pulses
2205 
2206                 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },             // forward strand, genomic
2207                 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },             // forward strand, native
2208                 { },    // forward strand, genomic, aligned
2209                 { },    // forward strand, native, aligned
2210                 { },    // forward strand, genomic, aligned, clipped
2211                 { },    // forward strand, native, aligned, clipped
2212                 { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 },             // reverse strand, genomic
2213                 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },             // reverse strand, native
2214                 { },    // reverse strand, genomic, aligned
2215                 { },    // reverse strand, native, aligned
2216                 { },    // reverse strand, genomic, aligned, clipped
2217                 { }     // reverse strand, native, aligned, clipped
2218             },
2219             {   // basecalls only
2220 
2221                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, genomic
2222                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, native
2223                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2224                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2225                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2226                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2227                 { 30, 10, 20, 10, 20, 10, 20, 10 },             // reverse strand, genomic
2228                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // reverse strand, native
2229                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2230                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2231                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2232                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2233             }
2234         );
2235     }
2236     {
2237         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
2238         BamRecordTests::CheckPulseFrameTags(
2239             "4=1D2I2D4=",       // CIGAR
2240             "ATCCTAGGTT",       // seqBases
2241             "ATttCCTtAGGggTT",  // pulseCalls
2242             { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2243 
2244             {   // all pulses
2245 
2246                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, genomic
2247                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, native
2248                 { },    // forward strand, genomic, aligned
2249                 { },    // forward strand, native, aligned
2250                 { },    // forward strand, genomic, aligned, clipped
2251                 { },    // forward strand, native, aligned, clipped
2252                 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 },             // reverse strand, genomic
2253                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // reverse strand, native
2254                 { },    // reverse strand, genomic, aligned
2255                 { },    // reverse strand, native, aligned
2256                 { },    // reverse strand, genomic, aligned, clipped
2257                 { }     // reverse strand, native, aligned, clipped
2258             },
2259             {   // basecalls only
2260 
2261                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // forward strand, genomic
2262                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // forward strand, native
2263                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2264                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2265                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2266                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2267                 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 },             // reverse strand, genomic
2268                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // reverse strand, native
2269                 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2270                 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2271                 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2272                 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2273             }
2274         );
2275     }
2276     {
2277         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
2278         BamRecordTests::CheckPulseFrameTags(
2279             "4=1D2P2I2P2D4=",   // CIGAR
2280             "ATCCTAGGTT",       // seqBases
2281             "ATttCCTtAGGggTT",  // pulseCalls
2282             { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2283 
2284             {   // all pulses
2285 
2286                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, genomic
2287                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, native
2288                 { },    // forward strand, genomic, aligned
2289                 { },    // forward strand, native, aligned
2290                 { },    // forward strand, genomic, aligned, clipped
2291                 { },    // forward strand, native, aligned, clipped
2292                 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 },             // reverse strand, genomic
2293                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // reverse strand, native
2294                 { },    // reverse strand, genomic, aligned
2295                 { },    // reverse strand, native, aligned
2296                 { },    // reverse strand, genomic, aligned, clipped
2297                 { }     // reverse strand, native, aligned, clipped
2298             },
2299             {   // basecalls only
2300 
2301                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // forward strand, genomic
2302                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // forward strand, native
2303                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2304                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2305                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2306                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2307                 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 },                         // reverse strand, genomic
2308                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // reverse strand, native
2309                 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2310                 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2311                 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2312                 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2313             }
2314         );
2315     }
2316     {
2317         SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
2318         BamRecordTests::CheckPulseFrameTags(
2319             "3S4=3D4=3S",               // CIGAR
2320             "TTTAACCGTTACCG",           // seqBases
2321             "TTTttAACCccGTTAaaCCG",     // pulseCalls
2322             { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },   // tag data
2323 
2324             {   // all pulses
2325 
2326                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, genomic
2327                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, native
2328                 { },    // forward strand, genomic, aligned
2329                 { },    // forward strand, native, aligned
2330                 { },    // forward strand, genomic, aligned, clipped
2331                 { },    // forward strand, native, aligned, clipped
2332                 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 },             // reverse strand, genomic
2333                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // reverse strand, native
2334                 { },    // reverse strand, genomic, aligned
2335                 { },    // reverse strand, native, aligned
2336                 { },    // reverse strand, genomic, aligned, clipped
2337                 { }     // reverse strand, native, aligned, clipped
2338             },
2339             {   // basecalls only
2340 
2341                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, genomic
2342                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, native
2343                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, genomic, aligned
2344                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, native, aligned
2345                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, genomic, aligned, clipped
2346                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, native, aligned, clipped
2347                 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 },             // reverse strand, genomic
2348                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // reverse strand, native
2349                 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 },    // reverse strand, genomic, aligned
2350                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // reverse strand, native, aligned
2351                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },                            // reverse strand, genomic, aligned, clipped
2352                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }                             // reverse strand, native, aligned, clipped
2353             }
2354         );
2355     }
2356     {
2357         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
2358         BamRecordTests::CheckPulseFrameTags(
2359             "2H4=3D4=3H",       // CIGAR
2360             "AACCGTTA",         // seqBases
2361             "AAaaCCGggTTA",     // pulseCalls
2362             { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
2363 
2364             {   // all pulses
2365 
2366                 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 },             // forward strand, genomic
2367                 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 },             // forward strand, native
2368                 { },    // forward strand, genomic, aligned
2369                 { },    // forward strand, native, aligned
2370                 { },    // forward strand, genomic, aligned, clipped
2371                 { },    // forward strand, native, aligned, clipped
2372                 { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 },             // reverse strand, genomic
2373                 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 },             // reverse strand, native
2374                 { },    // reverse strand, genomic, aligned
2375                 { },    // reverse strand, native, aligned
2376                 { },    // reverse strand, genomic, aligned, clipped
2377                 { }     // reverse strand, native, aligned, clipped
2378             },
2379             {   // basecalls only
2380 
2381                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, genomic
2382                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, native
2383                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2384                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2385                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2386                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2387                 { 30, 10, 20, 10, 20, 10, 20, 10 },             // reverse strand, genomic
2388                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // reverse strand, native
2389                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2390                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2391                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2392                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2393             }
2394         );
2395     }
2396     {
2397         SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
2398         BamRecordTests::CheckPulseFrameTags(
2399             "2H3S4=3D4=3S3H",           // CIGAR
2400             "TTTAACCGTTACCG",           // seqBases
2401             "TTTttAACCccGTTAaaCCG",     // pulseCalls
2402             { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },                 // tag data
2403 
2404             {   // all pulses
2405 
2406                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, genomic
2407                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, native
2408                 { },    // forward strand, genomic, aligned
2409                 { },    // forward strand, native, aligned
2410                 { },    // forward strand, genomic, aligned, clipped
2411                 { },    // forward strand, native, aligned, clipped
2412                 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 },             // reverse strand, genomic
2413                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // reverse strand, native
2414                 { },    // reverse strand, genomic, aligned
2415                 { },    // reverse strand, native, aligned
2416                 { },    // reverse strand, genomic, aligned, clipped
2417                 { }     // reverse strand, native, aligned, clipped
2418             },
2419             {   // basecalls only
2420 
2421                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, genomic
2422                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, native
2423                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, genomic, aligned
2424                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, native, aligned
2425                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, genomic, aligned, clipped
2426                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, native, aligned, clipped
2427                 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 },             // reverse strand, genomic
2428                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // reverse strand, native
2429                 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 },    // reverse strand, genomic, aligned
2430                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // reverse strand, native, aligned
2431                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },                            // reverse strand, genomic, aligned, clipped
2432                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }                             // reverse strand, native, aligned, clipped
2433             }
2434         );
2435     }
2436 }
2437 
TEST(BamRecordTest,PulseUIntTags)2438 TEST(BamRecordTest, PulseUIntTags)
2439 {
2440     {
2441         SCOPED_TRACE("CIGAR: 4=3D4=");
2442         BamRecordTests::CheckPulseUIntTags(
2443             "4=3D4=",       // CIGAR
2444             "AACCGTTA",     // seqBases
2445             "AAaaCCGggTTA", // pulseCalls
2446             { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },   // tag data
2447 
2448             {   // all pulses
2449 
2450                 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },             // forward strand, genomic
2451                 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },             // forward strand, native
2452                 { },    // forward strand, genomic, aligned
2453                 { },    // forward strand, native, aligned
2454                 { },    // forward strand, genomic, aligned, clipped
2455                 { },    // forward strand, native, aligned, clipped
2456                 { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 },             // reverse strand, genomic
2457                 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 },             // reverse strand, native
2458                 { },    // reverse strand, genomic, aligned
2459                 { },    // reverse strand, native, aligned
2460                 { },    // reverse strand, genomic, aligned, clipped
2461                 { }     // reverse strand, native, aligned, clipped
2462             },
2463             {   // basecalls only
2464 
2465                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, genomic
2466                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, native
2467                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2468                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2469                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2470                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2471                 { 30, 10, 20, 10, 20, 10, 20, 10 },             // reverse strand, genomic
2472                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // reverse strand, native
2473                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2474                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2475                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2476                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2477             }
2478         );
2479     }
2480     {
2481         SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
2482         BamRecordTests::CheckPulseUIntTags(
2483             "4=1D2I2D4=",       // CIGAR
2484             "ATCCTAGGTT",       // seqBases
2485             "ATttCCTtAGGggTT",  // pulseCalls
2486             { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2487 
2488             {   // all pulses
2489 
2490                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, genomic
2491                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, native
2492                 { },    // forward strand, genomic, aligned
2493                 { },    // forward strand, native, aligned
2494                 { },    // forward strand, genomic, aligned, clipped
2495                 { },    // forward strand, native, aligned, clipped
2496                 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 },             // reverse strand, genomic
2497                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // reverse strand, native
2498                 { },    // reverse strand, genomic, aligned
2499                 { },    // reverse strand, native, aligned
2500                 { },    // reverse strand, genomic, aligned, clipped
2501                 { }     // reverse strand, native, aligned, clipped
2502             },
2503             {   // basecalls only
2504 
2505                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // forward strand, genomic
2506                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // forward strand, native
2507                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2508                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2509                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2510                 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2511                 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 },             // reverse strand, genomic
2512                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },             // reverse strand, native
2513                 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2514                 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2515                 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2516                 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2517             }
2518         );
2519     }
2520     {
2521         SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
2522         BamRecordTests::CheckPulseUIntTags(
2523             "4=1D2P2I2P2D4=",   // CIGAR
2524             "ATCCTAGGTT",       // seqBases
2525             "ATttCCTtAGGggTT",  // pulseCalls
2526             { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2527 
2528             {   // all pulses
2529 
2530                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, genomic
2531                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // forward strand, native
2532                 { },    // forward strand, genomic, aligned
2533                 { },    // forward strand, native, aligned
2534                 { },    // forward strand, genomic, aligned, clipped
2535                 { },    // forward strand, native, aligned, clipped
2536                 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 },             // reverse strand, genomic
2537                 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 },             // reverse strand, native
2538                 { },    // reverse strand, genomic, aligned
2539                 { },    // reverse strand, native, aligned
2540                 { },    // reverse strand, genomic, aligned, clipped
2541                 { }     // reverse strand, native, aligned, clipped
2542             },
2543             {   // basecalls only
2544 
2545                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // forward strand, genomic
2546                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // forward strand, native
2547                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2548                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2549                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2550                 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2551                 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 },                         // reverse strand, genomic
2552                 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 },                         // reverse strand, native
2553                 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2554                 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2555                 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2556                 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2557             }
2558         );
2559     }
2560     {
2561         SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
2562         BamRecordTests::CheckPulseUIntTags(
2563             "3S4=3D4=3S",               // CIGAR
2564             "TTTAACCGTTACCG",           // seqBases
2565             "TTTttAACCccGTTAaaCCG",     // pulseCalls
2566             { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },   // tag data
2567 
2568             {   // all pulses
2569 
2570                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, genomic
2571                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, native
2572                 { },    // forward strand, genomic, aligned
2573                 { },    // forward strand, native, aligned
2574                 { },    // forward strand, genomic, aligned, clipped
2575                 { },    // forward strand, native, aligned, clipped
2576                 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 },             // reverse strand, genomic
2577                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // reverse strand, native
2578                 { },    // reverse strand, genomic, aligned
2579                 { },    // reverse strand, native, aligned
2580                 { },    // reverse strand, genomic, aligned, clipped
2581                 { }     // reverse strand, native, aligned, clipped
2582             },
2583             {   // basecalls only
2584 
2585                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, genomic
2586                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, native
2587                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, genomic, aligned
2588                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, native, aligned
2589                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, genomic, aligned, clipped
2590                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, native, aligned, clipped
2591                 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 },             // reverse strand, genomic
2592                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // reverse strand, native
2593                 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 },    // reverse strand, genomic, aligned
2594                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // reverse strand, native, aligned
2595                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },                            // reverse strand, genomic, aligned, clipped
2596                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }                             // reverse strand, native, aligned, clipped
2597             }
2598         );
2599     }
2600     {
2601         SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
2602         BamRecordTests::CheckPulseUIntTags(
2603             "2H4=3D4=3H",       // CIGAR
2604             "AACCGTTA",         // seqBases
2605             "AAaaCCGggTTA",     // pulseCalls
2606             { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
2607 
2608             {   // all pulses
2609 
2610                 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 },             // forward strand, genomic
2611                 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 },             // forward strand, native
2612                 { },    // forward strand, genomic, aligned
2613                 { },    // forward strand, native, aligned
2614                 { },    // forward strand, genomic, aligned, clipped
2615                 { },    // forward strand, native, aligned, clipped
2616                 { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 },             // reverse strand, genomic
2617                 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 },             // reverse strand, native
2618                 { },    // reverse strand, genomic, aligned
2619                 { },    // reverse strand, native, aligned
2620                 { },    // reverse strand, genomic, aligned, clipped
2621                 { }     // reverse strand, native, aligned, clipped
2622             },
2623             {   // basecalls only
2624 
2625                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, genomic
2626                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // forward strand, native
2627                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned
2628                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned
2629                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, genomic, aligned, clipped
2630                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // forward strand, native, aligned, clipped
2631                 { 30, 10, 20, 10, 20, 10, 20, 10 },             // reverse strand, genomic
2632                 { 10, 20, 10, 20, 10, 20, 10, 30 },             // reverse strand, native
2633                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned
2634                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },    // reverse strand, native, aligned
2635                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },    // reverse strand, genomic, aligned, clipped
2636                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }     // reverse strand, native, aligned, clipped
2637             }
2638         );
2639     }
2640     {
2641         SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
2642         BamRecordTests::CheckPulseUIntTags(
2643             "2H3S4=3D4=3S3H",           // CIGAR
2644             "TTTAACCGTTACCG",           // seqBases
2645             "TTTttAACCccGTTAaaCCG",     // pulseCalls
2646             { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },                 // tag data
2647 
2648             {   // all pulses
2649 
2650                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, genomic
2651                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // forward strand, native
2652                 { },    // forward strand, genomic, aligned
2653                 { },    // forward strand, native, aligned
2654                 { },    // forward strand, genomic, aligned, clipped
2655                 { },    // forward strand, native, aligned, clipped
2656                 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 },             // reverse strand, genomic
2657                 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 },             // reverse strand, native
2658                 { },    // reverse strand, genomic, aligned
2659                 { },    // reverse strand, native, aligned
2660                 { },    // reverse strand, genomic, aligned, clipped
2661                 { }     // reverse strand, native, aligned, clipped
2662             },
2663             {   // basecalls only
2664 
2665                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, genomic
2666                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // forward strand, native
2667                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, genomic, aligned
2668                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // forward strand, native, aligned
2669                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, genomic, aligned, clipped
2670                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 },                            // forward strand, native, aligned, clipped
2671                 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 },             // reverse strand, genomic
2672                 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 },             // reverse strand, native
2673                 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 },    // reverse strand, genomic, aligned
2674                 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 },    // reverse strand, native, aligned
2675                 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 },                            // reverse strand, genomic, aligned, clipped
2676                 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }                             // reverse strand, native, aligned, clipped
2677             }
2678         );
2679     }
2680 }
2681 
TEST(BamRecordTest,PulseExclusionTag)2682 TEST(BamRecordTest, PulseExclusionTag)
2683 {
2684     const std::vector<PacBio::BAM::PulseExclusionReason> reasons =
2685     {
2686         PulseExclusionReason::BASE
2687       , PulseExclusionReason::PAUSE
2688       , PulseExclusionReason::SHORT_PULSE
2689       , PulseExclusionReason::BURST
2690       , PulseExclusionReason::BASE
2691       , PulseExclusionReason::PAUSE
2692     };
2693 
2694     auto bam = BamRecordTests::CreateBam();
2695     bam.PulseExclusionReason(reasons);
2696 
2697     EXPECT_TRUE(bam.HasPulseExclusion());
2698     auto result = bam.PulseExclusionReason();
2699     EXPECT_EQ(reasons, result);
2700 
2701 }
2702 
TEST(BamRecordTest,TranscriptRecord)2703 TEST(BamRecordTest, TranscriptRecord)
2704 {
2705     const std::string readTypeStr{"TRANSCRIPT"};
2706     const auto readGroupId = MakeReadGroupId("transcript", readTypeStr);
2707 
2708     ReadGroupInfo rg{readGroupId};
2709     rg.ReadType(readTypeStr);
2710 
2711     BamHeader header;
2712     header.Version("1.1")
2713         .SortOrder("queryname")
2714         .PacBioBamVersion("3.0.1");
2715 
2716     BamRecord bam{header};
2717     bam.Impl().Name("transcript/1234");
2718 
2719     EXPECT_EQ(RecordType::TRANSCRIPT, bam.Type());
2720     EXPECT_EQ(1234, bam.HoleNumber());
2721     EXPECT_THROW({bam.QueryStart();}, std::runtime_error);
2722     EXPECT_THROW({bam.QueryEnd();}, std::runtime_error);
2723 }
2724 
2725 // clang-format on
2726