1 // Author: Derek Barnett
2
3 #include <array>
4 #include <cstddef>
5 #include <cstdint>
6 #include <initializer_list>
7 #include <string>
8 #include <vector>
9
10 #include <gtest/gtest.h>
11
12 #include <pbbam/BamRecord.h>
13 #include <pbbam/BamTagCodec.h>
14 #include "../src/MemoryUtils.h"
15
16 // clang-format off
17
18 using namespace PacBio;
19 using namespace PacBio::BAM;
20
21 namespace BamRecordTests {
22
23 static
CreateBamImpl()24 BamRecordImpl CreateBamImpl()
25 {
26 TagCollection tags;
27 tags["HX"] = std::string("1abc75");
28 tags["HX"].Modifier(TagModifier::HEX_STRING);
29 tags["CA"] = std::vector<uint8_t>({34, 5, 125});
30 tags["XY"] = int32_t{-42};
31
32 BamRecordImpl bam;
33 bam.Bin(42);
34 bam.Flag(42);
35 bam.InsertSize(42);
36 bam.MapQuality(42);
37 bam.MatePosition(42);
38 bam.MateReferenceId(42);
39 bam.Position(42);
40 bam.ReferenceId(42);
41 bam.Tags(tags);
42 return bam;
43 }
44
45 static inline
CreateBam()46 BamRecord CreateBam()
47 { return BamRecord{ CreateBamImpl() }; }
48
49 static
CheckRawData(const BamRecordImpl & bam)50 void CheckRawData(const BamRecordImpl& bam)
51 {
52 // ensure raw data (lengths at least) matches API-facing data
53 const uint32_t expectedNameBytes = bam.Name().size() + 1; // include NULL term
54 const uint32_t expectedNameNulls = 4 - (expectedNameBytes % 4);
55 const uint32_t expectedNameLength = expectedNameBytes + expectedNameNulls;
56 const uint32_t expectedNumCigarOps = bam.CigarData().size();
57 const int32_t expectedSeqLength = bam.Sequence().length();
58 const size_t expectedTagsLength = BamTagCodec::Encode(bam.Tags()).size();
59
60 // Name CIGAR Sequence Quals Tags
61 // l_qname + (n_cigar * 4) + (l_qseq+1)/2 + l_qseq + <encoded length>
62 const int expectedTotalDataLength = expectedNameLength + (expectedNumCigarOps * 4) +
63 (expectedSeqLength + 1) / 2 + expectedSeqLength +
64 expectedTagsLength;
65
66 const auto rawData = PacBio::BAM::internal::BamRecordMemory::GetRawData(bam);
67 ASSERT_TRUE(static_cast<bool>(rawData));
68
69 EXPECT_EQ(expectedNameNulls, rawData->core.l_extranul);
70 EXPECT_EQ(expectedNameLength, rawData->core.l_qname);
71 EXPECT_EQ(expectedNumCigarOps, rawData->core.n_cigar);
72 EXPECT_EQ(expectedSeqLength, rawData->core.l_qseq);
73 EXPECT_EQ(expectedTotalDataLength, rawData->l_data);
74 }
75
76 static inline
CheckRawData(const BamRecord & bam)77 void CheckRawData(const BamRecord& bam)
78 { CheckRawData(bam.Impl()); }
79
80 static
MakeCigaredImpl(const std::string & seq,const std::string & cigar,const Strand strand)81 BamRecordImpl MakeCigaredImpl(const std::string& seq,
82 const std::string& cigar,
83 const Strand strand)
84 {
85 BamRecordImpl impl;
86 impl.SetMapped(true).ReferenceId(0).Position(0).MapQuality(0);
87 impl.CigarData(Cigar::FromStdString(cigar));
88 impl.MateReferenceId(-1).MatePosition(-1).InsertSize(0);
89 impl.SetSequenceAndQualities(seq, std::string(seq.size(), '*'));
90 impl.SetReverseStrand(strand == Strand::REVERSE);
91 return impl;
92 }
93
94 static inline
MakeCigaredRecord(const std::string & seq,const std::string & cigar,const Strand strand)95 BamRecord MakeCigaredRecord(const std::string& seq,
96 const std::string& cigar,
97 const Strand strand)
98 { return BamRecord{ MakeCigaredImpl(seq, cigar, strand) }; }
99
100 static
MakeCigaredBaseRecord(const std::string & bases,const std::string & cigar,const Strand strand)101 BamRecord MakeCigaredBaseRecord(const std::string& bases,
102 const std::string& cigar,
103 const Strand strand)
104 {
105 TagCollection tags;
106 tags["dt"] = bases;
107 tags["st"] = bases;
108
109 const std::string seq = std::string(bases.size(), 'N');
110 BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
111 impl.Tags(tags);
112 return BamRecord(std::move(impl));
113 }
114
115 static
MakeCigaredFrameRecord(const std::vector<uint16_t> & frames,const std::string & cigar,const Strand strand)116 BamRecord MakeCigaredFrameRecord(const std::vector<uint16_t>& frames,
117 const std::string& cigar,
118 const Strand strand)
119 {
120 TagCollection tags;
121 tags["ip"] = frames;
122 tags["pw"] = frames;
123
124 const std::string seq = std::string(frames.size(), 'N');
125 BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
126 impl.Tags(tags);
127 return BamRecord(std::move(impl));
128 }
129
130 static
MakeCigaredQualRecord(const std::string & quals,const std::string & cigar,const Strand strand)131 BamRecord MakeCigaredQualRecord(const std::string& quals,
132 const std::string& cigar,
133 const Strand strand)
134 {
135 TagCollection tags;
136 tags["dq"] = quals;
137 tags["iq"] = quals;
138 tags["mq"] = quals;
139 tags["sq"] = quals;
140
141 const std::string seq = std::string(quals.size(), 'N');
142 BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
143 impl.Tags(tags);
144 return BamRecord(std::move(impl));
145 }
146
147 static
MakeCigaredPulseBaseRecord(const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseBases,const std::string & cigar,const Strand strand)148 BamRecord MakeCigaredPulseBaseRecord(const std::string& seqBases,
149 const std::string& pulseCalls,
150 const std::string& pulseBases,
151 const std::string& cigar,
152 const Strand strand)
153 {
154 TagCollection tags;
155 tags["pc"] = pulseCalls; // PulseCall
156 tags["pt"] = pulseBases; // AltLabelTag
157
158 BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
159 impl.Tags(tags);
160 return BamRecord(std::move(impl));
161 }
162
163 static
MakeCigaredPulseQualRecord(const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseQuals,const std::string & cigar,const Strand strand)164 BamRecord MakeCigaredPulseQualRecord(const std::string& seqBases,
165 const std::string& pulseCalls,
166 const std::string& pulseQuals,
167 const std::string& cigar,
168 const Strand strand)
169 {
170 TagCollection tags;
171 tags["pc"] = pulseCalls;
172 tags["pv"] = pulseQuals; // AltLabelQV
173 tags["pq"] = pulseQuals; // LabelQV
174 tags["pg"] = pulseQuals; // PulseMergeQV
175
176 BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
177 impl.Tags(tags);
178 return BamRecord(std::move(impl));
179 }
180
181 static
MakeCigaredPulseFrameRecord(const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint16_t> & pulseFrames,const std::string & cigar,const Strand strand)182 BamRecord MakeCigaredPulseFrameRecord(const std::string& seqBases,
183 const std::string& pulseCalls,
184 const std::vector<uint16_t>& pulseFrames,
185 const std::string& cigar,
186 const Strand strand)
187 {
188 TagCollection tags;
189 tags["pc"] = pulseCalls;
190 tags["pd"] = pulseFrames; // PrePulseFrames
191 tags["px"] = pulseFrames; // PulseCallWidth
192
193 BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
194 impl.Tags(tags);
195 return BamRecord(std::move(impl));
196 }
197
198 static
MakeCigaredPulseUIntRecord(const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint32_t> & pulseUInts,const std::string & cigar,const Strand strand)199 BamRecord MakeCigaredPulseUIntRecord(const std::string& seqBases,
200 const std::string& pulseCalls,
201 const std::vector<uint32_t>& pulseUInts,
202 const std::string& cigar,
203 const Strand strand)
204 {
205 TagCollection tags;
206 tags["pc"] = pulseCalls;
207 tags["sf"] = pulseUInts; // StartFrame
208
209 BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
210 impl.Tags(tags);
211 return BamRecord(std::move(impl));
212 }
213
214 // ----------------------------------------------------------
215 // helper structs and methods for checking combinations of:
216 // aligned strand, orientation requested, alignment, clipping
217 // ----------------------------------------------------------
218
219 // generic result holder for various requested states
220 template<typename T>
221 struct ExpectedResult
222 {
223 public:
ExpectedResultBamRecordTests::ExpectedResult224 ExpectedResult(std::initializer_list<T> init)
225 : d_(init)
226 {
227 assert(12 == init.size());
228 }
229
ForwardGenomicBamRecordTests::ExpectedResult230 T ForwardGenomic() const { return d_.at(0); }
ForwardNativeBamRecordTests::ExpectedResult231 T ForwardNative() const { return d_.at(1); }
ForwardGenomicAlignedBamRecordTests::ExpectedResult232 T ForwardGenomicAligned() const { return d_.at(2); }
ForwardNativeAlignedBamRecordTests::ExpectedResult233 T ForwardNativeAligned() const { return d_.at(3); }
ForwardGenomicAlignedClippedBamRecordTests::ExpectedResult234 T ForwardGenomicAlignedClipped() const { return d_.at(4); }
ForwardNativeAlignedClippedBamRecordTests::ExpectedResult235 T ForwardNativeAlignedClipped() const { return d_.at(5); }
ReverseGenomicBamRecordTests::ExpectedResult236 T ReverseGenomic() const { return d_.at(6); }
ReverseNativeBamRecordTests::ExpectedResult237 T ReverseNative() const { return d_.at(7); }
ReverseGenomicAlignedBamRecordTests::ExpectedResult238 T ReverseGenomicAligned() const { return d_.at(8); }
ReverseNativeAlignedBamRecordTests::ExpectedResult239 T ReverseNativeAligned() const { return d_.at(9); }
ReverseGenomicAlignedClippedBamRecordTests::ExpectedResult240 T ReverseGenomicAlignedClipped() const { return d_.at(10); }
ReverseNativeAlignedClippedBamRecordTests::ExpectedResult241 T ReverseNativeAlignedClipped() const { return d_.at(11); }
242
243 private:
244 std::vector<T> d_;
245 };
246
247 // generic data type checker on the various requested states
248 template<typename DataType, typename MakeRecordType, typename FetchDataType>
CheckAlignAndClip(const std::string & cigar,const DataType & input,const BamRecordTests::ExpectedResult<DataType> & e,const MakeRecordType & makeRecord,const FetchDataType & fetchData)249 void CheckAlignAndClip(const std::string& cigar,
250 const DataType& input,
251 const BamRecordTests::ExpectedResult<DataType>& e,
252 const MakeRecordType& makeRecord,
253 const FetchDataType& fetchData)
254 {
255 { // map to forward strand
256 const BamRecord b = makeRecord(input, cigar, Strand::FORWARD);
257 EXPECT_EQ(e.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false));
258 EXPECT_EQ(e.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false));
259 EXPECT_EQ(e.ForwardGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false));
260 EXPECT_EQ(e.ForwardNativeAligned(), fetchData(b, Orientation::NATIVE, true, false));
261 EXPECT_EQ(e.ForwardGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true));
262 EXPECT_EQ(e.ForwardNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true));
263 }
264 { // map to reverse strand
265 const BamRecord b = makeRecord(input, cigar, Strand::REVERSE);
266 EXPECT_EQ(e.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false));
267 EXPECT_EQ(e.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false));
268 EXPECT_EQ(e.ReverseGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false));
269 EXPECT_EQ(e.ReverseNativeAligned(), fetchData(b, Orientation::NATIVE, true, false));
270 EXPECT_EQ(e.ReverseGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true));
271 EXPECT_EQ(e.ReverseNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true));
272 }
273 }
274
275 template<typename DataType, typename MakeRecordType, typename FetchDataType>
CheckPulseDataAlignAndClip(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const DataType & input,const BamRecordTests::ExpectedResult<DataType> & allPulses,const BamRecordTests::ExpectedResult<DataType> & basecallsOnly,const MakeRecordType & makeRecord,const FetchDataType & fetchData)276 void CheckPulseDataAlignAndClip(const std::string& cigar,
277 const std::string& seqBases,
278 const std::string& pulseCalls,
279 const DataType& input,
280 const BamRecordTests::ExpectedResult<DataType>& allPulses,
281 const BamRecordTests::ExpectedResult<DataType>& basecallsOnly,
282 const MakeRecordType& makeRecord,
283 const FetchDataType& fetchData)
284 {
285 { // map to forward strand
286 const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::FORWARD);
287
288 EXPECT_EQ(allPulses.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
289 EXPECT_EQ(allPulses.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::ALL));
290 // no align/clipping operations available on ALL pulses
291
292 EXPECT_EQ(basecallsOnly.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
293 EXPECT_EQ(basecallsOnly.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::BASECALLS_ONLY));
294 EXPECT_EQ(basecallsOnly.ForwardGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false, PulseBehavior::BASECALLS_ONLY));
295 EXPECT_EQ(basecallsOnly.ForwardNativeAligned(), fetchData(b, Orientation::NATIVE, true, false, PulseBehavior::BASECALLS_ONLY));
296 EXPECT_EQ(basecallsOnly.ForwardGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true, PulseBehavior::BASECALLS_ONLY));
297 EXPECT_EQ(basecallsOnly.ForwardNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true, PulseBehavior::BASECALLS_ONLY));
298 }
299 { // map to reverse strand
300 const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::REVERSE);
301
302 EXPECT_EQ(allPulses.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
303 EXPECT_EQ(allPulses.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::ALL));
304 // no align/clipping operations available on ALL pulses
305
306 EXPECT_EQ(basecallsOnly.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
307 EXPECT_EQ(basecallsOnly.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::BASECALLS_ONLY));
308 EXPECT_EQ(basecallsOnly.ReverseGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false, PulseBehavior::BASECALLS_ONLY));
309 EXPECT_EQ(basecallsOnly.ReverseNativeAligned(), fetchData(b, Orientation::NATIVE, true, false, PulseBehavior::BASECALLS_ONLY));
310 EXPECT_EQ(basecallsOnly.ReverseGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true, PulseBehavior::BASECALLS_ONLY));
311 EXPECT_EQ(basecallsOnly.ReverseNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true, PulseBehavior::BASECALLS_ONLY));
312 }
313 }
314
315 static
CheckBaseTagsClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)316 void CheckBaseTagsClippedAndAligned(const std::string& cigar,
317 const std::string& input,
318 const ExpectedResult<std::string>& e)
319 {
320 // aligned record + DeletionTag, SubstitutionTag
321 auto makeRecord = [](const std::string& newBases,
322 const std::string& newCigar,
323 const Strand newStrand)
324 { return MakeCigaredBaseRecord(newBases, newCigar, newStrand); };
325
326 // DeletionTag
327 CheckAlignAndClip(cigar, input, e, makeRecord,
328 [](const BamRecord& b,
329 Orientation orientation,
330 bool aligned,
331 bool exciseSoftClips)
332 { return b.DeletionTag(orientation, aligned, exciseSoftClips); }
333 );
334
335 // SubstitutionTag
336 CheckAlignAndClip(cigar, input, e, makeRecord,
337 [](const BamRecord& b,
338 Orientation orientation,
339 bool aligned,
340 bool exciseSoftClips)
341 { return b.SubstitutionTag(orientation, aligned, exciseSoftClips); }
342 );
343 }
344
345 static
CheckFrameTagsClippedAndAligned(const std::string & cigar,const std::vector<uint16_t> & input,const ExpectedResult<std::vector<uint16_t>> & e)346 void CheckFrameTagsClippedAndAligned(const std::string& cigar,
347 const std::vector<uint16_t>& input,
348 const ExpectedResult<std::vector<uint16_t> >& e)
349 {
350
351 // aligned record + IPD, PulseWidth
352 auto makeRecord = [](const std::vector<uint16_t>& newFrames,
353 const std::string& newCigar,
354 const Strand newStrand)
355 { return BamRecordTests::MakeCigaredFrameRecord(newFrames, newCigar, newStrand); };
356
357 // IPD
358 CheckAlignAndClip(cigar, input, e, makeRecord,
359 [](const BamRecord& b,
360 Orientation orientation,
361 bool aligned,
362 bool exciseSoftClips)
363 { return b.IPD(orientation, aligned, exciseSoftClips).Data(); }
364 );
365
366 // PulseWidth
367 CheckAlignAndClip(cigar, input, e, makeRecord,
368 [](const BamRecord& b,
369 Orientation orientation,
370 bool aligned,
371 bool exciseSoftClips)
372 { return b.PulseWidth(orientation, aligned, exciseSoftClips).Data(); }
373 );
374 }
375
376 static
CheckQualityTagsClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)377 void CheckQualityTagsClippedAndAligned(const std::string& cigar,
378 const std::string& input,
379 const ExpectedResult<std::string>& e)
380 {
381 // aligned record + DeletionQV, InsertionQV, MergeQV, SubstitutionQV
382 auto makeRecord = [](const std::string& newQuals,
383 const std::string& newCigar,
384 const Strand newStrand)
385 { return BamRecordTests::MakeCigaredQualRecord(newQuals, newCigar, newStrand); };
386
387 // DeletionQV
388 CheckAlignAndClip(cigar, input, e, makeRecord,
389 [](const BamRecord& b,
390 Orientation orientation,
391 bool aligned,
392 bool exciseSoftClips)
393 { return b.DeletionQV(orientation, aligned, exciseSoftClips).Fastq(); }
394 );
395
396 // InsertionQV
397 CheckAlignAndClip(cigar, input, e, makeRecord,
398 [](const BamRecord& b,
399 Orientation orientation,
400 bool aligned,
401 bool exciseSoftClips)
402 { return b.InsertionQV(orientation, aligned, exciseSoftClips).Fastq(); }
403 );
404
405 // MergeQV
406 CheckAlignAndClip(cigar, input, e, makeRecord,
407 [](const BamRecord& b,
408 Orientation orientation,
409 bool aligned,
410 bool exciseSoftClips)
411 { return b.MergeQV(orientation, aligned, exciseSoftClips).Fastq(); }
412 );
413
414 // SubstitutionQV
415 CheckAlignAndClip(cigar, input, e, makeRecord,
416 [](const BamRecord& b,
417 Orientation orientation,
418 bool aligned,
419 bool exciseSoftClips)
420 { return b.SubstitutionQV(orientation, aligned, exciseSoftClips).Fastq(); }
421 );
422 }
423
424 static
CheckQualitiesClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)425 void CheckQualitiesClippedAndAligned(const std::string& cigar,
426 const std::string& input,
427 const ExpectedResult<std::string>& e)
428 {
429 // aligned record w/ dummy SEQ & QUALs under test
430 auto makeRecord = [](const std::string& newQuals,
431 const std::string& newCigar,
432 const Strand newStrand)
433 {
434 const std::string seq = std::string(newQuals.size(), 'N');
435 auto record = BamRecordTests::MakeCigaredRecord(seq, newCigar, newStrand);
436 record.Impl().SetSequenceAndQualities(seq, newQuals);
437 return record;
438 };
439
440 // QUAL
441 CheckAlignAndClip(cigar, input, e, makeRecord,
442 [](const BamRecord& b,
443 Orientation orientation,
444 bool aligned,
445 bool exciseSoftClips)
446 { return b.Qualities(orientation, aligned, exciseSoftClips).Fastq(); }
447 );
448 }
449
450 static
CheckSequenceClippedAndAligned(const std::string & cigar,const std::string & input,const ExpectedResult<std::string> & e)451 void CheckSequenceClippedAndAligned(const std::string& cigar,
452 const std::string& input,
453 const ExpectedResult<std::string>& e)
454 {
455 // aligned record w/ SEQ
456 auto makeRecord = [](const std::string& newSeq,
457 const std::string& newCigar,
458 const Strand newStrand)
459 { return BamRecordTests::MakeCigaredRecord(newSeq, newCigar, newStrand); };
460
461 // SEQ
462 CheckAlignAndClip(cigar, input, e, makeRecord,
463 [](const BamRecord& b,
464 Orientation orientation,
465 bool aligned,
466 bool exciseSoftClips)
467 { return b.Sequence(orientation, aligned, exciseSoftClips); }
468 );
469 }
470
471 static
CheckPulseBaseTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseBases,const ExpectedResult<std::string> & allPulses,const ExpectedResult<std::string> & basecallsOnly)472 void CheckPulseBaseTags(const std::string& cigar,
473 const std::string& seqBases,
474 const std::string& pulseCalls,
475 const std::string& pulseBases,
476 const ExpectedResult<std::string>& allPulses,
477 const ExpectedResult<std::string>& basecallsOnly)
478 {
479 // aligned record + AltLabelTag
480 auto makeRecord = [](const std::string& newSeqBases,
481 const std::string& newPulseCalls,
482 const std::string& newPulseBases,
483 const std::string& newCigar,
484 const Strand newStrand)
485 { return MakeCigaredPulseBaseRecord(newSeqBases, newPulseCalls, newPulseBases, newCigar, newStrand); };
486
487 // AltLabelTag
488 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
489 [](const BamRecord& b,
490 Orientation orientation,
491 bool aligned,
492 bool exciseSoftClips,
493 PulseBehavior pulseBehavior)
494 { return b.AltLabelTag(orientation, aligned, exciseSoftClips, pulseBehavior); }
495 );
496 // PulseCall
497 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
498 [](const BamRecord& b,
499 Orientation orientation,
500 bool aligned,
501 bool exciseSoftClips,
502 PulseBehavior pulseBehavior)
503 { return b.PulseCall(orientation, aligned, exciseSoftClips, pulseBehavior); }
504 );
505 }
506
507 static
CheckPulseFrameTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint16_t> & pulseFrames,const ExpectedResult<std::vector<uint16_t>> & allPulses,const ExpectedResult<std::vector<uint16_t>> & basecallsOnly)508 void CheckPulseFrameTags(const std::string& cigar,
509 const std::string& seqBases,
510 const std::string& pulseCalls,
511 const std::vector<uint16_t>& pulseFrames,
512 const ExpectedResult<std::vector<uint16_t>>& allPulses,
513 const ExpectedResult<std::vector<uint16_t>>& basecallsOnly)
514 {
515 // aligned record + PrePulseFrames
516 auto makeRecord = [](const std::string& newSeqBases,
517 const std::string& newPulseCalls,
518 const std::vector<uint16_t>& newPulseFrames,
519 const std::string& newCigar,
520 const Strand newStrand)
521 { return MakeCigaredPulseFrameRecord(newSeqBases, newPulseCalls, newPulseFrames, newCigar, newStrand); };
522
523 // PrePulseFrame
524 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
525 [](const BamRecord& b,
526 Orientation orientation,
527 bool aligned,
528 bool exciseSoftClips,
529 PulseBehavior pulseBehavior)
530 { return b.PrePulseFrames(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
531 );
532 // PulseCallWidth
533 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
534 [](const BamRecord& b,
535 Orientation orientation,
536 bool aligned,
537 bool exciseSoftClips,
538 PulseBehavior pulseBehavior)
539 { return b.PulseCallWidth(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
540 );
541 }
542
543 /*
544
545 { BamRecordTag::PKMEAN, {"pa", true} }, photons (vector<float>
546 { BamRecordTag::PKMEAN_2, {"ps", true} }, photons
547 { BamRecordTag::PKMID, {"pm", true} }, photons
548 { BamRecordTag::PKMID_2, {"pi", true} }, photons
549 */
550
551 static
CheckPulseQualityTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::string & pulseQuals,const ExpectedResult<std::string> & allPulses,const ExpectedResult<std::string> & basecallsOnly)552 void CheckPulseQualityTags(const std::string& cigar,
553 const std::string& seqBases,
554 const std::string& pulseCalls,
555 const std::string& pulseQuals,
556 const ExpectedResult<std::string>& allPulses,
557 const ExpectedResult<std::string>& basecallsOnly)
558 {
559 // aligned record + AltLabelQV
560 auto makeRecord = [](const std::string& newSeqBases,
561 const std::string& newPulseCalls,
562 const std::string& newPulseQuals,
563 const std::string& newCigar,
564 const Strand newStrand)
565 { return MakeCigaredPulseQualRecord(newSeqBases, newPulseCalls, newPulseQuals, newCigar, newStrand); };
566
567 // AltLabelQV
568 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
569 [](const BamRecord& b,
570 Orientation orientation,
571 bool aligned,
572 bool exciseSoftClips,
573 PulseBehavior pulseBehavior)
574 { return b.AltLabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
575 );
576 // LabelQV
577 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
578 [](const BamRecord& b,
579 Orientation orientation,
580 bool aligned,
581 bool exciseSoftClips,
582 PulseBehavior pulseBehavior)
583 { return b.LabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
584 );
585 // PulseMergeQV
586 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
587 [](const BamRecord& b,
588 Orientation orientation,
589 bool aligned,
590 bool exciseSoftClips,
591 PulseBehavior pulseBehavior)
592 { return b.PulseMergeQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
593 );
594 }
595
596 static
CheckPulseUIntTags(const std::string & cigar,const std::string & seqBases,const std::string & pulseCalls,const std::vector<uint32_t> & startFrames,const ExpectedResult<std::vector<uint32_t>> & allPulses,const ExpectedResult<std::vector<uint32_t>> & basecallsOnly)597 void CheckPulseUIntTags(const std::string& cigar,
598 const std::string& seqBases,
599 const std::string& pulseCalls,
600 const std::vector<uint32_t>& startFrames,
601 const ExpectedResult<std::vector<uint32_t>>& allPulses,
602 const ExpectedResult<std::vector<uint32_t>>& basecallsOnly)
603 {
604 // aligned record + StartFrame
605 auto makeRecord = [](const std::string& newSeqBases,
606 const std::string& newPulseCalls,
607 const std::vector<uint32_t>& newStartFrames,
608 const std::string& newCigar,
609 const Strand newStrand)
610 { return MakeCigaredPulseUIntRecord(newSeqBases, newPulseCalls, newStartFrames, newCigar, newStrand); };
611
612 // StartFrame
613 CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, startFrames, allPulses, basecallsOnly, makeRecord,
614 [](const BamRecord& b,
615 Orientation orientation,
616 bool aligned,
617 bool exciseSoftClips,
618 PulseBehavior pulseBehavior)
619 { return b.StartFrame(orientation, aligned, exciseSoftClips, pulseBehavior); }
620 );
621 }
622
623
624
625 } // namespace BamRecordTests
626
TEST(BamRecordTest,DefaultValues)627 TEST(BamRecordTest, DefaultValues)
628 {
629 BamRecord bam;
630 const std::string emptyString;
631
632 // BamRecordImpl data
633 EXPECT_EQ(0, bam.Impl().Bin());
634 EXPECT_EQ(BamRecordImpl::UNMAPPED, bam.Impl().Flag()); // forced init unmapped
635 EXPECT_EQ(0, bam.Impl().InsertSize());
636 EXPECT_EQ(255, bam.Impl().MapQuality());
637 EXPECT_EQ(-1, bam.Impl().MateReferenceId());
638 EXPECT_EQ(-1, bam.Impl().MatePosition());
639 EXPECT_EQ(-1, bam.Impl().Position());
640 EXPECT_EQ(-1, bam.Impl().ReferenceId());
641 EXPECT_EQ(0, bam.Impl().Tags().size());
642
643 EXPECT_FALSE(bam.Impl().IsDuplicate());
644 EXPECT_FALSE(bam.Impl().IsFailedQC());
645 EXPECT_FALSE(bam.Impl().IsFirstMate());
646 EXPECT_FALSE(bam.Impl().IsMapped()); // forced init unmapped
647 EXPECT_TRUE(bam.Impl().IsMateMapped());
648 EXPECT_FALSE(bam.Impl().IsMateReverseStrand());
649 EXPECT_FALSE(bam.Impl().IsPaired());
650 EXPECT_TRUE(bam.Impl().IsPrimaryAlignment());
651 EXPECT_FALSE(bam.Impl().IsProperPair());
652 EXPECT_FALSE(bam.Impl().IsReverseStrand());
653 EXPECT_FALSE(bam.Impl().IsSecondMate());
654 EXPECT_FALSE(bam.Impl().IsSupplementaryAlignment());
655
656 EXPECT_EQ(emptyString, bam.Impl().Name());
657 EXPECT_EQ(emptyString, bam.Impl().CigarData().ToStdString());
658 EXPECT_EQ(emptyString, bam.Impl().Sequence());
659 EXPECT_EQ(emptyString, bam.Impl().Qualities().Fastq());
660
661 // PacBio data
662 EXPECT_EQ(-1, bam.AlignedStart());
663 EXPECT_EQ(-1, bam.AlignedEnd());
664
665 EXPECT_FALSE(bam.HasHoleNumber());
666 EXPECT_FALSE(bam.HasNumPasses());
667 EXPECT_FALSE(bam.HasQueryEnd());
668 EXPECT_FALSE(bam.HasQueryStart());
669 EXPECT_FALSE(bam.HasReadAccuracy());
670
671 EXPECT_THROW(bam.HoleNumber(), std::exception);
672 EXPECT_THROW(bam.NumPasses(), std::exception);
673 EXPECT_EQ(int32_t{0}, bam.QueryEnd());
674 EXPECT_EQ(int32_t{0}, bam.QueryStart());
675 EXPECT_THROW(bam.ReadAccuracy(), std::exception);
676
677 EXPECT_FALSE(bam.HasDeletionQV());
678 EXPECT_FALSE(bam.HasDeletionTag());
679 EXPECT_FALSE(bam.HasInsertionQV());
680 EXPECT_FALSE(bam.HasMergeQV());
681 EXPECT_FALSE(bam.HasSubstitutionQV());
682 EXPECT_FALSE(bam.HasSubstitutionTag());
683
684 EXPECT_THROW(bam.DeletionQV(), std::exception);
685 EXPECT_THROW(bam.DeletionTag(), std::exception);
686 EXPECT_THROW(bam.InsertionQV(), std::exception);
687 EXPECT_THROW(bam.MergeQV(), std::exception);
688 EXPECT_THROW(bam.SubstitutionQV(), std::exception);
689 EXPECT_THROW(bam.SubstitutionTag(), std::exception);
690
691 // raw data
692 BamRecordTests::CheckRawData(bam);
693 }
694
TEST(BamRecordTest,FromBamRecordImpl)695 TEST(BamRecordTest, FromBamRecordImpl)
696 {
697 // check generic data
698 BamRecordImpl genericBam = BamRecordTests::CreateBamImpl();
699
700 EXPECT_EQ(42, genericBam.Bin());
701 EXPECT_EQ(42, genericBam.Flag());
702 EXPECT_EQ(42, genericBam.InsertSize());
703 EXPECT_EQ(42, genericBam.MapQuality());
704 EXPECT_EQ(42, genericBam.MateReferenceId());
705 EXPECT_EQ(42, genericBam.MatePosition());
706 EXPECT_EQ(42, genericBam.Position());
707 EXPECT_EQ(42, genericBam.ReferenceId());
708
709 const TagCollection genericTags = genericBam.Tags();
710 EXPECT_TRUE(genericTags.at("HX").HasModifier(TagModifier::HEX_STRING));
711 EXPECT_EQ(std::string("1abc75"), genericTags.at("HX").ToString());
712 EXPECT_EQ(int32_t{-42}, genericTags.at("XY").ToInt32());
713 EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), genericTags.at("CA").ToUInt8Array());
714
715 // copy ctor
716 BamRecord bam1(genericBam);
717
718 EXPECT_EQ(42, bam1.Impl().Bin());
719 EXPECT_EQ(42, bam1.Impl().Flag());
720 EXPECT_EQ(42, bam1.Impl().InsertSize());
721 EXPECT_EQ(42, bam1.Impl().MapQuality());
722 EXPECT_EQ(42, bam1.Impl().MateReferenceId());
723 EXPECT_EQ(42, bam1.Impl().MatePosition());
724 EXPECT_EQ(42, bam1.Impl().Position());
725 EXPECT_EQ(42, bam1.Impl().ReferenceId());
726
727 const TagCollection bam1Tags = bam1.Impl().Tags();
728 EXPECT_TRUE(bam1Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
729 EXPECT_EQ(std::string("1abc75"), bam1Tags.at("HX").ToString());
730 EXPECT_EQ(int32_t{-42}, bam1Tags.at("XY").ToInt32());
731 EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam1Tags.at("CA").ToUInt8Array());
732
733 // copy assignment
734 BamRecord bam2;
735 bam2 = genericBam;
736
737 EXPECT_EQ(42, bam2.Impl().Bin());
738 EXPECT_EQ(42, bam2.Impl().Flag());
739 EXPECT_EQ(42, bam2.Impl().InsertSize());
740 EXPECT_EQ(42, bam2.Impl().MapQuality());
741 EXPECT_EQ(42, bam2.Impl().MateReferenceId());
742 EXPECT_EQ(42, bam2.Impl().MatePosition());
743 EXPECT_EQ(42, bam2.Impl().Position());
744 EXPECT_EQ(42, bam2.Impl().ReferenceId());
745
746 const TagCollection bam2Tags = bam2.Impl().Tags();
747 EXPECT_TRUE(bam2Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
748 EXPECT_EQ(std::string("1abc75"), bam2Tags.at("HX").ToString());
749 EXPECT_EQ(int32_t{-42}, bam2Tags.at("XY").ToInt32());
750 EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam2Tags.at("CA").ToUInt8Array());
751
752 // change genericBam, make sure we deep copied bam1 & bam2
753 genericBam.Position(2000);
754
755 EXPECT_EQ(2000, genericBam.Position());
756 EXPECT_EQ(42, bam1.Impl().Position());
757 EXPECT_EQ(42, bam2.Impl().Position());
758
759 // move ctor
760 #ifdef __clang__
761 #pragma clang diagnostic push
762 #pragma clang diagnostic ignored "-Wpessimizing-move"
763 #endif
764 BamRecord bam3(std::move(BamRecordTests::CreateBamImpl()));
765 #ifdef __clang__
766 #pragma clang diagnostic pop
767 #endif
768
769 EXPECT_EQ(42, bam3.Impl().Bin());
770 EXPECT_EQ(42, bam3.Impl().Flag());
771 EXPECT_EQ(42, bam3.Impl().InsertSize());
772 EXPECT_EQ(42, bam3.Impl().MapQuality());
773 EXPECT_EQ(42, bam3.Impl().MateReferenceId());
774 EXPECT_EQ(42, bam3.Impl().MatePosition());
775 EXPECT_EQ(42, bam3.Impl().Position());
776 EXPECT_EQ(42, bam3.Impl().ReferenceId());
777
778 const TagCollection bam3Tags = bam3.Impl().Tags();
779 EXPECT_TRUE(bam3Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
780 EXPECT_EQ(std::string("1abc75"), bam3Tags.at("HX").ToString());
781 EXPECT_EQ(int32_t{-42}, bam3Tags.at("XY").ToInt32());
782 EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam3Tags.at("CA").ToUInt8Array());
783
784 // move assignment
785 BamRecord bam4;
786 #ifdef __clang__
787 #pragma clang diagnostic push
788 #pragma clang diagnostic ignored "-Wpessimizing-move"
789 #endif
790 bam4 = std::move(BamRecordTests::CreateBamImpl());
791 #ifdef __clang__
792 #pragma clang diagnostic pop
793 #endif
794
795 EXPECT_EQ(42, bam4.Impl().Bin());
796 EXPECT_EQ(42, bam4.Impl().Flag());
797 EXPECT_EQ(42, bam4.Impl().InsertSize());
798 EXPECT_EQ(42, bam4.Impl().MapQuality());
799 EXPECT_EQ(42, bam4.Impl().MateReferenceId());
800 EXPECT_EQ(42, bam4.Impl().MatePosition());
801 EXPECT_EQ(42, bam4.Impl().Position());
802 EXPECT_EQ(42, bam4.Impl().ReferenceId());
803
804 const TagCollection bam4Tags = bam4.Impl().Tags();
805 EXPECT_TRUE(bam4Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
806 EXPECT_EQ(std::string("1abc75"), bam4Tags.at("HX").ToString());
807 EXPECT_EQ(int32_t{-42}, bam4Tags.at("XY").ToInt32());
808 EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), bam4Tags.at("CA").ToUInt8Array());
809 }
810
TEST(BamRecordTest,SelfAssignmentTolerated)811 TEST(BamRecordTest, SelfAssignmentTolerated)
812 {
813 BamRecord bam1;
814 bam1.Impl().Bin(42);
815 bam1.Impl().Flag(42);
816 bam1.Impl().InsertSize(42);
817 bam1.Impl().MapQuality(42);
818 bam1.Impl().MatePosition(42);
819 bam1.Impl().MateReferenceId(42);
820 bam1.Impl().Position(42);
821 bam1.Impl().ReferenceId(42);
822
823 TagCollection tags;
824 tags["HX"] = std::string("1abc75");
825 tags["HX"].Modifier(TagModifier::HEX_STRING);
826 tags["CA"] = std::vector<uint8_t>({34, 5, 125});
827 tags["XY"] = int32_t{-42};
828 bam1.Impl().Tags(tags);
829
830 bam1 = bam1;
831
832 EXPECT_EQ(42, bam1.Impl().Bin());
833 EXPECT_EQ(42, bam1.Impl().Flag());
834 EXPECT_EQ(42, bam1.Impl().InsertSize());
835 EXPECT_EQ(42, bam1.Impl().MapQuality());
836 EXPECT_EQ(42, bam1.Impl().MateReferenceId());
837 EXPECT_EQ(42, bam1.Impl().MatePosition());
838 EXPECT_EQ(42, bam1.Impl().Position());
839 EXPECT_EQ(42, bam1.Impl().ReferenceId());
840
841 const TagCollection fetchedTags1 = bam1.Impl().Tags();
842 EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
843 EXPECT_EQ(std::string("1abc75"), fetchedTags1.at("HX").ToString());
844 EXPECT_EQ(int32_t{-42}, fetchedTags1.at("XY").ToInt32());
845 EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
846
847 BamRecordTests::CheckRawData(bam1);
848 }
849
TEST(BamRecordTest,CoreSetters)850 TEST(BamRecordTest, CoreSetters)
851 {
852 // create basic BAM with (generic) data
853 BamRecord bam = BamRecordTests::CreateBam();
854
855 QualityValues testQVs;
856 testQVs.push_back(0);
857 testQVs.push_back(1);
858
859 const std::string testTags = "GATTACA";
860
861 // now set PacBio data
862 // bam.AlignedStart(42);
863 // bam.AlignedEnd(42);
864 // bam.DeletionQVs(testQVs);
865 // bam.DeletionTags(testTags);
866 // bam.HoleNumber(42);
867 // bam.InsertionQVs(testQVs);
868 // bam.MergeQVs(testQVs);
869 // bam.NumPasses(42);
870 // bam.QueryEnd(42);
871 // bam.QueryStart(42);
872 // bam.ReadAccuracy(42);
873 // bam.ReferenceEnd(42);
874 // bam.ReferenceStart(42);
875 // bam.SubstitutionQVs(testQVs);
876 // bam.SubstitutionTags(testTags);
877
878 // check generic data
879 EXPECT_EQ(42, bam.Impl().Bin());
880 EXPECT_EQ(42, bam.Impl().Flag());
881 EXPECT_EQ(42, bam.Impl().InsertSize());
882 EXPECT_EQ(42, bam.Impl().MapQuality());
883 EXPECT_EQ(42, bam.Impl().MateReferenceId());
884 EXPECT_EQ(42, bam.Impl().MatePosition());
885 EXPECT_EQ(42, bam.Impl().Position());
886 EXPECT_EQ(42, bam.Impl().ReferenceId());
887
888 // check PacBio data
889 // EXPECT_EQ(42, bam.AlignedStart());
890 // EXPECT_EQ(42, bam.AlignedEnd());
891 // EXPECT_EQ(testQVs, bam.DeletionQVs());
892 // EXPECT_EQ(testTags, bam.DeletionTags());
893 // EXPECT_EQ(42, bam.HoleNumber());
894 // EXPECT_EQ(testQVs, bam.InsertionQVs());
895 // EXPECT_EQ(testQVs, bam.MergeQVs());
896
897 // EXPECT_EQ(42, bam.NumPasses());
898 // EXPECT_EQ(42, bam.QueryEnd());
899 // EXPECT_EQ(42, bam.QueryStart());
900 // EXPECT_EQ(42, bam.ReadAccuracy());
901 // EXPECT_EQ(42, bam.ReferenceEnd());
902 // EXPECT_EQ(42, bam.ReferenceStart());
903 // EXPECT_EQ(testQVs, bam.SubstitutionQVs());
904 // EXPECT_EQ(testTags, bam.SubstitutionTags());
905
906 // check tags
907 const TagCollection fetchedTags = bam.Impl().Tags();
908 EXPECT_TRUE(fetchedTags.at("HX").HasModifier(TagModifier::HEX_STRING));
909 EXPECT_EQ(std::string("1abc75"), fetchedTags.at("HX").ToString());
910 EXPECT_EQ(int32_t{-42}, fetchedTags.at("XY").ToInt32());
911 EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags.at("CA").ToUInt8Array());
912
913 BamRecordTests::CheckRawData(bam);
914 }
915
TEST(BamRecordTest,SequenceOrientation)916 TEST(BamRecordTest, SequenceOrientation)
917 {
918 {
919 SCOPED_TRACE("Simple CIGAR Sequence");
920 BamRecordTests::CheckSequenceClippedAndAligned(
921 "13=", // CIGAR
922 "ATATATCCCGGCG", // input
923 {
924 "ATATATCCCGGCG", // forward strand, genomic
925 "ATATATCCCGGCG", // forward strand, native
926 "ATATATCCCGGCG", // forward strand, genomic, aligned
927 "ATATATCCCGGCG", // forward strand, native, aligned
928 "ATATATCCCGGCG", // forward strand, genomic, aligned + clipped
929 "ATATATCCCGGCG", // forward strand, native, aligned + clipped
930 "ATATATCCCGGCG", // reverse strand, genomic
931 "CGCCGGGATATAT", // reverse strand, native
932 "ATATATCCCGGCG", // reverse strand, genomic, aligned
933 "CGCCGGGATATAT", // reverse strand, native, aligned
934 "ATATATCCCGGCG", // reverse strand, genomic, aligned + clipped
935 "CGCCGGGATATAT" // reverse strand, native, aligned + clipped
936 }
937 );
938 }
939 }
940
TEST(BamRecordTest,QualitiesOrientation)941 TEST(BamRecordTest, QualitiesOrientation)
942 {
943 {
944 SCOPED_TRACE("Simple CIGAR Qualities");
945 BamRecordTests::CheckQualitiesClippedAndAligned(
946 "13=", // CIGAR
947 "?]?]?]?]?]?]*", // input
948 {
949 "?]?]?]?]?]?]*", // forward strand, genomic
950 "?]?]?]?]?]?]*", // forward strand, native
951 "?]?]?]?]?]?]*", // forward strand, genomic, aligned
952 "?]?]?]?]?]?]*", // forward strand, native, aligned
953 "?]?]?]?]?]?]*", // forward strand, genomic, aligned + clipped
954 "?]?]?]?]?]?]*", // forward strand, native, aligned + clipped
955 "?]?]?]?]?]?]*", // reverse strand, genomic
956 "*]?]?]?]?]?]?", // reverse strand, native
957 "?]?]?]?]?]?]*", // reverse strand, genomic, aligned
958 "*]?]?]?]?]?]?", // reverse strand, native, aligned
959 "?]?]?]?]?]?]*", // reverse strand, genomic, aligned + clipped
960 "*]?]?]?]?]?]?" // reverse strand, native, aligned + clipped
961 }
962 );
963 }
964 }
965
TEST(BamRecordTest,SequenceTagsOrientation)966 TEST(BamRecordTest, SequenceTagsOrientation)
967 {
968 {
969 SCOPED_TRACE("Simple CIGAR Base Tags");
970 BamRecordTests::CheckBaseTagsClippedAndAligned(
971 "13=", // CIGAR
972 "ATATATCCCGGCG", // input
973 {
974 "ATATATCCCGGCG", // forward strand, genomic
975 "ATATATCCCGGCG", // forward strand, native
976 "ATATATCCCGGCG", // forward strand, genomic, aligned
977 "ATATATCCCGGCG", // forward strand, native, aligned
978 "ATATATCCCGGCG", // forward strand, genomic, aligned, clipped
979 "ATATATCCCGGCG", // forward strand, native, aligned, clipped
980 "CGCCGGGATATAT", // reverse strand, genomic
981 "ATATATCCCGGCG", // reverse strand, native
982 "CGCCGGGATATAT", // reverse strand, genomic, aligned
983 "ATATATCCCGGCG", // reverse strand, native, aligned
984 "CGCCGGGATATAT", // reverse strand, genomic, aligned, clipped
985 "ATATATCCCGGCG" // reverse strand, native, aligned, clipped
986 }
987 );
988 }
989 }
990
TEST(BamRecordTest,FrameTagsOrientation)991 TEST(BamRecordTest, FrameTagsOrientation)
992 {
993 {
994 SCOPED_TRACE("Simple CIGAR Frames");
995 BamRecordTests::CheckFrameTagsClippedAndAligned(
996 "5=", // CIGAR
997 { 0, 1, 2, 3, 4 }, // input
998 {
999 { 0, 1, 2, 3, 4 }, // forward strand, genomic
1000 { 0, 1, 2, 3, 4 }, // forward strand, native
1001 { 0, 1, 2, 3, 4 }, // forward strand, genomic, aligned
1002 { 0, 1, 2, 3, 4 }, // forward strand, native, aligned
1003 { 0, 1, 2, 3, 4 }, // forward strand, genomic, aligned, clipped
1004 { 0, 1, 2, 3, 4 }, // forward strand, native, aligned, clipped
1005 { 4, 3, 2, 1, 0 }, // reverse strand, genomic
1006 { 0, 1, 2, 3, 4 }, // reverse strand, native
1007 { 4, 3, 2, 1, 0 }, // reverse strand, genomic, aligned
1008 { 0, 1, 2, 3, 4 }, // reverse strand, native, aligned
1009 { 4, 3, 2, 1, 0 }, // reverse strand, genomic, aligned, clipped
1010 { 0, 1, 2, 3, 4 } // reverse strand, native, aligned, clipped
1011 }
1012 );
1013 }
1014 }
1015
TEST(BamRecordTest,QualityTagsOrientation)1016 TEST(BamRecordTest, QualityTagsOrientation)
1017 {
1018 {
1019 SCOPED_TRACE("Simple CIGAR Quality Tags");
1020 BamRecordTests::CheckQualityTagsClippedAndAligned(
1021 "13=", // CIGAR
1022 "?]?]?]?]?]?]*", // input
1023 {
1024 "?]?]?]?]?]?]*", // forward strand, genomic
1025 "?]?]?]?]?]?]*", // forward strand, native
1026 "?]?]?]?]?]?]*", // forward strand, genomic, aligned
1027 "?]?]?]?]?]?]*", // forward strand, native, aligned
1028 "?]?]?]?]?]?]*", // forward strand, genomic, aligned + clipped
1029 "?]?]?]?]?]?]*", // forward strand, native, aligned + clipped
1030 "*]?]?]?]?]?]?", // reverse strand, genomic
1031 "?]?]?]?]?]?]*", // reverse strand, native
1032 "*]?]?]?]?]?]?", // reverse strand, genomic, aligned
1033 "?]?]?]?]?]?]*", // reverse strand, native, aligned
1034 "*]?]?]?]?]?]?", // reverse strand, genomic, aligned + clipped
1035 "?]?]?]?]?]?]*" // reverse strand, native, aligned + clipped
1036 }
1037 );
1038 }
1039 }
1040
TEST(BamRecordTest,SequenceClippedAndAligned)1041 TEST(BamRecordTest, SequenceClippedAndAligned)
1042 {
1043 {
1044 SCOPED_TRACE("CIGAR: 10=");
1045 BamRecordTests::CheckSequenceClippedAndAligned(
1046 "10=", // CIGAR
1047 "ATCCGCGGTT", // input
1048 {
1049 "ATCCGCGGTT", // forward strand, genomic
1050 "ATCCGCGGTT", // forward strand, native
1051 "ATCCGCGGTT", // forward strand, genomic, aligned
1052 "ATCCGCGGTT", // forward strand, native, aligned
1053 "ATCCGCGGTT", // forward strand, genomic, aligned + clipped
1054 "ATCCGCGGTT", // forward strand, native, aligned + clipped
1055 "ATCCGCGGTT", // reverse strand, genomic
1056 "AACCGCGGAT", // reverse strand, native
1057 "ATCCGCGGTT", // reverse strand, genomic, aligned
1058 "AACCGCGGAT", // reverse strand, native, aligned
1059 "ATCCGCGGTT", // reverse strand, genomic, aligned + clipped
1060 "AACCGCGGAT" // reverse strand, native, aligned + clipped
1061 }
1062 );
1063 }
1064 {
1065 SCOPED_TRACE("CIGAR: 3=4N3=");
1066 BamRecordTests::CheckSequenceClippedAndAligned(
1067 "3=4N3=", // CIGAR
1068 "ACGTTT", // input
1069 {
1070 "ACGTTT", // forward strand, genomic
1071 "ACGTTT", // forward strand, native
1072 "ACGTTT", // forward strand, genomic, aligned
1073 "ACGTTT", // forward strand, native, aligned
1074 "ACGTTT", // forward strand, genomic, aligned + clipped
1075 "ACGTTT", // forward strand, native, aligned + clipped
1076 "ACGTTT", // reverse strand, genomic
1077 "AAACGT", // reverse strand, native
1078 "ACGTTT", // reverse strand, genomic, aligned
1079 "AAACGT", // reverse strand, native, aligned
1080 "ACGTTT", // reverse strand, genomic, aligned + clipped
1081 "AAACGT" // reverse strand, native, aligned + clipped
1082 }
1083 );
1084 }
1085 {
1086 SCOPED_TRACE("CIGAR: 1S8=1S");
1087 BamRecordTests::CheckSequenceClippedAndAligned(
1088 "1S8=1S", // CIGAR
1089 "ACCCGCGGTT", // input
1090 {
1091 "ACCCGCGGTT", // forward strand, genomic
1092 "ACCCGCGGTT", // forward strand, native
1093 "ACCCGCGGTT", // forward strand, genomic, aligned
1094 "ACCCGCGGTT", // forward strand, native, aligned
1095 "CCCGCGGT", // forward strand, genomic, aligned + clipped
1096 "CCCGCGGT", // forward strand, native, aligned + clipped
1097 "ACCCGCGGTT", // reverse strand, genomic
1098 "AACCGCGGGT", // reverse strand, native
1099 "ACCCGCGGTT", // reverse strand, genomic, aligned
1100 "AACCGCGGGT", // reverse strand, native, aligned
1101 "CCCGCGGT", // reverse strand, genomic, aligned + clipped
1102 "ACCGCGGG" // reverse strand, native, aligned + clipped
1103 }
1104 );
1105 }
1106 {
1107 SCOPED_TRACE("CIGAR: 1H8=1H");
1108 BamRecordTests::CheckSequenceClippedAndAligned(
1109 "1H8=1H", // CIGAR
1110 "ATCGCGGT", // input
1111 {
1112 "ATCGCGGT", // forward strand, genomic
1113 "ATCGCGGT", // forward strand, native
1114 "ATCGCGGT", // forward strand, genomic, aligned
1115 "ATCGCGGT", // forward strand, native, aligned
1116 "ATCGCGGT", // forward strand, genomic, aligned + clipped
1117 "ATCGCGGT", // forward strand, native, aligned + clipped
1118 "ATCGCGGT", // reverse strand, genomic
1119 "ACCGCGAT", // reverse strand, native
1120 "ATCGCGGT", // reverse strand, genomic, aligned
1121 "ACCGCGAT", // reverse strand, native, aligned
1122 "ATCGCGGT", // reverse strand, genomic, aligned + clipped
1123 "ACCGCGAT" // reverse strand, native, aligned + clipped
1124 }
1125 );
1126 }
1127 {
1128 SCOPED_TRACE("CIGAR: 2S6=2S");
1129 BamRecordTests::CheckSequenceClippedAndAligned(
1130 "2S6=2S", // CIGAR
1131 "AGCCGCGGTT", // input
1132 {
1133 "AGCCGCGGTT", // forward strand, genomic
1134 "AGCCGCGGTT", // forward strand, native
1135 "AGCCGCGGTT", // forward strand, genomic, aligned
1136 "AGCCGCGGTT", // forward strand, native, aligned
1137 "CCGCGG", // forward strand, genomic, aligned + clipped
1138 "CCGCGG", // forward strand, native, aligned + clipped
1139 "AGCCGCGGTT", // reverse strand, genomic
1140 "AACCGCGGCT", // reverse strand, native
1141 "AGCCGCGGTT", // reverse strand, genomic, aligned
1142 "AACCGCGGCT", // reverse strand, native, aligned
1143 "CCGCGG", // reverse strand, genomic, aligned + clipped
1144 "CCGCGG" // reverse strand, native, aligned + clipped
1145 }
1146 );
1147 }
1148 {
1149 SCOPED_TRACE("CIGAR: 2S3=2I3=2S");
1150 BamRecordTests::CheckSequenceClippedAndAligned(
1151 "2S3=2I3=2S", // CIGAR
1152 "ATCCGNNCGGTT", // input
1153 {
1154 "ATCCGNNCGGTT", // forward strand, genomic
1155 "ATCCGNNCGGTT", // forward strand, native
1156 "ATCCGNNCGGTT", // forward strand, genomic, aligned
1157 "ATCCGNNCGGTT", // forward strand, native, aligned
1158 "CCGNNCGG", // forward strand, genomic, aligned + clipped
1159 "CCGNNCGG", // forward strand, native, aligned + clipped
1160 "ATCCGNNCGGTT", // reverse strand, genomic
1161 "AACCGNNCGGAT", // reverse strand, native
1162 "ATCCGNNCGGTT", // reverse strand, genomic, aligned
1163 "AACCGNNCGGAT", // reverse strand, native, aligned
1164 "CCGNNCGG", // reverse strand, genomic, aligned + clipped
1165 "CCGNNCGG" // reverse strand, native, aligned + clipped
1166 }
1167 );
1168 }
1169 {
1170 SCOPED_TRACE("CIGAR: 2H6=2H");
1171 BamRecordTests::CheckSequenceClippedAndAligned(
1172 "2H6=2H", // CIGAR
1173 "CAGCGG", // input
1174 {
1175 "CAGCGG", // forward strand, genomic
1176 "CAGCGG", // forward strand, native
1177 "CAGCGG", // forward strand, genomic, aligned
1178 "CAGCGG", // forward strand, native, aligned
1179 "CAGCGG", // forward strand, genomic, aligned + clipped
1180 "CAGCGG", // forward strand, native, aligned + clipped
1181 "CAGCGG", // reverse strand, genomic
1182 "CCGCTG", // reverse strand, native
1183 "CAGCGG", // reverse strand, genomic, aligned
1184 "CCGCTG", // reverse strand, native, aligned
1185 "CAGCGG", // reverse strand, genomic, aligned + clipped
1186 "CCGCTG" // reverse strand, native, aligned + clipped
1187 }
1188 );
1189 }
1190 }
1191
TEST(BamRecordTest,ClippingOrientationAndAlignment)1192 TEST(BamRecordTest, ClippingOrientationAndAlignment)
1193 {
1194 {
1195 SCOPED_TRACE("CIGAR: 4=3D4=");
1196 BamRecordTests::CheckSequenceClippedAndAligned(
1197 "4=3D4=", // CIGAR
1198 "AACCGTTA", // input
1199 {
1200 "AACCGTTA", // forward strand, genomic
1201 "AACCGTTA", // forward strand, native
1202 "AACC---GTTA", // forward strand, genomic, aligned
1203 "AACC---GTTA", // forward strand, native, aligned
1204 "AACC---GTTA", // forward strand, genomic, aligned + clipped
1205 "AACC---GTTA", // forward strand, native, aligned + clipped
1206 "AACCGTTA", // reverse strand, genomic
1207 "TAACGGTT", // reverse strand, native
1208 "AACC---GTTA", // reverse strand, genomic, aligned
1209 "TAAC---GGTT", // reverse strand, native, aligned
1210 "AACC---GTTA", // reverse strand, genomic, aligned + clipped
1211 "TAAC---GGTT" // reverse strand, native, aligned + clipped
1212 }
1213 );
1214 }
1215 {
1216 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1217 BamRecordTests::CheckSequenceClippedAndAligned(
1218 "4=1D2I2D4=", // CIGAR
1219 "ATCCTAGGTT", // input
1220 {
1221 "ATCCTAGGTT", // forward strand, genomic
1222 "ATCCTAGGTT", // forward strand, native
1223 "ATCC-TA--GGTT", // forward strand, genomic, aligned
1224 "ATCC-TA--GGTT", // forward strand, native, aligned
1225 "ATCC-TA--GGTT", // forward strand, genomic, aligned + clipped
1226 "ATCC-TA--GGTT", // forward strand, native, aligned + clipped
1227 "ATCCTAGGTT", // reverse strand, genomic
1228 "AACCTAGGAT", // reverse strand, native
1229 "ATCC-TA--GGTT", // reverse strand, genomic, aligned
1230 "AACC--TA-GGAT", // reverse strand, native, aligned
1231 "ATCC-TA--GGTT", // reverse strand, genomic, aligned + clipped
1232 "AACC--TA-GGAT" // reverse strand, native, aligned + clipped
1233 }
1234 );
1235 }
1236 {
1237 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1238 BamRecordTests::CheckSequenceClippedAndAligned(
1239 "4=1D2P2I2P2D4=", // CIGAR
1240 "ATCCTAGGTT", // input
1241 {
1242 "ATCCTAGGTT", // forward strand, genomic
1243 "ATCCTAGGTT", // forward strand, native
1244 "ATCC-**TA**--GGTT", // forward strand, genomic, aligned
1245 "ATCC-**TA**--GGTT", // forward strand, native, aligned
1246 "ATCC-**TA**--GGTT", // forward strand, genomic, aligned + clipped
1247 "ATCC-**TA**--GGTT", // forward strand, native, aligned + clipped
1248 "ATCCTAGGTT", // reverse strand, genomic
1249 "AACCTAGGAT", // reverse strand, native
1250 "ATCC-**TA**--GGTT", // reverse strand, genomic, aligned
1251 "AACC--**TA**-GGAT", // reverse strand, native, aligned
1252 "ATCC-**TA**--GGTT", // reverse strand, genomic, aligned + clipped
1253 "AACC--**TA**-GGAT" // reverse strand, native, aligned + clipped
1254 }
1255 );
1256 }
1257 {
1258 SCOPED_TRACE("CIGAR: 2S4=3D4=3S");
1259 BamRecordTests::CheckSequenceClippedAndAligned(
1260 "2S4=3D4=3S", // CIGAR
1261 "TTAACCGTTACCG", // input
1262 {
1263 "TTAACCGTTACCG", // forward strand, genomic
1264 "TTAACCGTTACCG", // forward strand, native
1265 "TTAACC---GTTACCG", // forward strand, genomic, aligned
1266 "TTAACC---GTTACCG", // forward strand, native, aligned
1267 "AACC---GTTA", // forward strand, genomic, aligned + clipped
1268 "AACC---GTTA", // forward strand, native, aligned + clipped
1269 "TTAACCGTTACCG", // reverse strand, genomic
1270 "CGGTAACGGTTAA", // reverse strand, native
1271 "TTAACC---GTTACCG", // reverse strand, genomic, aligned
1272 "CGGTAAC---GGTTAA", // reverse strand, native, aligned
1273 "AACC---GTTA", // reverse strand, genomic, aligned + clipped
1274 "TAAC---GGTT" // reverse strand, native, aligned + clipped
1275 }
1276 );
1277 }
1278 {
1279 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1280 BamRecordTests::CheckSequenceClippedAndAligned(
1281 "2H4=3D4=3H", // CIGAR
1282 "AACCGTTA", // input
1283 {
1284 "AACCGTTA", // forward strand, genomic
1285 "AACCGTTA", // forward strand, native
1286 "AACC---GTTA", // forward strand, genomic, aligned
1287 "AACC---GTTA", // forward strand, native, aligned
1288 "AACC---GTTA", // forward strand, genomic, aligned + clipped
1289 "AACC---GTTA", // forward strand, native, aligned + clipped
1290 "AACCGTTA", // reverse strand, genomic
1291 "TAACGGTT", // reverse strand, native
1292 "AACC---GTTA", // reverse strand, genomic, aligned
1293 "TAAC---GGTT", // reverse strand, native, aligned
1294 "AACC---GTTA", // reverse strand, genomic, aligned + clipped
1295 "TAAC---GGTT" // reverse strand, native, aligned + clipped
1296 }
1297 );
1298 }
1299 {
1300 SCOPED_TRACE("CIGAR: 2H2S4=3D4=3S3H");
1301 BamRecordTests::CheckSequenceClippedAndAligned(
1302 "2H2S4=3D4=3S3H", // CIGAR
1303 "TTAACCGTTACCG", // input
1304 {
1305 "TTAACCGTTACCG", // forward strand, genomic
1306 "TTAACCGTTACCG", // forward strand, native
1307 "TTAACC---GTTACCG", // forward strand, genomic, aligned
1308 "TTAACC---GTTACCG", // forward strand, native, aligned
1309 "AACC---GTTA", // forward strand, genomic, aligned + clipped
1310 "AACC---GTTA", // forward strand, native, aligned + clipped
1311 "TTAACCGTTACCG", // reverse strand, genomic
1312 "CGGTAACGGTTAA", // reverse strand, native
1313 "TTAACC---GTTACCG", // reverse strand, genomic, aligned
1314 "CGGTAAC---GGTTAA", // reverse strand, native, aligned
1315 "AACC---GTTA", // reverse strand, genomic, aligned + clipped
1316 "TAAC---GGTT" // reverse strand, native, aligned + clipped
1317 }
1318 );
1319 }
1320 }
1321
TEST(BamRecordTest,QualityTagsClippedAndAligned)1322 TEST(BamRecordTest, QualityTagsClippedAndAligned)
1323 {
1324 {
1325 SCOPED_TRACE("CIGAR: 4=3D4=");
1326 BamRecordTests::CheckQualityTagsClippedAndAligned(
1327 "4=3D4=", // CIGAR
1328 "?]?]?]?@", // input
1329 {
1330 "?]?]?]?@", // forward strand, genomic
1331 "?]?]?]?@", // forward strand, native
1332 "?]?]!!!?]?@", // forward strand, genomic, aligned
1333 "?]?]!!!?]?@", // forward strand, native, aligned
1334 "?]?]!!!?]?@", // forward strand, genomic, aligned + clipped
1335 "?]?]!!!?]?@", // forward strand, native, aligned + clipped
1336 "@?]?]?]?", // reverse strand, genomic
1337 "?]?]?]?@", // reverse strand, native
1338 "@?]?!!!]?]?", // reverse strand, genomic, aligned
1339 "?]?]!!!?]?@", // reverse strand, native, aligned
1340 "@?]?!!!]?]?", // reverse strand, genomic, aligned + clipped
1341 "?]?]!!!?]?@" // reverse strand, native, aligned + clipped
1342 }
1343 );
1344 }
1345 {
1346 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1347 BamRecordTests::CheckQualityTagsClippedAndAligned(
1348 "4=1D2I2D4=", // CIGAR
1349 "?]?]87?]?@", // input
1350 {
1351 "?]?]87?]?@", // forward strand, genomic
1352 "?]?]87?]?@", // forward strand, native
1353 "?]?]!87!!?]?@", // forward strand, genomic, aligned
1354 "?]?]!87!!?]?@", // forward strand, native, aligned
1355 "?]?]!87!!?]?@", // forward strand, genomic, aligned + clipped
1356 "?]?]!87!!?]?@", // forward strand, native, aligned + clipped
1357 "@?]?78]?]?", // reverse strand, genomic
1358 "?]?]87?]?@", // reverse strand, native
1359 "@?]?!78!!]?]?", // reverse strand, genomic, aligned
1360 "?]?]!!87!?]?@", // reverse strand, native, aligned
1361 "@?]?!78!!]?]?", // reverse strand, genomic, aligned + clipped
1362 "?]?]!!87!?]?@" // reverse strand, native, aligned + clipped
1363 }
1364 );
1365 }
1366 {
1367 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1368 BamRecordTests::CheckQualityTagsClippedAndAligned(
1369 "4=1D2P2I2P2D4=", // CIGAR
1370 "?]?]87?]?@", // input
1371 {
1372 "?]?]87?]?@", // forward strand, genomic
1373 "?]?]87?]?@", // forward strand, native
1374 "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned
1375 "?]?]!!!87!!!!?]?@", // forward strand, native, aligned
1376 "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned + clipped
1377 "?]?]!!!87!!!!?]?@", // forward strand, native, aligned + clipped
1378 "@?]?78]?]?", // reverse strand, genomic
1379 "?]?]87?]?@", // reverse strand, native
1380 "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned
1381 "?]?]!!!!87!!!?]?@", // reverse strand, native, aligned
1382 "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned + clipped
1383 "?]?]!!!!87!!!?]?@" // reverse strand, native, aligned + clipped
1384 }
1385 );
1386 }
1387 {
1388 SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1389 BamRecordTests::CheckQualityTagsClippedAndAligned(
1390 "3S4=3D4=3S", // CIGAR
1391 "vvv?]?]?]?@xxx", // input
1392 {
1393 "vvv?]?]?]?@xxx", // forward strand, genomic
1394 "vvv?]?]?]?@xxx", // forward strand, native
1395 "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
1396 "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
1397 "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
1398 "?]?]!!!?]?@", // forward strand, native, aligned, clipped
1399 "xxx@?]?]?]?vvv", // reverse strand, genomic
1400 "vvv?]?]?]?@xxx", // reverse strand, native
1401 "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
1402 "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
1403 "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
1404 "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
1405 }
1406 );
1407 }
1408 {
1409 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1410 BamRecordTests::CheckQualityTagsClippedAndAligned(
1411 "2H4=3D4=3H", // CIGAR
1412 "?]?]?]?@", // input
1413 {
1414 "?]?]?]?@", // forward strand, genomic
1415 "?]?]?]?@", // forward strand, native
1416 "?]?]!!!?]?@", // forward strand, genomic, aligned
1417 "?]?]!!!?]?@", // forward strand, native, aligned
1418 "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
1419 "?]?]!!!?]?@", // forward strand, native, aligned, clipped
1420 "@?]?]?]?", // reverse strand, genomic
1421 "?]?]?]?@", // reverse strand, native
1422 "@?]?!!!]?]?", // reverse strand, genomic, aligned
1423 "?]?]!!!?]?@", // reverse strand, native, aligned
1424 "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
1425 "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
1426 }
1427 );
1428 }
1429 {
1430 SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1431 BamRecordTests::CheckQualityTagsClippedAndAligned(
1432 "2H3S4=3D4=3S3H", // CIGAR
1433 "vvv?]?]?]?@xxx", // input
1434 {
1435 "vvv?]?]?]?@xxx", // forward strand, genomic
1436 "vvv?]?]?]?@xxx", // forward strand, native
1437 "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
1438 "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
1439 "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
1440 "?]?]!!!?]?@", // forward strand, native, aligned, clipped
1441 "xxx@?]?]?]?vvv", // reverse strand, genomic
1442 "vvv?]?]?]?@xxx", // reverse strand, native
1443 "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
1444 "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
1445 "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
1446 "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
1447 }
1448 );
1449 }
1450 }
1451
TEST(BamRecordTest,BaseTagsClippedAndAligned)1452 TEST(BamRecordTest, BaseTagsClippedAndAligned)
1453 {
1454 {
1455 SCOPED_TRACE("CIGAR: 4=3D4=");
1456 BamRecordTests::CheckBaseTagsClippedAndAligned(
1457 "4=3D4=", // CIGAR
1458 "AACCGTTA", // input
1459 {
1460 "AACCGTTA", // forward strand, genomic
1461 "AACCGTTA", // forward strand, native
1462 "AACC---GTTA", // forward strand, genomic, aligned
1463 "AACC---GTTA", // forward strand, native, aligned
1464 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1465 "AACC---GTTA", // forward strand, native, aligned, clipped
1466 "TAACGGTT", // reverse strand, genomic
1467 "AACCGTTA", // reverse strand, native
1468 "TAAC---GGTT", // reverse strand, genomic, aligned
1469 "AACC---GTTA", // reverse strand, native, aligned
1470 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1471 "AACC---GTTA" // reverse strand, native, aligned, clipped
1472 }
1473 );
1474 }
1475 {
1476 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1477 BamRecordTests::CheckBaseTagsClippedAndAligned(
1478 "4=1D2I2D4=", // CIGAR
1479 "ATCCTAGGTT", // input
1480 {
1481 "ATCCTAGGTT", // forward strand, genomic
1482 "ATCCTAGGTT", // forward strand, native
1483 "ATCC-TA--GGTT", // forward strand, genomic, aligned
1484 "ATCC-TA--GGTT", // forward strand, native, aligned
1485 "ATCC-TA--GGTT", // forward strand, genomic, aligned, clipped
1486 "ATCC-TA--GGTT", // forward strand, native, aligned, clipped
1487 "AACCTAGGAT", // reverse strand, genomic
1488 "ATCCTAGGTT", // reverse strand, native
1489 "AACC-TA--GGAT", // reverse strand, genomic, aligned
1490 "ATCC--TA-GGTT", // reverse strand, native, aligned
1491 "AACC-TA--GGAT", // reverse strand, genomic, aligned, clipped
1492 "ATCC--TA-GGTT" // reverse strand, native, aligned, clipped
1493 }
1494 );
1495 }
1496 {
1497 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1498 BamRecordTests::CheckBaseTagsClippedAndAligned(
1499 "4=1D2P2I2P2D4=", // CIGAR
1500 "ATCCTAGGTT", // input
1501 {
1502 "ATCCTAGGTT", // forward strand, genomic
1503 "ATCCTAGGTT", // forward strand, native
1504 "ATCC-**TA**--GGTT", // forward strand, genomic, aligned
1505 "ATCC-**TA**--GGTT", // forward strand, native, aligned
1506 "ATCC-**TA**--GGTT", // forward strand, genomic, aligned, clipped
1507 "ATCC-**TA**--GGTT", // forward strand, native, aligned, clipped
1508 "AACCTAGGAT", // reverse strand, genomic
1509 "ATCCTAGGTT", // reverse strand, native
1510 "AACC-**TA**--GGAT", // reverse strand, genomic, aligned
1511 "ATCC--**TA**-GGTT", // reverse strand, native, aligned
1512 "AACC-**TA**--GGAT", // reverse strand, genomic, aligned, clipped
1513 "ATCC--**TA**-GGTT" // reverse strand, native, aligned, clipped
1514 }
1515 );
1516 }
1517 {
1518 SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1519 BamRecordTests::CheckBaseTagsClippedAndAligned(
1520 "3S4=3D4=3S", // CIGAR
1521 "TTTAACCGTTACCG", // input
1522 {
1523 "TTTAACCGTTACCG", // forward strand, genomic
1524 "TTTAACCGTTACCG", // forward strand, native
1525 "TTTAACC---GTTACCG", // forward strand, genomic, aligned
1526 "TTTAACC---GTTACCG", // forward strand, native, aligned
1527 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1528 "AACC---GTTA", // forward strand, native, aligned, clipped
1529 "CGGTAACGGTTAAA", // reverse strand, genomic
1530 "TTTAACCGTTACCG", // reverse strand, native
1531 "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
1532 "TTTAACC---GTTACCG", // reverse strand, native, aligned
1533 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1534 "AACC---GTTA" // reverse strand, native, aligned, clipped
1535 }
1536 );
1537 }
1538 {
1539 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1540 BamRecordTests::CheckBaseTagsClippedAndAligned(
1541 "2H4=3D4=3H", // CIGAR
1542 "AACCGTTA", // input
1543 {
1544 "AACCGTTA", // forward strand, genomic
1545 "AACCGTTA", // forward strand, native
1546 "AACC---GTTA", // forward strand, genomic, aligned
1547 "AACC---GTTA", // forward strand, native, aligned
1548 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1549 "AACC---GTTA", // forward strand, native, aligned, clipped
1550 "TAACGGTT", // reverse strand, genomic
1551 "AACCGTTA", // reverse strand, native
1552 "TAAC---GGTT", // reverse strand, genomic, aligned
1553 "AACC---GTTA", // reverse strand, native, aligned
1554 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1555 "AACC---GTTA" // reverse strand, native, aligned, clipped
1556 }
1557 );
1558 }
1559 {
1560 SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1561 BamRecordTests::CheckBaseTagsClippedAndAligned(
1562 "2H3S4=3D4=3S3H", // CIGAR
1563 "TTTAACCGTTACCG", // input
1564 {
1565 "TTTAACCGTTACCG", // forward strand, genomic
1566 "TTTAACCGTTACCG", // forward strand, native
1567 "TTTAACC---GTTACCG", // forward strand, genomic, aligned
1568 "TTTAACC---GTTACCG", // forward strand, native, aligned
1569 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1570 "AACC---GTTA", // forward strand, native, aligned, clipped
1571 "CGGTAACGGTTAAA", // reverse strand, genomic
1572 "TTTAACCGTTACCG", // reverse strand, native
1573 "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
1574 "TTTAACC---GTTACCG", // reverse strand, native, aligned
1575 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1576 "AACC---GTTA" // reverse strand, native, aligned, clipped
1577 }
1578 );
1579 }
1580 }
1581
TEST(BamRecordTest,FrameTagsClippedAndAligned)1582 TEST(BamRecordTest, FrameTagsClippedAndAligned)
1583 {
1584 {
1585 SCOPED_TRACE("CIGAR: 4=3D4=");
1586 BamRecordTests::CheckFrameTagsClippedAndAligned(
1587 "4=3D4=", // CIGAR
1588 { 10, 20, 10, 20, 10, 20, 10, 30 }, // input
1589 {
1590 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
1591 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
1592 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
1593 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
1594 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
1595 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
1596 { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
1597 { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
1598 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
1599 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
1600 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
1601 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
1602 }
1603 );
1604 }
1605 {
1606 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1607 BamRecordTests::CheckFrameTagsClippedAndAligned(
1608 "4=1D2I2D4=", // CIGAR
1609 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // input
1610 {
1611 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
1612 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
1613 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
1614 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
1615 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
1616 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
1617 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
1618 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
1619 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
1620 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
1621 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
1622 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
1623 }
1624 );
1625 }
1626 {
1627 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1628 BamRecordTests::CheckFrameTagsClippedAndAligned(
1629 "4=1D2P2I2P2D4=", // CIGAR
1630 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // input
1631 {
1632 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
1633 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
1634 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
1635 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
1636 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
1637 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
1638 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
1639 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
1640 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
1641 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
1642 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
1643 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
1644 }
1645 );
1646 }
1647 {
1648 SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1649 BamRecordTests::CheckFrameTagsClippedAndAligned(
1650 "3S4=3D4=3S", // CIGAR
1651 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // input
1652 {
1653 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
1654 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
1655 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
1656 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
1657 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
1658 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
1659 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
1660 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
1661 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
1662 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
1663 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
1664 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
1665 }
1666 );
1667 }
1668 {
1669 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1670 BamRecordTests::CheckFrameTagsClippedAndAligned(
1671 "2H4=3D4=3H", // CIGAR
1672 { 10, 20, 10, 20, 10, 20, 10, 30 }, // input
1673 {
1674 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
1675 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
1676 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
1677 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
1678 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
1679 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
1680 { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
1681 { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
1682 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
1683 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
1684 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
1685 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
1686 }
1687 );
1688 }
1689 {
1690 SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1691 BamRecordTests::CheckFrameTagsClippedAndAligned(
1692 "2H3S4=3D4=3S3H", // CIGAR
1693 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // input
1694 {
1695 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
1696 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
1697 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
1698 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
1699 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
1700 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
1701 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
1702 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
1703 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
1704 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
1705 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
1706 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
1707 }
1708 );
1709 }
1710 }
1711
TEST(BamRecordTest,PulseBaseTags)1712 TEST(BamRecordTest, PulseBaseTags)
1713 {
1714 {
1715 SCOPED_TRACE("CIGAR: 4=3D4=");
1716 BamRecordTests::CheckPulseBaseTags(
1717 "4=3D4=", // CIGAR
1718 "AACCGTTA", // seqBases
1719 "AAaaCCGggTTA", // pulseCalls
1720 "AAaaCCGggTTA", // tag data
1721
1722 { // all pulses
1723
1724 "AAaaCCGggTTA", // forward strand, genomic
1725 "AAaaCCGggTTA", // forward strand, native
1726 "", // forward strand, genomic, aligned
1727 "", // forward strand, native, aligned
1728 "", // forward strand, genomic, aligned, clipped
1729 "", // forward strand, native, aligned, clipped
1730 "TAAccCGGttTT", // reverse strand, genomic
1731 "AAaaCCGggTTA", // reverse strand, native
1732 "", // reverse strand, genomic, aligned
1733 "", // reverse strand, native, aligned
1734 "", // reverse strand, genomic, aligned, clipped
1735 "" // reverse strand, native, aligned, clipped
1736 },
1737 { // basecalls only
1738
1739 "AACCGTTA", // forward strand, genomic
1740 "AACCGTTA", // forward strand, native
1741 "AACC---GTTA", // forward strand, genomic, aligned
1742 "AACC---GTTA", // forward strand, native, aligned
1743 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1744 "AACC---GTTA", // forward strand, native, aligned, clipped
1745 "TAACGGTT", // reverse strand, genomic
1746 "AACCGTTA", // reverse strand, native
1747 "TAAC---GGTT", // reverse strand, genomic, aligned
1748 "AACC---GTTA", // reverse strand, native, aligned
1749 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1750 "AACC---GTTA" // reverse strand, native, aligned, clipped
1751 }
1752 );
1753 }
1754 {
1755 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1756 BamRecordTests::CheckPulseBaseTags(
1757 "4=1D2I2D4=", // CIGAR
1758 "ATCCTAGGTT", // seqBases
1759 "ATttCCTtAGGggTT", // pulseCalls
1760 "ATttCCTtAGGggTT", // tag data
1761
1762 { // all pulses
1763
1764 "ATttCCTtAGGggTT", // forward strand, genomic
1765 "ATttCCTtAGGggTT", // forward strand, native
1766 "", // forward strand, genomic, aligned
1767 "", // forward strand, native, aligned
1768 "", // forward strand, genomic, aligned, clipped
1769 "", // forward strand, native, aligned, clipped
1770 "AAccCCTaAGGaaAT", // reverse strand, genomic
1771 "ATttCCTtAGGggTT", // reverse strand, native
1772 "", // reverse strand, genomic, aligned
1773 "", // reverse strand, native, aligned
1774 "", // reverse strand, genomic, aligned, clipped
1775 "" // reverse strand, native, aligned, clipped
1776 },
1777 { // basecalls only
1778
1779 "ATCCTAGGTT", // forward strand, genomic
1780 "ATCCTAGGTT", // forward strand, native
1781 "ATCC-TA--GGTT", // forward strand, genomic, aligned
1782 "ATCC-TA--GGTT", // forward strand, native, aligned
1783 "ATCC-TA--GGTT", // forward strand, genomic, aligned, clipped
1784 "ATCC-TA--GGTT", // forward strand, native, aligned, clipped
1785 "AACCTAGGAT", // reverse strand, genomic
1786 "ATCCTAGGTT", // reverse strand, native
1787 "AACC-TA--GGAT", // reverse strand, genomic, aligned
1788 "ATCC--TA-GGTT", // reverse strand, native, aligned
1789 "AACC-TA--GGAT", // reverse strand, genomic, aligned, clipped
1790 "ATCC--TA-GGTT" // reverse strand, native, aligned, clipped
1791 }
1792 );
1793 }
1794 {
1795 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
1796 BamRecordTests::CheckPulseBaseTags(
1797 "4=1D2P2I2P2D4=", // CIGAR
1798 "ATCCTAGGTT", // seqBases
1799 "ATttCCTtAGGggTT", // pulseCalls
1800 "ATttCCTtAGGggTT", // tag data
1801 {
1802 "ATttCCTtAGGggTT", // forward strand, genomic
1803 "ATttCCTtAGGggTT", // forward strand, native
1804 "", // forward strand, genomic, aligned
1805 "", // forward strand, native, aligned
1806 "", // forward strand, genomic, aligned, clipped
1807 "", // forward strand, native, aligned, clipped
1808 "AAccCCTaAGGaaAT", // reverse strand, genomic
1809 "ATttCCTtAGGggTT", // reverse strand, native
1810 "", // reverse strand, genomic, aligned
1811 "", // reverse strand, native, aligned
1812 "", // reverse strand, genomic, aligned, clipped
1813 "" // reverse strand, native, aligned, clipped
1814 },
1815 {
1816 "ATCCTAGGTT", // forward strand, genomic
1817 "ATCCTAGGTT", // forward strand, native
1818 "ATCC-**TA**--GGTT", // forward strand, genomic, aligned
1819 "ATCC-**TA**--GGTT", // forward strand, native, aligned
1820 "ATCC-**TA**--GGTT", // forward strand, genomic, aligned, clipped
1821 "ATCC-**TA**--GGTT", // forward strand, native, aligned, clipped
1822 "AACCTAGGAT", // reverse strand, genomic
1823 "ATCCTAGGTT", // reverse strand, native
1824 "AACC-**TA**--GGAT", // reverse strand, genomic, aligned
1825 "ATCC--**TA**-GGTT", // reverse strand, native, aligned
1826 "AACC-**TA**--GGAT", // reverse strand, genomic, aligned, clipped
1827 "ATCC--**TA**-GGTT" // reverse strand, native, aligned, clipped
1828 }
1829 );
1830 }
1831 {
1832 SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
1833 BamRecordTests::CheckPulseBaseTags(
1834 "3S4=3D4=3S", // CIGAR
1835 "TTTAACCGTTACCG", // seqBases
1836 "TTTttAACCccGTTAaaCCG", // pulseCalls
1837 "TTTttAACCccGTTAaaCCG", // tag data
1838
1839 { // all pulses
1840
1841 "TTTttAACCccGTTAaaCCG", // forward strand, genomic
1842 "TTTttAACCccGTTAaaCCG", // forward strand, native
1843 "", // forward strand, genomic, aligned
1844 "", // forward strand, native, aligned
1845 "", // forward strand, genomic, aligned, clipped
1846 "", // forward strand, native, aligned, clipped
1847 "CGGttTAACggGGTTaaAAA", // reverse strand, genomic
1848 "TTTttAACCccGTTAaaCCG", // reverse strand, native
1849 "", // reverse strand, genomic, aligned
1850 "", // reverse strand, native, aligned
1851 "", // reverse strand, genomic, aligned, clipped
1852 "" // reverse strand, native, aligned, clipped
1853 },
1854 { // basecalls only
1855
1856 "TTTAACCGTTACCG", // forward strand, genomic
1857 "TTTAACCGTTACCG", // forward strand, native
1858 "TTTAACC---GTTACCG", // forward strand, genomic, aligned
1859 "TTTAACC---GTTACCG", // forward strand, native, aligned
1860 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1861 "AACC---GTTA", // forward strand, native, aligned, clipped
1862 "CGGTAACGGTTAAA", // reverse strand, genomic
1863 "TTTAACCGTTACCG", // reverse strand, native
1864 "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
1865 "TTTAACC---GTTACCG", // reverse strand, native, aligned
1866 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1867 "AACC---GTTA" // reverse strand, native, aligned, clipped
1868 }
1869 );
1870 }
1871 {
1872 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
1873 BamRecordTests::CheckPulseBaseTags(
1874 "2H4=3D4=3H", // CIGAR
1875 "AACCGTTA", // seqBases
1876 "AAaaCCGggTTA", // pulseCalls
1877 "AAaaCCGggTTA", // tag data
1878
1879 { // all pulses
1880
1881 "AAaaCCGggTTA", // forward strand, genomic
1882 "AAaaCCGggTTA", // forward strand, native
1883 "", // forward strand, genomic, aligned
1884 "", // forward strand, native, aligned
1885 "", // forward strand, genomic, aligned, clipped
1886 "", // forward strand, native, aligned, clipped
1887 "TAAccCGGttTT", // reverse strand, genomic
1888 "AAaaCCGggTTA", // reverse strand, native
1889 "", // reverse strand, genomic, aligned
1890 "", // reverse strand, native, aligned
1891 "", // reverse strand, genomic, aligned, clipped
1892 "" // reverse strand, native, aligned, clipped
1893 },
1894 { // basecalls only
1895
1896 "AACCGTTA", // forward strand, genomic
1897 "AACCGTTA", // forward strand, native
1898 "AACC---GTTA", // forward strand, genomic, aligned
1899 "AACC---GTTA", // forward strand, native, aligned
1900 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1901 "AACC---GTTA", // forward strand, native, aligned, clipped
1902 "TAACGGTT", // reverse strand, genomic
1903 "AACCGTTA", // reverse strand, native
1904 "TAAC---GGTT", // reverse strand, genomic, aligned
1905 "AACC---GTTA", // reverse strand, native, aligned
1906 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1907 "AACC---GTTA" // reverse strand, native, aligned, clipped
1908 }
1909 );
1910 }
1911 {
1912 SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
1913 BamRecordTests::CheckPulseBaseTags(
1914 "2H3S4=3D4=3S3H", // CIGAR
1915 "TTTAACCGTTACCG", // seqBases
1916 "TTTttAACCccGTTAaaCCG", // pulseCalls
1917 "TTTttAACCccGTTAaaCCG", // tag data
1918
1919 { // all pulses
1920
1921 "TTTttAACCccGTTAaaCCG", // forward strand, genomic
1922 "TTTttAACCccGTTAaaCCG", // forward strand, native
1923 "", // forward strand, genomic, aligned
1924 "", // forward strand, native, aligned
1925 "", // forward strand, genomic, aligned, clipped
1926 "", // forward strand, native, aligned, clipped
1927 "CGGttTAACggGGTTaaAAA", // reverse strand, genomic
1928 "TTTttAACCccGTTAaaCCG", // reverse strand, native
1929 "", // reverse strand, genomic, aligned
1930 "", // reverse strand, native, aligned
1931 "", // reverse strand, genomic, aligned, clipped
1932 "" // reverse strand, native, aligned, clipped
1933 },
1934 { // basecalls only
1935
1936 "TTTAACCGTTACCG", // forward strand, genomic
1937 "TTTAACCGTTACCG", // forward strand, native
1938 "TTTAACC---GTTACCG", // forward strand, genomic, aligned
1939 "TTTAACC---GTTACCG", // forward strand, native, aligned
1940 "AACC---GTTA", // forward strand, genomic, aligned, clipped
1941 "AACC---GTTA", // forward strand, native, aligned, clipped
1942 "CGGTAACGGTTAAA", // reverse strand, genomic
1943 "TTTAACCGTTACCG", // reverse strand, native
1944 "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
1945 "TTTAACC---GTTACCG", // reverse strand, native, aligned
1946 "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
1947 "AACC---GTTA" // reverse strand, native, aligned, clipped
1948 }
1949 );
1950 }
1951 }
1952
TEST(BamRecordTest,PulseQualityTags)1953 TEST(BamRecordTest, PulseQualityTags)
1954 {
1955 {
1956 SCOPED_TRACE("CIGAR: 4=3D4=");
1957 BamRecordTests::CheckPulseQualityTags(
1958 "4=3D4=", // CIGAR
1959 "AACCGTTA", // seqBases
1960 "AAaaCCGggTTA", // pulseCalls
1961 "?]!!?]?!!]?@", // tag data
1962
1963 { // all pulses
1964
1965 "?]!!?]?!!]?@", // forward strand, genomic
1966 "?]!!?]?!!]?@", // forward strand, native
1967 "", // forward strand, genomic, aligned
1968 "", // forward strand, native, aligned
1969 "", // forward strand, genomic, aligned + clipped
1970 "", // forward strand, native, aligned + clipped
1971 "@?]!!?]?!!]?", // reverse strand, genomic
1972 "?]!!?]?!!]?@", // reverse strand, native
1973 "", // reverse strand, genomic, aligned
1974 "", // reverse strand, native, aligned
1975 "", // reverse strand, genomic, aligned + clipped
1976 "" // reverse strand, native, aligned + clipped
1977 },
1978 { // basecalls only
1979
1980 "?]?]?]?@", // forward strand, genomic
1981 "?]?]?]?@", // forward strand, native
1982 "?]?]!!!?]?@", // forward strand, genomic, aligned
1983 "?]?]!!!?]?@", // forward strand, native, aligned
1984 "?]?]!!!?]?@", // forward strand, genomic, aligned + clipped
1985 "?]?]!!!?]?@", // forward strand, native, aligned + clipped
1986 "@?]?]?]?", // reverse strand, genomic
1987 "?]?]?]?@", // reverse strand, native
1988 "@?]?!!!]?]?", // reverse strand, genomic, aligned
1989 "?]?]!!!?]?@", // reverse strand, native, aligned
1990 "@?]?!!!]?]?", // reverse strand, genomic, aligned + clipped
1991 "?]?]!!!?]?@" // reverse strand, native, aligned + clipped
1992 }
1993 );
1994 }
1995 {
1996 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
1997 BamRecordTests::CheckPulseQualityTags(
1998 "4=1D2I2D4=", // CIGAR
1999 "ATCCTAGGTT", // seqBases
2000 "ATttCCTtAGGggTT", // pulseCalls
2001 "?]!!?]8!7?]!!?@", // tag data
2002
2003 { // all pulses
2004
2005 "?]!!?]8!7?]!!?@", // forward strand, genomic
2006 "?]!!?]8!7?]!!?@", // forward strand, native
2007 "", // forward strand, genomic, aligned
2008 "", // forward strand, native, aligned
2009 "", // forward strand, genomic, aligned + clipped
2010 "", // forward strand, native, aligned + clipped
2011 "@?!!]?7!8]?!!]?", // reverse strand, genomic
2012 "?]!!?]8!7?]!!?@", // reverse strand, native
2013 "", // reverse strand, genomic, aligned
2014 "", // reverse strand, native, aligned
2015 "", // reverse strand, genomic, aligned + clipped
2016 "" // reverse strand, native, aligned + clipped
2017 },
2018 { // basecalls only
2019
2020 "?]?]87?]?@", // forward strand, genomic
2021 "?]?]87?]?@", // forward strand, native
2022 "?]?]!87!!?]?@", // forward strand, genomic, aligned
2023 "?]?]!87!!?]?@", // forward strand, native, aligned
2024 "?]?]!87!!?]?@", // forward strand, genomic, aligned + clipped
2025 "?]?]!87!!?]?@", // forward strand, native, aligned + clipped
2026 "@?]?78]?]?", // reverse strand, genomic
2027 "?]?]87?]?@", // reverse strand, native
2028 "@?]?!78!!]?]?", // reverse strand, genomic, aligned
2029 "?]?]!!87!?]?@", // reverse strand, native, aligned
2030 "@?]?!78!!]?]?", // reverse strand, genomic, aligned + clipped
2031 "?]?]!!87!?]?@" // reverse strand, native, aligned + clipped
2032 }
2033 );
2034 }
2035 {
2036 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
2037 BamRecordTests::CheckPulseQualityTags(
2038 "4=1D2P2I2P2D4=", // CIGAR
2039 "ATCCTAGGTT", // seqBases
2040 "ATttCCTtAGGggTT", // pulseCalls
2041 "?]!!?]8!7?]!!?@", // tag data
2042 {
2043 "?]!!?]8!7?]!!?@", // forward strand, genomic
2044 "?]!!?]8!7?]!!?@", // forward strand, native
2045 "", // forward strand, genomic, aligned
2046 "", // forward strand, native, aligned
2047 "", // forward strand, genomic, aligned + clipped
2048 "", // forward strand, native, aligned + clipped
2049 "@?!!]?7!8]?!!]?", // reverse strand, genomic
2050 "?]!!?]8!7?]!!?@", // reverse strand, native
2051 "", // reverse strand, genomic, aligned
2052 "", // reverse strand, native, aligned
2053 "", // reverse strand, genomic, aligned + clipped
2054 "" // reverse strand, native, aligned + clipped
2055 },
2056 {
2057 "?]?]87?]?@", // forward strand, genomic
2058 "?]?]87?]?@", // forward strand, native
2059 "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned
2060 "?]?]!!!87!!!!?]?@", // forward strand, native, aligned
2061 "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned + clipped
2062 "?]?]!!!87!!!!?]?@", // forward strand, native, aligned + clipped
2063 "@?]?78]?]?", // reverse strand, genomic
2064 "?]?]87?]?@", // reverse strand, native
2065 "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned
2066 "?]?]!!!!87!!!?]?@", // reverse strand, native, aligned
2067 "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned + clipped
2068 "?]?]!!!!87!!!?]?@" // reverse strand, native, aligned + clipped
2069 }
2070 );
2071 }
2072 {
2073 SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
2074 BamRecordTests::CheckPulseQualityTags(
2075 "3S4=3D4=3S", // CIGAR
2076 "TTTAACCGTTACCG", // seqBases
2077 "TTTttAACCccGTTAaaCCG", // pulseCalls
2078 "vvv!!?]?]!!?]?@!!xxx", // tag data
2079
2080 { // all pulses
2081
2082 "vvv!!?]?]!!?]?@!!xxx", // forward strand, genomic
2083 "vvv!!?]?]!!?]?@!!xxx", // forward strand, native
2084 "", // forward strand, genomic, aligned
2085 "", // forward strand, native, aligned
2086 "", // forward strand, genomic, aligned, clipped
2087 "", // forward strand, native, aligned, clipped
2088 "xxx!!@?]?!!]?]?!!vvv", // reverse strand, genomic
2089 "vvv!!?]?]!!?]?@!!xxx", // reverse strand, native
2090 "", // reverse strand, genomic, aligned
2091 "", // reverse strand, native, aligned
2092 "", // reverse strand, genomic, aligned, clipped
2093 "" // reverse strand, native, aligned, clipped
2094 },
2095 { // basecalls only
2096
2097 "vvv?]?]?]?@xxx", // forward strand, genomic
2098 "vvv?]?]?]?@xxx", // forward strand, native
2099 "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
2100 "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
2101 "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
2102 "?]?]!!!?]?@", // forward strand, native, aligned, clipped
2103 "xxx@?]?]?]?vvv", // reverse strand, genomic
2104 "vvv?]?]?]?@xxx", // reverse strand, native
2105 "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
2106 "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
2107 "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
2108 "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
2109 }
2110 );
2111 }
2112 {
2113 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
2114 BamRecordTests::CheckPulseQualityTags(
2115 "2H4=3D4=3H", // CIGAR
2116 "AACCGTTA", // seqBases
2117 "AAaaCCGggTTA", // pulseCalls
2118 "?]!!?]?!!]?@", // tag data
2119
2120 { // all pulses
2121
2122 "?]!!?]?!!]?@", // forward strand, genomic
2123 "?]!!?]?!!]?@", // forward strand, native
2124 "", // forward strand, genomic, aligned
2125 "", // forward strand, native, aligned
2126 "", // forward strand, genomic, aligned, clipped
2127 "", // forward strand, native, aligned, clipped
2128 "@?]!!?]?!!]?", // reverse strand, genomic
2129 "?]!!?]?!!]?@", // reverse strand, native
2130 "", // reverse strand, genomic, aligned
2131 "", // reverse strand, native, aligned
2132 "", // reverse strand, genomic, aligned, clipped
2133 "" // reverse strand, native, aligned, clipped
2134 },
2135 { // basecalls only
2136
2137 "?]?]?]?@", // forward strand, genomic
2138 "?]?]?]?@", // forward strand, native
2139 "?]?]!!!?]?@", // forward strand, genomic, aligned
2140 "?]?]!!!?]?@", // forward strand, native, aligned
2141 "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
2142 "?]?]!!!?]?@", // forward strand, native, aligned, clipped
2143 "@?]?]?]?", // reverse strand, genomic
2144 "?]?]?]?@", // reverse strand, native
2145 "@?]?!!!]?]?", // reverse strand, genomic, aligned
2146 "?]?]!!!?]?@", // reverse strand, native, aligned
2147 "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
2148 "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
2149 }
2150 );
2151 }
2152 {
2153 SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
2154 BamRecordTests::CheckPulseQualityTags(
2155 "2H3S4=3D4=3S3H", // CIGAR
2156 "TTTAACCGTTACCG", // seqBases
2157 "TTTttAACCccGTTAaaCCG", // pulseCalls
2158 "vvv!!?]?]!!?]?@!!xxx", // tag data
2159
2160 { // all pulses
2161
2162 "vvv!!?]?]!!?]?@!!xxx", // forward strand, genomic
2163 "vvv!!?]?]!!?]?@!!xxx", // forward strand, native
2164 "", // forward strand, genomic, aligned
2165 "", // forward strand, native, aligned
2166 "", // forward strand, genomic, aligned, clipped
2167 "", // forward strand, native, aligned, clipped
2168 "xxx!!@?]?!!]?]?!!vvv", // reverse strand, genomic
2169 "vvv!!?]?]!!?]?@!!xxx", // reverse strand, native
2170 "", // reverse strand, genomic, aligned
2171 "", // reverse strand, native, aligned
2172 "", // reverse strand, genomic, aligned, clipped
2173 "" // reverse strand, native, aligned, clipped
2174 },
2175 { // basecalls only
2176
2177 "vvv?]?]?]?@xxx", // forward strand, genomic
2178 "vvv?]?]?]?@xxx", // forward strand, native
2179 "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
2180 "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
2181 "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
2182 "?]?]!!!?]?@", // forward strand, native, aligned, clipped
2183 "xxx@?]?]?]?vvv", // reverse strand, genomic
2184 "vvv?]?]?]?@xxx", // reverse strand, native
2185 "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
2186 "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
2187 "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
2188 "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
2189 }
2190 );
2191 }
2192 }
2193
TEST(BamRecordTest,PulseFrameTags)2194 TEST(BamRecordTest, PulseFrameTags)
2195 {
2196 {
2197 SCOPED_TRACE("CIGAR: 4=3D4=");
2198 BamRecordTests::CheckPulseFrameTags(
2199 "4=3D4=", // CIGAR
2200 "AACCGTTA", // seqBases
2201 "AAaaCCGggTTA", // pulseCalls
2202 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // tag data
2203
2204 { // all pulses
2205
2206 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, genomic
2207 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, native
2208 { }, // forward strand, genomic, aligned
2209 { }, // forward strand, native, aligned
2210 { }, // forward strand, genomic, aligned, clipped
2211 { }, // forward strand, native, aligned, clipped
2212 { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 }, // reverse strand, genomic
2213 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // reverse strand, native
2214 { }, // reverse strand, genomic, aligned
2215 { }, // reverse strand, native, aligned
2216 { }, // reverse strand, genomic, aligned, clipped
2217 { } // reverse strand, native, aligned, clipped
2218 },
2219 { // basecalls only
2220
2221 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
2222 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
2223 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2224 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2225 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2226 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2227 { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
2228 { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
2229 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2230 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2231 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2232 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2233 }
2234 );
2235 }
2236 {
2237 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
2238 BamRecordTests::CheckPulseFrameTags(
2239 "4=1D2I2D4=", // CIGAR
2240 "ATCCTAGGTT", // seqBases
2241 "ATttCCTtAGGggTT", // pulseCalls
2242 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2243
2244 { // all pulses
2245
2246 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
2247 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
2248 { }, // forward strand, genomic, aligned
2249 { }, // forward strand, native, aligned
2250 { }, // forward strand, genomic, aligned, clipped
2251 { }, // forward strand, native, aligned, clipped
2252 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
2253 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
2254 { }, // reverse strand, genomic, aligned
2255 { }, // reverse strand, native, aligned
2256 { }, // reverse strand, genomic, aligned, clipped
2257 { } // reverse strand, native, aligned, clipped
2258 },
2259 { // basecalls only
2260
2261 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
2262 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
2263 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2264 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2265 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2266 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2267 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
2268 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
2269 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2270 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2271 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2272 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2273 }
2274 );
2275 }
2276 {
2277 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
2278 BamRecordTests::CheckPulseFrameTags(
2279 "4=1D2P2I2P2D4=", // CIGAR
2280 "ATCCTAGGTT", // seqBases
2281 "ATttCCTtAGGggTT", // pulseCalls
2282 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2283
2284 { // all pulses
2285
2286 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
2287 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
2288 { }, // forward strand, genomic, aligned
2289 { }, // forward strand, native, aligned
2290 { }, // forward strand, genomic, aligned, clipped
2291 { }, // forward strand, native, aligned, clipped
2292 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
2293 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
2294 { }, // reverse strand, genomic, aligned
2295 { }, // reverse strand, native, aligned
2296 { }, // reverse strand, genomic, aligned, clipped
2297 { } // reverse strand, native, aligned, clipped
2298 },
2299 { // basecalls only
2300
2301 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
2302 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
2303 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2304 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2305 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2306 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2307 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
2308 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
2309 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2310 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2311 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2312 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2313 }
2314 );
2315 }
2316 {
2317 SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
2318 BamRecordTests::CheckPulseFrameTags(
2319 "3S4=3D4=3S", // CIGAR
2320 "TTTAACCGTTACCG", // seqBases
2321 "TTTttAACCccGTTAaaCCG", // pulseCalls
2322 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
2323
2324 { // all pulses
2325
2326 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
2327 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
2328 { }, // forward strand, genomic, aligned
2329 { }, // forward strand, native, aligned
2330 { }, // forward strand, genomic, aligned, clipped
2331 { }, // forward strand, native, aligned, clipped
2332 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
2333 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
2334 { }, // reverse strand, genomic, aligned
2335 { }, // reverse strand, native, aligned
2336 { }, // reverse strand, genomic, aligned, clipped
2337 { } // reverse strand, native, aligned, clipped
2338 },
2339 { // basecalls only
2340
2341 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
2342 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
2343 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
2344 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
2345 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2346 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2347 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
2348 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
2349 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
2350 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
2351 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2352 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2353 }
2354 );
2355 }
2356 {
2357 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
2358 BamRecordTests::CheckPulseFrameTags(
2359 "2H4=3D4=3H", // CIGAR
2360 "AACCGTTA", // seqBases
2361 "AAaaCCGggTTA", // pulseCalls
2362 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
2363
2364 { // all pulses
2365
2366 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, genomic
2367 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, native
2368 { }, // forward strand, genomic, aligned
2369 { }, // forward strand, native, aligned
2370 { }, // forward strand, genomic, aligned, clipped
2371 { }, // forward strand, native, aligned, clipped
2372 { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
2373 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // reverse strand, native
2374 { }, // reverse strand, genomic, aligned
2375 { }, // reverse strand, native, aligned
2376 { }, // reverse strand, genomic, aligned, clipped
2377 { } // reverse strand, native, aligned, clipped
2378 },
2379 { // basecalls only
2380
2381 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
2382 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
2383 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2384 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2385 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2386 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2387 { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
2388 { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
2389 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2390 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2391 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2392 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2393 }
2394 );
2395 }
2396 {
2397 SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
2398 BamRecordTests::CheckPulseFrameTags(
2399 "2H3S4=3D4=3S3H", // CIGAR
2400 "TTTAACCGTTACCG", // seqBases
2401 "TTTttAACCccGTTAaaCCG", // pulseCalls
2402 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
2403
2404 { // all pulses
2405
2406 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
2407 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
2408 { }, // forward strand, genomic, aligned
2409 { }, // forward strand, native, aligned
2410 { }, // forward strand, genomic, aligned, clipped
2411 { }, // forward strand, native, aligned, clipped
2412 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
2413 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
2414 { }, // reverse strand, genomic, aligned
2415 { }, // reverse strand, native, aligned
2416 { }, // reverse strand, genomic, aligned, clipped
2417 { } // reverse strand, native, aligned, clipped
2418 },
2419 { // basecalls only
2420
2421 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
2422 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
2423 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
2424 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
2425 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2426 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2427 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
2428 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
2429 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
2430 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
2431 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2432 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2433 }
2434 );
2435 }
2436 }
2437
TEST(BamRecordTest,PulseUIntTags)2438 TEST(BamRecordTest, PulseUIntTags)
2439 {
2440 {
2441 SCOPED_TRACE("CIGAR: 4=3D4=");
2442 BamRecordTests::CheckPulseUIntTags(
2443 "4=3D4=", // CIGAR
2444 "AACCGTTA", // seqBases
2445 "AAaaCCGggTTA", // pulseCalls
2446 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // tag data
2447
2448 { // all pulses
2449
2450 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, genomic
2451 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, native
2452 { }, // forward strand, genomic, aligned
2453 { }, // forward strand, native, aligned
2454 { }, // forward strand, genomic, aligned, clipped
2455 { }, // forward strand, native, aligned, clipped
2456 { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 }, // reverse strand, genomic
2457 { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // reverse strand, native
2458 { }, // reverse strand, genomic, aligned
2459 { }, // reverse strand, native, aligned
2460 { }, // reverse strand, genomic, aligned, clipped
2461 { } // reverse strand, native, aligned, clipped
2462 },
2463 { // basecalls only
2464
2465 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
2466 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
2467 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2468 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2469 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2470 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2471 { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
2472 { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
2473 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2474 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2475 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2476 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2477 }
2478 );
2479 }
2480 {
2481 SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
2482 BamRecordTests::CheckPulseUIntTags(
2483 "4=1D2I2D4=", // CIGAR
2484 "ATCCTAGGTT", // seqBases
2485 "ATttCCTtAGGggTT", // pulseCalls
2486 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2487
2488 { // all pulses
2489
2490 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
2491 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
2492 { }, // forward strand, genomic, aligned
2493 { }, // forward strand, native, aligned
2494 { }, // forward strand, genomic, aligned, clipped
2495 { }, // forward strand, native, aligned, clipped
2496 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
2497 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
2498 { }, // reverse strand, genomic, aligned
2499 { }, // reverse strand, native, aligned
2500 { }, // reverse strand, genomic, aligned, clipped
2501 { } // reverse strand, native, aligned, clipped
2502 },
2503 { // basecalls only
2504
2505 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
2506 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
2507 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2508 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2509 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2510 { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2511 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
2512 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
2513 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2514 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2515 { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2516 { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2517 }
2518 );
2519 }
2520 {
2521 SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
2522 BamRecordTests::CheckPulseUIntTags(
2523 "4=1D2P2I2P2D4=", // CIGAR
2524 "ATCCTAGGTT", // seqBases
2525 "ATttCCTtAGGggTT", // pulseCalls
2526 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
2527
2528 { // all pulses
2529
2530 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
2531 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
2532 { }, // forward strand, genomic, aligned
2533 { }, // forward strand, native, aligned
2534 { }, // forward strand, genomic, aligned, clipped
2535 { }, // forward strand, native, aligned, clipped
2536 { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
2537 { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
2538 { }, // reverse strand, genomic, aligned
2539 { }, // reverse strand, native, aligned
2540 { }, // reverse strand, genomic, aligned, clipped
2541 { } // reverse strand, native, aligned, clipped
2542 },
2543 { // basecalls only
2544
2545 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
2546 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
2547 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2548 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2549 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2550 { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2551 { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
2552 { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
2553 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2554 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2555 { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2556 { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2557 }
2558 );
2559 }
2560 {
2561 SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
2562 BamRecordTests::CheckPulseUIntTags(
2563 "3S4=3D4=3S", // CIGAR
2564 "TTTAACCGTTACCG", // seqBases
2565 "TTTttAACCccGTTAaaCCG", // pulseCalls
2566 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
2567
2568 { // all pulses
2569
2570 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
2571 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
2572 { }, // forward strand, genomic, aligned
2573 { }, // forward strand, native, aligned
2574 { }, // forward strand, genomic, aligned, clipped
2575 { }, // forward strand, native, aligned, clipped
2576 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
2577 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
2578 { }, // reverse strand, genomic, aligned
2579 { }, // reverse strand, native, aligned
2580 { }, // reverse strand, genomic, aligned, clipped
2581 { } // reverse strand, native, aligned, clipped
2582 },
2583 { // basecalls only
2584
2585 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
2586 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
2587 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
2588 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
2589 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2590 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2591 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
2592 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
2593 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
2594 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
2595 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2596 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2597 }
2598 );
2599 }
2600 {
2601 SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
2602 BamRecordTests::CheckPulseUIntTags(
2603 "2H4=3D4=3H", // CIGAR
2604 "AACCGTTA", // seqBases
2605 "AAaaCCGggTTA", // pulseCalls
2606 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
2607
2608 { // all pulses
2609
2610 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, genomic
2611 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, native
2612 { }, // forward strand, genomic, aligned
2613 { }, // forward strand, native, aligned
2614 { }, // forward strand, genomic, aligned, clipped
2615 { }, // forward strand, native, aligned, clipped
2616 { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
2617 { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // reverse strand, native
2618 { }, // reverse strand, genomic, aligned
2619 { }, // reverse strand, native, aligned
2620 { }, // reverse strand, genomic, aligned, clipped
2621 { } // reverse strand, native, aligned, clipped
2622 },
2623 { // basecalls only
2624
2625 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
2626 { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
2627 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
2628 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
2629 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2630 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2631 { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
2632 { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
2633 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
2634 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
2635 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2636 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2637 }
2638 );
2639 }
2640 {
2641 SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
2642 BamRecordTests::CheckPulseUIntTags(
2643 "2H3S4=3D4=3S3H", // CIGAR
2644 "TTTAACCGTTACCG", // seqBases
2645 "TTTttAACCccGTTAaaCCG", // pulseCalls
2646 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
2647
2648 { // all pulses
2649
2650 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
2651 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
2652 { }, // forward strand, genomic, aligned
2653 { }, // forward strand, native, aligned
2654 { }, // forward strand, genomic, aligned, clipped
2655 { }, // forward strand, native, aligned, clipped
2656 { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
2657 { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
2658 { }, // reverse strand, genomic, aligned
2659 { }, // reverse strand, native, aligned
2660 { }, // reverse strand, genomic, aligned, clipped
2661 { } // reverse strand, native, aligned, clipped
2662 },
2663 { // basecalls only
2664
2665 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
2666 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
2667 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
2668 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
2669 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
2670 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
2671 { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
2672 { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
2673 { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
2674 { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
2675 { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
2676 { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
2677 }
2678 );
2679 }
2680 }
2681
TEST(BamRecordTest,PulseExclusionTag)2682 TEST(BamRecordTest, PulseExclusionTag)
2683 {
2684 const std::vector<PacBio::BAM::PulseExclusionReason> reasons =
2685 {
2686 PulseExclusionReason::BASE
2687 , PulseExclusionReason::PAUSE
2688 , PulseExclusionReason::SHORT_PULSE
2689 , PulseExclusionReason::BURST
2690 , PulseExclusionReason::BASE
2691 , PulseExclusionReason::PAUSE
2692 };
2693
2694 auto bam = BamRecordTests::CreateBam();
2695 bam.PulseExclusionReason(reasons);
2696
2697 EXPECT_TRUE(bam.HasPulseExclusion());
2698 auto result = bam.PulseExclusionReason();
2699 EXPECT_EQ(reasons, result);
2700
2701 }
2702
TEST(BamRecordTest,TranscriptRecord)2703 TEST(BamRecordTest, TranscriptRecord)
2704 {
2705 const std::string readTypeStr{"TRANSCRIPT"};
2706 const auto readGroupId = MakeReadGroupId("transcript", readTypeStr);
2707
2708 ReadGroupInfo rg{readGroupId};
2709 rg.ReadType(readTypeStr);
2710
2711 BamHeader header;
2712 header.Version("1.1")
2713 .SortOrder("queryname")
2714 .PacBioBamVersion("3.0.1");
2715
2716 BamRecord bam{header};
2717 bam.Impl().Name("transcript/1234");
2718
2719 EXPECT_EQ(RecordType::TRANSCRIPT, bam.Type());
2720 EXPECT_EQ(1234, bam.HoleNumber());
2721 EXPECT_THROW({bam.QueryStart();}, std::runtime_error);
2722 EXPECT_THROW({bam.QueryEnd();}, std::runtime_error);
2723 }
2724
2725 // clang-format on
2726