1 // Author: Derek Barnett
2
3 #include <iostream>
4 #include <sstream>
5 #include <string>
6
7 #include <gtest/gtest.h>
8
9 #include "PbbamTestData.h"
10
11 #include "pbbam/BamFile.h"
12 #include "pbbam/BamRecord.h"
13 #include "pbbam/EntireFileQuery.h"
14 #include "pbbam/IndexedFastaReader.h"
15
16 using namespace PacBio;
17 using namespace PacBio::BAM;
18
19 namespace IndexedFastaReaderTests {
20
21 const std::string lambdaFasta = PbbamTestsConfig::Data_Dir + "/lambdaNEB.fa";
22 const std::string singleInsertionBam = PbbamTestsConfig::Data_Dir + "/aligned.bam";
23
24 } // namespace IndexedFastaReaderTests
25
TEST(IndexedFastaReaderTests,PrintSingleInsertion)26 TEST(IndexedFastaReaderTests, PrintSingleInsertion)
27 {
28 IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
29
30 // Open BAM file
31 BamFile bamFile(IndexedFastaReaderTests::singleInsertionBam);
32 EntireFileQuery bamQuery(bamFile);
33
34 auto it = bamQuery.begin();
35 auto record = *it++;
36 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
37 r.ReferenceSubsequence(record, Orientation::NATIVE, true));
38 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
39 r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
40 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
41 r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
42 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
43 r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
44 record = *it++;
45 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
46 r.ReferenceSubsequence(record, Orientation::NATIVE, true));
47 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
48 r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
49 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
50 r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
51 EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
52 r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
53 record = *it++;
54 EXPECT_EQ(
55 "----------------------------------------------------"
56 "AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
57 r.ReferenceSubsequence(record, Orientation::NATIVE, true));
58 EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
59 r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
60 EXPECT_EQ(
61 "----------------------------------------------------"
62 "AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
63 r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
64 EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
65 r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
66 record = *it++;
67 EXPECT_EQ(
68 "AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA-----------------------------"
69 "-----------------------",
70 r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
71 EXPECT_EQ(
72 "----------------------------------------------------TTGCCGCTGTT-"
73 "ACCGTGCTGCGATCTTCTGCCATCGACGGACGTCCCACATTGGTGACTT",
74 r.ReferenceSubsequence(record, Orientation::NATIVE, true));
75 EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
76 r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
77 EXPECT_EQ("TTGCCGCTGTT-ACCGTGCTGCGATCTTCTGCCATCGACGGACGTCCCACATTGGTGACTT",
78 r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
79
80 // {
81 // std::ostringstream output;
82 // auto itSS = bamQuery.begin();
83 // {
84 // const auto recordSS = *itSS;
85 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
86 // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
87 // output << std::endl;
88 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
89 // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
90 // output << std::endl;
91 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
92 // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
93 // output << std::endl;
94 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
95 // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
96 // output << std::endl;
97 // }
98 // ++itSS;
99 // {
100 // const auto recordSS = *itSS;
101 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
102 // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
103 // output << std::endl;
104 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
105 // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
106 // output << std::endl;
107 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
108 // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
109 // output << std::endl;
110 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
111 // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
112 // output << std::endl;
113 // }
114 // ++itSS;
115 // {
116 // const auto recordSS = *itSS;
117 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
118 // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
119 // output << std::endl;
120 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
121 // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
122 // output << std::endl;
123 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
124 // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
125 // output << std::endl;
126 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
127 // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
128 // output << std::endl;
129 // }
130 // ++itSS;
131 // {
132 // const auto recordSS = *itSS;
133 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
134 // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
135 // output << std::endl;
136 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
137 // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
138 // output << std::endl;
139 // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
140 // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
141 // output << std::endl;
142 // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
143 // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
144 // }
145 // std::cerr << output.str();
146 // }
147 }
148
TEST(IndexedFastaReaderTests,ReadLambda)149 TEST(IndexedFastaReaderTests, ReadLambda)
150 {
151 IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
152
153 EXPECT_TRUE(r.HasSequence("lambda_NEB3011"));
154 EXPECT_FALSE(r.HasSequence("dog"));
155 EXPECT_EQ(1, r.NumSequences());
156 EXPECT_EQ(48502, r.SequenceLength("lambda_NEB3011"));
157
158 std::string seq = r.Subsequence("lambda_NEB3011:0-10");
159 EXPECT_EQ("GGGCGGCGAC", seq);
160
161 std::string seq2 = r.Subsequence("lambda_NEB3011", 0, 10);
162 EXPECT_EQ("GGGCGGCGAC", seq2);
163
164 // subsequence extending beyond bounds returns clipped
165 std::string seq3 = r.Subsequence("lambda_NEB3011", 48400, 48600);
166 EXPECT_EQ(102, seq3.length());
167
168 // bad subsequence
169 }
170
TEST(IndexedFastaReaderTests,Errors)171 TEST(IndexedFastaReaderTests, Errors)
172 {
173 IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
174
175 //
176 // attempt access without "opening"
177 //
178 // EXPECT_THROW(r.NumSequences(), std::exception);
179 // EXPECT_THROW(r.HasSequence("lambda_NEB3011"), std::exception);
180 // EXPECT_THROW(r.SequenceLength("lambda_NEB3011"), std::exception);
181 // EXPECT_THROW(r.Subsequence("lambda_NEB3011:0-10"), std::exception);
182
183 //
184 // invalid accesses after opening
185 //
186 EXPECT_THROW(r.SequenceLength("dog"), std::exception);
187 EXPECT_THROW(r.Subsequence("dog:0-10"), std::exception);
188 }
189
TEST(IndexedFastaReaderTests,Names)190 TEST(IndexedFastaReaderTests, Names)
191 {
192 IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
193 std::vector<std::string> names = {"lambda_NEB3011"};
194
195 // Test all-name request
196 EXPECT_EQ(names, r.Names());
197
198 // Test single-name query
199 EXPECT_EQ(names[0], r.Name(0));
200
201 // invalid name acces (out of range)
202 EXPECT_THROW(r.Name(1), std::exception);
203 }
204