1 // Author: Derek Barnett
2 
3 #include <iostream>
4 #include <sstream>
5 #include <string>
6 
7 #include <gtest/gtest.h>
8 
9 #include "PbbamTestData.h"
10 
11 #include "pbbam/BamFile.h"
12 #include "pbbam/BamRecord.h"
13 #include "pbbam/EntireFileQuery.h"
14 #include "pbbam/IndexedFastaReader.h"
15 
16 using namespace PacBio;
17 using namespace PacBio::BAM;
18 
19 namespace IndexedFastaReaderTests {
20 
21 const std::string lambdaFasta = PbbamTestsConfig::Data_Dir + "/lambdaNEB.fa";
22 const std::string singleInsertionBam = PbbamTestsConfig::Data_Dir + "/aligned.bam";
23 
24 }  // namespace IndexedFastaReaderTests
25 
TEST(IndexedFastaReaderTests,PrintSingleInsertion)26 TEST(IndexedFastaReaderTests, PrintSingleInsertion)
27 {
28     IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
29 
30     // Open BAM file
31     BamFile bamFile(IndexedFastaReaderTests::singleInsertionBam);
32     EntireFileQuery bamQuery(bamFile);
33 
34     auto it = bamQuery.begin();
35     auto record = *it++;
36     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
37               r.ReferenceSubsequence(record, Orientation::NATIVE, true));
38     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
39               r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
40     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
41               r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
42     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
43               r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
44     record = *it++;
45     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
46               r.ReferenceSubsequence(record, Orientation::NATIVE, true));
47     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
48               r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
49     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
50               r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
51     EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
52               r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
53     record = *it++;
54     EXPECT_EQ(
55         "----------------------------------------------------"
56         "AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
57         r.ReferenceSubsequence(record, Orientation::NATIVE, true));
58     EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
59               r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
60     EXPECT_EQ(
61         "----------------------------------------------------"
62         "AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
63         r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
64     EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
65               r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
66     record = *it++;
67     EXPECT_EQ(
68         "AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA-----------------------------"
69         "-----------------------",
70         r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
71     EXPECT_EQ(
72         "----------------------------------------------------TTGCCGCTGTT-"
73         "ACCGTGCTGCGATCTTCTGCCATCGACGGACGTCCCACATTGGTGACTT",
74         r.ReferenceSubsequence(record, Orientation::NATIVE, true));
75     EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
76               r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
77     EXPECT_EQ("TTGCCGCTGTT-ACCGTGCTGCGATCTTCTGCCATCGACGGACGTCCCACATTGGTGACTT",
78               r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
79 
80     // {
81     //     std::ostringstream output;
82     //     auto itSS = bamQuery.begin();
83     //     {
84     //         const auto recordSS = *itSS;
85     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
86     //         output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
87     //         output << std::endl;
88     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
89     //         output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
90     //         output << std::endl;
91     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
92     //         output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
93     //         output << std::endl;
94     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
95     //         output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
96     //         output << std::endl;
97     //     }
98     //     ++itSS;
99     //     {
100     //         const auto recordSS = *itSS;
101     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
102     //         output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
103     //         output << std::endl;
104     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
105     //         output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
106     //         output << std::endl;
107     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
108     //         output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
109     //         output << std::endl;
110     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
111     //         output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
112     //         output << std::endl;
113     //     }
114     //     ++itSS;
115     //     {
116     //         const auto recordSS = *itSS;
117     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
118     //         output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
119     //         output << std::endl;
120     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
121     //         output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
122     //         output << std::endl;
123     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
124     //         output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
125     //         output << std::endl;
126     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
127     //         output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
128     //         output << std::endl;
129     //     }
130     //     ++itSS;
131     //     {
132     //         const auto recordSS = *itSS;
133     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
134     //         output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
135     //         output << std::endl;
136     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
137     //         output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
138     //         output << std::endl;
139     //         output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
140     //         output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
141     //         output << std::endl;
142     //         output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
143     //         output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
144     //     }
145     //     std::cerr << output.str();
146     // }
147 }
148 
TEST(IndexedFastaReaderTests,ReadLambda)149 TEST(IndexedFastaReaderTests, ReadLambda)
150 {
151     IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
152 
153     EXPECT_TRUE(r.HasSequence("lambda_NEB3011"));
154     EXPECT_FALSE(r.HasSequence("dog"));
155     EXPECT_EQ(1, r.NumSequences());
156     EXPECT_EQ(48502, r.SequenceLength("lambda_NEB3011"));
157 
158     std::string seq = r.Subsequence("lambda_NEB3011:0-10");
159     EXPECT_EQ("GGGCGGCGAC", seq);
160 
161     std::string seq2 = r.Subsequence("lambda_NEB3011", 0, 10);
162     EXPECT_EQ("GGGCGGCGAC", seq2);
163 
164     // subsequence extending beyond bounds returns clipped
165     std::string seq3 = r.Subsequence("lambda_NEB3011", 48400, 48600);
166     EXPECT_EQ(102, seq3.length());
167 
168     // bad subsequence
169 }
170 
TEST(IndexedFastaReaderTests,Errors)171 TEST(IndexedFastaReaderTests, Errors)
172 {
173     IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
174 
175     //
176     // attempt access without "opening"
177     //
178     // EXPECT_THROW(r.NumSequences(), std::exception);
179     // EXPECT_THROW(r.HasSequence("lambda_NEB3011"), std::exception);
180     // EXPECT_THROW(r.SequenceLength("lambda_NEB3011"), std::exception);
181     // EXPECT_THROW(r.Subsequence("lambda_NEB3011:0-10"), std::exception);
182 
183     //
184     // invalid accesses after opening
185     //
186     EXPECT_THROW(r.SequenceLength("dog"), std::exception);
187     EXPECT_THROW(r.Subsequence("dog:0-10"), std::exception);
188 }
189 
TEST(IndexedFastaReaderTests,Names)190 TEST(IndexedFastaReaderTests, Names)
191 {
192     IndexedFastaReader r(IndexedFastaReaderTests::lambdaFasta);
193     std::vector<std::string> names = {"lambda_NEB3011"};
194 
195     // Test all-name request
196     EXPECT_EQ(names, r.Names());
197 
198     // Test single-name query
199     EXPECT_EQ(names[0], r.Name(0));
200 
201     // invalid name acces (out of range)
202     EXPECT_THROW(r.Name(1), std::exception);
203 }
204