1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <iostream>
28 #include <string>
29 #include <cstdlib> // for abort, getenv, random
30
31 #define TESTING 1
32 #include "../refseq.hpp"
33
34 #undef TESTING
35
36 static bool verbose = false;
37 #define VERBOSE if (!verbose) {} else std::cerr
38
main(int argc,char ** argv)39 int main(int argc, char **argv)
40 {
41 {
42 auto const envvar = getenv("VERBOSE");
43 auto const verbose_arg = (argc > 1 && std::string(argv[1]) == "-v");
44 auto const verbose_env = (envvar != NULL && std::string(envvar) == "1");
45
46 verbose = verbose_arg || verbose_env;
47 }
48 srandom(time(0));
49
50 if (RefSeq::test())
51 std::cerr << "RefSeq::test ok" << std::endl;
52 else
53 return 1;
54
55 return 0;
56 }
57
58 #include <sstream>
59
test()60 bool RefSeq::test() {
61 auto const seq = std::string("NNNACGTNNN");
62 auto testSequence = [&](RefSeq const &rs) -> bool {
63 char test[10];
64 unsigned len;
65
66 memset(test, 0, 10);
67 len = rs.getBases(test, 0, 10);
68 if (len != 10) {
69 std::cerr << "RefSeq test failed: length should be 10, not " << len << std::endl;
70 return false;
71 }
72 if (std::string(test, len) != seq) {
73 std::cerr << "RefSeq test failed: sequence should be '" << seq << "', not '" << std::string(test, len) << "'" << std::endl;
74 return false;
75 }
76
77 memset(test, 0, 10);
78 len = rs.getBases(test, 0, 5);
79 if (len != 5) {
80 std::cerr << "RefSeq test failed: length should be 5, not " << len << std::endl;
81 return false;
82 }
83 if (std::string(test, 5) != seq.substr(0, 5)) {
84 std::cerr << "RefSeq test failed: sequence should be '" << seq.substr(0, 5) << "', not '" << std::string(test, len) << "'" << std::endl;
85 return false;
86 }
87
88 memset(test, 0, 10);
89 len = rs.getBases(test, 5, 5);
90 if (len != 5) {
91 std::cerr << "RefSeq test failed: length should be 5, not " << len << std::endl;
92 return false;
93 }
94 if (std::string(test, len) != seq.substr(5)) {
95 std::cerr << "RefSeq test failed: sequence should be '" << seq.substr(5) << "', not '" << std::string(test, len) << "'" << std::endl;
96 return false;
97 }
98 return true;
99 };
100 {
101 auto const &rs = RefSeq::loadFromStream(std::istringstream(seq));
102
103 if (rs.length != 10) {
104 std::cerr << "RefSeq test failed: length should be 10, not " << rs.length << std::endl;
105 return false;
106 }
107 if (std::distance(rs.Ns.begin(), rs.Ns.end()) != 2) {
108 std::cerr << "RefSeq test failed: " << rs.Ns << " should have 2 ranges" << std::endl;
109 return false;
110 }
111 if (!testSequence(rs))
112 return false;
113 }
114 {
115 auto const &rs = RefSeq::loadFromStreamCircular(std::istringstream(seq));
116
117 if (rs.length != 10) {
118 std::cerr << "RefSeq test failed: length should be 10, not " << rs.length << std::endl;
119 return false;
120 }
121 if (!rs.Ns.empty()) {
122 std::cerr << "RefSeq test failed: " << rs.Ns << " should be empty" << std::endl;
123 return false;
124 }
125 if (!testSequence(rs))
126 return false;
127 }
128 return true;
129 }
130
131 #include <cctype>
loadFromStream(std::istream && input)132 RefSeq RefSeq::loadFromStream(std::istream && input) {
133 RangeList Ns;
134 std::vector<uint8_t> bases;
135 unsigned position = 0;
136
137 int n = 0;
138 int accum = 0;
139
140 char ch;
141 while (input.get(ch)) {
142 if (isspace(ch)) continue;
143
144 int b4na = 0;
145 int N = 1;
146
147 switch (ch) {
148 case 'A':
149 case 'a':
150 b4na = 0;
151 N = 0;
152 break;
153 case 'C':
154 case 'c':
155 b4na = 1;
156 N = 0;
157 break;
158 case 'G':
159 case 'g':
160 b4na = 2;
161 N = 0;
162 break;
163 case 'T':
164 case 't':
165 b4na = 3;
166 N = 0;
167 break;
168 default:
169 break;
170 }
171 accum = (accum << 2) | b4na;
172 n += 1;
173 if (n == 4) {
174 bases.push_back(accum);
175 accum = 0;
176 n = 0;
177 }
178 if (N)
179 Ns.add(position);
180 position += 1;
181 }
182 if (n != 0) {
183 while (n < 4) {
184 accum <<= 2;
185 n += 1;
186 }
187 bases.push_back(accum);
188 }
189 return RefSeq(std::move(Ns), std::move(bases), position);
190 }
191
loadFromStreamCircular(std::istream && input)192 RefSeq RefSeq::loadFromStreamCircular(std::istream && input) {
193 std::vector<uint8_t> bases;
194 unsigned position = 0;
195
196 int n = 0;
197 int accum = 0;
198
199 char ch;
200 while (input.get(ch)) {
201 if (isspace(ch)) continue;
202
203 int b2na = 0;
204
205 switch (ch) {
206 case 'A':
207 case 'a':
208 b2na = 1;
209 break;
210 case 'C':
211 case 'c':
212 b2na = 2;
213 break;
214 case 'G':
215 case 'g':
216 b2na = 4;
217 break;
218 case 'T':
219 case 't':
220 b2na = 8;
221 break;
222 default:
223 break;
224 }
225 accum = (accum << 4) | b2na;
226 n += 1;
227 if (n == 2) {
228 bases.push_back(accum);
229 accum = 0;
230 n = 0;
231 }
232 position += 1;
233 }
234 if (n != 0) {
235 while (n < 2) {
236 accum <<= 4;
237 n += 1;
238 }
239 bases.push_back(accum);
240 }
241 return RefSeq(std::move(bases), position);
242 }
243