1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 
27 #include <iostream>
28 #include <string>
29 #include <cstdlib> // for abort, getenv, random
30 
31 #define TESTING 1
32 #include "../refseq.hpp"
33 
34 #undef TESTING
35 
36 static bool verbose = false;
37 #define VERBOSE if (!verbose) {} else std::cerr
38 
main(int argc,char ** argv)39 int main(int argc, char **argv)
40 {
41     {
42         auto const envvar = getenv("VERBOSE");
43         auto const verbose_arg = (argc > 1 && std::string(argv[1]) == "-v");
44         auto const verbose_env = (envvar != NULL && std::string(envvar) == "1");
45 
46         verbose = verbose_arg || verbose_env;
47     }
48     srandom(time(0));
49 
50     if (RefSeq::test())
51         std::cerr << "RefSeq::test ok" << std::endl;
52     else
53         return 1;
54 
55     return 0;
56 }
57 
58 #include <sstream>
59 
test()60 bool RefSeq::test() {
61     auto const seq = std::string("NNNACGTNNN");
62     auto testSequence = [&](RefSeq const &rs) -> bool {
63         char test[10];
64         unsigned len;
65 
66         memset(test, 0, 10);
67         len = rs.getBases(test, 0, 10);
68         if (len != 10) {
69             std::cerr << "RefSeq test failed: length should be 10, not " << len << std::endl;
70             return false;
71         }
72         if (std::string(test, len) != seq) {
73             std::cerr << "RefSeq test failed: sequence should be '" << seq << "', not '" << std::string(test, len) << "'" << std::endl;
74             return false;
75         }
76 
77         memset(test, 0, 10);
78         len = rs.getBases(test, 0, 5);
79         if (len != 5) {
80             std::cerr << "RefSeq test failed: length should be 5, not " << len << std::endl;
81             return false;
82         }
83         if (std::string(test, 5) != seq.substr(0, 5)) {
84             std::cerr << "RefSeq test failed: sequence should be '" << seq.substr(0, 5) << "', not '" << std::string(test, len) << "'" << std::endl;
85             return false;
86         }
87 
88         memset(test, 0, 10);
89         len = rs.getBases(test, 5, 5);
90         if (len != 5) {
91             std::cerr << "RefSeq test failed: length should be 5, not " << len << std::endl;
92             return false;
93         }
94         if (std::string(test, len) != seq.substr(5)) {
95             std::cerr << "RefSeq test failed: sequence should be '" << seq.substr(5) << "', not '" << std::string(test, len) << "'" << std::endl;
96             return false;
97         }
98         return true;
99     };
100     {
101         auto const &rs = RefSeq::loadFromStream(std::istringstream(seq));
102 
103         if (rs.length != 10) {
104             std::cerr << "RefSeq test failed: length should be 10, not " << rs.length << std::endl;
105             return false;
106         }
107         if (std::distance(rs.Ns.begin(), rs.Ns.end()) != 2) {
108             std::cerr << "RefSeq test failed: " << rs.Ns << " should have 2 ranges" << std::endl;
109             return false;
110         }
111         if (!testSequence(rs))
112             return false;
113     }
114     {
115         auto const &rs = RefSeq::loadFromStreamCircular(std::istringstream(seq));
116 
117         if (rs.length != 10) {
118             std::cerr << "RefSeq test failed: length should be 10, not " << rs.length << std::endl;
119             return false;
120         }
121         if (!rs.Ns.empty()) {
122             std::cerr << "RefSeq test failed: " << rs.Ns << " should be empty" << std::endl;
123             return false;
124         }
125         if (!testSequence(rs))
126             return false;
127     }
128     return true;
129 }
130 
131 #include <cctype>
loadFromStream(std::istream && input)132 RefSeq RefSeq::loadFromStream(std::istream && input) {
133     RangeList Ns;
134     std::vector<uint8_t> bases;
135     unsigned position = 0;
136 
137     int n = 0;
138     int accum = 0;
139 
140     char ch;
141     while (input.get(ch)) {
142         if (isspace(ch)) continue;
143 
144         int b4na = 0;
145         int N = 1;
146 
147         switch (ch) {
148         case 'A':
149         case 'a':
150             b4na = 0;
151             N = 0;
152             break;
153         case 'C':
154         case 'c':
155             b4na = 1;
156             N = 0;
157             break;
158         case 'G':
159         case 'g':
160             b4na = 2;
161             N = 0;
162             break;
163         case 'T':
164         case 't':
165             b4na = 3;
166             N = 0;
167             break;
168         default:
169             break;
170         }
171         accum = (accum << 2) | b4na;
172         n += 1;
173         if (n == 4) {
174             bases.push_back(accum);
175             accum = 0;
176             n = 0;
177         }
178         if (N)
179             Ns.add(position);
180         position += 1;
181     }
182     if (n != 0) {
183         while (n < 4) {
184             accum <<= 2;
185             n += 1;
186         }
187         bases.push_back(accum);
188     }
189     return RefSeq(std::move(Ns), std::move(bases), position);
190 }
191 
loadFromStreamCircular(std::istream && input)192 RefSeq RefSeq::loadFromStreamCircular(std::istream && input) {
193     std::vector<uint8_t> bases;
194     unsigned position = 0;
195 
196     int n = 0;
197     int accum = 0;
198 
199     char ch;
200     while (input.get(ch)) {
201         if (isspace(ch)) continue;
202 
203         int b2na = 0;
204 
205         switch (ch) {
206         case 'A':
207         case 'a':
208             b2na = 1;
209             break;
210         case 'C':
211         case 'c':
212             b2na = 2;
213             break;
214         case 'G':
215         case 'g':
216             b2na = 4;
217             break;
218         case 'T':
219         case 't':
220             b2na = 8;
221             break;
222         default:
223             break;
224         }
225         accum = (accum << 4) | b2na;
226         n += 1;
227         if (n == 2) {
228             bases.push_back(accum);
229             accum = 0;
230             n = 0;
231         }
232         position += 1;
233     }
234     if (n != 0) {
235         while (n < 2) {
236             accum <<= 4;
237             n += 1;
238         }
239         bases.push_back(accum);
240     }
241     return RefSeq(std::move(bases), position);
242 }
243