1 #include "DNASeq4.h"
2
DNASeq4(const char * s,size_t len)3 DNASeq4::DNASeq4(const char* s, size_t len) : l_(len), v_() {
4 reserve(l_);
5 for (size_t i=0; i<len; i+=2)
6 v_.push_back(DiNuc(s[i], s[i+1])); //n.b. `s` is null-terminated
7 }
8
str() const9 string DNASeq4::str() const {
10 string s;
11 s.reserve(l_);
12 for (auto nt=begin(); nt!=end(); ++nt)
13 s.push_back(char(*nt));
14 return s;
15 }
16
rev_compl() const17 DNASeq4 DNASeq4::rev_compl() const {
18 DNASeq4 rev;
19 rev.l_ = l_;
20 rev.reserve(rev.l_);
21
22 for (iterator nt=end(); nt!=begin();) {
23 --nt;
24 Nt4 first = (*nt).rev_compl();
25 if (nt != begin()) {
26 --nt;
27 Nt4 second = (*nt).rev_compl();
28 rev.v_.push_back(DiNuc(first, second));
29 } else {
30 rev.v_.push_back(DiNuc((*nt).rev_compl(), Nt4::$));
31 break;
32 }
33 }
34
35 return rev;
36 }
37
resize(size_t len)38 void DNASeq4::resize(size_t len) {
39 if (l_ < len) {
40 while (l_ != len) {
41 push_back(Nt4::n);
42 ++l_;
43 }
44 } else if (l_ > len) {
45 l_ = len;
46 v_.resize(l_/2 + l_%2);
47 if (l_%2)
48 v_.back().second(Nt4::$);
49 }
50 }
51
append(iterator first,iterator past)52 void DNASeq4::append(iterator first, iterator past) {
53
54 if (!(first != past))
55 return;
56
57 if (l_%2==1) {
58 v_.back().second(*first);
59 ++l_;
60 ++first;
61 }
62
63 l_ += past - first;
64 reserve(l_);
65
66 while(first != past) {
67 Nt4 prev = *first;
68 ++first;
69 if (first != past) {
70 v_.push_back(DiNuc(prev, *first));
71 ++first;
72 } else {
73 v_.push_back(DiNuc(prev, Nt4::$));
74 break;
75 }
76 }
77 }
78
remove_Ns()79 void DNASeq4::remove_Ns() {
80 auto itr = begin();
81 while (itr != end() && *itr != Nt4::n)
82 ++itr;
83 if (itr == end())
84 // Sequence didn't contain N's.
85 return;
86 DNASeq4 seq;
87 auto start = begin();
88 while (start != end()) {
89 while (itr != end() && *itr != Nt4::n)
90 ++itr;
91 seq.append(start, itr);
92 while (itr != end() && *itr == Nt4::n)
93 ++itr;
94 start = itr;
95 }
96 *this = move(seq);
97 }
98
shift_Ns_towards_the_end()99 void DNASeq4::shift_Ns_towards_the_end() {
100 vector<DiNuc>::iterator v_itr = v_.begin();
101 iterator first = begin();
102 while(first != end()) {
103 while (first != end() && *first == Nt4::n)
104 ++first;
105 if (first == end())
106 break;
107 assert(*first != Nt4::n);
108
109 iterator second = first;
110 ++second;
111 while (second != end() && *second == Nt4::n)
112 ++second;
113 if (second == end())
114 break;
115 assert(*second != Nt4::n);
116
117 assert(v_itr != v_.end());
118 *v_itr = DiNuc(*first, *second);
119 ++v_itr;
120
121 first = second;
122 ++first;
123 }
124 if (first != end()) {
125 // Loop ended while scanning for a `second`, and nucleotide `*first`
126 // hasn't been written yet.
127 assert(v_itr != v_.end());
128 *v_itr = DiNuc(*first, Nt4::n);
129 ++v_itr;
130 }
131 while (v_itr != v_.end()) {
132 *v_itr = DiNuc(Nt4::n, Nt4::n);
133 ++v_itr;
134 }
135 if (l_%2)
136 v_.back().second(Nt4::$);
137 }
138