1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2010, Julian Catchen <jcatchen@uoregon.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks. If not, see <http://www.gnu.org/licenses/>.
19 //
20
21 #ifndef __STACKS_H__
22 #define __STACKS_H__
23
24 #include <cstring>
25 #include <string>
26 #include <vector>
27 #include <map>
28 #include <set>
29 #include <algorithm>
30 #include <utility>
31 #include<iostream>
32 #include<sstream>
33
34 #include "constants.h"
35 #include "nucleotides.h"
36 #include "utils.h"
37 #include "Seq.h"
38 #include "DNASeq.h"
39 #include "DNANSeq.h"
40 #include "DNASeq4.h"
41 #include "MetaPopInfo.h"
42
43 typedef unsigned int uint;
44 typedef string allele_type;
45
46 enum snp_type {snp_type_het, snp_type_hom, snp_type_unk, snp_type_discarded};
47 enum read_type {primary, secondary};
48 enum searcht {sequence, genomic_loc};
49 enum corr_type {p_value, bonferroni_win, bonferroni_gen, no_correction};
50
51 class SNP {
52 public:
53 snp_type type; // Heterozygous or homozygous
54 uint col;
55 float lratio;
56 char rank_1;
57 char rank_2;
58 char rank_3;
59 char rank_4;
60
SNP()61 SNP() {
62 col = 0;
63 lratio = 0.0;
64 rank_1 = 0;
65 rank_2 = 0;
66 rank_3 = 0;
67 rank_4 = 0;
68 }
SNP(const SNP & other)69 SNP(const SNP &other) {
70 col = other.col;
71 type = other.type;
72 lratio = other.lratio;
73 rank_1 = other.rank_1;
74 rank_2 = other.rank_2;
75 rank_3 = other.rank_3;
76 rank_4 = other.rank_4;
77 }
78 };
79
80 inline
compare_pair_snp(const pair<string,SNP * > & lhs,const pair<string,SNP * > & rhs)81 bool compare_pair_snp(const pair<string, SNP*>& lhs, const pair<string, SNP*>& rhs)
82 {
83 return lhs.second->col < rhs.second->col;
84 }
85
86 class Gap {
87 public:
88 uint start;
89 uint end;
90
Gap(uint s,uint e)91 Gap(uint s, uint e) {
92 start = s;
93 end = e;
94 }
95 };
96
97 class Aln {
98 public:
99 uint id;
100 uint gap_cnt;
101 double pct_id;
102 string cigar;
Aln()103 Aln() {
104 this->id = 0;
105 this->pct_id = 0.0;
106 this->gap_cnt = 0;
107 }
Aln(uint id,string cigar,double pct_id,uint gap_cnt)108 Aln(uint id, string cigar, double pct_id, uint gap_cnt) {
109 this->id = id;
110 this->cigar = cigar;
111 this->pct_id = pct_id;
112 this->gap_cnt = gap_cnt;
113 }
114 };
115
116 class PStack {
117 public:
118 uint id;
119 uint count; // Number of identical reads forming this stack
120 DNANSeq *seq; // Sequence read
121 vector<char *> map; // List of sequence read IDs merged into this stack
122 PhyLoc loc; // Physical genome location of this stack.
123
PStack()124 PStack() {
125 id = 0;
126 count = 0;
127 seq = NULL;
128 }
129 PStack(const PStack& other);
130 PStack& operator= (PStack&& other);
131 PStack& operator= (const PStack& other) = delete;
132
~PStack()133 ~PStack() {
134 if (seq!=NULL)
135 delete seq;
136 for (unsigned int i = 0; i < this->map.size(); i++)
137 delete [] this->map[i];
138 }
139
140 int add_id(const char *);
141 int add_seq(const char *);
142 int add_seq(const DNANSeq *);
add_read(const char * read_name)143 void add_read(const char* read_name) {
144 char* copy = new char[strlen(read_name)+1];
145 strcpy(copy, read_name);
146 map.push_back(copy);
147 ++count;
148 }
149
150 // extend(): Extends the PStack to the desired span.
151 void extend(const PhyLoc& phyloc, int length);
152
153 void clear();
154 bool operator< (const PStack& other) const;
155
set_id_of(set<PStack>::iterator pstack,int id)156 static void set_id_of(set<PStack>::iterator pstack, int id) {
157 const_cast<PStack&>(*pstack).id = id;
158 }
add_read_to(set<PStack>::iterator pstack,const char * read_name)159 static void add_read_to(set<PStack>::iterator pstack, const char* read_name) {
160 const_cast<PStack&>(*pstack).add_read(read_name);
161 }
162
163 };
164
165 class Stack {
166 public:
167 uint id;
168 DNANSeq *seq; // Sequence read
169 vector<uint> map; // List of sequence read IDs merged into this stack
170
Stack()171 Stack() {
172 id = 0;
173 seq = NULL;
174 }
~Stack()175 ~Stack() {
176 delete this->seq;
177 }
count()178 uint count() { return this->map.size(); }
179 int add_id(uint);
180 int add_seq(const char *);
181 int add_seq(const DNANSeq *);
182 };
183
184 class Rem {
185 public:
186 uint id;
187 vector<uint> map; // List of sequence read IDs merged into this stack
188 DNANSeq *seq; // Sequence read
189 bool utilized;
190
191 Rem();
192 Rem(int, uint, DNANSeq *);
~Rem()193 ~Rem() {
194 delete this->seq;
195 }
count()196 uint count() { return this->map.size(); }
197 int add_id(uint);
198 int add_seq(const char *);
199 int add_seq(const DNANSeq *);
200 };
201
202 class CatMatch {
203 public:
204 int batch_id;
205 int cat_id; // c-locus ID
206 int sample_id; // sample ID
207 int tag_id; // s-locus ID
208 int depth;
209 double lnl;
210 char *haplotype;
211 char *cigar;
212
CatMatch()213 CatMatch() {
214 batch_id = 0;
215 cat_id = 0;
216 sample_id = 0;
217 tag_id = 0;
218 depth = 0;
219 lnl = 0.0;
220 haplotype = NULL;
221 cigar = NULL;
222 }
~CatMatch()223 ~CatMatch() {
224 delete [] haplotype;
225 delete [] cigar;
226 }
227 };
228
229 class ModRes {
230 public:
231 int sample_id;
232 int tag_id;
233 char *model;
234
ModRes(int samp_id,int tag_id,const char * model)235 ModRes(int samp_id, int tag_id, const char *model) {
236 this->sample_id = samp_id;
237 this->tag_id = tag_id;
238 this->model = new char[strlen(model) + 1];
239 strcpy(this->model, model);
240 }
~ModRes()241 ~ModRes() {
242 delete [] this->model;
243 }
244 };
245
246 class SNPRes {
247 public:
248 int sample_id;
249 int tag_id;
250 vector<SNP *> snps;
251
SNPRes(int samp_id,int tag_id)252 SNPRes(int samp_id, int tag_id) {
253 this->sample_id = samp_id;
254 this->tag_id = tag_id;
255 }
~SNPRes()256 ~SNPRes() {
257 for (uint i = 0; i < this->snps.size(); i++)
258 delete this->snps[i];
259 this->snps.clear();
260 }
261 };
262
263 struct Read {
264 DNASeq4 seq;
265 string name;
266
267 Read() = default;
ReadRead268 Read(DNASeq4&& s, string&& n)
269 : seq(move(s)), name(move(n))
270 {}
271 Read(Read&&) = default;
272 Read& operator= (Read&&) = default;
273
274 bool is_read2() const;
275 };
276
277 struct SiteCounts {
278 Counts<Nt2> tot; // The sum over all samples.
279 vector<Counts<Nt2>> samples; // With size() == mpopi->samples().size()/
280 const MetaPopInfo* mpopi;
281 };
282
283 //
284 // Inline definitions
285 // ----------
286 //
287
288 inline
PStack(const PStack & other)289 PStack::PStack(const PStack& other)
290 : id(other.id)
291 , count (other.count)
292 , seq (new DNANSeq(*other.seq))
293 , map ()
294 , loc (other.loc)
295 {
296 map.reserve(other.map.size());
297 for (const char* readname : other.map) {
298 char* copy = new char[strlen(readname)+1];
299 strcpy(copy, readname);
300 map.push_back(copy);
301 }
302 }
303
304 inline
305 PStack& PStack::operator= (PStack&& other) {
306 id = other.id;
307 count = other.count;
308 seq = other.seq;
309 other.seq = NULL;
310 swap(map, other.map);
311 swap(loc, other.loc);
312 return *this;
313 }
314
315 inline
clear()316 void PStack::clear() {
317 id = 0;
318 count = 0;
319 if(seq!=NULL) {
320 delete seq;
321 seq = NULL;
322 }
323 map.clear();
324 loc.clear();
325 }
326
327 inline
328 bool PStack::operator< (const PStack& other) const {
329 if (loc < other.loc)
330 return true;
331 else if (other.loc < loc)
332 return false;
333 else
334 // Same genomic loci, compare sequences.
335 return *seq < *other.seq;
336 }
337
338 inline
is_read2()339 bool Read::is_read2() const {
340 return name.length() >= 2
341 && name.back() == '2'
342 && name[name.length()-2] == '/';
343 }
344
345 #endif // __STACKS_H__
346