1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2010, Julian Catchen <jcatchen@uoregon.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks.  If not, see <http://www.gnu.org/licenses/>.
19 //
20 
21 #ifndef __STACKS_H__
22 #define __STACKS_H__
23 
24 #include <cstring>
25 #include <string>
26 #include <vector>
27 #include <map>
28 #include <set>
29 #include <algorithm>
30 #include <utility>
31 #include<iostream>
32 #include<sstream>
33 
34 #include "constants.h"
35 #include "nucleotides.h"
36 #include "utils.h"
37 #include "Seq.h"
38 #include "DNASeq.h"
39 #include "DNANSeq.h"
40 #include "DNASeq4.h"
41 #include "MetaPopInfo.h"
42 
43 typedef unsigned int uint;
44 typedef string allele_type;
45 
46 enum snp_type    {snp_type_het, snp_type_hom, snp_type_unk, snp_type_discarded};
47 enum read_type   {primary, secondary};
48 enum searcht     {sequence, genomic_loc};
49 enum corr_type   {p_value, bonferroni_win, bonferroni_gen, no_correction};
50 
51 class SNP {
52  public:
53     snp_type type;   // Heterozygous or homozygous
54     uint     col;
55     float    lratio;
56     char     rank_1;
57     char     rank_2;
58     char     rank_3;
59     char     rank_4;
60 
SNP()61     SNP() {
62         col    = 0;
63         lratio = 0.0;
64         rank_1 = 0;
65         rank_2 = 0;
66         rank_3 = 0;
67         rank_4 = 0;
68     }
SNP(const SNP & other)69     SNP(const SNP &other) {
70         col    = other.col;
71         type   = other.type;
72         lratio = other.lratio;
73         rank_1 = other.rank_1;
74         rank_2 = other.rank_2;
75         rank_3 = other.rank_3;
76         rank_4 = other.rank_4;
77     }
78 };
79 
80 inline
compare_pair_snp(const pair<string,SNP * > & lhs,const pair<string,SNP * > & rhs)81 bool compare_pair_snp(const pair<string, SNP*>& lhs, const pair<string, SNP*>& rhs)
82 {
83     return lhs.second->col < rhs.second->col;
84 }
85 
86 class Gap {
87 public:
88     uint start;
89     uint end;
90 
Gap(uint s,uint e)91     Gap(uint s, uint e) {
92         start = s;
93         end   = e;
94     }
95 };
96 
97 class Aln {
98 public:
99     uint   id;
100     uint   gap_cnt;
101     double pct_id;
102     string cigar;
Aln()103     Aln() {
104         this->id      = 0;
105         this->pct_id  = 0.0;
106         this->gap_cnt = 0;
107     }
Aln(uint id,string cigar,double pct_id,uint gap_cnt)108     Aln(uint id, string cigar, double pct_id, uint gap_cnt) {
109         this->id      = id;
110         this->cigar   = cigar;
111         this->pct_id  = pct_id;
112         this->gap_cnt = gap_cnt;
113     }
114 };
115 
116 class PStack {
117  public:
118     uint            id;
119     uint         count; // Number of identical reads forming this stack
120     DNANSeq       *seq; // Sequence read
121     vector<char *> map; // List of sequence read IDs merged into this stack
122     PhyLoc         loc; // Physical genome location of this stack.
123 
PStack()124     PStack()  {
125         id     = 0;
126         count  = 0;
127         seq    = NULL;
128     }
129     PStack(const PStack& other);
130     PStack& operator= (PStack&& other);
131     PStack& operator= (const PStack& other) = delete;
132 
~PStack()133     ~PStack() {
134         if (seq!=NULL)
135             delete seq;
136         for (unsigned int i = 0; i < this->map.size(); i++)
137             delete [] this->map[i];
138     }
139 
140     int  add_id(const char *);
141     int  add_seq(const char *);
142     int  add_seq(const DNANSeq *);
add_read(const char * read_name)143     void add_read(const char* read_name) {
144         char* copy = new char[strlen(read_name)+1];
145         strcpy(copy, read_name);
146         map.push_back(copy);
147         ++count;
148     }
149 
150     // extend(): Extends the PStack to the desired span.
151     void extend(const PhyLoc& phyloc, int length);
152 
153     void clear();
154     bool operator< (const PStack& other) const;
155 
set_id_of(set<PStack>::iterator pstack,int id)156     static void set_id_of(set<PStack>::iterator pstack, int id) {
157         const_cast<PStack&>(*pstack).id = id;
158     }
add_read_to(set<PStack>::iterator pstack,const char * read_name)159     static void add_read_to(set<PStack>::iterator pstack, const char* read_name) {
160         const_cast<PStack&>(*pstack).add_read(read_name);
161     }
162 
163 };
164 
165 class Stack {
166  public:
167     uint         id;
168     DNANSeq     *seq; // Sequence read
169     vector<uint> map; // List of sequence read IDs merged into this stack
170 
Stack()171     Stack()  {
172         id  = 0;
173         seq = NULL;
174     }
~Stack()175     ~Stack() {
176         delete this->seq;
177     }
count()178     uint count() { return this->map.size(); }
179     int  add_id(uint);
180     int  add_seq(const char *);
181     int  add_seq(const DNANSeq *);
182 };
183 
184 class Rem {
185  public:
186     uint          id;
187     vector<uint> map; // List of sequence read IDs merged into this stack
188     DNANSeq     *seq; // Sequence read
189     bool    utilized;
190 
191     Rem();
192     Rem(int, uint, DNANSeq *);
~Rem()193     ~Rem() {
194         delete this->seq;
195     }
count()196     uint count() { return this->map.size(); }
197     int  add_id(uint);
198     int  add_seq(const char *);
199     int  add_seq(const DNANSeq *);
200 };
201 
202 class CatMatch {
203 public:
204     int    batch_id;
205     int    cat_id; // c-locus ID
206     int    sample_id; // sample ID
207     int    tag_id; // s-locus ID
208     int    depth;
209     double lnl;
210     char  *haplotype;
211     char  *cigar;
212 
CatMatch()213     CatMatch() {
214         batch_id  = 0;
215         cat_id    = 0;
216         sample_id = 0;
217         tag_id    = 0;
218         depth     = 0;
219         lnl       = 0.0;
220         haplotype = NULL;
221         cigar     = NULL;
222     }
~CatMatch()223     ~CatMatch() {
224         delete [] haplotype;
225         delete [] cigar;
226     }
227 };
228 
229 class ModRes {
230 public:
231     int   sample_id;
232     int   tag_id;
233     char *model;
234 
ModRes(int samp_id,int tag_id,const char * model)235     ModRes(int samp_id, int tag_id, const char *model) {
236         this->sample_id = samp_id;
237         this->tag_id    = tag_id;
238         this->model     = new char[strlen(model) + 1];
239         strcpy(this->model, model);
240     }
~ModRes()241     ~ModRes() {
242         delete [] this->model;
243     }
244 };
245 
246 class SNPRes {
247 public:
248     int   sample_id;
249     int   tag_id;
250     vector<SNP *> snps;
251 
SNPRes(int samp_id,int tag_id)252     SNPRes(int samp_id, int tag_id) {
253         this->sample_id = samp_id;
254         this->tag_id    = tag_id;
255     }
~SNPRes()256     ~SNPRes() {
257         for (uint i = 0; i < this->snps.size(); i++)
258             delete this->snps[i];
259         this->snps.clear();
260     }
261 };
262 
263 struct Read {
264     DNASeq4 seq;
265     string name;
266 
267     Read() = default;
ReadRead268     Read(DNASeq4&& s, string&& n)
269         : seq(move(s)), name(move(n))
270         {}
271     Read(Read&&) = default;
272     Read& operator= (Read&&) = default;
273 
274     bool is_read2() const;
275 };
276 
277 struct SiteCounts {
278     Counts<Nt2> tot; // The sum over all samples.
279     vector<Counts<Nt2>> samples; // With size() == mpopi->samples().size()/
280     const MetaPopInfo* mpopi;
281 };
282 
283 //
284 // Inline definitions
285 // ----------
286 //
287 
288 inline
PStack(const PStack & other)289 PStack::PStack(const PStack& other)
290         : id(other.id)
291         , count (other.count)
292         , seq (new DNANSeq(*other.seq))
293         , map ()
294         , loc (other.loc)
295         {
296     map.reserve(other.map.size());
297     for (const char* readname : other.map) {
298         char* copy = new char[strlen(readname)+1];
299         strcpy(copy, readname);
300         map.push_back(copy);
301     }
302 }
303 
304 inline
305 PStack& PStack::operator= (PStack&& other) {
306     id = other.id;
307     count = other.count;
308     seq = other.seq;
309     other.seq = NULL;
310     swap(map, other.map);
311     swap(loc, other.loc);
312     return *this;
313 }
314 
315 inline
clear()316 void PStack::clear() {
317     id = 0;
318     count = 0;
319     if(seq!=NULL) {
320         delete seq;
321         seq = NULL;
322     }
323     map.clear();
324     loc.clear();
325 }
326 
327 inline
328 bool PStack::operator< (const PStack& other) const {
329     if (loc < other.loc)
330         return true;
331     else if (other.loc < loc)
332         return false;
333     else
334         // Same genomic loci, compare sequences.
335         return *seq < *other.seq;
336 }
337 
338 inline
is_read2()339 bool Read::is_read2() const {
340     return name.length() >= 2
341             && name.back() == '2'
342             && name[name.length()-2] == '/';
343 }
344 
345 #endif // __STACKS_H__
346