1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2013, Julian Catchen <jcatchen@uoregon.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks.  If not, see <http://www.gnu.org/licenses/>.
19 //
20 
21 #ifndef __PHASEDSTACKS_H__
22 #define __PHASEDSTACKS_H__
23 
24 #ifdef _OPENMP
25 #include <omp.h>    // OpenMP library
26 #endif
27 #include <getopt.h> // Process command-line options
28 #include <dirent.h> // Open/Read contents of a directory
29 #include <cmath>
30 #include <cstdlib>
31 #include <cstring>
32 #include <ctime>
33 #include <utility>
34 #include <string>
35 #include <iostream>
36 #include <fstream>
37 #include <sstream>
38 #include <iomanip>
39 #include <vector>
40 #include <map>
41 #include <set>
42 
43 #include "constants.h"
44 #include "utils.h"
45 #include "log_utils.h"
46 #include "catalog_utils.h"
47 #include "input.h"
48 #include "sql_utilities.h"
49 #include "locus.h"
50 
51 #ifdef HAVE_LIBZ
52 #include <cerrno>
53 #include <zlib.h>
54 #endif
55 
56 enum loc_t {strong_ld, recomb, uninformative};
57 
58 class PhasedSample {
59 public:
60     string name;
61     int    id;
62     int    size;
63     char  *nucs_1;
64     char  *nucs_2;
65 
PhasedSample()66     PhasedSample() {
67         this->id   = 0;
68         this->size = 0;
69         this->nucs_1 = NULL;
70         this->nucs_2 = NULL;
71     }
~PhasedSample()72     ~PhasedSample() {
73         if (this->nucs_1 != NULL)
74             delete [] this->nucs_1;
75         if (this->nucs_2 != NULL)
76             delete [] this->nucs_2;
77     }
78 };
79 
80 class NucSum {
81 public:
82     uint  bp;
83     uint  col;
84     uint  clocus;
85     float freq;
86     uint  nuc[4];
87     // nuc[0] == A
88     // nuc[1] == C
89     // nuc[2] == G
90     // nuc[3] == T
91 
NucSum()92     NucSum() {
93         this->freq    = 1.0;
94         this->bp      = 0;
95         this->clocus  = 0;
96         for (uint i = 0; i < 4; i++)
97             this->nuc[i] = 0;
98     }
99 };
100 
101 class dPrime {
102 public:
103     double dprime;
104     bool   chisq_p;
105     double var;
106     double ci_high;
107     double ci_low;
108     bool   informative;
109     loc_t  type;
110 
dPrime()111     dPrime() {
112         this->dprime  = 0.0;
113         this->chisq_p = false;
114         this->var     = 0.0;
115         this->ci_high = 0.0;
116         this->ci_low  = 0.0;
117         this->type    = uninformative;
118     }
119 };
120 
121 class PhasedSummary {
122     map<string, int> sample_map;
123 
124 public:
125     uint     size;
126     uint     sample_cnt;
127     NucSum  *nucs;
128     PhasedSample  *samples;
129     dPrime **dprime;
130     bool   **recomb;
131 
PhasedSummary(uint num_samples,uint num_genotypes)132     PhasedSummary(uint num_samples, uint num_genotypes) {
133         this->sample_cnt = num_samples;
134         this->samples    = new PhasedSample[this->sample_cnt];
135         this->size       = num_genotypes;
136         this->nucs       = new NucSum[this->size];
137         this->dprime     = new dPrime *[this->size];
138         for (uint i = 0; i < this->size; i++)
139             this->dprime[i] = new dPrime[this->size];
140 
141         this->recomb     = new bool *[this->size];
142         for (uint i = 0; i < this->size; i++) {
143             this->recomb[i] = new bool[this->size];
144             memset(this->recomb[i], 0, this->size);
145         }
146     }
~PhasedSummary()147     ~PhasedSummary() {
148         if (this->nucs != NULL)
149             delete [] this->nucs;
150         if (this->dprime != NULL) {
151             for (uint i = 0; i < this->size; i++)
152                 delete [] this->dprime[i];
153             delete [] this->dprime;
154         }
155         if (this->recomb != NULL) {
156             for (uint i = 0; i < this->size; i++)
157                 delete [] this->recomb[i];
158             delete [] this->recomb;
159         }
160         if (this->samples != NULL)
161             delete [] this->samples;
162     }
add_sample(string name)163     int add_sample(string name) {
164         uint i = this->sample_map.size();
165         this->sample_map[name] = i;
166         this->samples[i].name = name;
167         return i;
168     }
169 };
170 
171 class HBlock {
172 public:
173     vector<int> loci;
174     HBlock *next;
175 
HBlock()176     HBlock() {
177         this->next = NULL;
178     }
179 };
180 
181 class dPrimeBlocks {
182     HBlock *_head;
183 
184 public:
dPrimeBlocks()185     dPrimeBlocks() {
186         this->_head = NULL;
187     }
~dPrimeBlocks()188     ~dPrimeBlocks() {
189         HBlock *cur, *next;
190         cur  = this->_head;
191         next = cur->next;
192 
193         while (next != NULL) {
194             delete cur;
195             cur  = next;
196             next = cur->next;
197         }
198     }
head()199     HBlock *head() {
200         return this->_head;
201     }
202     HBlock *initialize(set<int> &);
203     HBlock *merge_adjacent(HBlock *);
204     int     print();
205 };
206 
207 void  help( void );
208 void  version( void );
209 int   parse_command_line(int, char**);
210 int   build_file_list(vector<pair<int, string> > &);
211 int   parse_population_map(string, map<string, int> &, map<int, int> &);
212 
213 PhasedSummary *parse_fastphase(string);
214 PhasedSummary *parse_beagle(map<int, CSLocus *> &, string);
215 PhasedSummary *parse_beagle_haplotypes(map<int, CSLocus *> &, string);
216 
217 int   summarize_phased_genotypes(PhasedSummary *);
218 int   calc_dprime(PhasedSummary *);
219 int   assign_alleles(NucSum, char &, char &, double &, double &);
220 int   write_dprime(string, PhasedSummary *);
221 int   four_gamete_test(string, map<string, int> &, PhasedSummary *, map<int, int> &, map<int, int> &);
222 int   dprime_blocks(string, map<string, int> &, PhasedSummary *, map<int, int> &, map<int, int> &);
223 bool  check_adjacent_blocks(PhasedSummary *, HBlock *);
224 int   enumerate_haplotypes(ofstream &, map<string, int> &, PhasedSummary *, uint, uint);
225 int   bucket_dprime(vector<double> &, vector<double> &, PhasedSummary *);
226 int   write_buckets(string, vector<double> &, vector<double> &);
227 
228 #endif // __PHASEDSTACKS_H__
229