1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- 2 // 3 // Copyright 2013, Julian Catchen <jcatchen@uoregon.edu> 4 // 5 // This file is part of Stacks. 6 // 7 // Stacks is free software: you can redistribute it and/or modify 8 // it under the terms of the GNU General Public License as published by 9 // the Free Software Foundation, either version 3 of the License, or 10 // (at your option) any later version. 11 // 12 // Stacks is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 // 17 // You should have received a copy of the GNU General Public License 18 // along with Stacks. If not, see <http://www.gnu.org/licenses/>. 19 // 20 21 #ifndef __PHASEDSTACKS_H__ 22 #define __PHASEDSTACKS_H__ 23 24 #ifdef _OPENMP 25 #include <omp.h> // OpenMP library 26 #endif 27 #include <getopt.h> // Process command-line options 28 #include <dirent.h> // Open/Read contents of a directory 29 #include <cmath> 30 #include <cstdlib> 31 #include <cstring> 32 #include <ctime> 33 #include <utility> 34 #include <string> 35 #include <iostream> 36 #include <fstream> 37 #include <sstream> 38 #include <iomanip> 39 #include <vector> 40 #include <map> 41 #include <set> 42 43 #include "constants.h" 44 #include "utils.h" 45 #include "log_utils.h" 46 #include "catalog_utils.h" 47 #include "input.h" 48 #include "sql_utilities.h" 49 #include "locus.h" 50 51 #ifdef HAVE_LIBZ 52 #include <cerrno> 53 #include <zlib.h> 54 #endif 55 56 enum loc_t {strong_ld, recomb, uninformative}; 57 58 class PhasedSample { 59 public: 60 string name; 61 int id; 62 int size; 63 char *nucs_1; 64 char *nucs_2; 65 PhasedSample()66 PhasedSample() { 67 this->id = 0; 68 this->size = 0; 69 this->nucs_1 = NULL; 70 this->nucs_2 = NULL; 71 } ~PhasedSample()72 ~PhasedSample() { 73 if (this->nucs_1 != NULL) 74 delete [] this->nucs_1; 75 if (this->nucs_2 != NULL) 76 delete [] this->nucs_2; 77 } 78 }; 79 80 class NucSum { 81 public: 82 uint bp; 83 uint col; 84 uint clocus; 85 float freq; 86 uint nuc[4]; 87 // nuc[0] == A 88 // nuc[1] == C 89 // nuc[2] == G 90 // nuc[3] == T 91 NucSum()92 NucSum() { 93 this->freq = 1.0; 94 this->bp = 0; 95 this->clocus = 0; 96 for (uint i = 0; i < 4; i++) 97 this->nuc[i] = 0; 98 } 99 }; 100 101 class dPrime { 102 public: 103 double dprime; 104 bool chisq_p; 105 double var; 106 double ci_high; 107 double ci_low; 108 bool informative; 109 loc_t type; 110 dPrime()111 dPrime() { 112 this->dprime = 0.0; 113 this->chisq_p = false; 114 this->var = 0.0; 115 this->ci_high = 0.0; 116 this->ci_low = 0.0; 117 this->type = uninformative; 118 } 119 }; 120 121 class PhasedSummary { 122 map<string, int> sample_map; 123 124 public: 125 uint size; 126 uint sample_cnt; 127 NucSum *nucs; 128 PhasedSample *samples; 129 dPrime **dprime; 130 bool **recomb; 131 PhasedSummary(uint num_samples,uint num_genotypes)132 PhasedSummary(uint num_samples, uint num_genotypes) { 133 this->sample_cnt = num_samples; 134 this->samples = new PhasedSample[this->sample_cnt]; 135 this->size = num_genotypes; 136 this->nucs = new NucSum[this->size]; 137 this->dprime = new dPrime *[this->size]; 138 for (uint i = 0; i < this->size; i++) 139 this->dprime[i] = new dPrime[this->size]; 140 141 this->recomb = new bool *[this->size]; 142 for (uint i = 0; i < this->size; i++) { 143 this->recomb[i] = new bool[this->size]; 144 memset(this->recomb[i], 0, this->size); 145 } 146 } ~PhasedSummary()147 ~PhasedSummary() { 148 if (this->nucs != NULL) 149 delete [] this->nucs; 150 if (this->dprime != NULL) { 151 for (uint i = 0; i < this->size; i++) 152 delete [] this->dprime[i]; 153 delete [] this->dprime; 154 } 155 if (this->recomb != NULL) { 156 for (uint i = 0; i < this->size; i++) 157 delete [] this->recomb[i]; 158 delete [] this->recomb; 159 } 160 if (this->samples != NULL) 161 delete [] this->samples; 162 } add_sample(string name)163 int add_sample(string name) { 164 uint i = this->sample_map.size(); 165 this->sample_map[name] = i; 166 this->samples[i].name = name; 167 return i; 168 } 169 }; 170 171 class HBlock { 172 public: 173 vector<int> loci; 174 HBlock *next; 175 HBlock()176 HBlock() { 177 this->next = NULL; 178 } 179 }; 180 181 class dPrimeBlocks { 182 HBlock *_head; 183 184 public: dPrimeBlocks()185 dPrimeBlocks() { 186 this->_head = NULL; 187 } ~dPrimeBlocks()188 ~dPrimeBlocks() { 189 HBlock *cur, *next; 190 cur = this->_head; 191 next = cur->next; 192 193 while (next != NULL) { 194 delete cur; 195 cur = next; 196 next = cur->next; 197 } 198 } head()199 HBlock *head() { 200 return this->_head; 201 } 202 HBlock *initialize(set<int> &); 203 HBlock *merge_adjacent(HBlock *); 204 int print(); 205 }; 206 207 void help( void ); 208 void version( void ); 209 int parse_command_line(int, char**); 210 int build_file_list(vector<pair<int, string> > &); 211 int parse_population_map(string, map<string, int> &, map<int, int> &); 212 213 PhasedSummary *parse_fastphase(string); 214 PhasedSummary *parse_beagle(map<int, CSLocus *> &, string); 215 PhasedSummary *parse_beagle_haplotypes(map<int, CSLocus *> &, string); 216 217 int summarize_phased_genotypes(PhasedSummary *); 218 int calc_dprime(PhasedSummary *); 219 int assign_alleles(NucSum, char &, char &, double &, double &); 220 int write_dprime(string, PhasedSummary *); 221 int four_gamete_test(string, map<string, int> &, PhasedSummary *, map<int, int> &, map<int, int> &); 222 int dprime_blocks(string, map<string, int> &, PhasedSummary *, map<int, int> &, map<int, int> &); 223 bool check_adjacent_blocks(PhasedSummary *, HBlock *); 224 int enumerate_haplotypes(ofstream &, map<string, int> &, PhasedSummary *, uint, uint); 225 int bucket_dprime(vector<double> &, vector<double> &, PhasedSummary *); 226 int write_buckets(string, vector<double> &, vector<double> &); 227 228 #endif // __PHASEDSTACKS_H__ 229