1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- 2 // 3 // Copyright 2012-2019, Julian Catchen <jcatchen@illinois.edu> 4 // 5 // This file is part of Stacks. 6 // 7 // Stacks is free software: you can redistribute it and/or modify 8 // it under the terms of the GNU General Public License as published by 9 // the Free Software Foundation, either version 3 of the License, or 10 // (at your option) any later version. 11 // 12 // Stacks is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 // 17 // You should have received a copy of the GNU General Public License 18 // along with Stacks. If not, see <http://www.gnu.org/licenses/>. 19 // 20 #ifndef EXPORT_FORMATS_H 21 #define EXPORT_FORMATS_H 22 23 #include <iostream> 24 #include <utility> 25 #include <map> 26 #include <typeinfo> 27 #include <typeindex> 28 29 #include "locus.h" 30 #include "PopMap.h" 31 #include "PopSum.h" 32 #include "ordered.h" // for "snp" 33 34 void tally_complete_haplotypes( 35 Datum const*const* data, 36 size_t n_samples, 37 strand_type loc_strand, 38 vector<pair<const char*, size_t>>& haps_sorted_decr_freq, 39 map<const char*, size_t, LessCStrs>& hap_indexes_map 40 ); 41 42 class Export { 43 protected: 44 string _path; 45 ofstream _fh; 46 47 public: Export()48 Export() {} ~Export()49 virtual ~Export() {} 50 virtual int open(const MetaPopInfo *) = 0; 51 virtual int write_header() = 0; 52 virtual int write_batch(const vector<LocBin *> &) = 0; post_processing()53 virtual int post_processing() {return 0;} close()54 virtual void close() {this->_fh.close();} 55 56 bool is_hap_export(); tmp_path()57 string tmp_path() const {return this->_path + ".part";} 58 static int transpose(ifstream &ifh, vector<string> &transposed); 59 }; 60 61 class OrderableExport : public Export { 62 public: OrderableExport()63 OrderableExport() {} ~OrderableExport()64 virtual ~OrderableExport() {} 65 int write_batch(const vector<LocBin*>& loci); 66 67 protected: 68 virtual int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index) = 0; 69 }; 70 71 class GenPos { 72 public: 73 uint id; 74 uint bp; 75 uint snp_index; 76 loc_type type; 77 GenPos(int id,int snp_index,int bp)78 GenPos(int id, int snp_index, int bp) { 79 this->id = id; 80 this->snp_index = snp_index; 81 this->bp = bp; 82 this->type = snp; 83 } GenPos(int id,int snp_index,int bp,loc_type type)84 GenPos(int id, int snp_index, int bp, loc_type type) { 85 this->id = id; 86 this->snp_index = snp_index; 87 this->bp = bp; 88 this->type = type; 89 } 90 91 bool operator<(const GenPos& other) const {return bp < other.bp;} 92 }; 93 94 class MarkersExport: public Export { 95 // 96 // Output a list of heterozygous loci and the associated haplotype frequencies. 97 // 98 const MetaPopInfo *_mpopi; 99 100 public: MarkersExport()101 MarkersExport() : _mpopi(NULL) {} ~MarkersExport()102 ~MarkersExport() {} 103 int open(const MetaPopInfo *mpopi); 104 int write_header(); 105 int write_batch(const vector<LocBin *> &); 106 }; 107 108 class GenotypesExport: public Export { 109 // 110 // Output a list of heterozygous loci and the associated haplotype frequencies. 111 // 112 const MetaPopInfo *_mpopi; 113 114 public: GenotypesExport()115 GenotypesExport() : _mpopi(NULL) {} ~GenotypesExport()116 ~GenotypesExport() {} 117 int open(const MetaPopInfo *mpopi); 118 int write_header(); 119 int write_batch(const vector<LocBin *> &); 120 }; 121 122 class SumstatsExport: public Export { 123 // 124 // Output the locus-level summary statistics. 125 // 126 const MetaPopInfo *_mpopi; 127 uint _pop_cnt; 128 129 public: SumstatsExport()130 SumstatsExport() : _mpopi(NULL), _pop_cnt(UINT_MAX) {} ~SumstatsExport()131 ~SumstatsExport() {} 132 int open(const MetaPopInfo *mpopi); 133 int write_header(); 134 int write_batch(const vector<LocBin *> &); 135 }; 136 137 class HapstatsExport: public Export { 138 // 139 // Output the locus-level haplotype statistics. 140 // 141 const MetaPopInfo *_mpopi; 142 uint _pop_cnt; 143 144 public: HapstatsExport()145 HapstatsExport() : _mpopi(NULL), _pop_cnt(UINT_MAX) {} ~HapstatsExport()146 ~HapstatsExport() {} 147 int open(const MetaPopInfo *mpopi); 148 int write_header(); 149 int write_batch(const vector<LocBin *> &); 150 }; 151 152 class SnpDivergenceExport: public Export { 153 // 154 // Output the SNP-level divergence statistics. 155 // 156 const MetaPopInfo *_mpopi; 157 vector<ofstream *> _fhs; 158 OPopPair<PopPair> *_order; 159 160 public: 161 SnpDivergenceExport(ofstream &log_fh); ~SnpDivergenceExport()162 ~SnpDivergenceExport() { 163 for (uint i = 0; i < this->_fhs.size(); i++) 164 delete this->_fhs[i]; 165 delete this->_order; 166 } 167 int open(const MetaPopInfo *mpopi); 168 int write_header(); write_batch(const vector<LocBin * > &)169 int write_batch(const vector<LocBin *> &) { return 0; } 170 int write_batch_pairwise(const vector<LocBin *> &, const vector<vector<PopPair **>> &); close()171 void close() { 172 for (uint i = 0; i < this->_fhs.size(); i++) 173 this->_fhs[i]->close(); 174 return; 175 } 176 177 private: 178 int write_site(ofstream *fh, const PopPair *pp, string chr); 179 }; 180 181 class HapDivergenceExport: public Export { 182 // 183 // Output the SNP-level divergence statistics. 184 // 185 const MetaPopInfo *_mpopi; 186 vector<ofstream *> _fhs; 187 ofstream *_metapop_fh; 188 189 public: HapDivergenceExport()190 HapDivergenceExport() : _mpopi(NULL), _metapop_fh(NULL) {} ~HapDivergenceExport()191 ~HapDivergenceExport() { 192 for (uint i = 0; i < this->_fhs.size(); i++) 193 delete this->_fhs[i]; 194 } 195 int open(const MetaPopInfo *mpopi); 196 int write_header(); write_batch(const vector<LocBin * > &)197 int write_batch(const vector<LocBin *> &) { return 0; } 198 int write_batch_pairwise(const vector<LocBin *> &, const vector<vector<HapStat *>> &, const vector<HapStat *> &); close()199 void close() { 200 for (uint i = 0; i < this->_fhs.size(); i++) 201 this->_fhs[i]->close(); 202 return; 203 } 204 }; 205 206 class PlinkExport: public OrderableExport { 207 const MetaPopInfo *_mpopi; 208 ofstream _tmpfh; 209 ifstream _intmpfh; 210 string _markers_path; 211 ofstream _markers_fh; 212 213 public: PlinkExport()214 PlinkExport() : _mpopi(NULL) {} ~PlinkExport()215 ~PlinkExport() {} 216 int open(const MetaPopInfo *mpopi); 217 int write_header(); 218 int write_batch(const vector<LocBin *> &loci); 219 int post_processing(); 220 void close(); 221 222 private: 223 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 224 }; 225 226 class GenePopExport: public OrderableExport { 227 const MetaPopInfo *_mpopi; 228 ofstream _tmpfh; 229 230 public: GenePopExport()231 GenePopExport() : _mpopi(NULL) {} ~GenePopExport()232 ~GenePopExport() {} 233 int open(const MetaPopInfo *mpopi); 234 int write_header(); 235 int post_processing(); close()236 void close() {this->_fh.close(); remove(this->tmp_path().c_str());} 237 238 private: 239 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 240 }; 241 242 class GenePopHapsExport: public Export { 243 const MetaPopInfo *_mpopi; 244 ofstream _tmpfh; 245 size_t _n_digits; 246 247 public: GenePopHapsExport()248 GenePopHapsExport() : _mpopi(NULL), _n_digits(2) {} ~GenePopHapsExport()249 ~GenePopHapsExport() {} 250 int open(const MetaPopInfo *mpopi); 251 int write_header(); 252 int write_batch(const vector<LocBin*>& loci); 253 int post_processing(); close()254 void close() {this->_fh.close(); remove(this->tmp_path().c_str());} 255 set_digits(size_t n)256 void set_digits(size_t n) { assert(n==2 || n==3); _n_digits=n; } 257 258 private: 259 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 260 }; 261 262 class StructureExport: public OrderableExport { 263 const MetaPopInfo *_mpopi; 264 string _tmp_path; 265 ofstream _tmpfh; 266 ifstream _intmpfh; 267 268 public: StructureExport()269 StructureExport() : _mpopi(NULL) {} ~StructureExport()270 ~StructureExport() {} 271 int open(const MetaPopInfo *mpopi); 272 int write_header(); 273 int post_processing(); 274 void close(); 275 276 private: 277 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 278 }; 279 280 class FineRADStructureExport: public Export { 281 const MetaPopInfo *_mpopi; 282 string _tmp_path; 283 ofstream _tmpfh; 284 ifstream _intmpfh; 285 286 public: FineRADStructureExport()287 FineRADStructureExport() : _mpopi(NULL) {} ~FineRADStructureExport()288 ~FineRADStructureExport() {} 289 int open(const MetaPopInfo *mpopi); 290 int write_header(); 291 int write_batch(const vector<LocBin *> &); 292 }; 293 294 class PhylipExport: public OrderableExport { 295 protected: 296 const MetaPopInfo *_mpopi; 297 string _log_path; 298 ofstream _logfh; 299 string _tmp_path; 300 ofstream _tmpfh; 301 ifstream _intmpfh; 302 size_t _site_index; 303 304 public: PhylipExport()305 PhylipExport() : _mpopi(NULL), _site_index(0) {} 306 int open(const MetaPopInfo *mpopi); 307 int write_header(); 308 int post_processing(); 309 void close(); 310 }; 311 312 class PhylipVarExport: public PhylipExport { 313 public: PhylipVarExport()314 PhylipVarExport() {} 315 private: 316 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 317 }; 318 319 class PhylipFixedExport: public PhylipExport { 320 public: PhylipFixedExport()321 PhylipFixedExport() {} 322 private: 323 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 324 }; 325 326 class HzarExport: public OrderableExport { 327 const MetaPopInfo *_mpopi; 328 ofstream _tmpfh; 329 string _tmp_path; 330 ifstream _intmpfh; 331 332 public: HzarExport()333 HzarExport() : _mpopi(NULL) {} ~HzarExport()334 ~HzarExport() {} 335 int open(const MetaPopInfo *mpopi); write_header()336 int write_header() { return 0; }; 337 int post_processing(); 338 void close(); 339 340 private: 341 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 342 }; 343 344 class FastaLociExport: public Export { 345 // 346 // Output a list of heterozygous loci and the associated haplotype frequencies. 347 // 348 const MetaPopInfo *_mpopi; 349 350 public: FastaLociExport()351 FastaLociExport() : _mpopi(NULL) {} ~FastaLociExport()352 ~FastaLociExport() {} 353 int open(const MetaPopInfo *mpopi); 354 int write_header(); 355 int write_batch(const vector<LocBin *> &); 356 }; 357 358 class FastaRawExport: public Export { 359 // 360 // Output a list of heterozygous loci and the associated haplotype frequencies. 361 // 362 const MetaPopInfo *_mpopi; 363 364 public: FastaRawExport()365 FastaRawExport() : _mpopi(NULL) {} ~FastaRawExport()366 ~FastaRawExport() {} 367 int open(const MetaPopInfo *mpopi); 368 int write_header(); 369 int write_batch(const vector<LocBin *> &); 370 }; 371 372 class FastaSamplesExport: public Export { 373 // 374 // Output a list of heterozygous loci and the associated haplotype frequencies. 375 // 376 const MetaPopInfo *_mpopi; 377 378 public: FastaSamplesExport()379 FastaSamplesExport() : _mpopi(NULL) {} ~FastaSamplesExport()380 ~FastaSamplesExport() {} 381 int open(const MetaPopInfo *mpopi); 382 int write_header(); 383 int write_batch(const vector<LocBin *> &); 384 }; 385 386 class VcfExport: public OrderableExport { 387 const MetaPopInfo*_mpopi; 388 VcfWriter* _writer; 389 390 public: VcfExport()391 VcfExport() : _mpopi(NULL), _writer(NULL) {} ~VcfExport()392 ~VcfExport() { delete this->_writer; } 393 int open(const MetaPopInfo *mpopi); write_header()394 int write_header() { return 0; } 395 396 private: 397 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 398 }; 399 400 class VcfHapsExport: public Export { 401 const MetaPopInfo*_mpopi; 402 VcfWriter* _writer; 403 404 public: VcfHapsExport()405 VcfHapsExport() : _mpopi(NULL), _writer(NULL) {} ~VcfHapsExport()406 ~VcfHapsExport() { delete this->_writer; } 407 int open(const MetaPopInfo *mpopi); 408 int write_batch(const vector<LocBin*>& loci); write_header()409 int write_header() { return 0; } 410 }; 411 412 class TreemixExport: public OrderableExport { 413 const MetaPopInfo*_mpopi; 414 ofstream _writer; 415 416 public: TreemixExport()417 TreemixExport() : _mpopi(NULL), _writer(NULL) {} 418 int open(const MetaPopInfo *mpopi); write_header()419 int write_header() { return 0; } 420 421 private: 422 int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index); 423 }; 424 425 /* 426 int write_generic(map<int, CSLocus *> &, PopMap<CSLocus> *, bool); 427 int write_vcf_haplotypes(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *); 428 int write_phase(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *); 429 int write_fastphase(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *); 430 int write_plink(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *); 431 int write_hzar(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *); 432 int write_fullseq_phylip(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *); 433 */ 434 435 int find_datum_allele_depths(const Datum*, int, char, char, int &, int &); 436 int tally_observed_haplotypes(const vector<char *> &, int, char &, char &); 437 int tally_haplotype_freq(CSLocus *, Datum **, uint, int &, double &, string &); 438 439 #endif // EXPORT_FORMATS_H 440