1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2012-2019, Julian Catchen <jcatchen@illinois.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks.  If not, see <http://www.gnu.org/licenses/>.
19 //
20 #ifndef EXPORT_FORMATS_H
21 #define EXPORT_FORMATS_H
22 
23 #include <iostream>
24 #include <utility>
25 #include <map>
26 #include <typeinfo>
27 #include <typeindex>
28 
29 #include "locus.h"
30 #include "PopMap.h"
31 #include "PopSum.h"
32 #include "ordered.h" // for "snp"
33 
34 void tally_complete_haplotypes(
35         Datum const*const* data,
36         size_t n_samples,
37         strand_type loc_strand,
38         vector<pair<const char*, size_t>>& haps_sorted_decr_freq,
39         map<const char*, size_t, LessCStrs>& hap_indexes_map
40         );
41 
42 class Export {
43  protected:
44     string     _path;
45     ofstream   _fh;
46 
47  public:
Export()48     Export() {}
~Export()49     virtual ~Export() {}
50     virtual int  open(const MetaPopInfo *) = 0;
51     virtual int  write_header()    = 0;
52     virtual int  write_batch(const vector<LocBin *> &) = 0;
post_processing()53     virtual int  post_processing() {return 0;}
close()54     virtual void close()           {this->_fh.close();}
55 
56     bool is_hap_export();
tmp_path()57     string tmp_path() const {return this->_path + ".part";}
58     static int transpose(ifstream &ifh, vector<string> &transposed);
59 };
60 
61 class OrderableExport : public Export {
62  public:
OrderableExport()63     OrderableExport() {}
~OrderableExport()64     virtual ~OrderableExport() {}
65     int write_batch(const vector<LocBin*>& loci);
66 
67  protected:
68     virtual int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index) = 0;
69 };
70 
71 class GenPos {
72  public:
73     uint     id;
74     uint     bp;
75     uint     snp_index;
76     loc_type type;
77 
GenPos(int id,int snp_index,int bp)78     GenPos(int id, int snp_index, int bp) {
79         this->id        = id;
80         this->snp_index = snp_index;
81         this->bp        = bp;
82         this->type      = snp;
83     }
GenPos(int id,int snp_index,int bp,loc_type type)84     GenPos(int id, int snp_index, int bp, loc_type type) {
85         this->id        = id;
86         this->snp_index = snp_index;
87         this->bp        = bp;
88         this->type      = type;
89     }
90 
91     bool operator<(const GenPos& other) const {return bp < other.bp;}
92 };
93 
94 class MarkersExport: public Export {
95     //
96     // Output a list of heterozygous loci and the associated haplotype frequencies.
97     //
98     const MetaPopInfo *_mpopi;
99 
100  public:
MarkersExport()101     MarkersExport() : _mpopi(NULL) {}
~MarkersExport()102     ~MarkersExport() {}
103     int  open(const MetaPopInfo *mpopi);
104     int  write_header();
105     int  write_batch(const vector<LocBin *> &);
106 };
107 
108 class GenotypesExport: public Export {
109     //
110     // Output a list of heterozygous loci and the associated haplotype frequencies.
111     //
112     const MetaPopInfo *_mpopi;
113 
114  public:
GenotypesExport()115     GenotypesExport() : _mpopi(NULL) {}
~GenotypesExport()116     ~GenotypesExport() {}
117     int  open(const MetaPopInfo *mpopi);
118     int  write_header();
119     int  write_batch(const vector<LocBin *> &);
120 };
121 
122 class SumstatsExport: public Export {
123     //
124     // Output the locus-level summary statistics.
125     //
126     const MetaPopInfo *_mpopi;
127     uint  _pop_cnt;
128 
129  public:
SumstatsExport()130     SumstatsExport() : _mpopi(NULL), _pop_cnt(UINT_MAX) {}
~SumstatsExport()131     ~SumstatsExport() {}
132     int  open(const MetaPopInfo *mpopi);
133     int  write_header();
134     int  write_batch(const vector<LocBin *> &);
135 };
136 
137 class HapstatsExport: public Export {
138     //
139     // Output the locus-level haplotype statistics.
140     //
141     const MetaPopInfo *_mpopi;
142     uint  _pop_cnt;
143 
144  public:
HapstatsExport()145     HapstatsExport() : _mpopi(NULL), _pop_cnt(UINT_MAX) {}
~HapstatsExport()146     ~HapstatsExport() {}
147     int  open(const MetaPopInfo *mpopi);
148     int  write_header();
149     int  write_batch(const vector<LocBin *> &);
150 };
151 
152 class SnpDivergenceExport: public Export {
153     //
154     // Output the SNP-level divergence statistics.
155     //
156     const MetaPopInfo *_mpopi;
157     vector<ofstream *> _fhs;
158     OPopPair<PopPair> *_order;
159 
160  public:
161     SnpDivergenceExport(ofstream &log_fh);
~SnpDivergenceExport()162     ~SnpDivergenceExport() {
163         for (uint i = 0; i < this->_fhs.size(); i++)
164             delete this->_fhs[i];
165         delete this->_order;
166     }
167     int  open(const MetaPopInfo *mpopi);
168     int  write_header();
write_batch(const vector<LocBin * > &)169     int  write_batch(const vector<LocBin *> &) { return 0; }
170     int  write_batch_pairwise(const vector<LocBin *> &, const vector<vector<PopPair **>> &);
close()171     void close() {
172         for (uint i = 0; i < this->_fhs.size(); i++)
173             this->_fhs[i]->close();
174         return;
175     }
176 
177 private:
178     int write_site(ofstream *fh, const PopPair *pp, string chr);
179 };
180 
181 class HapDivergenceExport: public Export {
182     //
183     // Output the SNP-level divergence statistics.
184     //
185     const MetaPopInfo *_mpopi;
186     vector<ofstream *> _fhs;
187     ofstream *_metapop_fh;
188 
189  public:
HapDivergenceExport()190     HapDivergenceExport() : _mpopi(NULL), _metapop_fh(NULL) {}
~HapDivergenceExport()191     ~HapDivergenceExport() {
192         for (uint i = 0; i < this->_fhs.size(); i++)
193             delete this->_fhs[i];
194     }
195     int  open(const MetaPopInfo *mpopi);
196     int  write_header();
write_batch(const vector<LocBin * > &)197     int  write_batch(const vector<LocBin *> &) { return 0; }
198     int  write_batch_pairwise(const vector<LocBin *> &, const vector<vector<HapStat *>> &, const vector<HapStat *> &);
close()199     void close() {
200         for (uint i = 0; i < this->_fhs.size(); i++)
201             this->_fhs[i]->close();
202         return;
203     }
204 };
205 
206 class PlinkExport: public OrderableExport {
207     const MetaPopInfo *_mpopi;
208     ofstream _tmpfh;
209     ifstream _intmpfh;
210     string   _markers_path;
211     ofstream _markers_fh;
212 
213  public:
PlinkExport()214     PlinkExport() : _mpopi(NULL) {}
~PlinkExport()215     ~PlinkExport() {}
216     int  open(const MetaPopInfo *mpopi);
217     int  write_header();
218     int  write_batch(const vector<LocBin *> &loci);
219     int  post_processing();
220     void close();
221 
222  private:
223     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
224 };
225 
226 class GenePopExport: public OrderableExport {
227     const MetaPopInfo *_mpopi;
228     ofstream _tmpfh;
229 
230  public:
GenePopExport()231     GenePopExport() : _mpopi(NULL) {}
~GenePopExport()232     ~GenePopExport() {}
233     int  open(const MetaPopInfo *mpopi);
234     int  write_header();
235     int  post_processing();
close()236     void close() {this->_fh.close(); remove(this->tmp_path().c_str());}
237 
238  private:
239     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
240 };
241 
242 class GenePopHapsExport: public Export {
243     const MetaPopInfo *_mpopi;
244     ofstream _tmpfh;
245     size_t _n_digits;
246 
247  public:
GenePopHapsExport()248     GenePopHapsExport() : _mpopi(NULL), _n_digits(2) {}
~GenePopHapsExport()249     ~GenePopHapsExport() {}
250     int  open(const MetaPopInfo *mpopi);
251     int  write_header();
252     int  write_batch(const vector<LocBin*>& loci);
253     int  post_processing();
close()254     void close() {this->_fh.close(); remove(this->tmp_path().c_str());}
255 
set_digits(size_t n)256     void set_digits(size_t n) { assert(n==2 || n==3); _n_digits=n; }
257 
258  private:
259     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
260 };
261 
262 class StructureExport: public OrderableExport {
263     const MetaPopInfo *_mpopi;
264     string   _tmp_path;
265     ofstream _tmpfh;
266     ifstream _intmpfh;
267 
268  public:
StructureExport()269     StructureExport() : _mpopi(NULL) {}
~StructureExport()270     ~StructureExport() {}
271     int  open(const MetaPopInfo *mpopi);
272     int  write_header();
273     int  post_processing();
274     void close();
275 
276  private:
277     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
278 };
279 
280 class FineRADStructureExport: public Export {
281     const MetaPopInfo *_mpopi;
282     string   _tmp_path;
283     ofstream _tmpfh;
284     ifstream _intmpfh;
285 
286  public:
FineRADStructureExport()287     FineRADStructureExport() : _mpopi(NULL) {}
~FineRADStructureExport()288     ~FineRADStructureExport() {}
289     int  open(const MetaPopInfo *mpopi);
290     int  write_header();
291     int  write_batch(const vector<LocBin *> &);
292 };
293 
294 class PhylipExport: public OrderableExport {
295 protected:
296     const MetaPopInfo *_mpopi;
297     string   _log_path;
298     ofstream _logfh;
299     string   _tmp_path;
300     ofstream _tmpfh;
301     ifstream _intmpfh;
302     size_t   _site_index;
303 
304  public:
PhylipExport()305     PhylipExport() : _mpopi(NULL), _site_index(0) {}
306     int open(const MetaPopInfo *mpopi);
307     int  write_header();
308     int  post_processing();
309     void close();
310 };
311 
312 class PhylipVarExport: public PhylipExport {
313  public:
PhylipVarExport()314     PhylipVarExport() {}
315  private:
316     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
317 };
318 
319 class PhylipFixedExport: public PhylipExport {
320  public:
PhylipFixedExport()321     PhylipFixedExport() {}
322  private:
323     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
324 };
325 
326 class HzarExport: public OrderableExport {
327     const MetaPopInfo *_mpopi;
328     ofstream _tmpfh;
329     string   _tmp_path;
330     ifstream _intmpfh;
331 
332  public:
HzarExport()333     HzarExport() : _mpopi(NULL) {}
~HzarExport()334     ~HzarExport() {}
335     int  open(const MetaPopInfo *mpopi);
write_header()336     int  write_header() { return 0; };
337     int  post_processing();
338     void close();
339 
340  private:
341     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
342 };
343 
344 class FastaLociExport: public Export {
345     //
346     // Output a list of heterozygous loci and the associated haplotype frequencies.
347     //
348     const MetaPopInfo *_mpopi;
349 
350  public:
FastaLociExport()351     FastaLociExport() : _mpopi(NULL) {}
~FastaLociExport()352     ~FastaLociExport() {}
353     int  open(const MetaPopInfo *mpopi);
354     int  write_header();
355     int  write_batch(const vector<LocBin *> &);
356 };
357 
358 class FastaRawExport: public Export {
359     //
360     // Output a list of heterozygous loci and the associated haplotype frequencies.
361     //
362     const MetaPopInfo *_mpopi;
363 
364  public:
FastaRawExport()365     FastaRawExport() : _mpopi(NULL) {}
~FastaRawExport()366     ~FastaRawExport() {}
367     int open(const MetaPopInfo *mpopi);
368     int write_header();
369     int write_batch(const vector<LocBin *> &);
370 };
371 
372 class FastaSamplesExport: public Export {
373     //
374     // Output a list of heterozygous loci and the associated haplotype frequencies.
375     //
376     const MetaPopInfo *_mpopi;
377 
378  public:
FastaSamplesExport()379     FastaSamplesExport() : _mpopi(NULL) {}
~FastaSamplesExport()380     ~FastaSamplesExport() {}
381     int open(const MetaPopInfo *mpopi);
382     int write_header();
383     int write_batch(const vector<LocBin *> &);
384 };
385 
386 class VcfExport: public OrderableExport {
387     const MetaPopInfo*_mpopi;
388     VcfWriter* _writer;
389 
390  public:
VcfExport()391     VcfExport() : _mpopi(NULL), _writer(NULL) {}
~VcfExport()392     ~VcfExport() { delete this->_writer; }
393     int open(const MetaPopInfo *mpopi);
write_header()394     int write_header() { return 0; }
395 
396  private:
397     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
398 };
399 
400 class VcfHapsExport: public Export {
401     const MetaPopInfo*_mpopi;
402     VcfWriter* _writer;
403 
404  public:
VcfHapsExport()405     VcfHapsExport() : _mpopi(NULL), _writer(NULL) {}
~VcfHapsExport()406     ~VcfHapsExport() { delete this->_writer; }
407     int open(const MetaPopInfo *mpopi);
408     int write_batch(const vector<LocBin*>& loci);
write_header()409     int write_header() { return 0; }
410 };
411 
412 class TreemixExport: public OrderableExport {
413     const MetaPopInfo*_mpopi;
414     ofstream _writer;
415 
416  public:
TreemixExport()417     TreemixExport() : _mpopi(NULL), _writer(NULL) {}
418     int open(const MetaPopInfo *mpopi);
write_header()419     int write_header() { return 0; }
420 
421  private:
422     int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
423 };
424 
425 /*
426 int write_generic(map<int, CSLocus *> &, PopMap<CSLocus> *, bool);
427 int write_vcf_haplotypes(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *);
428 int write_phase(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *);
429 int write_fastphase(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *);
430 int write_plink(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *);
431 int write_hzar(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *);
432 int write_fullseq_phylip(map<int, CSLocus *> &, PopMap<CSLocus> *, PopSum<CSLocus> *);
433 */
434 
435 int find_datum_allele_depths(const Datum*, int, char, char, int &, int &);
436 int tally_observed_haplotypes(const vector<char *> &, int, char &, char &);
437 int tally_haplotype_freq(CSLocus *, Datum **, uint, int &, double &, string &);
438 
439 #endif // EXPORT_FORMATS_H
440