1 /*
2  * parameters.cpp
3  *
4  *  Created on: Nov 11, 2009
5  *      Author: Adam Auton
6  *      ($Revision: 249 $)
7  */
8 
9 // Class for reading in, checking and storing user parameters
10 #ifndef PARAMETERS_H_
11 #define PARAMETERS_H_
12 
13 #if HAVE_CONFIG_H
14 # include "config.h"
15 #endif
16 
17 #include <algorithm>
18 #include <cstdio>
19 #include <cstdlib>
20 #include <iostream>
21 #include <limits>
22 #include <string>
23 #include <vector>
24 #include <set>
25 #include <stdint.h>
26 #include <unistd.h>
27 
28 #include "output_log.h"
29 
30 extern output_log LOG;
31 
32 using namespace std;
33 
34 const string VCFTOOLS_VERSION=PACKAGE_VERSION;
35 static const uint8_t bgzf_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; //just compare the first 16 chars? though
36 static const uint8_t gzip_magic[2] = {0x1f,0x8b};
37 
38 class parameters
39 {
40 public:
41 	bool stream_in;
42 	bool bcf_format;
43 	bool BED_exclude;
44 	string BED_file;
45 	set<string> chrs_to_exclude;
46 	set<string> chrs_to_keep;
47 	string chrom_map_file;
48 	string contigs_file;
49 	bool derived;
50 	bool diff_discordance_matrix;
51 	string diff_file;
52 	bool diff_file_bcf;
53 	bool diff_file_compressed;
54 	bool diff_indv;
55 	bool diff_indv_discordance;
56 	string diff_indv_map_file;
57 	bool diff_site;
58 	bool diff_site_discordance;
59 	bool diff_switch_error;
60 	int end_pos;
61 	string exclude_positions_file;
62 	string exclude_positions_overlap_file;
63 	string FORMAT_id_to_extract;
64 	set<string> geno_filter_flags_to_exclude;
65 	string geno_rsq_position_list;
66 	string hap_rsq_position_list;
67 	string hapcount_BED;
68 	vector<string> weir_fst_populations;
69 	int fst_window_size;
70 	int fst_window_step;
71 	vector<string> indv_exclude_files;
72 	vector<string> indv_keep_files;
73 	set<string> indv_to_exclude;
74 	set<string> indv_to_keep;
75 	vector<string> INFO_to_extract;
76 	bool invert_mask;
77 	bool keep_only_indels;
78 	int ld_bp_window_size;
79 	int ld_snp_window_size;
80 	int ld_bp_window_min;
81 	int ld_snp_window_min;
82 	int min_mac;
83 	double min_maf;
84 	string mask_file;
85 	int max_alleles;
86 	int max_genotype_depth;
87 	int max_mac;
88 	double max_maf;
89 	double max_mean_depth;
90 	int max_missing_call_count;
91 	int max_non_ref_ac;
92 	double max_non_ref_af;
93 	int max_non_ref_ac_any;
94 	double max_non_ref_af_any;
95 	int max_N_indv;
96 	string mendel_ped_file;
97 	int min_alleles;
98 	int min_genotype_depth;
99 	double min_genotype_quality;
100 	double min_HWE_pvalue;
101 	int min_interSNP_distance;
102 	int min_kept_mask_value;
103 	double min_mean_depth;
104 	int min_non_ref_ac;
105 	double min_non_ref_af;
106 	int min_non_ref_ac_any;
107 	double min_non_ref_af_any;
108 	double min_quality;
109 	double min_r2;
110 	double min_site_call_rate;
111 	int num_outputs;
112 	bool output_012_matrix;
113 	bool output_as_IMPUTE;
114 	bool output_as_ldhat_phased;
115 	bool output_as_ldhat_unphased;
116 	bool output_as_ldhelmet;
117 	bool output_BEAGLE_genotype_likelihoods_GL;
118 	bool output_BEAGLE_genotype_likelihoods_PL;
119 	bool output_counts;
120 	bool output_filter_summary;
121 	bool output_freq;
122 	bool output_geno_depth;
123 	bool output_geno_chisq;
124 	bool output_geno_rsq;
125 	bool output_hap_rsq;
126 	bool output_het;
127 	bool output_HWE;
128 	bool output_indel_hist;
129 	bool output_indv_burden;
130 	bool output_indv_depth;
131 	bool output_indv_freq_burden;
132 	bool output_indv_freq_burden2;
133 	bool output_indv_missingness;
134 	bool output_interchromosomal_hap_rsq;
135 	bool output_interchromosomal_geno_rsq;
136 	bool output_kept_sites;
137 	bool output_LROH;
138 	int output_N_PCA_SNP_loadings;
139 	bool output_PCA;
140 	string output_prefix;
141 	bool output_relatedness_Yang;
142 	bool output_relatedness_Manichaikul;
143 	bool output_removed_sites;
144 	bool output_singletons;
145 	bool output_site_depth;
146 	bool output_site_mean_depth;
147 	bool output_site_missingness;
148 	bool output_site_pi;
149 	bool output_site_quality;
150 	int output_SNP_density_bin_size;
151 	int output_Tajima_D_bin_size;
152 	int output_TsTv_bin_size;
153 	bool output_TsTv_by_count;
154 	bool output_TsTv_by_qual;
155 	bool output_TsTv_summary;
156 	bool phased_only;
157 	bool PCA_no_normalisation;
158 	int pi_window_size;
159 	int pi_window_step;
160 	bool plink_output;
161 	bool plink_tped_output;
162 	string positions_file;
163 	string positions_overlap_file;
164 	bool recode;
165 	bool recode_bcf;
166 	set<string> recode_INFO_to_keep;
167 	bool recode_all_INFO;
168 	bool remove_all_filtered_genotypes;
169 	bool remove_all_filtered_sites;
170 	bool remove_indels;
171 	set<string> site_filter_flags_to_exclude;
172 	set<string> site_filter_flags_to_keep;
173 	set<string> site_INFO_flags_to_keep;
174 	set<string> site_INFO_flags_to_remove;
175 	string snps_to_exclude_file;
176 	string snps_to_keep_file;
177 	set<string> snps_to_keep;
178 	int start_pos;
179 	bool stream_err;
180 	bool stream_out;
181 	bool suppress_allele_output;
182 	string temp_dir;
183 	string vcf_filename;
184 	bool vcf_format;
185 	bool vcf_compressed;
186 
187 	parameters(int argc, char *argv[]);
~parameters()188 	~parameters(){};
189 
190 	void read_parameters();
191 	void print_help();
192 	void print_params();
193 
194 private:
195 	void check_parameters();
196 	static void error(string err_msg, int code);
197 
198 	vector<string> argv;
199 
200 	string get_arg(unsigned int i);
201 };
202 
203 
204 #endif /* PARAMETERS_H_ */
205