1 /* The MIT License
2
3 Copyright (c) 2015 Adrian Tan <atks@umich.edu>
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "hfilter.h"
25
26 namespace
27 {
28
29 class Igor : Program
30 {
31 public:
32
33 std::string version;
34
35 ///////////
36 //options//
37 ///////////
38 std::string input_vcf_file;
39 std::string ref_fasta_file;
40 std::string output_vcf_file;
41 std::vector<GenomeInterval> intervals;
42 std::string interval_list;
43 std::string regions_bed_file;
44 std::string filter_tag;
45 std::string filter_tag_desc;
46 bool clear_filter;
47
48 ///////
49 //i/o//
50 ///////
51 BCFOrderedReader *odr;
52 BCFOrderedWriter *odw;
53
54 //////////
55 //filter//
56 //////////
57 std::string fexp;
58 Filter filter;
59 bool filter_exists;
60
61 /////////
62 //stats//
63 /////////
64 int32_t no_variants_filtered;
65 int32_t no_variants;
66
67 ////////////////
68 //common tools//
69 ////////////////
70 VariantManip *vm;
71
Igor(int argc,char ** argv)72 Igor(int argc, char **argv)
73 {
74 version = "0.5";
75
76 //////////////////////////
77 //options initialization//
78 //////////////////////////
79 try
80 {
81 std::string desc = "filters variants in a VCF file";
82
83 TCLAP::CmdLine cmd(desc, ' ', version);
84 VTOutput my; cmd.setOutput(&my);
85 TCLAP::ValueArg<std::string> arg_intervals("i", "i", "intervals", false, "", "str", cmd);
86 TCLAP::ValueArg<std::string> arg_interval_list("I", "I", "file containing list of intervals []", false, "", "str", cmd);
87 TCLAP::ValueArg<std::string> arg_output_vcf_file("o", "o", "output VCF file [-]", false, "-", "str", cmd);
88 TCLAP::ValueArg<std::string> arg_filter_tag("t", "t", "filter tag []", true, "", "str", cmd);
89 TCLAP::ValueArg<std::string> arg_filter_tag_desc("d", "d", "filter tag description []", true, "", "str", cmd);
90 TCLAP::ValueArg<std::string> arg_fexp("f", "f", "filter expression []", true, "", "str", cmd);
91 TCLAP::SwitchArg arg_clear_filter("x", "x", "clear filter [false]", cmd, false);
92 TCLAP::UnlabeledValueArg<std::string> arg_input_vcf_file("<in.vcf>", "input VCF file", true, "","file", cmd);
93
94 cmd.parse(argc, argv);
95
96 input_vcf_file = arg_input_vcf_file.getValue();
97 output_vcf_file = arg_output_vcf_file.getValue();
98 parse_intervals(intervals, arg_interval_list.getValue(), arg_intervals.getValue());
99 fexp = arg_fexp.getValue();
100 clear_filter = arg_clear_filter.getValue();
101 filter_tag = arg_filter_tag.getValue();
102 filter_tag_desc = arg_filter_tag_desc.getValue();
103 }
104 catch (TCLAP::ArgException &e)
105 {
106 std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl;
107 abort();
108 }
109 };
110
~Igor()111 ~Igor() {};
112
initialize()113 void initialize()
114 {
115 //******************
116 //i/o initialization
117 //******************
118 odr = new BCFOrderedReader(input_vcf_file, intervals);
119 odw = new BCFOrderedWriter(output_vcf_file);
120 odw->link_hdr(odr->hdr);
121
122 std::string hrec = "##FILTER=<ID=" + filter_tag + ",Description=\"" + filter_tag_desc + "\">";
123 bcf_hdr_append(odw->hdr, hrec.c_str());
124
125 /////////////////////////
126 //filter initialization//
127 /////////////////////////
128 filter.parse(fexp.c_str(), false);
129 filter_exists = fexp=="" ? false : true;
130
131 ///////////////////////
132 //tool initialization//
133 ///////////////////////
134 vm = new VariantManip();
135
136 ////////////////////////
137 //stats initialization//
138 ////////////////////////
139 no_variants_filtered = 0;
140 no_variants = 0;
141 }
142
print_options()143 void print_options()
144 {
145 std::clog << "hfilter v" << version << "\n";
146 std::clog << "\n";
147 std::clog << "options: input VCF file(s) " << input_vcf_file << "\n";
148 std::clog << " [o] output VCF file " << output_vcf_file << "\n";
149 print_str_op(" [f] filter expression ", fexp);
150 print_str_op(" [t] filter tag ", filter_tag);
151 print_str_op(" [d] filter description ", filter_tag_desc);
152 print_int_op(" [i] intervals ", intervals);
153 std::clog << "\n";
154 }
155
print_stats()156 void print_stats()
157 {
158 std::clog << "\n";
159 std::cerr << "stats: no. of variants filtered " << no_variants_filtered << "\n";
160 std::cerr << " total no. of variants " << no_variants << "\n";std::clog << "\n";
161 }
162
hfilter()163 void hfilter()
164 {
165 odw->write_hdr();
166
167 bcf1_t *v = odw->get_bcf1_from_pool();
168 bcf_hdr_t *h = odr->hdr;
169 Variant variant;
170 int32_t filter_id = bcf_hdr_id2int(h, BCF_DT_ID, filter_tag.c_str());
171 while (odr->read(v))
172 {
173 vm->classify_variant(h, v, variant);
174 if (filter.apply(h, v, &variant, false))
175 {
176 if (clear_filter)
177 {
178 bcf_update_filter(h, v, &filter_id, 1);
179 }
180 else
181 {
182 bcf_add_filter(h, v, filter_id);
183 }
184
185 ++no_variants_filtered;
186 }
187
188 ++no_variants;
189 odw->write(v);
190 v = odw->get_bcf1_from_pool();
191 }
192
193 odw->close();
194 };
195
196 private:
197 };
198 }
199
hfilter(int argc,char ** argv)200 void hfilter(int argc, char ** argv)
201 {
202 Igor igor(argc, argv);
203 igor.print_options();
204 igor.initialize();
205 igor.hfilter();
206 igor.print_stats();
207 };