1 /* The MIT License
2 
3    Copyright (c) 2015 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #include "hfilter.h"
25 
26 namespace
27 {
28 
29 class Igor : Program
30 {
31     public:
32 
33     std::string version;
34 
35     ///////////
36     //options//
37     ///////////
38     std::string input_vcf_file;
39     std::string ref_fasta_file;
40     std::string output_vcf_file;
41     std::vector<GenomeInterval> intervals;
42     std::string interval_list;
43     std::string regions_bed_file;
44     std::string filter_tag;
45     std::string filter_tag_desc;
46     bool clear_filter;
47 
48     ///////
49     //i/o//
50     ///////
51     BCFOrderedReader *odr;
52     BCFOrderedWriter *odw;
53 
54     //////////
55     //filter//
56     //////////
57     std::string fexp;
58     Filter filter;
59     bool filter_exists;
60 
61     /////////
62     //stats//
63     /////////
64     int32_t no_variants_filtered;
65     int32_t no_variants;
66 
67     ////////////////
68     //common tools//
69     ////////////////
70     VariantManip *vm;
71 
Igor(int argc,char ** argv)72     Igor(int argc, char **argv)
73     {
74         version = "0.5";
75 
76         //////////////////////////
77         //options initialization//
78         //////////////////////////
79         try
80         {
81             std::string desc = "filters variants in a VCF file";
82 
83             TCLAP::CmdLine cmd(desc, ' ', version);
84             VTOutput my; cmd.setOutput(&my);
85             TCLAP::ValueArg<std::string> arg_intervals("i", "i", "intervals", false, "", "str", cmd);
86             TCLAP::ValueArg<std::string> arg_interval_list("I", "I", "file containing list of intervals []", false, "", "str", cmd);
87             TCLAP::ValueArg<std::string> arg_output_vcf_file("o", "o", "output VCF file [-]", false, "-", "str", cmd);
88             TCLAP::ValueArg<std::string> arg_filter_tag("t", "t", "filter tag []", true, "", "str", cmd);
89             TCLAP::ValueArg<std::string> arg_filter_tag_desc("d", "d", "filter tag description []", true, "", "str", cmd);
90             TCLAP::ValueArg<std::string> arg_fexp("f", "f", "filter expression []", true, "", "str", cmd);
91             TCLAP::SwitchArg arg_clear_filter("x", "x", "clear filter [false]", cmd, false);
92             TCLAP::UnlabeledValueArg<std::string> arg_input_vcf_file("<in.vcf>", "input VCF file", true, "","file", cmd);
93 
94             cmd.parse(argc, argv);
95 
96             input_vcf_file = arg_input_vcf_file.getValue();
97             output_vcf_file = arg_output_vcf_file.getValue();
98             parse_intervals(intervals, arg_interval_list.getValue(), arg_intervals.getValue());
99             fexp = arg_fexp.getValue();
100             clear_filter = arg_clear_filter.getValue();
101             filter_tag = arg_filter_tag.getValue();
102             filter_tag_desc = arg_filter_tag_desc.getValue();
103         }
104         catch (TCLAP::ArgException &e)
105         {
106             std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl;
107             abort();
108         }
109     };
110 
~Igor()111     ~Igor() {};
112 
initialize()113     void initialize()
114     {
115         //******************
116         //i/o initialization
117         //******************
118         odr = new BCFOrderedReader(input_vcf_file, intervals);
119         odw = new BCFOrderedWriter(output_vcf_file);
120         odw->link_hdr(odr->hdr);
121 
122         std::string hrec = "##FILTER=<ID=" + filter_tag + ",Description=\"" + filter_tag_desc + "\">";
123         bcf_hdr_append(odw->hdr, hrec.c_str());
124 
125         /////////////////////////
126         //filter initialization//
127         /////////////////////////
128         filter.parse(fexp.c_str(), false);
129         filter_exists = fexp=="" ? false : true;
130 
131         ///////////////////////
132         //tool initialization//
133         ///////////////////////
134         vm = new VariantManip();
135 
136         ////////////////////////
137         //stats initialization//
138         ////////////////////////
139         no_variants_filtered = 0;
140         no_variants = 0;
141     }
142 
print_options()143     void print_options()
144     {
145         std::clog << "hfilter v" << version << "\n";
146         std::clog << "\n";
147         std::clog << "options:     input VCF file(s)     " << input_vcf_file << "\n";
148         std::clog << "         [o] output VCF file       " << output_vcf_file << "\n";
149         print_str_op("         [f] filter expression     ", fexp);
150         print_str_op("         [t] filter tag            ", filter_tag);
151         print_str_op("         [d] filter description    ", filter_tag_desc);
152         print_int_op("         [i] intervals             ", intervals);
153         std::clog << "\n";
154     }
155 
print_stats()156     void print_stats()
157     {
158         std::clog << "\n";
159         std::cerr << "stats: no. of variants filtered     " << no_variants_filtered << "\n";
160         std::cerr << "       total no. of variants        " << no_variants << "\n";std::clog << "\n";
161     }
162 
hfilter()163     void hfilter()
164     {
165         odw->write_hdr();
166 
167         bcf1_t *v = odw->get_bcf1_from_pool();
168         bcf_hdr_t *h = odr->hdr;
169         Variant variant;
170         int32_t filter_id = bcf_hdr_id2int(h, BCF_DT_ID, filter_tag.c_str());
171         while (odr->read(v))
172         {
173             vm->classify_variant(h, v, variant);
174             if (filter.apply(h, v, &variant, false))
175             {
176                 if (clear_filter)
177                 {
178                     bcf_update_filter(h, v, &filter_id, 1);
179                 }
180                 else
181                 {
182                     bcf_add_filter(h, v, filter_id);
183                 }
184 
185                 ++no_variants_filtered;
186             }
187 
188             ++no_variants;
189             odw->write(v);
190             v = odw->get_bcf1_from_pool();
191         }
192 
193         odw->close();
194     };
195 
196     private:
197 };
198 }
199 
hfilter(int argc,char ** argv)200 void hfilter(int argc, char ** argv)
201 {
202     Igor igor(argc, argv);
203     igor.print_options();
204     igor.initialize();
205     igor.hfilter();
206     igor.print_stats();
207 };