1 /* The MIT License
2 
3    Copyright (c) 2015 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #include "rminfo.h"
25 
26 namespace
27 {
28 
29 class Igor : Program
30 {
31     public:
32 
33     ///////////
34     //options//
35     ///////////
36     std::string input_vcf_file;
37     std::string output_vcf_file;
38     std::vector<GenomeInterval> intervals;
39     std::vector<std::string> info_tags;
40     std::vector<std::string> format_tags;
41     bool print;
42     bool remove_filters;
43 
44     ///////
45     //i/o//
46     ///////
47     BCFOrderedReader *odr;
48     BCFOrderedWriter *odw;
49 
50     /////////
51     //stats//
52     /////////
53     uint32_t no_variants;
54     uint32_t no_variants_with_removed_info;
55 
56     /////////
57     //tools//
58     /////////
59     VariantManip *vm;
60 
Igor(int argc,char ** argv)61     Igor(int argc, char **argv)
62     {
63         version = "0.5";
64 
65         //////////////////////////
66         //options initialization//
67         //////////////////////////
68         try
69         {
70             std::string desc = "removes INFO/FORMAT tags from a VCF/BCF file";
71 
72             TCLAP::CmdLine cmd(desc, ' ', version);
73             VTOutput my; cmd.setOutput(&my);
74             TCLAP::ValueArg<std::string> arg_intervals("i", "i", "intervals []", false, "", "str", cmd);
75             TCLAP::ValueArg<std::string> arg_interval_list("I", "I", "file containing list of intervals []", false, "", "file", cmd);
76             TCLAP::ValueArg<std::string> arg_info_tags("t", "t", "list of info tags to be removed []", false, "", "str", cmd);
77             TCLAP::ValueArg<std::string> arg_format_tags("u", "u", "list of format tags to be removed []", false, "", "str", cmd);
78             TCLAP::SwitchArg arg_quiet("q", "q", "do not print options and summary [false]", cmd, false);
79             TCLAP::SwitchArg arg_remove_filters("x", "x", "remove filters [false]", cmd, false);
80             TCLAP::ValueArg<std::string> arg_output_vcf_file("o", "o", "output VCF file [-]", false, "-", "str", cmd);
81             TCLAP::UnlabeledValueArg<std::string> arg_input_vcf_file("<in.vcf>", "input VCF file", true, "","file", cmd);
82 
83             cmd.parse(argc, argv);
84 
85             input_vcf_file = arg_input_vcf_file.getValue();
86             output_vcf_file = arg_output_vcf_file.getValue();
87             parse_intervals(intervals, arg_interval_list.getValue(), arg_intervals.getValue());
88             parse_string_list(info_tags, arg_info_tags.getValue());
89             parse_string_list(format_tags, arg_format_tags.getValue());
90             print = !arg_quiet.getValue();
91             remove_filters = arg_remove_filters.getValue();
92         }
93         catch (TCLAP::ArgException &e)
94         {
95             std::cerr << "error: " << e.error() << " for arg " << e.argId() << "\n";
96             abort();
97         }
98     };
99 
initialize()100     void initialize()
101     {
102         if (info_tags.size()==0 && format_tags.size()==0)
103         {
104             fprintf(stderr, "[%s:%d %s] At least one of -u or -t options must be specified.\n", __FILE__, __LINE__, __FUNCTION__);
105             exit(1);
106         }
107 
108         //////////////////////
109         //i/o initialization//
110         //////////////////////
111         odr = new BCFOrderedReader(input_vcf_file, intervals);
112         odw = new BCFOrderedWriter(output_vcf_file);
113         odw->link_hdr(odr->hdr);
114         odw->write_hdr();
115 
116         ////////////////////////
117         //stats initialization//
118         ////////////////////////
119         no_variants = 0;
120         no_variants_with_removed_info = 0;
121 
122         ////////////////////////
123         //tools initialization//
124         ////////////////////////
125     }
126 
rminfo()127     void rminfo()
128     {
129         bcf1_t *v = odw->get_bcf1_from_pool();
130         bcf_hdr_t *h = odr->hdr;
131 
132         while (odr->read(v))
133         {
134             bcf_unpack(v, BCF_UN_INFO);
135 
136             int32_t ret = 0;
137             for (uint32_t i=0; i<info_tags.size(); ++i)
138             {
139                 ret += bcf_update_info(h, v, info_tags[i].c_str(), NULL, 0, 0);
140             }
141 
142             for (uint32_t i=0; i<format_tags.size(); ++i)
143             {
144                 ret += bcf_update_format(h, v, format_tags[i].c_str(), NULL, 0, 0);
145             }
146 
147             //todo: this is not correct, ret only returns non 0 upon an error.
148             if (!ret) ++no_variants_with_removed_info;
149 
150             if (remove_filters) bcf_update_filter(h, v, NULL, 0);
151 
152             ++no_variants;
153 
154             odw->write(v);
155             v = odw->get_bcf1_from_pool();
156         }
157 
158         odw->close();
159         odr->close();
160     };
161 
print_options()162     void print_options()
163     {
164         if (!print) return;
165 
166         std::clog << "rminfo v" << version << "\n";
167         std::clog << "\n";
168         std::clog << "options:     input VCF file                   " << input_vcf_file << "\n";
169         std::clog << "         [o] output VCF file                  " << output_vcf_file << "\n";
170         std::clog << "         [q] quiet                            " << (!print ? "true" : "false") << "\n";
171         print_strvec("         [t] info tags                        ", info_tags);
172         print_int_op("         [i] intervals                        ", intervals);
173         std::clog << "\n";
174     }
175 
print_stats()176     void print_stats()
177     {
178         if (!print) return;
179 
180         std::clog << "\n";
181         std::clog << "stats: total no. variants                   : " << no_variants << "\n";
182         std::clog << "       total no. variants with removed info : " << no_variants_with_removed_info << "\n";
183         std::clog << "\n";
184     };
185 
~Igor()186     ~Igor() {};
187 
188     private:
189 };
190 
191 }
192 
rminfo(int argc,char ** argv)193 bool rminfo(int argc, char ** argv)
194 {
195     Igor igor(argc, argv);
196     igor.print_options();
197     igor.initialize();
198     igor.rminfo();
199     igor.print_stats();
200 
201     return igor.print;
202 };
203