1 /* The MIT License
2
3 Copyright (c) 2015 Adrian Tan <atks@umich.edu>
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "rminfo.h"
25
26 namespace
27 {
28
29 class Igor : Program
30 {
31 public:
32
33 ///////////
34 //options//
35 ///////////
36 std::string input_vcf_file;
37 std::string output_vcf_file;
38 std::vector<GenomeInterval> intervals;
39 std::vector<std::string> info_tags;
40 std::vector<std::string> format_tags;
41 bool print;
42 bool remove_filters;
43
44 ///////
45 //i/o//
46 ///////
47 BCFOrderedReader *odr;
48 BCFOrderedWriter *odw;
49
50 /////////
51 //stats//
52 /////////
53 uint32_t no_variants;
54 uint32_t no_variants_with_removed_info;
55
56 /////////
57 //tools//
58 /////////
59 VariantManip *vm;
60
Igor(int argc,char ** argv)61 Igor(int argc, char **argv)
62 {
63 version = "0.5";
64
65 //////////////////////////
66 //options initialization//
67 //////////////////////////
68 try
69 {
70 std::string desc = "removes INFO/FORMAT tags from a VCF/BCF file";
71
72 TCLAP::CmdLine cmd(desc, ' ', version);
73 VTOutput my; cmd.setOutput(&my);
74 TCLAP::ValueArg<std::string> arg_intervals("i", "i", "intervals []", false, "", "str", cmd);
75 TCLAP::ValueArg<std::string> arg_interval_list("I", "I", "file containing list of intervals []", false, "", "file", cmd);
76 TCLAP::ValueArg<std::string> arg_info_tags("t", "t", "list of info tags to be removed []", false, "", "str", cmd);
77 TCLAP::ValueArg<std::string> arg_format_tags("u", "u", "list of format tags to be removed []", false, "", "str", cmd);
78 TCLAP::SwitchArg arg_quiet("q", "q", "do not print options and summary [false]", cmd, false);
79 TCLAP::SwitchArg arg_remove_filters("x", "x", "remove filters [false]", cmd, false);
80 TCLAP::ValueArg<std::string> arg_output_vcf_file("o", "o", "output VCF file [-]", false, "-", "str", cmd);
81 TCLAP::UnlabeledValueArg<std::string> arg_input_vcf_file("<in.vcf>", "input VCF file", true, "","file", cmd);
82
83 cmd.parse(argc, argv);
84
85 input_vcf_file = arg_input_vcf_file.getValue();
86 output_vcf_file = arg_output_vcf_file.getValue();
87 parse_intervals(intervals, arg_interval_list.getValue(), arg_intervals.getValue());
88 parse_string_list(info_tags, arg_info_tags.getValue());
89 parse_string_list(format_tags, arg_format_tags.getValue());
90 print = !arg_quiet.getValue();
91 remove_filters = arg_remove_filters.getValue();
92 }
93 catch (TCLAP::ArgException &e)
94 {
95 std::cerr << "error: " << e.error() << " for arg " << e.argId() << "\n";
96 abort();
97 }
98 };
99
initialize()100 void initialize()
101 {
102 if (info_tags.size()==0 && format_tags.size()==0)
103 {
104 fprintf(stderr, "[%s:%d %s] At least one of -u or -t options must be specified.\n", __FILE__, __LINE__, __FUNCTION__);
105 exit(1);
106 }
107
108 //////////////////////
109 //i/o initialization//
110 //////////////////////
111 odr = new BCFOrderedReader(input_vcf_file, intervals);
112 odw = new BCFOrderedWriter(output_vcf_file);
113 odw->link_hdr(odr->hdr);
114 odw->write_hdr();
115
116 ////////////////////////
117 //stats initialization//
118 ////////////////////////
119 no_variants = 0;
120 no_variants_with_removed_info = 0;
121
122 ////////////////////////
123 //tools initialization//
124 ////////////////////////
125 }
126
rminfo()127 void rminfo()
128 {
129 bcf1_t *v = odw->get_bcf1_from_pool();
130 bcf_hdr_t *h = odr->hdr;
131
132 while (odr->read(v))
133 {
134 bcf_unpack(v, BCF_UN_INFO);
135
136 int32_t ret = 0;
137 for (uint32_t i=0; i<info_tags.size(); ++i)
138 {
139 ret += bcf_update_info(h, v, info_tags[i].c_str(), NULL, 0, 0);
140 }
141
142 for (uint32_t i=0; i<format_tags.size(); ++i)
143 {
144 ret += bcf_update_format(h, v, format_tags[i].c_str(), NULL, 0, 0);
145 }
146
147 //todo: this is not correct, ret only returns non 0 upon an error.
148 if (!ret) ++no_variants_with_removed_info;
149
150 if (remove_filters) bcf_update_filter(h, v, NULL, 0);
151
152 ++no_variants;
153
154 odw->write(v);
155 v = odw->get_bcf1_from_pool();
156 }
157
158 odw->close();
159 odr->close();
160 };
161
print_options()162 void print_options()
163 {
164 if (!print) return;
165
166 std::clog << "rminfo v" << version << "\n";
167 std::clog << "\n";
168 std::clog << "options: input VCF file " << input_vcf_file << "\n";
169 std::clog << " [o] output VCF file " << output_vcf_file << "\n";
170 std::clog << " [q] quiet " << (!print ? "true" : "false") << "\n";
171 print_strvec(" [t] info tags ", info_tags);
172 print_int_op(" [i] intervals ", intervals);
173 std::clog << "\n";
174 }
175
print_stats()176 void print_stats()
177 {
178 if (!print) return;
179
180 std::clog << "\n";
181 std::clog << "stats: total no. variants : " << no_variants << "\n";
182 std::clog << " total no. variants with removed info : " << no_variants_with_removed_info << "\n";
183 std::clog << "\n";
184 };
185
~Igor()186 ~Igor() {};
187
188 private:
189 };
190
191 }
192
rminfo(int argc,char ** argv)193 bool rminfo(int argc, char ** argv)
194 {
195 Igor igor(argc, argv);
196 igor.print_options();
197 igor.initialize();
198 igor.rminfo();
199 igor.print_stats();
200
201 return igor.print;
202 };
203