1 /* The MIT License
2
3 Copyright (c) 2013 Adrian Tan <atks@umich.edu>
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "program.h"
25
failure(TCLAP::CmdLineInterface & c,TCLAP::ArgException & e)26 void VTOutput::failure(TCLAP::CmdLineInterface& c, TCLAP::ArgException& e)
27 {
28 std::clog << "\n";
29 std::clog << " " << e.what() << "\n\n";
30 usage(c);
31 exit(1);
32 }
33
usage(TCLAP::CmdLineInterface & c)34 void VTOutput::usage(TCLAP::CmdLineInterface& c)
35 {
36 std::string s = "";
37 std::list<TCLAP::Arg*> args = c.getArgList();
38 //prints unlabeled arument list first
39 for (TCLAP::ArgListIterator it = args.begin(); it != args.end(); it++)
40 {
41 TCLAP::Arg& arg = **it;
42 if (typeid(arg)==typeid(TCLAP::UnlabeledValueArg<std::string>))
43 {
44 TCLAP::UnlabeledValueArg<std::string> *i = (TCLAP::UnlabeledValueArg<std::string> *) (*it);
45 s = i->getName();
46 }
47 else if (typeid(arg)==typeid(TCLAP::UnlabeledMultiArg<std::string>))
48 {
49 TCLAP::UnlabeledMultiArg<std::string> *i = (TCLAP::UnlabeledMultiArg<std::string> *) (*it);
50 s = i->getName();
51 }
52 }
53
54 std::clog << c.getProgramName() << " v" << c.getVersion() << "\n\n";
55 std::clog << "description : " << c.getMessage() << "\n\n";
56 std::clog << "usage : vt " << c.getProgramName() << " [options] " << s << "\n\n";
57
58 //prints rest of arguments
59 for (TCLAP::ArgListIterator it = args.begin(); it != args.end(); it++)
60 {
61 if (it==args.begin())
62 {
63 std::clog << "options : ";
64 }
65 else
66 {
67 std::clog << " ";
68 }
69
70 TCLAP::Arg& arg = **it;
71 if (typeid(arg)==typeid(TCLAP::ValueArg<std::string>) ||
72 typeid(arg)==typeid(TCLAP::ValueArg<uint32_t>) ||
73 typeid(arg)==typeid(TCLAP::ValueArg<int32_t>) ||
74 typeid(arg)==typeid(TCLAP::ValueArg<double>) ||
75 typeid(arg)==typeid(TCLAP::ValueArg<float>))
76 {
77 TCLAP::ValueArg<std::string> *i = (TCLAP::ValueArg<std::string> *) (*it);
78
79 std::clog << "-" << (i->getFlag()=="" ? i->getName() : i->getFlag())
80 << " " << i->getDescription() << "\n";
81 }
82 else if (typeid(arg)==typeid(TCLAP::SwitchArg))
83 {
84 TCLAP::SwitchArg *i = (TCLAP::SwitchArg *) (*it);
85
86 std::clog << "-" << i->getFlag()
87 << " " << i->getDescription() << "\n";
88 }
89 else if (typeid(arg)==typeid(TCLAP::UnlabeledValueArg<std::string>))
90 {
91 //ignored
92 }
93 else if (typeid(arg)==typeid(TCLAP::UnlabeledMultiArg<std::string>))
94 {
95 //ignored
96 }
97 else
98 {
99 std::clog << "oops, argument type not handled\n";
100 }
101 }
102
103 std::clog << "\n";
104 }
105
106 /**
107 * Parse multiple files from command line unlabeled arguments or -L denoted file list. If both are defined, the files are merged.
108 *
109 * @files - file names are stored in this vector
110 * @argument_files - vector of input files
111 * @file_list - file names stored in a file
112 *
113 */
parse_files(std::vector<std::string> & files,const std::vector<std::string> & arg_files,std::string file_list)114 void Program::parse_files(std::vector<std::string>& files, const std::vector<std::string>& arg_files, std::string file_list)
115 {
116 files.clear();
117
118 if (arg_files.size()!=0)
119 {
120 files = arg_files;
121 }
122
123 if (file_list != "")
124 {
125 htsFile *file = hts_open(file_list.c_str(), "r");
126 if (file==NULL)
127 {
128 std::cerr << "cannot open " << file_list << "\n";
129 exit(1);
130 }
131 kstring_t *s = &file->line;
132 while (hts_getline(file, '\n', s) >= 0)
133 {
134 if (s->s[0]!='#')
135 {
136 files.push_back(std::string(s->s));
137 }
138 }
139 hts_close(file);
140 }
141 }
142
143 /**
144 * Parse intervals. Processes the interval list first followed by the interval string. Duplicates are dropped.
145 *
146 * @intervals - intervals stored in this vector
147 * @interval_list - file containing intervals
148 * @interval_string - comma delimited intervals in a string
149 *
150 * todo: merge overlapping sites?
151 */
parse_intervals(std::vector<GenomeInterval> & intervals,std::string interval_list,std::string interval_string)152 void Program::parse_intervals(std::vector<GenomeInterval>& intervals, std::string interval_list, std::string interval_string)
153 {
154 intervals.clear();
155 std::map<std::string, uint32_t> m;
156
157 if (interval_list!="")
158 {
159 htsFile *file = hts_open(interval_list.c_str(), "r");
160 if (file)
161 {
162 kstring_t *s = &file->line;
163 while (hts_getline(file, '\n', s)>=0)
164 {
165 std::string ss = std::string(s->s);
166 if (m.find(ss)==m.end())
167 {
168 m[ss] = 1;
169 GenomeInterval interval(ss);
170 intervals.push_back(interval);
171 }
172 }
173 hts_close(file);
174 }
175 }
176
177 std::vector<std::string> v;
178 if (interval_string!="")
179 split(v, ",", interval_string);
180
181 for (size_t i=0; i<v.size(); ++i)
182 {
183 if (m.find(v[i])==m.end())
184 {
185 m[v[i]] = 1;
186 GenomeInterval interval(v[i]);
187 intervals.push_back(interval);
188 }
189 }
190 }
191
192 /**
193 * Parse filters. Processes the filter list.
194 *
195 * @filters - filters stored in this vector
196 * @filter_string - comma delimited filters in a string
197 * @n - ensure that filters vector had n filters.
198 * if there are less, just pad with empty strings
199 * if there are more, thrown an error.
200 * if n is 0, ignore the previous contraints.
201 * @pad - if there are less than expected variant expressions
202 * when true, the remaining filter expressions are padded with the empty string.
203 * when false and only one expression is observed, the remaining filter expressions
204 * duplicated with that filter expression.
205 */
parse_filters(std::vector<std::string> & filters,std::string filter_string,int32_t n,bool pad)206 void Program::parse_filters(std::vector<std::string>& filters, std::string filter_string, int32_t n, bool pad)
207 {
208 filters.clear();
209 if (filter_string!="")
210 split(filters, ",", filter_string);
211
212 if (n && filters.size()!=0)
213 {
214 if (filters.size()<n)
215 {
216 if (pad)
217 {
218 while(filters.size()!=n) filters.push_back("");
219 fprintf(stderr, "[%s:%d %s] Number of filters less than expected, padding remaining filters with empty string\n", __FILE__, __LINE__, __FUNCTION__);
220 }
221 else
222 {
223 if (filters.size()==1)
224 {
225 filters.resize(n, filters[0]);
226 }
227 else
228 {
229 fprintf(stderr, "[%s:%d %s] %d filter expressions are expected : %s\n", __FILE__, __LINE__, __FUNCTION__, n, filter_string.c_str());
230 exit(1);
231 }
232 }
233 }
234 else if (filters.size()>n)
235 {
236 fprintf(stderr, "[%s:%d %s] %d filter expressions are expected : %s\n", __FILE__, __LINE__, __FUNCTION__, n, filter_string.c_str());
237 exit(1);
238 }
239 else
240 {
241 //all is good
242 }
243 }
244
245 if (filters.size()==0)
246 {
247 filters.push_back("");
248 }
249 }
250
251 /**
252 * Parse a list of strings delimited by commas.
253 *
254 * @strings - list of strings
255 * @string_list - comma delimited strings
256 */
parse_string_list(std::vector<std::string> & strings,std::string string_list)257 void Program::parse_string_list(std::vector<std::string>& strings, std::string string_list)
258 {
259 strings.clear();
260 if (string_list!="")
261 split(strings, ",", string_list);
262 }
263
264 /**
265 * Print reference FASTA file option.
266 */
print_ref_op(const char * option_line,std::string ref_fasta_file)267 void Program::print_ref_op(const char* option_line, std::string ref_fasta_file)
268 {
269 if (ref_fasta_file!="")
270 {
271 std::clog << option_line << ref_fasta_file << "\n";
272 }
273 }
274
275 /**
276 * Print string option, hide if not present.
277 */
print_str_op(const char * option_line,std::string str_value)278 void Program::print_str_op(const char* option_line, std::string str_value)
279 {
280 if (str_value!="")
281 {
282 std::clog << option_line << str_value << "\n";
283 }
284 }
285
286 /**
287 * Print number option, hide if 0.
288 */
print_num_op(const char * option_line,uint32_t num_value)289 void Program::print_num_op(const char* option_line, uint32_t num_value)
290 {
291 if (num_value)
292 {
293 std::clog << option_line << num_value << "\n";
294 }
295 }
296
297 /**
298 * Print switch option, hide if not switched on.
299 */
print_boo_op(const char * option_line,bool value)300 void Program::print_boo_op(const char* option_line, bool value)
301 {
302 if (value)
303 {
304 std::clog << option_line << "true" << "\n";
305 }
306 else
307 {
308 std::clog << option_line << "false" << "\n";
309 }
310 }
311
312 /**
313 * Print intervals option.
314 */
print_int_op(const char * option_line,std::vector<GenomeInterval> & intervals)315 void Program::print_int_op(const char* option_line, std::vector<GenomeInterval>& intervals)
316 {
317 if (intervals.size()!=0)
318 {
319 std::clog << option_line;
320 for (size_t i=0; i<std::min((uint32_t)intervals.size(),(uint32_t)5); ++i)
321 {
322 if (i) std::clog << ",";
323 std::clog << intervals[i].to_string();
324 }
325 if (intervals.size()>5)
326 {
327 std::clog << " and " << (intervals.size()-5) << " other intervals\n";
328 }
329 else
330 {
331 std::clog << "\n";
332 }
333 }
334 }
335
336 /**
337 * Print string vector.
338 */
print_strvec(const char * option_line,std::vector<std::string> & vec)339 void Program::print_strvec(const char* option_line, std::vector<std::string>& vec)
340 {
341 if (vec.size()!=0)
342 {
343 std::clog << option_line;
344 for (size_t i=0; i<std::min((uint32_t)vec.size(),(uint32_t)4); ++i)
345 {
346 if (i) std::clog << ",";
347 std::clog << vec[i];
348 }
349
350 if (vec.size()>4)
351 {
352 std::clog << " and " << (vec.size()-4) << " other values\n";
353 }
354 else
355 {
356 std::clog << "\n";
357 }
358 }
359 }
360
361 /**
362 * Print input files.
363 */
print_ifiles(const char * option_line,std::vector<std::string> & files)364 void Program::print_ifiles(const char* option_line, std::vector<std::string>& files)
365 {
366 if (files.size()!=0)
367 {
368 std::clog << option_line;
369 for (size_t i=0; i<std::min((uint32_t)files.size(),(uint32_t)2); ++i)
370 {
371 if (i) std::clog << ",";
372 std::clog << files[i];
373 }
374 if (files.size()>2)
375 {
376 std::clog << " and " << (files.size()-2) << " other files\n";
377 }
378 else
379 {
380 std::clog << "\n";
381 }
382 }
383 }
384
385 /**
386 * Parse samples. Processes the sample list. Duplicates are dropped.
387 *
388 * @nsamples - number of unique samples found in list
389 * @sample_list - file containing sample names
390 */
read_sample_list(int32_t & nsamples,std::string sample_list)391 char** Program::read_sample_list(int32_t& nsamples, std::string sample_list)
392 {
393 std::vector<std::string> vsamples;
394 std::map<std::string, int32_t> map;
395
396 if (sample_list!="")
397 {
398 htsFile *file = hts_open(sample_list.c_str(), "r");
399 if (file)
400 {
401 kstring_t *s = &file->line;
402 while (hts_getline(file, '\n', s)>=0)
403 {
404 std::string ss = std::string(s->s);
405 if (map.find(ss)==map.end())
406 {
407 map[ss] = 1;
408 vsamples.push_back(ss);
409 }
410 }
411 hts_close(file);
412 }
413
414 nsamples = vsamples.size();
415 char** samples = (char**) malloc(sizeof(char*)*nsamples);
416
417 for (int32_t i=0; i<vsamples.size(); ++i)
418 {
419 samples[i] = strdup(vsamples[i].c_str());
420 }
421
422 return samples;
423 }
424
425 return NULL;
426 }