1 /* The MIT License
2 
3    Copyright (c) 2013 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #include "program.h"
25 
failure(TCLAP::CmdLineInterface & c,TCLAP::ArgException & e)26 void VTOutput::failure(TCLAP::CmdLineInterface& c, TCLAP::ArgException& e)
27 {
28     std::clog << "\n";
29     std::clog << "  " << e.what() << "\n\n";
30     usage(c);
31     exit(1);
32 }
33 
usage(TCLAP::CmdLineInterface & c)34 void VTOutput::usage(TCLAP::CmdLineInterface& c)
35 {
36     std::string s = "";
37     std::list<TCLAP::Arg*> args = c.getArgList();
38     //prints unlabeled arument list first
39     for (TCLAP::ArgListIterator it = args.begin(); it != args.end(); it++)
40     {
41         TCLAP::Arg& arg = **it;
42         if (typeid(arg)==typeid(TCLAP::UnlabeledValueArg<std::string>))
43         {
44             TCLAP::UnlabeledValueArg<std::string> *i = (TCLAP::UnlabeledValueArg<std::string> *) (*it);
45             s = i->getName();
46         }
47         else if (typeid(arg)==typeid(TCLAP::UnlabeledMultiArg<std::string>))
48         {
49             TCLAP::UnlabeledMultiArg<std::string> *i = (TCLAP::UnlabeledMultiArg<std::string> *) (*it);
50             s = i->getName();
51         }
52     }
53 
54     std::clog << c.getProgramName() << " v" << c.getVersion() << "\n\n";
55     std::clog << "description : " << c.getMessage() << "\n\n";
56     std::clog << "usage : vt "  << c.getProgramName() << " [options] " << s << "\n\n";
57 
58     //prints rest of arguments
59     for (TCLAP::ArgListIterator it = args.begin(); it != args.end(); it++)
60     {
61         if (it==args.begin())
62         {
63             std::clog << "options : ";
64         }
65         else
66         {
67             std::clog << "          ";
68         }
69 
70         TCLAP::Arg& arg = **it;
71         if (typeid(arg)==typeid(TCLAP::ValueArg<std::string>) ||
72             typeid(arg)==typeid(TCLAP::ValueArg<uint32_t>) ||
73             typeid(arg)==typeid(TCLAP::ValueArg<int32_t>) ||
74             typeid(arg)==typeid(TCLAP::ValueArg<double>) ||
75             typeid(arg)==typeid(TCLAP::ValueArg<float>))
76         {
77             TCLAP::ValueArg<std::string> *i = (TCLAP::ValueArg<std::string> *) (*it);
78 
79             std::clog  << "-" << (i->getFlag()=="" ? i->getName() : i->getFlag())
80                        << "  " << i->getDescription() << "\n";
81         }
82         else if (typeid(arg)==typeid(TCLAP::SwitchArg))
83         {
84             TCLAP::SwitchArg *i = (TCLAP::SwitchArg *) (*it);
85 
86             std::clog  << "-" << i->getFlag()
87                        << "  " << i->getDescription() << "\n";
88         }
89         else if (typeid(arg)==typeid(TCLAP::UnlabeledValueArg<std::string>))
90         {
91             //ignored
92         }
93         else if (typeid(arg)==typeid(TCLAP::UnlabeledMultiArg<std::string>))
94         {
95             //ignored
96         }
97         else
98         {
99             std::clog << "oops, argument type not handled\n";
100         }
101     }
102 
103     std::clog  <<  "\n";
104 }
105 
106 /**
107  * Parse multiple files from command line unlabeled arguments or -L denoted file list.  If both are defined, the files are merged.
108  *
109  * @files          - file names are stored in this vector
110  * @argument_files - vector of input files
111  * @file_list      - file names stored in a file
112  *
113  */
parse_files(std::vector<std::string> & files,const std::vector<std::string> & arg_files,std::string file_list)114 void Program::parse_files(std::vector<std::string>& files, const std::vector<std::string>& arg_files, std::string file_list)
115 {
116     files.clear();
117 
118     if (arg_files.size()!=0)
119     {
120         files = arg_files;
121     }
122 
123     if (file_list != "")
124     {
125         htsFile *file = hts_open(file_list.c_str(), "r");
126         if (file==NULL)
127         {
128             std::cerr << "cannot open " << file_list << "\n";
129             exit(1);
130         }
131         kstring_t *s = &file->line;
132         while (hts_getline(file, '\n', s) >= 0)
133         {
134             if (s->s[0]!='#')
135             {
136                 files.push_back(std::string(s->s));
137             }
138         }
139         hts_close(file);
140     }
141 }
142 
143 /**
144  * Parse intervals. Processes the interval list first followed by the interval string. Duplicates are dropped.
145  *
146  * @intervals       - intervals stored in this vector
147  * @interval_list   - file containing intervals
148  * @interval_string - comma delimited intervals in a string
149  *
150  * todo: merge overlapping sites?
151  */
parse_intervals(std::vector<GenomeInterval> & intervals,std::string interval_list,std::string interval_string)152 void Program::parse_intervals(std::vector<GenomeInterval>& intervals, std::string interval_list, std::string interval_string)
153 {
154     intervals.clear();
155     std::map<std::string, uint32_t> m;
156 
157     if (interval_list!="")
158     {
159         htsFile *file = hts_open(interval_list.c_str(), "r");
160         if (file)
161         {
162             kstring_t *s = &file->line;
163             while (hts_getline(file, '\n', s)>=0)
164             {
165                 std::string ss = std::string(s->s);
166                 if (m.find(ss)==m.end())
167                 {
168                     m[ss] = 1;
169                     GenomeInterval interval(ss);
170                     intervals.push_back(interval);
171                 }
172             }
173             hts_close(file);
174         }
175     }
176 
177     std::vector<std::string> v;
178     if (interval_string!="")
179         split(v, ",", interval_string);
180 
181     for (size_t i=0; i<v.size(); ++i)
182     {
183         if (m.find(v[i])==m.end())
184         {
185             m[v[i]] = 1;
186             GenomeInterval interval(v[i]);
187             intervals.push_back(interval);
188         }
189     }
190 }
191 
192 /**
193  * Parse filters. Processes the filter list.
194  *
195  * @filters       - filters stored in this vector
196  * @filter_string - comma delimited filters in a string
197  * @n             - ensure that filters vector had n filters.
198  *                  if there are less, just pad with empty strings
199  *                  if there are more, thrown an error.
200  *                  if n is 0, ignore the previous contraints.
201  * @pad           - if there are less than expected variant expressions
202  *                      when true, the remaining filter expressions are padded with the empty string.
203  *                      when false and only one expression is observed, the remaining filter expressions
204  *                      duplicated with that filter expression.
205  */
parse_filters(std::vector<std::string> & filters,std::string filter_string,int32_t n,bool pad)206 void Program::parse_filters(std::vector<std::string>& filters, std::string filter_string, int32_t n, bool pad)
207 {
208     filters.clear();
209     if (filter_string!="")
210         split(filters, ",", filter_string);
211 
212     if (n && filters.size()!=0)
213     {
214         if (filters.size()<n)
215         {
216             if (pad)
217             {
218                 while(filters.size()!=n) filters.push_back("");
219                 fprintf(stderr, "[%s:%d %s] Number of filters less than expected, padding remaining filters with empty string\n", __FILE__, __LINE__, __FUNCTION__);
220             }
221             else
222             {
223                 if (filters.size()==1)
224                 {
225                     filters.resize(n, filters[0]);
226                 }
227                 else
228                 {
229                     fprintf(stderr, "[%s:%d %s] %d filter expressions are expected : %s\n", __FILE__, __LINE__, __FUNCTION__, n, filter_string.c_str());
230                     exit(1);
231                 }
232             }
233         }
234         else if (filters.size()>n)
235         {
236             fprintf(stderr, "[%s:%d %s] %d filter expressions are expected : %s\n", __FILE__, __LINE__, __FUNCTION__, n, filter_string.c_str());
237             exit(1);
238         }
239         else
240         {
241             //all is good
242         }
243     }
244 
245     if (filters.size()==0)
246     {
247         filters.push_back("");
248     }
249 }
250 
251 /**
252  * Parse a list of strings delimited by commas.
253  *
254  * @strings        - list of strings
255  * @string_list    - comma delimited strings
256  */
parse_string_list(std::vector<std::string> & strings,std::string string_list)257 void Program::parse_string_list(std::vector<std::string>& strings, std::string string_list)
258 {
259     strings.clear();
260     if (string_list!="")
261         split(strings, ",", string_list);
262 }
263 
264 /**
265  * Print reference FASTA file option.
266  */
print_ref_op(const char * option_line,std::string ref_fasta_file)267 void Program::print_ref_op(const char* option_line, std::string ref_fasta_file)
268 {
269     if (ref_fasta_file!="")
270     {
271         std::clog << option_line << ref_fasta_file << "\n";
272     }
273 }
274 
275 /**
276  * Print string option, hide if not present.
277  */
print_str_op(const char * option_line,std::string str_value)278 void Program::print_str_op(const char* option_line, std::string str_value)
279 {
280     if (str_value!="")
281     {
282         std::clog << option_line << str_value << "\n";
283     }
284 }
285 
286 /**
287  * Print number option, hide if 0.
288  */
print_num_op(const char * option_line,uint32_t num_value)289 void Program::print_num_op(const char* option_line, uint32_t num_value)
290 {
291     if (num_value)
292     {
293         std::clog << option_line << num_value << "\n";
294     }
295 }
296 
297 /**
298  * Print switch option, hide if not switched on.
299  */
print_boo_op(const char * option_line,bool value)300 void Program::print_boo_op(const char* option_line, bool value)
301 {
302     if (value)
303     {
304         std::clog << option_line << "true" << "\n";
305     }
306     else
307     {
308         std::clog << option_line << "false" << "\n";
309     }
310 }
311 
312 /**
313  * Print intervals option.
314  */
print_int_op(const char * option_line,std::vector<GenomeInterval> & intervals)315 void Program::print_int_op(const char* option_line, std::vector<GenomeInterval>& intervals)
316 {
317     if (intervals.size()!=0)
318     {
319         std::clog << option_line;
320         for (size_t i=0; i<std::min((uint32_t)intervals.size(),(uint32_t)5); ++i)
321         {
322             if (i) std::clog << ",";
323             std::clog << intervals[i].to_string();
324         }
325         if (intervals.size()>5)
326         {
327             std::clog << " and " << (intervals.size()-5) <<  " other intervals\n";
328         }
329         else
330         {
331             std::clog << "\n";
332         }
333     }
334 }
335 
336 /**
337  * Print string vector.
338  */
print_strvec(const char * option_line,std::vector<std::string> & vec)339 void Program::print_strvec(const char* option_line, std::vector<std::string>& vec)
340 {
341     if (vec.size()!=0)
342     {
343         std::clog << option_line;
344         for (size_t i=0; i<std::min((uint32_t)vec.size(),(uint32_t)4); ++i)
345         {
346             if (i) std::clog << ",";
347             std::clog << vec[i];
348         }
349 
350         if (vec.size()>4)
351         {
352             std::clog << " and " << (vec.size()-4) <<  " other values\n";
353         }
354         else
355         {
356             std::clog << "\n";
357         }
358     }
359 }
360 
361 /**
362  * Print input files.
363  */
print_ifiles(const char * option_line,std::vector<std::string> & files)364 void Program::print_ifiles(const char* option_line, std::vector<std::string>& files)
365 {
366     if (files.size()!=0)
367     {
368         std::clog << option_line;
369         for (size_t i=0; i<std::min((uint32_t)files.size(),(uint32_t)2); ++i)
370         {
371             if (i) std::clog << ",";
372             std::clog << files[i];
373         }
374         if (files.size()>2)
375         {
376             std::clog << " and " << (files.size()-2) <<  " other files\n";
377         }
378         else
379         {
380             std::clog << "\n";
381         }
382     }
383 }
384 
385 /**
386  * Parse samples. Processes the sample list. Duplicates are dropped.
387  *
388  * @nsamples     - number of unique samples found in list
389  * @sample_list  - file containing sample names
390  */
read_sample_list(int32_t & nsamples,std::string sample_list)391 char** Program::read_sample_list(int32_t& nsamples, std::string sample_list)
392 {
393     std::vector<std::string> vsamples;
394     std::map<std::string, int32_t> map;
395 
396     if (sample_list!="")
397     {
398         htsFile *file = hts_open(sample_list.c_str(), "r");
399         if (file)
400         {
401             kstring_t *s = &file->line;
402             while (hts_getline(file, '\n', s)>=0)
403             {
404                 std::string ss = std::string(s->s);
405                 if (map.find(ss)==map.end())
406                 {
407                     map[ss] = 1;
408                     vsamples.push_back(ss);
409                 }
410             }
411             hts_close(file);
412         }
413 
414         nsamples = vsamples.size();
415         char** samples = (char**) malloc(sizeof(char*)*nsamples);
416 
417         for (int32_t i=0; i<vsamples.size(); ++i)
418         {
419             samples[i] = strdup(vsamples[i].c_str());
420         }
421 
422         return samples;
423     }
424 
425     return NULL;
426 }