1 /* The MIT License
2 
3    Copyright (c) 2013 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #ifndef PROGRAM_H
25 #define PROGRAM_H
26 
27 #include <fstream>
28 #include <iostream>
29 #include <typeinfo>
30 #include "tclap/CmdLine.h"
31 #include "tclap/Arg.h"
32 #include "bcf_ordered_reader.h"
33 #include "bcf_ordered_writer.h"
34 #include "bcf_synced_reader.h"
35 #include "ordered_bcf_overlap_matcher.h"
36 #include "ordered_region_overlap_matcher.h"
37 #include "hts_utils.h"
38 #include "utils.h"
39 #include "variant_manip.h"
40 #include "filter.h"
41 #include "genome_interval.h"
42 #include "reference_sequence.h"
43 
44 class VTOutput : public TCLAP::StdOutput
45 {
46     public:
47 
48     void failure(TCLAP::CmdLineInterface& c, TCLAP::ArgException& e);
49 
50     void usage(TCLAP::CmdLineInterface& c);
51 };
52 
53 /**
54  * Provides an interface for programs in vt.
55  *
56  *
57  */
58 class Program
59 {
60     public:
61 
62     std::string version;
63     std::ofstream out;
64 
65     /**
66      * Process arguments.
67      */
Program()68     Program(){};
69 
70     /**
71      * Parse multiple files from command line unlabeled arguments or -L denoted file list.  If both are defined, the files are merged.
72      *
73      * @files          - file names are stored in this vector
74      * @argument_files - vector of input files
75      * @file_list      - file names stored in a file
76      *
77      */
78     void parse_files(std::vector<std::string>& files, const std::vector<std::string>& arg_files, std::string file_list);
79 
80     /**
81      * Parse intervals. Processes the interval list first followed by the interval string. Duplicates are dropped.
82      *
83      * @intervals       - intervals stored in this vector
84      * @interval_list   - file containing intervals
85      * @interval_string - comma delimited intervals in a string
86      */
87     void parse_intervals(std::vector<GenomeInterval>& intervals, std::string interval_list, std::string interval_string);
88 
89     /**
90      * Parse filters. Processes the filter list.
91      *
92      * @filters       - filters stored in this vector
93      * @filter_string - comma delimited filters in a string
94      * @n             - ensure that filters vector had n filters.
95      *                  if there are less, just pad with empty strings
96      *                  if there are more, thrown an error.
97      *                  if n is 0, ignore the previous contraints.
98      * @pad           - if there are less than expected variant expressions
99      *                      when true, the remaining filter expressions are padded with the empty string.
100      *                      when false and only one expression is observed, the remaining filter expressions
101      *                      duplicated with that filter expression.
102      */
103     void parse_filters(std::vector<std::string>& filters, std::string filter_string, int32_t n=0, bool pad=false);
104 
105     /**
106      * Parse a list of strings delimited by commas.
107      *
108      * @strings        - list of strings
109      * @string_list    - comma delimited strings
110      */
111     void parse_string_list(std::vector<std::string>& strings, std::string string_list);
112 
113     /**
114      * Parse samples. Processes the sample list. Duplicates are dropped.
115      *
116      * @nsamples     - number of unique samples found in list
117      * @sample_list  - file containing sample names
118      */
119     char** read_sample_list(int32_t& nsamples, std::string sample_list);
120 
121     /**
122      * Initialize I/O and shared objects.
123      */
initialize()124     void initialize(){};
125 
126     /**
127      * Print options.
128      */
print_options()129     void print_options(){};
130 
131     /**
132      * Print run stats.
133      */
print_stats()134     void print_stats(){};
135 
136     /**
137      * Print reference FASTA file option.
138      */
139     void print_ref_op(const char* option_line, std::string ref_fasta_file);
140 
141     /**
142      * Print string option, hide if not present.
143      */
144     void print_str_op(const char* option_line, std::string str_value);
145 
146     /**
147      * Print number option, hide if 0.
148      */
149     void print_num_op(const char* option_line, uint32_t num_value);
150 
151     /**
152      * Print switch option, hide if not switched on.
153      */
154     void print_boo_op(const char* option_line, bool value);
155 
156     /**
157      * Print intervals option.
158      */
159     void print_int_op(const char* option_line, std::vector<GenomeInterval>& intervals);
160 
161     /**
162      * Print string vector.
163      */
164     void print_strvec(const char* option_line, std::vector<std::string>& vec);
165 
166     /**
167      * Print input files.
168      */
169     void print_ifiles(const char* option_line, std::vector<std::string>& files);
170 
171     private:
172 };
173 
174 #endif