1 /*
2 Copyright (c) 2003 by Stefan Kurtz and The Institute for
3 Genomic Research. This is OSI Certified Open Source Software.
4 Please see the file LICENSE for licensing information and
5 the file ACKNOWLEDGEMENTS for names of contributors to the
6 code base.
7 */
8
9 //\IgnoreLatex{
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include "types.h"
14 #include "optdesc.h"
15 #include "debugdef.h"
16 #include "errordef.h"
17 #include "protodef.h"
18 #include "maxmatdef.h"
19
20 //}
21
22 /*EE
23 This file contains functions to parse the possible
24 options of \texttt{maxmat3} and to appropriately initialize
25 the \texttt{mmcallinfo}-record according to the given options.
26 */
27
28 //\IgnoreLatex{
29
30 /*
31 The default value for the minimal unique match length.
32 */
33
34 #define DEFAULTMINUNIQUEMATCHLEN 20
35
36 //}
37
38 /*EE
39 The following type declares symbolic constants for the options.
40 */
41
42 typedef enum
43 {
44 OPTMUM = 0,
45 OPTMUMCAND,
46 OPTMUMREF,
47 OPTMAXMATCH,
48 OPTMATCHNUCLEOTIDESONLY,
49 OPTLEASTLENGTH,
50 OPTCOMPUTEBOTHDIRECTIONS,
51 OPTONLYREVERSECOMPLEMENT,
52 OPTSHOWSTRING,
53 OPTSHOWREVERSEPOSITIONS,
54 OPTFOURCOLUMN,
55 OPTSHOWSEQUENCELENGTHS,
56 OPTH,
57 OPTHELP,
58 NUMOFOPTIONS
59 } Optionnumber;
60
61 /*
62 The following function stores the help-text for the option \texttt{-l}.
63 This is necessary, since the text depends on the value of the
64 symbolic constant \texttt{DEFAULTMINUNIQUEMATCHLEN};
65 */
66
makeleastlengthtext(char * spacefortext)67 static void makeleastlengthtext(char *spacefortext)
68 {
69 sprintf(spacefortext,"set the minimum length of a match\n"
70 "if not set, the default value is %lu",
71 (Showuint) DEFAULTMINUNIQUEMATCHLEN);
72 }
73
74 /*
75 The following function shows a usage line including the
76 possible options.
77 */
78
showusage(char * program,OptionDescription * options,Uint numofoptions)79 static void showusage(char *program,OptionDescription *options,
80 Uint numofoptions)
81 {
82 printf("Usage: %s [options] <reference-file> <query-files>\n\n"
83 "Find and output (to stdout) the positions and length of all\n"
84 "sufficiently long maximal matches of a substring in\n"
85 "<query-file> and <reference-file>\n\n",program);
86 printf("Options:\n");
87 showoptions(stdout,program,options,numofoptions);
88 }
89
90 /*EE
91 The following function declares the possible options
92 in a record \texttt{options}. It then ananlyzes the \texttt{argv}-vector
93 step by step. If everything is okay, 0 is returned and the
94 \texttt{mmcallinfo} is correctly initialized.
95 Otherwise, a negative value is returned.
96 */
97
parsemaxmatoptions(MMcallinfo * mmcallinfo,Argctype argc,char ** argv)98 Sint parsemaxmatoptions(MMcallinfo *mmcallinfo,Argctype argc, char **argv)
99 {
100 OptionDescription options[NUMOFOPTIONS]; // store the options
101 Sint optval; // neg. return val. if error, otherwise option number
102 Uint argnum; // pointer to argv
103 Scaninteger readint; // temporary integer to read value from string
104 char leastlengthtext[128+1];
105
106 DEBUGLEVELSET;
107 initoptions(&options[0],(Uint) NUMOFOPTIONS);
108 ADDOPTION(OPTMUM,"-mum",
109 "compute maximal matches that are unique in both sequences");
110 ADDOPTION(OPTMUMREF,"-mumreference",
111 "compute maximal matches that are unique in the reference-\n"
112 "sequence but not necessarily in the query-sequence (default)");
113 ADDOPTION(OPTMUMCAND,"-mumcand",
114 "same as -mumreference");
115 ADDOPTION(OPTMAXMATCH,"-maxmatch",
116 "compute all maximal matches regardless of their uniqueness");
117 ADDOPTION(OPTMATCHNUCLEOTIDESONLY,"-n",
118 "match only the characters a, c, g, or t\n"
119 "they can be in upper or in lower case");
120 makeleastlengthtext(&leastlengthtext[0]);
121 ADDOPTION(OPTLEASTLENGTH,"-l",&leastlengthtext[0]);
122 ADDOPTION(OPTCOMPUTEBOTHDIRECTIONS,"-b",
123 "compute forward and reverse complement matches");
124 ADDOPTION(OPTONLYREVERSECOMPLEMENT,"-r",
125 "only compute reverse complement matches");
126 ADDOPTION(OPTSHOWSTRING,"-s",
127 "show the matching substrings");
128 ADDOPTION(OPTSHOWREVERSEPOSITIONS,"-c",
129 "report the query-position of a reverse complement match\n"
130 "relative to the original query sequence");
131 ADDOPTION(OPTFOURCOLUMN,"-F",
132 "force 4 column output format regardless of the number of\n"
133 "reference sequence inputs");
134 ADDOPTION(OPTSHOWSEQUENCELENGTHS,"-L",
135 "show the length of the query sequences on the header line");
136 ADDOPTION(OPTH,"-h",
137 "show possible options");
138 ADDOPTION(OPTHELP,"-help",
139 "show possible options");
140 mmcallinfo->showstring = False;
141 mmcallinfo->reversecomplement = False;
142 mmcallinfo->forward = True;
143 mmcallinfo->showreversepositions = False;
144 mmcallinfo->fourcolumn = False;
145 mmcallinfo->showsequencelengths = False;
146 mmcallinfo->matchnucleotidesonly = False;
147 mmcallinfo->cmum = False;
148 mmcallinfo->cmumcand = False;
149 mmcallinfo->cmaxmatch = False;
150 mmcallinfo->minmatchlength = (Uint) DEFAULTMINUNIQUEMATCHLEN;
151
152 if(argc == 1)
153 {
154 showusage(argv[0],&options[0],(Uint) NUMOFOPTIONS);
155 return 1;
156 }
157
158 for(argnum = UintConst(1); argnum < (Uint) argc && argv[argnum][0] == '-';
159 argnum++)
160 {
161 optval = procoption(options,(Uint) NUMOFOPTIONS,argv[argnum]);
162 if(optval < 0)
163 {
164 return -1;
165 }
166 switch(optval)
167 {
168 case OPTSHOWSTRING:
169 mmcallinfo->showstring = True;
170 break;
171 case OPTCOMPUTEBOTHDIRECTIONS:
172 mmcallinfo->reversecomplement = True;
173 break;
174 case OPTSHOWREVERSEPOSITIONS:
175 mmcallinfo->showreversepositions = True;
176 break;
177 case OPTLEASTLENGTH: // additionally check the length parameter
178 argnum++;
179 if(argnum > (Uint) (argc-2))
180 {
181 ERROR1("missing argument for option %s",
182 options[OPTLEASTLENGTH].optname);
183 return -2;
184 }
185 if(sscanf(argv[argnum],"%ld",&readint) != 1 || readint <= 0)
186 {
187 ERROR2("argument %s for option %s is not a positive integer",
188 argv[argnum],options[OPTLEASTLENGTH].optname);
189 return -3;
190 }
191 mmcallinfo->minmatchlength = (Uint) readint;
192 break;
193 case OPTFOURCOLUMN:
194 mmcallinfo->fourcolumn = True;
195 break;
196 case OPTSHOWSEQUENCELENGTHS:
197 mmcallinfo->showsequencelengths = True;
198 break;
199 case OPTMATCHNUCLEOTIDESONLY:
200 mmcallinfo->matchnucleotidesonly = True;
201 break;
202 case OPTONLYREVERSECOMPLEMENT:
203 mmcallinfo->forward = False;
204 mmcallinfo->reversecomplement = True;
205 break;
206 case OPTMAXMATCH:
207 mmcallinfo->cmaxmatch = True;
208 break;
209 case OPTMUMREF:
210 case OPTMUMCAND:
211 mmcallinfo->cmumcand = True;
212 break;
213 case OPTMUM:
214 mmcallinfo->cmum = True;
215 break;
216 case OPTH:
217 case OPTHELP:
218 showusage(argv[0],&options[0],(Uint) NUMOFOPTIONS);
219 return 1;
220 }
221 }
222 if(argnum > (Uint) (argc-2))
223 {
224 ERROR0("missing file arguments");
225 return -4;
226 }
227 if(safestringcopy(&mmcallinfo->program[0],argv[0],PATH_MAX) != 0)
228 {
229 return -5;
230 }
231 if(safestringcopy(&mmcallinfo->subjectfile[0],argv[argnum],PATH_MAX) != 0)
232 {
233 return -6;
234 }
235 for(argnum++, mmcallinfo->numofqueryfiles = 0;
236 argnum < (Uint) argc; mmcallinfo->numofqueryfiles++, argnum++)
237 {
238 if(mmcallinfo->numofqueryfiles >= (Uint) MAXNUMOFQUERYFILES)
239 {
240 ERROR1("too many query files, maximal number is %lu",
241 (Showuint) MAXNUMOFQUERYFILES);
242 return -7;
243 }
244 if(safestringcopy(&mmcallinfo->queryfilelist
245 [mmcallinfo->numofqueryfiles][0],
246 argv[argnum],PATH_MAX) != 0)
247 {
248 return -8;
249 }
250 }
251 /*
252 verify that mum options are not interchanged
253 */
254 OPTIONEXCLUDE(OPTMUM,OPTMUMCAND);
255 OPTIONEXCLUDE(OPTMUM,OPTMUMREF);
256 OPTIONEXCLUDE(OPTMUM,OPTMAXMATCH);
257 OPTIONEXCLUDE(OPTMUMCAND,OPTMAXMATCH);
258 OPTIONEXCLUDE(OPTMUMREF,OPTMAXMATCH);
259 if ( mmcallinfo->cmaxmatch )
260 {
261 mmcallinfo->cmum = False;
262 mmcallinfo->cmumcand = False;
263 }
264 else if ( mmcallinfo->cmum )
265 {
266
267 }
268 else /* default to cmumcand */
269 {
270 mmcallinfo->cmumcand = True;
271 }
272 /*
273 verify that the options -b and -r are not used at the same time
274 */
275 OPTIONEXCLUDE(OPTCOMPUTEBOTHDIRECTIONS,OPTONLYREVERSECOMPLEMENT);
276 /*
277 verify that -c is only used in combination with either -b or -r
278 */
279 OPTIONIMPLYEITHER2(OPTSHOWREVERSEPOSITIONS,
280 OPTCOMPUTEBOTHDIRECTIONS,OPTONLYREVERSECOMPLEMENT);
281 return 0;
282 }
283