1 /*
2   Copyright (c) 2003 by Stefan Kurtz and The Institute for
3   Genomic Research.  This is OSI Certified Open Source Software.
4   Please see the file LICENSE for licensing information and
5   the file ACKNOWLEDGEMENTS for names of contributors to the
6   code base.
7 */
8 
9 //\IgnoreLatex{
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include "types.h"
14 #include "optdesc.h"
15 #include "debugdef.h"
16 #include "errordef.h"
17 #include "protodef.h"
18 #include "maxmatdef.h"
19 
20 //}
21 
22 /*EE
23   This file contains functions to parse the possible
24   options of \texttt{maxmat3} and to appropriately initialize
25   the \texttt{mmcallinfo}-record according to the given options.
26 */
27 
28 //\IgnoreLatex{
29 
30 /*
31   The default value for the minimal unique match length.
32 */
33 
34 #define DEFAULTMINUNIQUEMATCHLEN 20
35 
36 //}
37 
38 /*EE
39   The following type declares symbolic constants for the options.
40 */
41 
42 typedef enum
43 {
44   OPTMUM = 0,
45   OPTMUMCAND,
46   OPTMUMREF,
47   OPTMAXMATCH,
48   OPTMATCHNUCLEOTIDESONLY,
49   OPTLEASTLENGTH,
50   OPTCOMPUTEBOTHDIRECTIONS,
51   OPTONLYREVERSECOMPLEMENT,
52   OPTSHOWSTRING,
53   OPTSHOWREVERSEPOSITIONS,
54   OPTFOURCOLUMN,
55   OPTSHOWSEQUENCELENGTHS,
56   OPTH,
57   OPTHELP,
58   NUMOFOPTIONS
59 } Optionnumber;
60 
61 /*
62   The following function stores the help-text for the option \texttt{-l}.
63   This is necessary, since the text depends on the value of the
64   symbolic constant \texttt{DEFAULTMINUNIQUEMATCHLEN};
65 */
66 
makeleastlengthtext(char * spacefortext)67 static void makeleastlengthtext(char *spacefortext)
68 {
69   sprintf(spacefortext,"set the minimum length of a match\n"
70                        "if not set, the default value is %lu",
71                        (Showuint) DEFAULTMINUNIQUEMATCHLEN);
72 }
73 
74 /*
75   The following function shows a usage line including the
76   possible options.
77 */
78 
showusage(char * program,OptionDescription * options,Uint numofoptions)79 static void showusage(char *program,OptionDescription *options,
80                       Uint numofoptions)
81 {
82   printf("Usage: %s [options] <reference-file> <query-files>\n\n"
83          "Find and output (to stdout) the positions and length of all\n"
84          "sufficiently long maximal matches of a substring in\n"
85          "<query-file> and <reference-file>\n\n",program);
86   printf("Options:\n");
87   showoptions(stdout,program,options,numofoptions);
88 }
89 
90 /*EE
91   The following function declares the possible options
92   in a record \texttt{options}. It then ananlyzes the \texttt{argv}-vector
93   step by step. If everything is okay, 0 is returned and the
94   \texttt{mmcallinfo} is correctly initialized.
95   Otherwise, a negative value is returned.
96 */
97 
parsemaxmatoptions(MMcallinfo * mmcallinfo,Argctype argc,char ** argv)98 Sint parsemaxmatoptions(MMcallinfo *mmcallinfo,Argctype argc, char **argv)
99 {
100   OptionDescription options[NUMOFOPTIONS];   // store the options
101   Sint optval;         // neg. return val. if error, otherwise option number
102   Uint argnum;         // pointer to argv
103   Scaninteger readint; // temporary integer to read value from string
104   char leastlengthtext[128+1];
105 
106   DEBUGLEVELSET;
107   initoptions(&options[0],(Uint) NUMOFOPTIONS);
108   ADDOPTION(OPTMUM,"-mum",
109             "compute maximal matches that are unique in both sequences");
110   ADDOPTION(OPTMUMREF,"-mumreference",
111 	    "compute maximal matches that are unique in the reference-\n"
112             "sequence but not necessarily in the query-sequence (default)");
113   ADDOPTION(OPTMUMCAND,"-mumcand",
114             "same as -mumreference");
115   ADDOPTION(OPTMAXMATCH,"-maxmatch",
116 	    "compute all maximal matches regardless of their uniqueness");
117   ADDOPTION(OPTMATCHNUCLEOTIDESONLY,"-n",
118             "match only the characters a, c, g, or t\n"
119             "they can be in upper or in lower case");
120   makeleastlengthtext(&leastlengthtext[0]);
121   ADDOPTION(OPTLEASTLENGTH,"-l",&leastlengthtext[0]);
122   ADDOPTION(OPTCOMPUTEBOTHDIRECTIONS,"-b",
123             "compute forward and reverse complement matches");
124   ADDOPTION(OPTONLYREVERSECOMPLEMENT,"-r",
125             "only compute reverse complement matches");
126   ADDOPTION(OPTSHOWSTRING,"-s",
127             "show the matching substrings");
128   ADDOPTION(OPTSHOWREVERSEPOSITIONS,"-c",
129             "report the query-position of a reverse complement match\n"
130             "relative to the original query sequence");
131   ADDOPTION(OPTFOURCOLUMN,"-F",
132 	    "force 4 column output format regardless of the number of\n"
133 	    "reference sequence inputs");
134   ADDOPTION(OPTSHOWSEQUENCELENGTHS,"-L",
135             "show the length of the query sequences on the header line");
136   ADDOPTION(OPTH,"-h",
137 	    "show possible options");
138   ADDOPTION(OPTHELP,"-help",
139             "show possible options");
140   mmcallinfo->showstring = False;
141   mmcallinfo->reversecomplement = False;
142   mmcallinfo->forward = True;
143   mmcallinfo->showreversepositions = False;
144   mmcallinfo->fourcolumn = False;
145   mmcallinfo->showsequencelengths = False;
146   mmcallinfo->matchnucleotidesonly = False;
147   mmcallinfo->cmum = False;
148   mmcallinfo->cmumcand = False;
149   mmcallinfo->cmaxmatch = False;
150   mmcallinfo->minmatchlength = (Uint) DEFAULTMINUNIQUEMATCHLEN;
151 
152   if(argc == 1)
153   {
154     showusage(argv[0],&options[0],(Uint) NUMOFOPTIONS);
155     return 1;
156   }
157 
158   for(argnum = UintConst(1); argnum < (Uint) argc && argv[argnum][0] == '-';
159       argnum++)
160   {
161     optval = procoption(options,(Uint) NUMOFOPTIONS,argv[argnum]);
162     if(optval < 0)
163     {
164       return -1;
165     }
166     switch(optval)
167     {
168       case OPTSHOWSTRING:
169         mmcallinfo->showstring = True;
170         break;
171       case OPTCOMPUTEBOTHDIRECTIONS:
172         mmcallinfo->reversecomplement = True;
173         break;
174       case OPTSHOWREVERSEPOSITIONS:
175         mmcallinfo->showreversepositions = True;
176         break;
177       case OPTLEASTLENGTH:  // additionally check the length parameter
178         argnum++;
179         if(argnum > (Uint) (argc-2))
180         {
181           ERROR1("missing argument for option %s",
182                   options[OPTLEASTLENGTH].optname);
183           return -2;
184         }
185         if(sscanf(argv[argnum],"%ld",&readint) != 1 || readint <= 0)
186         {
187           ERROR2("argument %s for option %s is not a positive integer",
188                   argv[argnum],options[OPTLEASTLENGTH].optname);
189           return -3;
190         }
191         mmcallinfo->minmatchlength = (Uint) readint;
192         break;
193       case OPTFOURCOLUMN:
194 	mmcallinfo->fourcolumn = True;
195 	break;
196       case OPTSHOWSEQUENCELENGTHS:
197         mmcallinfo->showsequencelengths = True;
198         break;
199       case OPTMATCHNUCLEOTIDESONLY:
200         mmcallinfo->matchnucleotidesonly = True;
201         break;
202       case OPTONLYREVERSECOMPLEMENT:
203         mmcallinfo->forward = False;
204         mmcallinfo->reversecomplement = True;
205         break;
206       case OPTMAXMATCH:
207 	mmcallinfo->cmaxmatch = True;
208 	break;
209       case OPTMUMREF:
210       case OPTMUMCAND:
211         mmcallinfo->cmumcand = True;
212         break;
213       case OPTMUM:
214         mmcallinfo->cmum = True;
215         break;
216       case OPTH:
217       case OPTHELP:
218         showusage(argv[0],&options[0],(Uint) NUMOFOPTIONS);
219         return 1;
220     }
221   }
222   if(argnum > (Uint) (argc-2))
223   {
224     ERROR0("missing file arguments");
225     return -4;
226   }
227   if(safestringcopy(&mmcallinfo->program[0],argv[0],PATH_MAX) != 0)
228   {
229     return -5;
230   }
231   if(safestringcopy(&mmcallinfo->subjectfile[0],argv[argnum],PATH_MAX) != 0)
232   {
233     return -6;
234   }
235   for(argnum++, mmcallinfo->numofqueryfiles = 0;
236       argnum < (Uint) argc; mmcallinfo->numofqueryfiles++, argnum++)
237   {
238     if(mmcallinfo->numofqueryfiles >= (Uint) MAXNUMOFQUERYFILES)
239     {
240       ERROR1("too many query files, maximal number is %lu",
241               (Showuint) MAXNUMOFQUERYFILES);
242       return -7;
243     }
244     if(safestringcopy(&mmcallinfo->queryfilelist
245                        [mmcallinfo->numofqueryfiles][0],
246                       argv[argnum],PATH_MAX) != 0)
247     {
248       return -8;
249     }
250   }
251   /*
252     verify that mum options are not interchanged
253   */
254   OPTIONEXCLUDE(OPTMUM,OPTMUMCAND);
255   OPTIONEXCLUDE(OPTMUM,OPTMUMREF);
256   OPTIONEXCLUDE(OPTMUM,OPTMAXMATCH);
257   OPTIONEXCLUDE(OPTMUMCAND,OPTMAXMATCH);
258   OPTIONEXCLUDE(OPTMUMREF,OPTMAXMATCH);
259   if ( mmcallinfo->cmaxmatch )
260     {
261       mmcallinfo->cmum = False;
262       mmcallinfo->cmumcand = False;
263     }
264   else if ( mmcallinfo->cmum )
265     {
266 
267     }
268   else /* default to cmumcand */
269     {
270       mmcallinfo->cmumcand = True;
271     }
272   /*
273     verify that the options -b and -r are not used at the same time
274   */
275   OPTIONEXCLUDE(OPTCOMPUTEBOTHDIRECTIONS,OPTONLYREVERSECOMPLEMENT);
276   /*
277     verify that -c is only used in combination with either -b or -r
278   */
279   OPTIONIMPLYEITHER2(OPTSHOWREVERSEPOSITIONS,
280                      OPTCOMPUTEBOTHDIRECTIONS,OPTONLYREVERSECOMPLEMENT);
281   return 0;
282 }
283